modified: blueprints/chat.py

modified:   services/llm_service.py
	modified:   services/rag_service.py
	modified:   static/js/chat.js
This commit is contained in:
SimolZimol
2026-05-22 17:27:07 +02:00
parent 3f13f740d6
commit 718e38e9d5
4 changed files with 72 additions and 37 deletions

View File

@@ -47,6 +47,22 @@ def get_messages(session_id):
return jsonify([m.to_dict() for m in session.messages]) return jsonify([m.to_dict() for m in session.messages])
# ── Diagnostics ─────────────────────────────────────────────────────────────
@chat_bp.route("/ping-llm", methods=["GET"])
@login_required
def ping_llm():
"""Test LM Studio connectivity. Returns config + a short completion."""
url = current_app.config.get("LM_STUDIO_URL", "")
model = current_app.config.get("LM_STUDIO_MODEL", "")
embed_model = current_app.config.get("LM_STUDIO_EMBEDDING_MODEL", "")
try:
reply = llm_service.ask(user_message="Reply with exactly: OK", context_chunks=[], history=[])
return jsonify({"status": "ok", "reply": reply, "url": url, "model": model, "embed_model": embed_model})
except Exception as e:
return jsonify({"status": "error", "error": str(e), "url": url, "model": model, "embed_model": embed_model}), 502
# ── Main chat ──────────────────────────────────────────────────────────────── # ── Main chat ────────────────────────────────────────────────────────────────
@chat_bp.route("/sessions/<int:session_id>/ask", methods=["POST"]) @chat_bp.route("/sessions/<int:session_id>/ask", methods=["POST"])
@@ -64,31 +80,33 @@ def ask(session_id):
doc_ids = [r["id"] for r in context_refs if r.get("type") == "doc"] doc_ids = [r["id"] for r in context_refs if r.get("type") == "doc"]
url_ids = [r["id"] for r in context_refs if r.get("type") == "url"] url_ids = [r["id"] for r in context_refs if r.get("type") == "url"]
# RAG lookup # RAG lookup — failures are non-fatal (chat continues without context)
chunks = [] chunks = []
if doc_ids: try:
chunks += rag_service.similarity_search( if doc_ids:
query=message, chunks += rag_service.similarity_search(
user_id=current_user.id, query=message,
source_ids=doc_ids, user_id=current_user.id,
source_type="doc", source_ids=doc_ids,
top_k=current_app.config["RAG_TOP_K"], source_type="doc",
) top_k=current_app.config["RAG_TOP_K"],
if url_ids: )
chunks += rag_service.similarity_search( if url_ids:
query=message, chunks += rag_service.similarity_search(
user_id=current_user.id, query=message,
source_ids=url_ids, user_id=current_user.id,
source_type="url", source_ids=url_ids,
top_k=current_app.config["RAG_TOP_K"], source_type="url",
) top_k=current_app.config["RAG_TOP_K"],
# If no specific ids given, search all user context )
if not context_refs: if not context_refs:
chunks = rag_service.similarity_search( chunks = rag_service.similarity_search(
query=message, query=message,
user_id=current_user.id, user_id=current_user.id,
top_k=current_app.config["RAG_TOP_K"], top_k=current_app.config["RAG_TOP_K"],
) )
except Exception as e:
current_app.logger.warning(f"RAG lookup failed, continuing without context: {e}")
# Build history (last 10 messages for context window) # Build history (last 10 messages for context window)
history = [ history = [

View File

@@ -21,13 +21,14 @@ def _get_client():
if provider == "openai": if provider == "openai":
api_key = current_app.config.get("OPENAI_API_KEY", "") api_key = current_app.config.get("OPENAI_API_KEY", "")
_client = openai.OpenAI(api_key=api_key) _client = openai.OpenAI(api_key=api_key, timeout=60.0)
else: else:
# LM Studio or any OpenAI-compatible endpoint # LM Studio or any OpenAI-compatible endpoint
base_url = current_app.config.get("LM_STUDIO_URL", "http://localhost:1234") base_url = current_app.config.get("LM_STUDIO_URL", "http://localhost:1234")
_client = openai.OpenAI( _client = openai.OpenAI(
base_url=f"{base_url.rstrip('/')}/v1", base_url=f"{base_url.rstrip('/')}/v1",
api_key="lm-studio", api_key="lm-studio",
timeout=60.0,
) )
return _client return _client

View File

@@ -20,7 +20,11 @@ class LMStudioEmbeddingFunction(EmbeddingFunction):
"""ChromaDB-compatible embedding function that calls LM Studio's /v1/embeddings.""" """ChromaDB-compatible embedding function that calls LM Studio's /v1/embeddings."""
def __init__(self, base_url: str, model: str): def __init__(self, base_url: str, model: str):
self._client = OpenAI(base_url=f"{base_url}/v1", api_key="lm-studio") self._client = OpenAI(
base_url=f"{base_url}/v1",
api_key="lm-studio",
timeout=60.0,
)
self._model = model self._model = model
def __call__(self, input: Documents) -> Embeddings: def __call__(self, input: Documents) -> Embeddings:

View File

@@ -28,20 +28,32 @@ export class Chat {
// Typing indicator // Typing indicator
const typingId = this._appendTyping(); const typingId = this._appendTyping();
const res = await fetch(`/api/chat/sessions/${sessionId}/ask`, { try {
method: 'POST', const res = await fetch(`/api/chat/sessions/${sessionId}/ask`, {
headers: { 'Content-Type': 'application/json' }, method: 'POST',
body: JSON.stringify({ message, context_ids: contextIds }), headers: { 'Content-Type': 'application/json' },
}); body: JSON.stringify({ message, context_ids: contextIds }),
});
this._removeTyping(typingId); this._removeTyping(typingId);
const data = await res.json(); let data;
try {
data = await res.json();
} catch {
this._appendBubble('assistant', 'Server error — could not parse response.', true);
this._scrollBottom();
return;
}
if (!res.ok) { if (!res.ok) {
this._appendBubble('assistant', `Error: ${data.error || 'Unknown error'}`, true); this._appendBubble('assistant', `Error: ${data.error || 'Unknown error'}`, true);
} else { } else {
this._appendBubble('assistant', data.reply); this._appendBubble('assistant', data.reply);
}
} catch (err) {
this._removeTyping(typingId);
this._appendBubble('assistant', `Network error: ${err.message}`, true);
} }
this._scrollBottom(); this._scrollBottom();