diff --git a/blueprints/chat.py b/blueprints/chat.py index 1ed2f0c..6e5262d 100644 --- a/blueprints/chat.py +++ b/blueprints/chat.py @@ -47,6 +47,22 @@ def get_messages(session_id): return jsonify([m.to_dict() for m in session.messages]) +# ── Diagnostics ───────────────────────────────────────────────────────────── + +@chat_bp.route("/ping-llm", methods=["GET"]) +@login_required +def ping_llm(): + """Test LM Studio connectivity. Returns config + a short completion.""" + url = current_app.config.get("LM_STUDIO_URL", "") + model = current_app.config.get("LM_STUDIO_MODEL", "") + embed_model = current_app.config.get("LM_STUDIO_EMBEDDING_MODEL", "") + try: + reply = llm_service.ask(user_message="Reply with exactly: OK", context_chunks=[], history=[]) + return jsonify({"status": "ok", "reply": reply, "url": url, "model": model, "embed_model": embed_model}) + except Exception as e: + return jsonify({"status": "error", "error": str(e), "url": url, "model": model, "embed_model": embed_model}), 502 + + # ── Main chat ──────────────────────────────────────────────────────────────── @chat_bp.route("/sessions//ask", methods=["POST"]) @@ -64,31 +80,33 @@ def ask(session_id): doc_ids = [r["id"] for r in context_refs if r.get("type") == "doc"] url_ids = [r["id"] for r in context_refs if r.get("type") == "url"] - # RAG lookup + # RAG lookup — failures are non-fatal (chat continues without context) chunks = [] - if doc_ids: - chunks += rag_service.similarity_search( - query=message, - user_id=current_user.id, - source_ids=doc_ids, - source_type="doc", - top_k=current_app.config["RAG_TOP_K"], - ) - if url_ids: - chunks += rag_service.similarity_search( - query=message, - user_id=current_user.id, - source_ids=url_ids, - source_type="url", - top_k=current_app.config["RAG_TOP_K"], - ) - # If no specific ids given, search all user context - if not context_refs: - chunks = rag_service.similarity_search( - query=message, - user_id=current_user.id, - top_k=current_app.config["RAG_TOP_K"], - ) + try: + if doc_ids: + chunks += rag_service.similarity_search( + query=message, + user_id=current_user.id, + source_ids=doc_ids, + source_type="doc", + top_k=current_app.config["RAG_TOP_K"], + ) + if url_ids: + chunks += rag_service.similarity_search( + query=message, + user_id=current_user.id, + source_ids=url_ids, + source_type="url", + top_k=current_app.config["RAG_TOP_K"], + ) + if not context_refs: + chunks = rag_service.similarity_search( + query=message, + user_id=current_user.id, + top_k=current_app.config["RAG_TOP_K"], + ) + except Exception as e: + current_app.logger.warning(f"RAG lookup failed, continuing without context: {e}") # Build history (last 10 messages for context window) history = [ diff --git a/services/llm_service.py b/services/llm_service.py index e5ab453..0ae8716 100644 --- a/services/llm_service.py +++ b/services/llm_service.py @@ -21,13 +21,14 @@ def _get_client(): if provider == "openai": api_key = current_app.config.get("OPENAI_API_KEY", "") - _client = openai.OpenAI(api_key=api_key) + _client = openai.OpenAI(api_key=api_key, timeout=60.0) else: # LM Studio or any OpenAI-compatible endpoint base_url = current_app.config.get("LM_STUDIO_URL", "http://localhost:1234") _client = openai.OpenAI( base_url=f"{base_url.rstrip('/')}/v1", api_key="lm-studio", + timeout=60.0, ) return _client diff --git a/services/rag_service.py b/services/rag_service.py index 24f4684..9a1da67 100644 --- a/services/rag_service.py +++ b/services/rag_service.py @@ -20,7 +20,11 @@ class LMStudioEmbeddingFunction(EmbeddingFunction): """ChromaDB-compatible embedding function that calls LM Studio's /v1/embeddings.""" def __init__(self, base_url: str, model: str): - self._client = OpenAI(base_url=f"{base_url}/v1", api_key="lm-studio") + self._client = OpenAI( + base_url=f"{base_url}/v1", + api_key="lm-studio", + timeout=60.0, + ) self._model = model def __call__(self, input: Documents) -> Embeddings: diff --git a/static/js/chat.js b/static/js/chat.js index aa17513..5c09541 100644 --- a/static/js/chat.js +++ b/static/js/chat.js @@ -28,20 +28,32 @@ export class Chat { // Typing indicator const typingId = this._appendTyping(); - const res = await fetch(`/api/chat/sessions/${sessionId}/ask`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ message, context_ids: contextIds }), - }); + try { + const res = await fetch(`/api/chat/sessions/${sessionId}/ask`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ message, context_ids: contextIds }), + }); - this._removeTyping(typingId); + this._removeTyping(typingId); - const data = await res.json(); + let data; + try { + data = await res.json(); + } catch { + this._appendBubble('assistant', 'Server error — could not parse response.', true); + this._scrollBottom(); + return; + } - if (!res.ok) { - this._appendBubble('assistant', `Error: ${data.error || 'Unknown error'}`, true); - } else { - this._appendBubble('assistant', data.reply); + if (!res.ok) { + this._appendBubble('assistant', `Error: ${data.error || 'Unknown error'}`, true); + } else { + this._appendBubble('assistant', data.reply); + } + } catch (err) { + this._removeTyping(typingId); + this._appendBubble('assistant', `Network error: ${err.message}`, true); } this._scrollBottom();