""" Abstracted LLM service. Supports AI_PROVIDER=lmstudio (default) or AI_PROVIDER=openai. Both use the openai Python client — only base_url / api_key differ. """ from typing import Optional from flask import current_app _client = None def _get_client(): global _client if _client is not None: return _client import openai provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower() if provider == "openai": api_key = current_app.config.get("OPENAI_API_KEY", "") _client = openai.OpenAI(api_key=api_key, timeout=60.0) else: # LM Studio or any OpenAI-compatible endpoint base_url = current_app.config.get("LM_STUDIO_URL", "http://localhost:1234") _client = openai.OpenAI( base_url=f"{base_url.rstrip('/')}/v1", api_key="lm-studio", timeout=60.0, ) return _client def _get_model() -> str: provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower() if provider == "openai": return current_app.config.get("OPENAI_MODEL", "gpt-4o") return current_app.config.get("LM_STUDIO_MODEL", "local-model") def ask( user_message: str, context_chunks: Optional[list[str]] = None, history: Optional[list[dict]] = None, system_extra: Optional[str] = None, ) -> str: """ Send a message to the LLM with optional RAG context and chat history. Returns the assistant reply as a string. """ client = _get_client() model = _get_model() system_parts = [ "You are a helpful AI assistant. You will be given excerpts from one or more documents " "as context. Synthesize the information from ALL relevant excerpts to give a complete answer. " "The excerpts are ordered by their position in the document.", "If specific information is not contained in the context, say so clearly.", "When the answer spans multiple sections, summarize each relevant part.", ] if context_chunks: context_text = "\n\n---\n\n".join( f"[Excerpt {i+1}]\n{chunk}" for i, chunk in enumerate(context_chunks) ) system_parts.append(f"\n\n## Document excerpts\n\n{context_text}") if system_extra: system_parts.append(system_extra) messages = [{"role": "system", "content": "\n".join(system_parts)}] if history: messages.extend(history) messages.append({"role": "user", "content": user_message}) response = client.chat.completions.create( model=model, messages=messages, temperature=0.7, ) return response.choices[0].message.content.strip() def ask_inline(selected_text: str, question: str) -> str: """ Inline chat: use selected_text directly as context — no RAG lookup. """ system = ( "You are a helpful AI assistant. The user has selected the following text " "and has a question about it. Answer specifically about the selected content." ) messages = [ {"role": "system", "content": system}, { "role": "user", "content": f"## Selected text\n\n{selected_text}\n\n## Question\n\n{question}", }, ] client = _get_client() model = _get_model() response = client.chat.completions.create( model=model, messages=messages, temperature=0.7, ) return response.choices[0].message.content.strip()