""" Abstracted LLM service. Supports AI_PROVIDER=lmstudio (default) or AI_PROVIDER=openai. Both use the openai Python client — only base_url / api_key differ. """ from typing import Optional from flask import current_app _client = None def _get_client(): global _client if _client is not None: return _client import openai provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower() if provider == "openai": api_key = current_app.config.get("OPENAI_API_KEY", "") _client = openai.OpenAI(api_key=api_key, timeout=60.0) else: # LM Studio or any OpenAI-compatible endpoint base_url = current_app.config.get("LM_STUDIO_URL", "http://localhost:1234") _client = openai.OpenAI( base_url=f"{base_url.rstrip('/')}/v1", api_key="lm-studio", timeout=60.0, ) return _client def _get_model() -> str: provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower() if provider == "openai": return current_app.config.get("OPENAI_MODEL", "gpt-4o") return current_app.config.get("LM_STUDIO_MODEL", "local-model") def ask( user_message: str, context_chunks: Optional[list[str]] = None, history: Optional[list[dict]] = None, system_extra: Optional[str] = None, ) -> str: """ Send a message to the LLM with optional RAG context and chat history. Returns the assistant reply as a string. """ client = _get_client() model = _get_model() system_parts = [ "You are a helpful AI assistant. Answer questions accurately based on the provided context.", "If the context does not contain enough information, say so clearly.", ] if context_chunks: context_text = "\n\n---\n\n".join(context_chunks) system_parts.append(f"\n\n## Context\n\n{context_text}") if system_extra: system_parts.append(system_extra) messages = [{"role": "system", "content": "\n".join(system_parts)}] if history: messages.extend(history) messages.append({"role": "user", "content": user_message}) response = client.chat.completions.create( model=model, messages=messages, temperature=0.7, ) return response.choices[0].message.content.strip() def ask_inline(selected_text: str, question: str) -> str: """ Inline chat: use selected_text directly as context — no RAG lookup. """ system = ( "You are a helpful AI assistant. The user has selected the following text " "and has a question about it. Answer specifically about the selected content." ) messages = [ {"role": "system", "content": system}, { "role": "user", "content": f"## Selected text\n\n{selected_text}\n\n## Question\n\n{question}", }, ] client = _get_client() model = _get_model() response = client.chat.completions.create( model=model, messages=messages, temperature=0.7, ) return response.choices[0].message.content.strip()