Files
notes/services/llm_service.py
SimolZimol 9eb0869c50 modified: config.py
modified:   services/llm_service.py
	modified:   services/rag_service.py
2026-05-23 14:15:42 +02:00

116 lines
3.4 KiB
Python

"""
Abstracted LLM service.
Supports AI_PROVIDER=lmstudio (default) or AI_PROVIDER=openai.
Both use the openai Python client — only base_url / api_key differ.
"""
from typing import Optional
from flask import current_app
_client = None
def _get_client():
global _client
if _client is not None:
return _client
import openai
provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower()
if provider == "openai":
api_key = current_app.config.get("OPENAI_API_KEY", "")
_client = openai.OpenAI(api_key=api_key, timeout=60.0)
else:
# LM Studio or any OpenAI-compatible endpoint
base_url = current_app.config.get("LM_STUDIO_URL", "http://localhost:1234")
_client = openai.OpenAI(
base_url=f"{base_url.rstrip('/')}/v1",
api_key="lm-studio",
timeout=60.0,
)
return _client
def _get_model() -> str:
provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower()
if provider == "openai":
return current_app.config.get("OPENAI_MODEL", "gpt-4o")
return current_app.config.get("LM_STUDIO_MODEL", "local-model")
def ask(
user_message: str,
context_chunks: Optional[list[str]] = None,
history: Optional[list[dict]] = None,
system_extra: Optional[str] = None,
) -> str:
"""
Send a message to the LLM with optional RAG context and chat history.
Returns the assistant reply as a string.
"""
client = _get_client()
model = _get_model()
system_parts = [
"You are a helpful AI assistant. You will be given excerpts from one or more documents "
"as context. Synthesize the information from ALL relevant excerpts to give a complete answer. "
"The excerpts are ordered by their position in the document.",
"If specific information is not contained in the context, say so clearly.",
"When the answer spans multiple sections, summarize each relevant part.",
]
if context_chunks:
context_text = "\n\n---\n\n".join(
f"[Excerpt {i+1}]\n{chunk}" for i, chunk in enumerate(context_chunks)
)
system_parts.append(f"\n\n## Document excerpts\n\n{context_text}")
if system_extra:
system_parts.append(system_extra)
messages = [{"role": "system", "content": "\n".join(system_parts)}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": user_message})
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7,
)
return response.choices[0].message.content.strip()
def ask_inline(selected_text: str, question: str) -> str:
"""
Inline chat: use selected_text directly as context — no RAG lookup.
"""
system = (
"You are a helpful AI assistant. The user has selected the following text "
"and has a question about it. Answer specifically about the selected content."
)
messages = [
{"role": "system", "content": system},
{
"role": "user",
"content": f"## Selected text\n\n{selected_text}\n\n## Question\n\n{question}",
},
]
client = _get_client()
model = _get_model()
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7,
)
return response.choices[0].message.content.strip()