Files
notes/services/llm_service.py
SimolZimol 718e38e9d5 modified: blueprints/chat.py
modified:   services/llm_service.py
	modified:   services/rag_service.py
	modified:   static/js/chat.js
2026-05-22 17:27:07 +02:00

111 lines
3.0 KiB
Python

"""
Abstracted LLM service.
Supports AI_PROVIDER=lmstudio (default) or AI_PROVIDER=openai.
Both use the openai Python client — only base_url / api_key differ.
"""
from typing import Optional
from flask import current_app
_client = None
def _get_client():
global _client
if _client is not None:
return _client
import openai
provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower()
if provider == "openai":
api_key = current_app.config.get("OPENAI_API_KEY", "")
_client = openai.OpenAI(api_key=api_key, timeout=60.0)
else:
# LM Studio or any OpenAI-compatible endpoint
base_url = current_app.config.get("LM_STUDIO_URL", "http://localhost:1234")
_client = openai.OpenAI(
base_url=f"{base_url.rstrip('/')}/v1",
api_key="lm-studio",
timeout=60.0,
)
return _client
def _get_model() -> str:
provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower()
if provider == "openai":
return current_app.config.get("OPENAI_MODEL", "gpt-4o")
return current_app.config.get("LM_STUDIO_MODEL", "local-model")
def ask(
user_message: str,
context_chunks: Optional[list[str]] = None,
history: Optional[list[dict]] = None,
system_extra: Optional[str] = None,
) -> str:
"""
Send a message to the LLM with optional RAG context and chat history.
Returns the assistant reply as a string.
"""
client = _get_client()
model = _get_model()
system_parts = [
"You are a helpful AI assistant. Answer questions accurately based on the provided context.",
"If the context does not contain enough information, say so clearly.",
]
if context_chunks:
context_text = "\n\n---\n\n".join(context_chunks)
system_parts.append(f"\n\n## Context\n\n{context_text}")
if system_extra:
system_parts.append(system_extra)
messages = [{"role": "system", "content": "\n".join(system_parts)}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": user_message})
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7,
)
return response.choices[0].message.content.strip()
def ask_inline(selected_text: str, question: str) -> str:
"""
Inline chat: use selected_text directly as context — no RAG lookup.
"""
system = (
"You are a helpful AI assistant. The user has selected the following text "
"and has a question about it. Answer specifically about the selected content."
)
messages = [
{"role": "system", "content": system},
{
"role": "user",
"content": f"## Selected text\n\n{selected_text}\n\n## Question\n\n{question}",
},
]
client = _get_client()
model = _get_model()
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7,
)
return response.choices[0].message.content.strip()