modified: services/llm_service.py modified: services/rag_service.py modified: static/js/chat.js
111 lines
3.0 KiB
Python
111 lines
3.0 KiB
Python
"""
|
|
Abstracted LLM service.
|
|
Supports AI_PROVIDER=lmstudio (default) or AI_PROVIDER=openai.
|
|
Both use the openai Python client — only base_url / api_key differ.
|
|
"""
|
|
|
|
from typing import Optional
|
|
from flask import current_app
|
|
|
|
_client = None
|
|
|
|
|
|
def _get_client():
|
|
global _client
|
|
if _client is not None:
|
|
return _client
|
|
|
|
import openai
|
|
|
|
provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower()
|
|
|
|
if provider == "openai":
|
|
api_key = current_app.config.get("OPENAI_API_KEY", "")
|
|
_client = openai.OpenAI(api_key=api_key, timeout=60.0)
|
|
else:
|
|
# LM Studio or any OpenAI-compatible endpoint
|
|
base_url = current_app.config.get("LM_STUDIO_URL", "http://localhost:1234")
|
|
_client = openai.OpenAI(
|
|
base_url=f"{base_url.rstrip('/')}/v1",
|
|
api_key="lm-studio",
|
|
timeout=60.0,
|
|
)
|
|
|
|
return _client
|
|
|
|
|
|
def _get_model() -> str:
|
|
provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower()
|
|
if provider == "openai":
|
|
return current_app.config.get("OPENAI_MODEL", "gpt-4o")
|
|
return current_app.config.get("LM_STUDIO_MODEL", "local-model")
|
|
|
|
|
|
def ask(
|
|
user_message: str,
|
|
context_chunks: Optional[list[str]] = None,
|
|
history: Optional[list[dict]] = None,
|
|
system_extra: Optional[str] = None,
|
|
) -> str:
|
|
"""
|
|
Send a message to the LLM with optional RAG context and chat history.
|
|
Returns the assistant reply as a string.
|
|
"""
|
|
client = _get_client()
|
|
model = _get_model()
|
|
|
|
system_parts = [
|
|
"You are a helpful AI assistant. Answer questions accurately based on the provided context.",
|
|
"If the context does not contain enough information, say so clearly.",
|
|
]
|
|
|
|
if context_chunks:
|
|
context_text = "\n\n---\n\n".join(context_chunks)
|
|
system_parts.append(f"\n\n## Context\n\n{context_text}")
|
|
|
|
if system_extra:
|
|
system_parts.append(system_extra)
|
|
|
|
messages = [{"role": "system", "content": "\n".join(system_parts)}]
|
|
|
|
if history:
|
|
messages.extend(history)
|
|
|
|
messages.append({"role": "user", "content": user_message})
|
|
|
|
response = client.chat.completions.create(
|
|
model=model,
|
|
messages=messages,
|
|
temperature=0.7,
|
|
)
|
|
|
|
return response.choices[0].message.content.strip()
|
|
|
|
|
|
def ask_inline(selected_text: str, question: str) -> str:
|
|
"""
|
|
Inline chat: use selected_text directly as context — no RAG lookup.
|
|
"""
|
|
system = (
|
|
"You are a helpful AI assistant. The user has selected the following text "
|
|
"and has a question about it. Answer specifically about the selected content."
|
|
)
|
|
messages = [
|
|
{"role": "system", "content": system},
|
|
{
|
|
"role": "user",
|
|
"content": f"## Selected text\n\n{selected_text}\n\n## Question\n\n{question}",
|
|
},
|
|
]
|
|
|
|
client = _get_client()
|
|
model = _get_model()
|
|
|
|
response = client.chat.completions.create(
|
|
model=model,
|
|
messages=messages,
|
|
temperature=0.7,
|
|
)
|
|
|
|
return response.choices[0].message.content.strip()
|