notes/services/llm_service.py

"""
Abstracted LLM service.
Supports AI_PROVIDER=lmstudio (default) or AI_PROVIDER=openai.
Both use the openai Python client — only base_url / api_key differ.
"""

from typing import Optional
from flask import current_app

_client = None


def _get_client():
    global _client
    if _client is not None:
        return _client

    import openai

    provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower()

    if provider == "openai":
        api_key = current_app.config.get("OPENAI_API_KEY", "")
        _client = openai.OpenAI(api_key=api_key, timeout=60.0)
    else:
        # LM Studio or any OpenAI-compatible endpoint
        base_url = current_app.config.get("LM_STUDIO_URL", "http://localhost:1234")
        _client = openai.OpenAI(
            base_url=f"{base_url.rstrip('/')}/v1",
            api_key="lm-studio",
            timeout=60.0,
        )

    return _client


def _get_model() -> str:
    provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower()
    if provider == "openai":
        return current_app.config.get("OPENAI_MODEL", "gpt-4o")
    return current_app.config.get("LM_STUDIO_MODEL", "local-model")


def ask(
    user_message: str,
    context_chunks: Optional[list[str]] = None,
    history: Optional[list[dict]] = None,
    system_extra: Optional[str] = None,
) -> str:
    """
    Send a message to the LLM with optional RAG context and chat history.
    Returns the assistant reply as a string.
    """
    client = _get_client()
    model = _get_model()

    system_parts = [
        "You are a helpful AI assistant. You will be given excerpts from one or more documents "
        "as context. Synthesize the information from ALL relevant excerpts to give a complete answer. "
        "The excerpts are ordered by their position in the document.",
        "If specific information is not contained in the context, say so clearly.",
        "When the answer spans multiple sections, summarize each relevant part.",
    ]

    if context_chunks:
        context_text = "\n\n---\n\n".join(
            f"[Excerpt {i+1}]\n{chunk}" for i, chunk in enumerate(context_chunks)
        )
        system_parts.append(f"\n\n## Document excerpts\n\n{context_text}")

    if system_extra:
        system_parts.append(system_extra)

    messages = [{"role": "system", "content": "\n".join(system_parts)}]

    if history:
        messages.extend(history)

    messages.append({"role": "user", "content": user_message})

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.7,
    )

    return response.choices[0].message.content.strip()


def ask_inline(selected_text: str, question: str) -> str:
    """
    Inline chat: use selected_text directly as context — no RAG lookup.
    """
    system = (
        "You are a helpful AI assistant. The user has selected the following text "
        "and has a question about it. Answer specifically about the selected content."
    )
    messages = [
        {"role": "system", "content": system},
        {
            "role": "user",
            "content": f"## Selected text\n\n{selected_text}\n\n## Question\n\n{question}",
        },
    ]

    client = _get_client()
    model = _get_model()

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.7,
    )

    return response.choices[0].message.content.strip()