notes/services/llm_service.py

"""
Abstracted LLM service.
Supports AI_PROVIDER=lmstudio (default) or AI_PROVIDER=openai.
Both use the openai Python client — only base_url / api_key differ.
"""

from typing import Optional
from flask import current_app

_client = None


def _get_client():
    global _client
    if _client is not None:
        return _client

    import openai

    provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower()

    if provider == "openai":
        api_key = current_app.config.get("OPENAI_API_KEY", "")
        _client = openai.OpenAI(api_key=api_key, timeout=60.0)
    else:
        # LM Studio or any OpenAI-compatible endpoint
        base_url = current_app.config.get("LM_STUDIO_URL", "http://localhost:1234")
        _client = openai.OpenAI(
            base_url=f"{base_url.rstrip('/')}/v1",
            api_key="lm-studio",
            timeout=60.0,
        )

    return _client


def _get_model() -> str:
    provider = current_app.config.get("AI_PROVIDER", "lmstudio").lower()
    if provider == "openai":
        return current_app.config.get("OPENAI_MODEL", "gpt-4o")
    return current_app.config.get("LM_STUDIO_MODEL", "local-model")


def ask(
    user_message: str,
    context_chunks: Optional[list[str]] = None,
    history: Optional[list[dict]] = None,
    system_extra: Optional[str] = None,
) -> str:
    """
    Send a message to the LLM with optional RAG context and chat history.
    Returns the assistant reply as a string.
    """
    client = _get_client()
    model = _get_model()

    system_parts = [
        "You are a helpful AI assistant. Answer questions accurately based on the provided context.",
        "If the context does not contain enough information, say so clearly.",
    ]

    if context_chunks:
        context_text = "\n\n---\n\n".join(context_chunks)
        system_parts.append(f"\n\n## Context\n\n{context_text}")

    if system_extra:
        system_parts.append(system_extra)

    messages = [{"role": "system", "content": "\n".join(system_parts)}]

    if history:
        messages.extend(history)

    messages.append({"role": "user", "content": user_message})

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.7,
    )

    return response.choices[0].message.content.strip()


def ask_inline(selected_text: str, question: str) -> str:
    """
    Inline chat: use selected_text directly as context — no RAG lookup.
    """
    system = (
        "You are a helpful AI assistant. The user has selected the following text "
        "and has a question about it. Answer specifically about the selected content."
    )
    messages = [
        {"role": "system", "content": system},
        {
            "role": "user",
            "content": f"## Selected text\n\n{selected_text}\n\n## Question\n\n{question}",
        },
    ]

    client = _get_client()
    model = _get_model()

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.7,
    )

    return response.choices[0].message.content.strip()