modified: .env.example

modified: config.py modified: docker-compose.yml modified: requirements.txt modified: services/rag_service.py
2026-05-22 16:45:13 +02:00
parent b5a7abddd5
commit 9df6c67a29
5 changed files with 31 additions and 35 deletions
--- a/services/rag_service.py
+++ b/services/rag_service.py
@@ -1,25 +1,31 @@
 """
-RAG service using ChromaDB + sentence-transformers.
+RAG service using ChromaDB + LM Studio's /v1/embeddings endpoint.
+No local ML libraries (torch, sentence-transformers, onnxruntime) needed —
+embeddings are generated by the same LM Studio instance used for chat.
 Each chunk is stored with metadata: user_id, source_id, source_type (doc|url).
 """

-import os
 import re
 from typing import Optional
+
+from chromadb import EmbeddingFunction, Documents, Embeddings
 from flask import current_app
+from openai import OpenAI

 _chroma_client = None
 _collection = None
-_embedder = None


-def _get_embedder():
-    global _embedder
-    if _embedder is None:
-        from sentence_transformers import SentenceTransformer
-        cache = current_app.config.get("TRANSFORMERS_CACHE", ".cache")
-        _embedder = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache)
-    return _embedder
+class LMStudioEmbeddingFunction(EmbeddingFunction):
+    """ChromaDB-compatible embedding function that calls LM Studio's /v1/embeddings."""
+
+    def __init__(self, base_url: str, model: str):
+        self._client = OpenAI(base_url=f"{base_url}/v1", api_key="lm-studio")
+        self._model = model
+
+    def __call__(self, input: Documents) -> Embeddings:
+        response = self._client.embeddings.create(model=self._model, input=input)
+        return [item.embedding for item in response.data]


 def _get_collection():
@@ -27,16 +33,18 @@ def _get_collection():
    if _collection is None:
        import chromadb
        path = current_app.config["VECTORDB_PATH"]
+        base_url = current_app.config["LM_STUDIO_URL"]
+        model = current_app.config["LM_STUDIO_EMBEDDING_MODEL"]
        _chroma_client = chromadb.PersistentClient(path=path)
        _collection = _chroma_client.get_or_create_collection(
            name="ki_context",
+            embedding_function=LMStudioEmbeddingFunction(base_url, model),
            metadata={"hnsw:space": "cosine"},
        )
    return _collection


 def chunk_text(text: str, chunk_size: int, overlap: int) -> list[str]:
-    """Split text into overlapping word-based chunks."""
    words = text.split()
    chunks = []
    start = 0
@@ -55,26 +63,22 @@ def index_source(
    chunk_size: int = 500,
    chunk_overlap: int = 50,
 ):
-    """Chunk, embed and store text in ChromaDB. Replaces existing chunks for this source."""
+    """Chunk, embed via LM Studio and store in ChromaDB. Replaces existing chunks."""
    collection = _get_collection()
-    embedder = _get_embedder()

-    # Remove old chunks for this source first
    delete_source(user_id, source_id, source_type)

    chunks = chunk_text(text, chunk_size, chunk_overlap)
    if not chunks:
        return

-    embeddings = embedder.encode(chunks, show_progress_bar=False).tolist()
-
    ids = [f"{source_type}_{source_id}_chunk_{i}" for i in range(len(chunks))]
    metadatas = [
        {"user_id": str(user_id), "source_id": str(source_id), "source_type": source_type}
        for _ in chunks
    ]

-    collection.add(documents=chunks, embeddings=embeddings, ids=ids, metadatas=metadatas)
+    collection.add(documents=chunks, ids=ids, metadatas=metadatas)


 def delete_source(user_id: int, source_id: int, source_type: str):
@@ -101,17 +105,9 @@ def similarity_search(
    source_type: Optional[str] = None,
    top_k: int = 5,
 ) -> list[str]:
-    """
-    Search for relevant chunks.
-    Optionally filter by specific source_ids and/or source_type.
-    Returns list of chunk texts.
-    """
+    """Search for relevant chunks via LM Studio embeddings."""
    collection = _get_collection()
-    embedder = _get_embedder()

-    query_embedding = embedder.encode([query], show_progress_bar=False).tolist()[0]
-
-    # Build where filter
    conditions = [{"user_id": {"$eq": str(user_id)}}]

    if source_ids is not None and len(source_ids) > 0:
@@ -126,7 +122,7 @@ def similarity_search(

    try:
        results = collection.query(
-            query_embeddings=[query_embedding],
+            query_texts=[query],
            n_results=top_k,
            where=where,
        )