From 9df6c67a29578208fafc85e8f1946c9ac8f454a7 Mon Sep 17 00:00:00 2001
From: SimolZimol <70102430+SimolZimol@users.noreply.github.com>
Date: Fri, 22 May 2026 16:45:13 +0200
Subject: [PATCH] 	modified:   .env.example 	modified:   config.py 
 modified:   docker-compose.yml 	modified:   requirements.txt 
 modified:   services/rag_service.py

---
 .env.example            |  2 ++
 config.py               | 11 ++++-----
 docker-compose.yml      |  2 --
 requirements.txt        |  1 -
 services/rag_service.py | 50 +++++++++++++++++++----------------------
 5 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/.env.example b/.env.example
index 58a7383..d3aafe7 100644
--- a/.env.example
+++ b/.env.example
@@ -16,6 +16,8 @@ AI_PROVIDER=lmstudio
 # On Linux/Docker: use host.docker.internal to reach the host
 LM_STUDIO_URL=http://host.docker.internal:1234
 LM_STUDIO_MODEL=local-model
+# Model used for RAG embeddings — can be the same model or a dedicated embedding model
+LM_STUDIO_EMBEDDING_MODEL=local-model
 
 # OpenAI (only needed when AI_PROVIDER=openai)
 # OPENAI_API_KEY=sk-...
diff --git a/config.py b/config.py
index bab966c..7f3f129 100644
--- a/config.py
+++ b/config.py
@@ -8,16 +8,16 @@ BASE_DIR = os.path.abspath(os.path.dirname(__file__))
 
 class Config:
     SECRET_KEY = os.environ.get("SECRET_KEY", "change-me-in-production")
-    SQLALCHEMY_DATABASE_URI = os.environ.get(
-        "DATABASE_URI", f"sqlite:///{os.path.join(BASE_DIR, 'app.db')}"
+    _db_uri = os.environ.get("DATABASE_URI", "")
+    _default_uri = f"sqlite:///{os.path.join(BASE_DIR, 'app.db')}"
+    # Fall back to SQLite if DATABASE_URI is empty or not a valid SQLAlchemy URL
+    SQLALCHEMY_DATABASE_URI = (
+        _db_uri if _db_uri and "://" in _db_uri else _default_uri
     )
     SQLALCHEMY_TRACK_MODIFICATIONS = False
 
     UPLOAD_FOLDER = os.environ.get("UPLOAD_FOLDER", os.path.join(BASE_DIR, "uploads"))
     VECTORDB_PATH = os.environ.get("VECTORDB_PATH", os.path.join(BASE_DIR, "vectordb"))
-    TRANSFORMERS_CACHE = os.environ.get(
-        "TRANSFORMERS_CACHE", os.path.join(BASE_DIR, ".cache")
-    )
 
     ALLOWED_EXTENSIONS = {"pdf", "txt", "docx", "md"}
     MAX_CONTENT_LENGTH = 50 * 1024 * 1024  # 50 MB
@@ -26,6 +26,7 @@ class Config:
     AI_PROVIDER = os.environ.get("AI_PROVIDER", "lmstudio")
     LM_STUDIO_URL = os.environ.get("LM_STUDIO_URL", "http://localhost:1234")
     LM_STUDIO_MODEL = os.environ.get("LM_STUDIO_MODEL", "local-model")
+    LM_STUDIO_EMBEDDING_MODEL = os.environ.get("LM_STUDIO_EMBEDDING_MODEL", "local-model")
     OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
     OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o")
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 910dab8..8b99dd3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,7 +9,6 @@ services:
     volumes:
       - uploads:/app/uploads
       - vectordb:/app/vectordb
-      - hf_cache:/app/.cache
     healthcheck:
       test: ["CMD", "python", "-c",
              "import urllib.request; urllib.request.urlopen('http://localhost:5000/auth/login')"]
@@ -21,4 +20,3 @@ services:
 volumes:
   uploads:
   vectordb:
-  hf_cache:
diff --git a/requirements.txt b/requirements.txt
index 837a6d0..40e9173 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,7 +9,6 @@ python-docx==1.1.2
 markdown==3.6
 beautifulsoup4==4.12.3
 requests==2.32.3
-sentence-transformers==3.0.1
 chromadb==0.5.3
 openai==1.35.3
 gunicorn==22.0.0
diff --git a/services/rag_service.py b/services/rag_service.py
index 524f4a1..24f4684 100644
--- a/services/rag_service.py
+++ b/services/rag_service.py
@@ -1,25 +1,31 @@
 """
-RAG service using ChromaDB + sentence-transformers.
+RAG service using ChromaDB + LM Studio's /v1/embeddings endpoint.
+No local ML libraries (torch, sentence-transformers, onnxruntime) needed —
+embeddings are generated by the same LM Studio instance used for chat.
 Each chunk is stored with metadata: user_id, source_id, source_type (doc|url).
 """
 
-import os
 import re
 from typing import Optional
+
+from chromadb import EmbeddingFunction, Documents, Embeddings
 from flask import current_app
+from openai import OpenAI
 
 _chroma_client = None
 _collection = None
-_embedder = None
 
 
-def _get_embedder():
-    global _embedder
-    if _embedder is None:
-        from sentence_transformers import SentenceTransformer
-        cache = current_app.config.get("TRANSFORMERS_CACHE", ".cache")
-        _embedder = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache)
-    return _embedder
+class LMStudioEmbeddingFunction(EmbeddingFunction):
+    """ChromaDB-compatible embedding function that calls LM Studio's /v1/embeddings."""
+
+    def __init__(self, base_url: str, model: str):
+        self._client = OpenAI(base_url=f"{base_url}/v1", api_key="lm-studio")
+        self._model = model
+
+    def __call__(self, input: Documents) -> Embeddings:
+        response = self._client.embeddings.create(model=self._model, input=input)
+        return [item.embedding for item in response.data]
 
 
 def _get_collection():
@@ -27,16 +33,18 @@ def _get_collection():
     if _collection is None:
         import chromadb
         path = current_app.config["VECTORDB_PATH"]
+        base_url = current_app.config["LM_STUDIO_URL"]
+        model = current_app.config["LM_STUDIO_EMBEDDING_MODEL"]
         _chroma_client = chromadb.PersistentClient(path=path)
         _collection = _chroma_client.get_or_create_collection(
             name="ki_context",
+            embedding_function=LMStudioEmbeddingFunction(base_url, model),
             metadata={"hnsw:space": "cosine"},
         )
     return _collection
 
 
 def chunk_text(text: str, chunk_size: int, overlap: int) -> list[str]:
-    """Split text into overlapping word-based chunks."""
     words = text.split()
     chunks = []
     start = 0
@@ -55,26 +63,22 @@ def index_source(
     chunk_size: int = 500,
     chunk_overlap: int = 50,
 ):
-    """Chunk, embed and store text in ChromaDB. Replaces existing chunks for this source."""
+    """Chunk, embed via LM Studio and store in ChromaDB. Replaces existing chunks."""
     collection = _get_collection()
-    embedder = _get_embedder()
 
-    # Remove old chunks for this source first
     delete_source(user_id, source_id, source_type)
 
     chunks = chunk_text(text, chunk_size, chunk_overlap)
     if not chunks:
         return
 
-    embeddings = embedder.encode(chunks, show_progress_bar=False).tolist()
-
     ids = [f"{source_type}_{source_id}_chunk_{i}" for i in range(len(chunks))]
     metadatas = [
         {"user_id": str(user_id), "source_id": str(source_id), "source_type": source_type}
         for _ in chunks
     ]
 
-    collection.add(documents=chunks, embeddings=embeddings, ids=ids, metadatas=metadatas)
+    collection.add(documents=chunks, ids=ids, metadatas=metadatas)
 
 
 def delete_source(user_id: int, source_id: int, source_type: str):
@@ -101,17 +105,9 @@ def similarity_search(
     source_type: Optional[str] = None,
     top_k: int = 5,
 ) -> list[str]:
-    """
-    Search for relevant chunks.
-    Optionally filter by specific source_ids and/or source_type.
-    Returns list of chunk texts.
-    """
+    """Search for relevant chunks via LM Studio embeddings."""
     collection = _get_collection()
-    embedder = _get_embedder()
 
-    query_embedding = embedder.encode([query], show_progress_bar=False).tolist()[0]
-
-    # Build where filter
     conditions = [{"user_id": {"$eq": str(user_id)}}]
 
     if source_ids is not None and len(source_ids) > 0:
@@ -126,7 +122,7 @@ def similarity_search(
 
     try:
         results = collection.query(
-            query_embeddings=[query_embedding],
+            query_texts=[query],
             n_results=top_k,
             where=where,
         )