modified: config.py modified: docker-compose.yml modified: requirements.txt modified: services/rag_service.py
132 lines
4.0 KiB
Python
132 lines
4.0 KiB
Python
"""
|
|
RAG service using ChromaDB + LM Studio's /v1/embeddings endpoint.
|
|
No local ML libraries (torch, sentence-transformers, onnxruntime) needed —
|
|
embeddings are generated by the same LM Studio instance used for chat.
|
|
Each chunk is stored with metadata: user_id, source_id, source_type (doc|url).
|
|
"""
|
|
|
|
import re
|
|
from typing import Optional
|
|
|
|
from chromadb import EmbeddingFunction, Documents, Embeddings
|
|
from flask import current_app
|
|
from openai import OpenAI
|
|
|
|
_chroma_client = None
|
|
_collection = None
|
|
|
|
|
|
class LMStudioEmbeddingFunction(EmbeddingFunction):
|
|
"""ChromaDB-compatible embedding function that calls LM Studio's /v1/embeddings."""
|
|
|
|
def __init__(self, base_url: str, model: str):
|
|
self._client = OpenAI(base_url=f"{base_url}/v1", api_key="lm-studio")
|
|
self._model = model
|
|
|
|
def __call__(self, input: Documents) -> Embeddings:
|
|
response = self._client.embeddings.create(model=self._model, input=input)
|
|
return [item.embedding for item in response.data]
|
|
|
|
|
|
def _get_collection():
|
|
global _chroma_client, _collection
|
|
if _collection is None:
|
|
import chromadb
|
|
path = current_app.config["VECTORDB_PATH"]
|
|
base_url = current_app.config["LM_STUDIO_URL"]
|
|
model = current_app.config["LM_STUDIO_EMBEDDING_MODEL"]
|
|
_chroma_client = chromadb.PersistentClient(path=path)
|
|
_collection = _chroma_client.get_or_create_collection(
|
|
name="ki_context",
|
|
embedding_function=LMStudioEmbeddingFunction(base_url, model),
|
|
metadata={"hnsw:space": "cosine"},
|
|
)
|
|
return _collection
|
|
|
|
|
|
def chunk_text(text: str, chunk_size: int, overlap: int) -> list[str]:
|
|
words = text.split()
|
|
chunks = []
|
|
start = 0
|
|
while start < len(words):
|
|
end = start + chunk_size
|
|
chunks.append(" ".join(words[start:end]))
|
|
start += chunk_size - overlap
|
|
return [c for c in chunks if c.strip()]
|
|
|
|
|
|
def index_source(
|
|
text: str,
|
|
user_id: int,
|
|
source_id: int,
|
|
source_type: str, # "doc" | "url"
|
|
chunk_size: int = 500,
|
|
chunk_overlap: int = 50,
|
|
):
|
|
"""Chunk, embed via LM Studio and store in ChromaDB. Replaces existing chunks."""
|
|
collection = _get_collection()
|
|
|
|
delete_source(user_id, source_id, source_type)
|
|
|
|
chunks = chunk_text(text, chunk_size, chunk_overlap)
|
|
if not chunks:
|
|
return
|
|
|
|
ids = [f"{source_type}_{source_id}_chunk_{i}" for i in range(len(chunks))]
|
|
metadatas = [
|
|
{"user_id": str(user_id), "source_id": str(source_id), "source_type": source_type}
|
|
for _ in chunks
|
|
]
|
|
|
|
collection.add(documents=chunks, ids=ids, metadatas=metadatas)
|
|
|
|
|
|
def delete_source(user_id: int, source_id: int, source_type: str):
|
|
"""Remove all chunks belonging to a source from ChromaDB."""
|
|
collection = _get_collection()
|
|
try:
|
|
collection.delete(
|
|
where={
|
|
"$and": [
|
|
{"user_id": {"$eq": str(user_id)}},
|
|
{"source_id": {"$eq": str(source_id)}},
|
|
{"source_type": {"$eq": source_type}},
|
|
]
|
|
}
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def similarity_search(
|
|
query: str,
|
|
user_id: int,
|
|
source_ids: Optional[list[int]] = None,
|
|
source_type: Optional[str] = None,
|
|
top_k: int = 5,
|
|
) -> list[str]:
|
|
"""Search for relevant chunks via LM Studio embeddings."""
|
|
collection = _get_collection()
|
|
|
|
conditions = [{"user_id": {"$eq": str(user_id)}}]
|
|
|
|
if source_ids is not None and len(source_ids) > 0:
|
|
conditions.append(
|
|
{"source_id": {"$in": [str(sid) for sid in source_ids]}}
|
|
)
|
|
|
|
if source_type:
|
|
conditions.append({"source_type": {"$eq": source_type}})
|
|
|
|
where = {"$and": conditions} if len(conditions) > 1 else conditions[0]
|
|
|
|
try:
|
|
results = collection.query(
|
|
query_texts=[query],
|
|
n_results=top_k,
|
|
where=where,
|
|
)
|
|
return results["documents"][0] if results["documents"] else []
|
|
except Exception:
|
|
return []
|