""" RAG service using ChromaDB + LM Studio's /v1/embeddings endpoint. No local ML libraries (torch, sentence-transformers, onnxruntime) needed — embeddings are generated by the same LM Studio instance used for chat. Each chunk is stored with metadata: user_id, source_id, source_type (doc|url). """ import re from typing import Optional from chromadb import EmbeddingFunction, Documents, Embeddings from flask import current_app from openai import OpenAI _chroma_client = None _collection = None class LMStudioEmbeddingFunction(EmbeddingFunction): """ChromaDB-compatible embedding function that calls LM Studio's /v1/embeddings.""" def __init__(self, base_url: str, model: str): self._client = OpenAI(base_url=f"{base_url}/v1", api_key="lm-studio") self._model = model def __call__(self, input: Documents) -> Embeddings: response = self._client.embeddings.create(model=self._model, input=input) return [item.embedding for item in response.data] def _get_collection(): global _chroma_client, _collection if _collection is None: import chromadb path = current_app.config["VECTORDB_PATH"] base_url = current_app.config["LM_STUDIO_URL"] model = current_app.config["LM_STUDIO_EMBEDDING_MODEL"] _chroma_client = chromadb.PersistentClient(path=path) _collection = _chroma_client.get_or_create_collection( name="ki_context", embedding_function=LMStudioEmbeddingFunction(base_url, model), metadata={"hnsw:space": "cosine"}, ) return _collection def chunk_text(text: str, chunk_size: int, overlap: int) -> list[str]: words = text.split() chunks = [] start = 0 while start < len(words): end = start + chunk_size chunks.append(" ".join(words[start:end])) start += chunk_size - overlap return [c for c in chunks if c.strip()] def index_source( text: str, user_id: int, source_id: int, source_type: str, # "doc" | "url" chunk_size: int = 500, chunk_overlap: int = 50, ): """Chunk, embed via LM Studio and store in ChromaDB. Replaces existing chunks.""" collection = _get_collection() delete_source(user_id, source_id, source_type) chunks = chunk_text(text, chunk_size, chunk_overlap) if not chunks: return ids = [f"{source_type}_{source_id}_chunk_{i}" for i in range(len(chunks))] metadatas = [ {"user_id": str(user_id), "source_id": str(source_id), "source_type": source_type} for _ in chunks ] collection.add(documents=chunks, ids=ids, metadatas=metadatas) def delete_source(user_id: int, source_id: int, source_type: str): """Remove all chunks belonging to a source from ChromaDB.""" collection = _get_collection() try: collection.delete( where={ "$and": [ {"user_id": {"$eq": str(user_id)}}, {"source_id": {"$eq": str(source_id)}}, {"source_type": {"$eq": source_type}}, ] } ) except Exception: pass def similarity_search( query: str, user_id: int, source_ids: Optional[list[int]] = None, source_type: Optional[str] = None, top_k: int = 5, ) -> list[str]: """Search for relevant chunks via LM Studio embeddings.""" collection = _get_collection() conditions = [{"user_id": {"$eq": str(user_id)}}] if source_ids is not None and len(source_ids) > 0: conditions.append( {"source_id": {"$in": [str(sid) for sid in source_ids]}} ) if source_type: conditions.append({"source_type": {"$eq": source_type}}) where = {"$and": conditions} if len(conditions) > 1 else conditions[0] try: results = collection.query( query_texts=[query], n_results=top_k, where=where, ) return results["documents"][0] if results["documents"] else [] except Exception: return []