modified: .env.example
modified: config.py modified: docker-compose.yml modified: requirements.txt modified: services/rag_service.py
This commit is contained in:
@@ -1,25 +1,31 @@
|
||||
"""
|
||||
RAG service using ChromaDB + sentence-transformers.
|
||||
RAG service using ChromaDB + LM Studio's /v1/embeddings endpoint.
|
||||
No local ML libraries (torch, sentence-transformers, onnxruntime) needed —
|
||||
embeddings are generated by the same LM Studio instance used for chat.
|
||||
Each chunk is stored with metadata: user_id, source_id, source_type (doc|url).
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from chromadb import EmbeddingFunction, Documents, Embeddings
|
||||
from flask import current_app
|
||||
from openai import OpenAI
|
||||
|
||||
_chroma_client = None
|
||||
_collection = None
|
||||
_embedder = None
|
||||
|
||||
|
||||
def _get_embedder():
|
||||
global _embedder
|
||||
if _embedder is None:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
cache = current_app.config.get("TRANSFORMERS_CACHE", ".cache")
|
||||
_embedder = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache)
|
||||
return _embedder
|
||||
class LMStudioEmbeddingFunction(EmbeddingFunction):
|
||||
"""ChromaDB-compatible embedding function that calls LM Studio's /v1/embeddings."""
|
||||
|
||||
def __init__(self, base_url: str, model: str):
|
||||
self._client = OpenAI(base_url=f"{base_url}/v1", api_key="lm-studio")
|
||||
self._model = model
|
||||
|
||||
def __call__(self, input: Documents) -> Embeddings:
|
||||
response = self._client.embeddings.create(model=self._model, input=input)
|
||||
return [item.embedding for item in response.data]
|
||||
|
||||
|
||||
def _get_collection():
|
||||
@@ -27,16 +33,18 @@ def _get_collection():
|
||||
if _collection is None:
|
||||
import chromadb
|
||||
path = current_app.config["VECTORDB_PATH"]
|
||||
base_url = current_app.config["LM_STUDIO_URL"]
|
||||
model = current_app.config["LM_STUDIO_EMBEDDING_MODEL"]
|
||||
_chroma_client = chromadb.PersistentClient(path=path)
|
||||
_collection = _chroma_client.get_or_create_collection(
|
||||
name="ki_context",
|
||||
embedding_function=LMStudioEmbeddingFunction(base_url, model),
|
||||
metadata={"hnsw:space": "cosine"},
|
||||
)
|
||||
return _collection
|
||||
|
||||
|
||||
def chunk_text(text: str, chunk_size: int, overlap: int) -> list[str]:
|
||||
"""Split text into overlapping word-based chunks."""
|
||||
words = text.split()
|
||||
chunks = []
|
||||
start = 0
|
||||
@@ -55,26 +63,22 @@ def index_source(
|
||||
chunk_size: int = 500,
|
||||
chunk_overlap: int = 50,
|
||||
):
|
||||
"""Chunk, embed and store text in ChromaDB. Replaces existing chunks for this source."""
|
||||
"""Chunk, embed via LM Studio and store in ChromaDB. Replaces existing chunks."""
|
||||
collection = _get_collection()
|
||||
embedder = _get_embedder()
|
||||
|
||||
# Remove old chunks for this source first
|
||||
delete_source(user_id, source_id, source_type)
|
||||
|
||||
chunks = chunk_text(text, chunk_size, chunk_overlap)
|
||||
if not chunks:
|
||||
return
|
||||
|
||||
embeddings = embedder.encode(chunks, show_progress_bar=False).tolist()
|
||||
|
||||
ids = [f"{source_type}_{source_id}_chunk_{i}" for i in range(len(chunks))]
|
||||
metadatas = [
|
||||
{"user_id": str(user_id), "source_id": str(source_id), "source_type": source_type}
|
||||
for _ in chunks
|
||||
]
|
||||
|
||||
collection.add(documents=chunks, embeddings=embeddings, ids=ids, metadatas=metadatas)
|
||||
collection.add(documents=chunks, ids=ids, metadatas=metadatas)
|
||||
|
||||
|
||||
def delete_source(user_id: int, source_id: int, source_type: str):
|
||||
@@ -101,17 +105,9 @@ def similarity_search(
|
||||
source_type: Optional[str] = None,
|
||||
top_k: int = 5,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Search for relevant chunks.
|
||||
Optionally filter by specific source_ids and/or source_type.
|
||||
Returns list of chunk texts.
|
||||
"""
|
||||
"""Search for relevant chunks via LM Studio embeddings."""
|
||||
collection = _get_collection()
|
||||
embedder = _get_embedder()
|
||||
|
||||
query_embedding = embedder.encode([query], show_progress_bar=False).tolist()[0]
|
||||
|
||||
# Build where filter
|
||||
conditions = [{"user_id": {"$eq": str(user_id)}}]
|
||||
|
||||
if source_ids is not None and len(source_ids) > 0:
|
||||
@@ -126,7 +122,7 @@ def similarity_search(
|
||||
|
||||
try:
|
||||
results = collection.query(
|
||||
query_embeddings=[query_embedding],
|
||||
query_texts=[query],
|
||||
n_results=top_k,
|
||||
where=where,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user