modified: blueprints/chat.py

modified:   services/rag_service.py
This commit is contained in:
SimolZimol
2026-05-23 14:36:36 +02:00
parent 9eb0869c50
commit 215936691a
2 changed files with 44 additions and 8 deletions

View File

@@ -190,6 +190,16 @@ def _fetch_neighbor_docs(chunk_id: str, collection) -> list[tuple[str, str]]:
return neighbors
def _build_where(user_id: int, source_ids=None, source_type=None) -> dict:
"""Build a ChromaDB where-filter for user/source scoping."""
conditions = [{"user_id": {"$eq": str(user_id)}}]
if source_ids is not None and len(source_ids) > 0:
conditions.append({"source_id": {"$in": [str(sid) for sid in source_ids]}})
if source_type:
conditions.append({"source_type": {"$eq": source_type}})
return {"$and": conditions} if len(conditions) > 1 else conditions[0]
def similarity_search(
query: str,
user_id: int,
@@ -199,13 +209,7 @@ def similarity_search(
) -> list[str]:
"""Multi-query search with neighbor expansion and reading-order sorting."""
collection = _get_collection()
conditions = [{"user_id": {"$eq": str(user_id)}}]
if source_ids is not None and len(source_ids) > 0:
conditions.append({"source_id": {"$in": [str(sid) for sid in source_ids]}})
if source_type:
conditions.append({"source_type": {"$eq": source_type}})
where = {"$and": conditions} if len(conditions) > 1 else conditions[0]
where = _build_where(user_id, source_ids, source_type)
# Generate multiple queries for broader recall
queries = _expand_query(query)
@@ -228,7 +232,7 @@ def similarity_search(
dists = (results.get("distances") or [[]])[0]
ids = (results.get("ids") or [[]])[0]
for doc, dist, doc_id in zip(docs, dists, ids):
if doc_id not in seen_ids and dist < 0.65:
if doc_id not in seen_ids and dist < 0.80:
seen_ids.add(doc_id)
ranked.append((dist, doc_id, doc))
except Exception: