modified: blueprints/chat.py
modified: services/rag_service.py
This commit is contained in:
@@ -190,6 +190,16 @@ def _fetch_neighbor_docs(chunk_id: str, collection) -> list[tuple[str, str]]:
|
||||
return neighbors
|
||||
|
||||
|
||||
def _build_where(user_id: int, source_ids=None, source_type=None) -> dict:
|
||||
"""Build a ChromaDB where-filter for user/source scoping."""
|
||||
conditions = [{"user_id": {"$eq": str(user_id)}}]
|
||||
if source_ids is not None and len(source_ids) > 0:
|
||||
conditions.append({"source_id": {"$in": [str(sid) for sid in source_ids]}})
|
||||
if source_type:
|
||||
conditions.append({"source_type": {"$eq": source_type}})
|
||||
return {"$and": conditions} if len(conditions) > 1 else conditions[0]
|
||||
|
||||
|
||||
def similarity_search(
|
||||
query: str,
|
||||
user_id: int,
|
||||
@@ -199,13 +209,7 @@ def similarity_search(
|
||||
) -> list[str]:
|
||||
"""Multi-query search with neighbor expansion and reading-order sorting."""
|
||||
collection = _get_collection()
|
||||
|
||||
conditions = [{"user_id": {"$eq": str(user_id)}}]
|
||||
if source_ids is not None and len(source_ids) > 0:
|
||||
conditions.append({"source_id": {"$in": [str(sid) for sid in source_ids]}})
|
||||
if source_type:
|
||||
conditions.append({"source_type": {"$eq": source_type}})
|
||||
where = {"$and": conditions} if len(conditions) > 1 else conditions[0]
|
||||
where = _build_where(user_id, source_ids, source_type)
|
||||
|
||||
# Generate multiple queries for broader recall
|
||||
queries = _expand_query(query)
|
||||
@@ -228,7 +232,7 @@ def similarity_search(
|
||||
dists = (results.get("distances") or [[]])[0]
|
||||
ids = (results.get("ids") or [[]])[0]
|
||||
for doc, dist, doc_id in zip(docs, dists, ids):
|
||||
if doc_id not in seen_ids and dist < 0.65:
|
||||
if doc_id not in seen_ids and dist < 0.80:
|
||||
seen_ids.add(doc_id)
|
||||
ranked.append((dist, doc_id, doc))
|
||||
except Exception:
|
||||
|
||||
Reference in New Issue
Block a user