Fix score preservation in chat sources

rex993 · claude · rex993 · commit a5e3434e79f4 · 2025-06-10T00:34:12.000+10:00
Preserve similarity scores from ChunkSource objects when enriching sources via batch_retrieve_chunks. Previously, scores were lost when get_chunks_by_id returned hardcoded 0.0 scores. - Create score mapping from original ChunkSource objects - Apply preserved scores to retrieved chunks - Sort chunks by score descending for consistent relevance ordering Fixes issue where chat page showed 0.0 scores while search page showed correct similarity scores. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/core/services/document_service.py b/core/services/document_service.py
@@ -563,6 +563,24 @@ async def batch_retrieve_chunks(
             logger.error(f"Error during parallel chunk retrieval: {e}", exc_info=True)
             return []
 
+        # Create a mapping of original scores from ChunkSource objects (O(n) time)
+        score_map = {
+            (source.document_id, source.chunk_number): source.score 
+            for source in authorized_sources 
+            if source.score is not None
+        }
+        
+        # Apply original scores to the retrieved chunks (O(m) time with O(1) lookups)
+        for chunk in chunks:
+            key = (chunk.document_id, chunk.chunk_number)
+            if key in score_map:
+                chunk.score = score_map[key]
+                logger.debug(f"Restored score {chunk.score} for chunk {key}")
+
+        # Sort chunks by score in descending order (highest score first)
+        chunks.sort(key=lambda x: x.score, reverse=True)
+        logger.debug(f"Sorted {len(chunks)} chunks by score")
+
         # Convert to chunk results
         results = await self._create_chunk_results(auth, chunks)
         logger.info(f"Batch retrieved {len(results)} chunks out of {len(chunk_ids)} requested")