From d9892dba6f79f1366165bdccb8e6d7d1d191d1e0 Mon Sep 17 00:00:00 2001 From: kempersc Date: Mon, 15 Dec 2025 10:31:39 +0100 Subject: [PATCH] fix: handle single-vector Qdrant collections and multi-collection embedding dimensions - Fixed _vector_search() to check uses_named_vectors() before adding 'using' parameter - Fixed _person_vector_search() to detect person collection vector size and use appropriate model - Resolves 'Not existing vector name error: openai_1536' for single-vector collections - Resolves embedding dimension mismatch between heritage_custodians (1536-dim) and heritage_persons (384-dim) --- src/glam_extractor/api/hybrid_retriever.py | 45 ++++++++++++++++------ 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/glam_extractor/api/hybrid_retriever.py b/src/glam_extractor/api/hybrid_retriever.py index 771ce0664d..03e087d426 100644 --- a/src/glam_extractor/api/hybrid_retriever.py +++ b/src/glam_extractor/api/hybrid_retriever.py @@ -838,11 +838,16 @@ class HybridRetriever: "with_payload": True, } - # Add named vector 'using' if in multi-embedding mode - if self.use_multi_embedding and using: - search_params["using"] = using - elif self.use_multi_embedding and self._selected_multi_model: - search_params["using"] = self._selected_multi_model.value + # Add named vector 'using' ONLY if collection actually has named vectors + # Single-vector collections will error with "Not existing vector name" otherwise + if self.use_multi_embedding and self.multi_retriever: + uses_named = self.multi_retriever.uses_named_vectors(self.collection_name) + if uses_named: + if using: + search_params["using"] = using + elif self._selected_multi_model: + search_params["using"] = self._selected_multi_model.value + # else: single-vector collection, don't add 'using' parameter results = self.qdrant_client.query_points(**search_params) @@ -1139,7 +1144,20 @@ class HybridRetriever: """ from qdrant_client.http import models - query_vector = self._get_embedding(query, using=using) + # Check person collection vector size and use appropriate model + person_vector_size = self._get_person_collection_vector_size() + person_model = using + + if person_vector_size == 384 and not using: + # Person collection uses MiniLM (384-dim), override model selection + person_model = "minilm_384" + logger.info(f"Person collection uses 384-dim vectors, using MiniLM model") + elif person_vector_size == 1536 and not using: + person_model = "openai_1536" + elif person_vector_size == 768 and not using: + person_model = "bge_768" + + query_vector = self._get_embedding(query, using=person_model) try: # Build query parameters @@ -1150,11 +1168,16 @@ class HybridRetriever: "with_payload": True, } - # Add named vector 'using' if in multi-embedding mode - if self.use_multi_embedding and using: - search_params["using"] = using - elif self.use_multi_embedding and self._selected_multi_model: - search_params["using"] = self._selected_multi_model.value + # Add named vector 'using' ONLY if collection actually has named vectors + # Single-vector collections will error with "Not existing vector name" otherwise + if self.use_multi_embedding and self.multi_retriever: + uses_named = self.multi_retriever.uses_named_vectors("heritage_persons") + if uses_named: + if using: + search_params["using"] = using + elif self._selected_multi_model: + search_params["using"] = self._selected_multi_model.value + # else: single-vector collection, don't add 'using' parameter # Add schema-aware filters if provided if filter_conditions: