diff --git a/src/glam_extractor/api/hybrid_retriever.py b/src/glam_extractor/api/hybrid_retriever.py index 771ce0664d..03e087d426 100644 --- a/src/glam_extractor/api/hybrid_retriever.py +++ b/src/glam_extractor/api/hybrid_retriever.py @@ -838,11 +838,16 @@ class HybridRetriever: "with_payload": True, } - # Add named vector 'using' if in multi-embedding mode - if self.use_multi_embedding and using: - search_params["using"] = using - elif self.use_multi_embedding and self._selected_multi_model: - search_params["using"] = self._selected_multi_model.value + # Add named vector 'using' ONLY if collection actually has named vectors + # Single-vector collections will error with "Not existing vector name" otherwise + if self.use_multi_embedding and self.multi_retriever: + uses_named = self.multi_retriever.uses_named_vectors(self.collection_name) + if uses_named: + if using: + search_params["using"] = using + elif self._selected_multi_model: + search_params["using"] = self._selected_multi_model.value + # else: single-vector collection, don't add 'using' parameter results = self.qdrant_client.query_points(**search_params) @@ -1139,7 +1144,20 @@ class HybridRetriever: """ from qdrant_client.http import models - query_vector = self._get_embedding(query, using=using) + # Check person collection vector size and use appropriate model + person_vector_size = self._get_person_collection_vector_size() + person_model = using + + if person_vector_size == 384 and not using: + # Person collection uses MiniLM (384-dim), override model selection + person_model = "minilm_384" + logger.info(f"Person collection uses 384-dim vectors, using MiniLM model") + elif person_vector_size == 1536 and not using: + person_model = "openai_1536" + elif person_vector_size == 768 and not using: + person_model = "bge_768" + + query_vector = self._get_embedding(query, using=person_model) try: # Build query parameters @@ -1150,11 +1168,16 @@ class HybridRetriever: "with_payload": True, } - # Add named vector 'using' if in multi-embedding mode - if self.use_multi_embedding and using: - search_params["using"] = using - elif self.use_multi_embedding and self._selected_multi_model: - search_params["using"] = self._selected_multi_model.value + # Add named vector 'using' ONLY if collection actually has named vectors + # Single-vector collections will error with "Not existing vector name" otherwise + if self.use_multi_embedding and self.multi_retriever: + uses_named = self.multi_retriever.uses_named_vectors("heritage_persons") + if uses_named: + if using: + search_params["using"] = using + elif self._selected_multi_model: + search_params["using"] = self._selected_multi_model.value + # else: single-vector collection, don't add 'using' parameter # Add schema-aware filters if provided if filter_conditions: