fix: handle single-vector Qdrant collections and multi-collection embedding dimensions

- Fixed _vector_search() to check uses_named_vectors() before adding 'using' parameter
- Fixed _person_vector_search() to detect person collection vector size and use appropriate model
- Resolves 'Not existing vector name error: openai_1536' for single-vector collections
- Resolves embedding dimension mismatch between heritage_custodians (1536-dim) and heritage_persons (384-dim)
This commit is contained in:
kempersc 2025-12-15 10:31:39 +01:00
parent 31bbce13e6
commit d9892dba6f

View file

@ -838,11 +838,16 @@ class HybridRetriever:
"with_payload": True,
}
# Add named vector 'using' if in multi-embedding mode
if self.use_multi_embedding and using:
search_params["using"] = using
elif self.use_multi_embedding and self._selected_multi_model:
search_params["using"] = self._selected_multi_model.value
# Add named vector 'using' ONLY if collection actually has named vectors
# Single-vector collections will error with "Not existing vector name" otherwise
if self.use_multi_embedding and self.multi_retriever:
uses_named = self.multi_retriever.uses_named_vectors(self.collection_name)
if uses_named:
if using:
search_params["using"] = using
elif self._selected_multi_model:
search_params["using"] = self._selected_multi_model.value
# else: single-vector collection, don't add 'using' parameter
results = self.qdrant_client.query_points(**search_params)
@ -1139,7 +1144,20 @@ class HybridRetriever:
"""
from qdrant_client.http import models
query_vector = self._get_embedding(query, using=using)
# Check person collection vector size and use appropriate model
person_vector_size = self._get_person_collection_vector_size()
person_model = using
if person_vector_size == 384 and not using:
# Person collection uses MiniLM (384-dim), override model selection
person_model = "minilm_384"
logger.info(f"Person collection uses 384-dim vectors, using MiniLM model")
elif person_vector_size == 1536 and not using:
person_model = "openai_1536"
elif person_vector_size == 768 and not using:
person_model = "bge_768"
query_vector = self._get_embedding(query, using=person_model)
try:
# Build query parameters
@ -1150,11 +1168,16 @@ class HybridRetriever:
"with_payload": True,
}
# Add named vector 'using' if in multi-embedding mode
if self.use_multi_embedding and using:
search_params["using"] = using
elif self.use_multi_embedding and self._selected_multi_model:
search_params["using"] = self._selected_multi_model.value
# Add named vector 'using' ONLY if collection actually has named vectors
# Single-vector collections will error with "Not existing vector name" otherwise
if self.use_multi_embedding and self.multi_retriever:
uses_named = self.multi_retriever.uses_named_vectors("heritage_persons")
if uses_named:
if using:
search_params["using"] = using
elif self._selected_multi_model:
search_params["using"] = self._selected_multi_model.value
# else: single-vector collection, don't add 'using' parameter
# Add schema-aware filters if provided
if filter_conditions: