""" Dynamic context template selection based on query classification. This module bridges HeritageQueryRouter output to specificity filtering by: 1. Mapping query intent (geographic, statistical, etc.) to context templates 2. Considering entity_type (person, institution, both) for refinement 3. Optionally using target_custodian_type for institution-specific contexts This enables automatic selection of the most relevant schema context for each query. """ from __future__ import annotations import logging from dataclasses import dataclass, field from typing import Optional, TYPE_CHECKING from .models import ContextTemplate, INSTITUTION_TYPE_TO_CONTEXT if TYPE_CHECKING: from dspy import Prediction logger = logging.getLogger(__name__) # Query intent to context template mapping # Maps HeritageQueryIntent.intent values to ContextTemplates INTENT_TO_CONTEXT_MAP: dict[str, ContextTemplate] = { # Geographic intent → location browsing "geographic": ContextTemplate.LOCATION_BROWSE, # Statistical intent → general (counts across types) "statistical": ContextTemplate.GENERAL_HERITAGE, # Relational intent → organizational relationships "relational": ContextTemplate.ORGANIZATIONAL_CHANGE, # Temporal intent → organizational history "temporal": ContextTemplate.ORGANIZATIONAL_CHANGE, # Entity lookup → identifier-based search "entity_lookup": ContextTemplate.IDENTIFIER_LOOKUP, # Comparative intent → general (comparing across types) "comparative": ContextTemplate.GENERAL_HERITAGE, # Exploration intent → general discovery "exploration": ContextTemplate.GENERAL_HERITAGE, } # Entity type to context template mapping # When entity_type is known, this can override or refine the intent-based context ENTITY_TYPE_TO_CONTEXT_MAP: dict[str, ContextTemplate] = { "person": ContextTemplate.PERSON_RESEARCH, # "institution" uses intent-based mapping # "both" uses general heritage } # Default thresholds per context template # Lower threshold = more selective (fewer classes) # Higher threshold = more inclusive (more classes) DEFAULT_THRESHOLDS: dict[ContextTemplate, float] = { # Person queries benefit from focused context ContextTemplate.PERSON_RESEARCH: 0.45, # Identifier lookups need core classes ContextTemplate.IDENTIFIER_LOOKUP: 0.40, # Institution-type searches are moderately selective ContextTemplate.ARCHIVE_SEARCH: 0.50, ContextTemplate.MUSEUM_SEARCH: 0.50, ContextTemplate.LIBRARY_SEARCH: 0.50, # Collection and location need broader context ContextTemplate.COLLECTION_DISCOVERY: 0.55, ContextTemplate.LOCATION_BROWSE: 0.55, # Organizational change needs temporal classes ContextTemplate.ORGANIZATIONAL_CHANGE: 0.55, # Digital platform is specialized ContextTemplate.DIGITAL_PLATFORM: 0.50, # General heritage is most inclusive ContextTemplate.GENERAL_HERITAGE: 0.60, } @dataclass class ContextSelectionResult: """Result of dynamic context selection. Attributes: template: Selected context template threshold: Recommended threshold for this context selection_reason: Human-readable explanation intent: Original query intent (if available) entity_type: Original entity type (if available) custodian_type: Target custodian type (if available) """ template: ContextTemplate threshold: float selection_reason: str intent: Optional[str] = None entity_type: Optional[str] = None custodian_type: Optional[str] = None def __str__(self) -> str: return f"ContextSelection({self.template.value}, threshold={self.threshold}, reason='{self.selection_reason}')" class DynamicContextSelector: """Selects appropriate context template based on query classification. This class integrates with HeritageQueryRouter output to automatically determine the best context template and threshold for specificity filtering. Usage: selector = DynamicContextSelector() # From HeritageQueryRouter prediction router_result = router.forward(question) selection = selector.select_from_prediction(router_result) # Or manually specify parameters selection = selector.select( intent="geographic", entity_type="institution", custodian_type="A" # Archive ) # Use with specificity-aware signatures sig = get_schema_aware_sparql_signature( context_template=selection.template.value, threshold=selection.threshold ) """ def __init__( self, intent_map: Optional[dict[str, ContextTemplate]] = None, entity_type_map: Optional[dict[str, ContextTemplate]] = None, custodian_type_map: Optional[dict[str, ContextTemplate]] = None, default_thresholds: Optional[dict[ContextTemplate, float]] = None, global_threshold_override: Optional[float] = None, ): """Initialize the selector. Args: intent_map: Override intent → context mapping entity_type_map: Override entity_type → context mapping custodian_type_map: Override custodian_type → context mapping default_thresholds: Override per-template thresholds global_threshold_override: If set, use this threshold for all templates """ self._intent_map = intent_map or INTENT_TO_CONTEXT_MAP self._entity_type_map = entity_type_map or ENTITY_TYPE_TO_CONTEXT_MAP self._custodian_type_map = custodian_type_map or INSTITUTION_TYPE_TO_CONTEXT self._default_thresholds = default_thresholds or DEFAULT_THRESHOLDS self._global_threshold = global_threshold_override def select( self, intent: Optional[str] = None, entity_type: Optional[str] = None, custodian_type: Optional[str] = None, threshold_override: Optional[float] = None, ) -> ContextSelectionResult: """Select context template based on query classification. Selection priority: 1. entity_type="person" → PERSON_RESEARCH (highest priority) 2. custodian_type (A/M/L/etc.) → type-specific context 3. intent → intent-based context 4. Fallback → GENERAL_HERITAGE Args: intent: Query intent from HeritageQueryRouter entity_type: "person", "institution", or "both" custodian_type: GLAMORCUBESFIXPHDNT single-letter code threshold_override: Override the default threshold Returns: ContextSelectionResult with template, threshold, and reasoning """ template: ContextTemplate reason: str # Priority 1: Person queries always use PERSON_RESEARCH if entity_type == "person": template = ContextTemplate.PERSON_RESEARCH reason = "entity_type='person' → PERSON_RESEARCH" logger.debug(f"Context selection: {reason}") # Priority 2: Custodian type refinement for institutions elif entity_type == "institution" and custodian_type: custodian_upper = custodian_type.upper() if custodian_upper in self._custodian_type_map: template = self._custodian_type_map[custodian_upper] reason = f"custodian_type='{custodian_upper}' → {template.value}" logger.debug(f"Context selection: {reason}") elif intent and intent in self._intent_map: template = self._intent_map[intent] reason = f"intent='{intent}' (custodian_type unknown) → {template.value}" logger.debug(f"Context selection: {reason}") else: template = ContextTemplate.GENERAL_HERITAGE reason = "Unknown custodian_type, no intent → GENERAL_HERITAGE" logger.debug(f"Context selection: {reason}") # Priority 3: Intent-based mapping elif intent and intent in self._intent_map: template = self._intent_map[intent] reason = f"intent='{intent}' → {template.value}" logger.debug(f"Context selection: {reason}") # Priority 4: entity_type="both" uses general elif entity_type == "both": template = ContextTemplate.GENERAL_HERITAGE reason = "entity_type='both' → GENERAL_HERITAGE" logger.debug(f"Context selection: {reason}") # Fallback else: template = ContextTemplate.GENERAL_HERITAGE reason = "No classification available → GENERAL_HERITAGE" logger.debug(f"Context selection: {reason}") # Determine threshold if threshold_override is not None: threshold = threshold_override elif self._global_threshold is not None: threshold = self._global_threshold else: threshold = self._default_thresholds.get(template, 0.5) return ContextSelectionResult( template=template, threshold=threshold, selection_reason=reason, intent=intent, entity_type=entity_type, custodian_type=custodian_type, ) def select_from_prediction( self, prediction: "Prediction", threshold_override: Optional[float] = None, ) -> ContextSelectionResult: """Select context from HeritageQueryRouter prediction. Extracts intent, entity_type, and target_custodian_type from the prediction object and calls select(). Args: prediction: Output from HeritageQueryRouter.forward() threshold_override: Override the default threshold Returns: ContextSelectionResult with template, threshold, and reasoning """ intent = getattr(prediction, "intent", None) entity_type = getattr(prediction, "entity_type", None) custodian_type = getattr(prediction, "target_custodian_type", None) # Normalize "UNKNOWN" to None if custodian_type == "UNKNOWN": custodian_type = None return self.select( intent=intent, entity_type=entity_type, custodian_type=custodian_type, threshold_override=threshold_override, ) def get_threshold_for_template(self, template: ContextTemplate) -> float: """Get the default threshold for a context template. Args: template: Context template Returns: Default threshold (0.0-1.0) """ if self._global_threshold is not None: return self._global_threshold return self._default_thresholds.get(template, 0.5) def get_all_thresholds(self) -> dict[str, float]: """Get all default thresholds by template name. Returns: Dict mapping template names to thresholds """ return { template.value: self._default_thresholds.get(template, 0.5) for template in ContextTemplate } # Singleton instance _selector_instance: Optional[DynamicContextSelector] = None def get_dynamic_context_selector() -> DynamicContextSelector: """Get singleton selector instance.""" global _selector_instance if _selector_instance is None: _selector_instance = DynamicContextSelector() return _selector_instance def select_context_for_query( intent: Optional[str] = None, entity_type: Optional[str] = None, custodian_type: Optional[str] = None, threshold_override: Optional[float] = None, ) -> ContextSelectionResult: """Convenience function for context selection. This is the primary entry point for dynamic context selection. Args: intent: Query intent from HeritageQueryRouter entity_type: "person", "institution", or "both" custodian_type: GLAMORCUBESFIXPHDNT single-letter code threshold_override: Override the default threshold Returns: ContextSelectionResult with template, threshold, and reasoning Example: from backend.rag.specificity import select_context_for_query # After query routing result = select_context_for_query( intent="geographic", entity_type="institution", custodian_type="A" ) # result.template = ContextTemplate.ARCHIVE_SEARCH # result.threshold = 0.5 """ return get_dynamic_context_selector().select( intent=intent, entity_type=entity_type, custodian_type=custodian_type, threshold_override=threshold_override, )