""" Tests for backend.rag.specificity.context_selector module. This module tests the dynamic context template selection system, which: - Maps query intent (geographic, statistical, etc.) to context templates - Prioritizes entity_type for person queries - Refines context based on custodian_type (A/M/L/etc.) - Provides per-template threshold defaults - Integrates with HeritageQueryRouter predictions Coverage: - DynamicContextSelector.select() with various combinations - Selection priority ordering (person > custodian > intent > fallback) - select_from_prediction() with mock router output - Threshold override behavior - Custom map injection via constructor - Singleton and convenience function behavior """ from __future__ import annotations from dataclasses import dataclass from typing import Optional import pytest from backend.rag.specificity.context_selector import ( INTENT_TO_CONTEXT_MAP, ENTITY_TYPE_TO_CONTEXT_MAP, DEFAULT_THRESHOLDS, ContextSelectionResult, DynamicContextSelector, get_dynamic_context_selector, select_context_for_query, ) from backend.rag.specificity.models import ( ContextTemplate, INSTITUTION_TYPE_TO_CONTEXT, ) # ============================================================================= # Fixtures # ============================================================================= @pytest.fixture def selector() -> DynamicContextSelector: """Create a fresh DynamicContextSelector instance.""" return DynamicContextSelector() @pytest.fixture def custom_intent_map() -> dict[str, ContextTemplate]: """Custom intent map for testing constructor injection.""" return { "custom_intent": ContextTemplate.COLLECTION_DISCOVERY, "geographic": ContextTemplate.MUSEUM_SEARCH, # Override default } @pytest.fixture def custom_thresholds() -> dict[ContextTemplate, float]: """Custom thresholds for testing constructor injection.""" return { ContextTemplate.PERSON_RESEARCH: 0.30, # Lower than default ContextTemplate.GENERAL_HERITAGE: 0.80, # Higher than default } @dataclass class MockPrediction: """Mock HeritageQueryRouter prediction for testing.""" intent: Optional[str] = None entity_type: Optional[str] = None target_custodian_type: Optional[str] = None # Additional fields that might be present sources: list[str] | None = None entities: list[str] | None = None language: str = "nl" # ============================================================================= # Test: Intent to Context Mapping # ============================================================================= class TestIntentMapping: """Test basic intent to context template mapping.""" def test_geographic_intent_maps_to_location_browse( self, selector: DynamicContextSelector ): """Geographic intent should map to LOCATION_BROWSE.""" result = selector.select(intent="geographic") assert result.template == ContextTemplate.LOCATION_BROWSE assert "geographic" in result.selection_reason def test_statistical_intent_maps_to_general( self, selector: DynamicContextSelector ): """Statistical intent should map to GENERAL_HERITAGE.""" result = selector.select(intent="statistical") assert result.template == ContextTemplate.GENERAL_HERITAGE def test_relational_intent_maps_to_organizational_change( self, selector: DynamicContextSelector ): """Relational intent should map to ORGANIZATIONAL_CHANGE.""" result = selector.select(intent="relational") assert result.template == ContextTemplate.ORGANIZATIONAL_CHANGE def test_temporal_intent_maps_to_organizational_change( self, selector: DynamicContextSelector ): """Temporal intent should map to ORGANIZATIONAL_CHANGE.""" result = selector.select(intent="temporal") assert result.template == ContextTemplate.ORGANIZATIONAL_CHANGE def test_entity_lookup_intent_maps_to_identifier_lookup( self, selector: DynamicContextSelector ): """Entity lookup intent should map to IDENTIFIER_LOOKUP.""" result = selector.select(intent="entity_lookup") assert result.template == ContextTemplate.IDENTIFIER_LOOKUP def test_comparative_intent_maps_to_general( self, selector: DynamicContextSelector ): """Comparative intent should map to GENERAL_HERITAGE.""" result = selector.select(intent="comparative") assert result.template == ContextTemplate.GENERAL_HERITAGE def test_exploration_intent_maps_to_general( self, selector: DynamicContextSelector ): """Exploration intent should map to GENERAL_HERITAGE.""" result = selector.select(intent="exploration") assert result.template == ContextTemplate.GENERAL_HERITAGE def test_unknown_intent_falls_back_to_general( self, selector: DynamicContextSelector ): """Unknown intent should fall back to GENERAL_HERITAGE.""" result = selector.select(intent="nonexistent_intent") assert result.template == ContextTemplate.GENERAL_HERITAGE # ============================================================================= # Test: Entity Type Priority # ============================================================================= class TestEntityTypePriority: """Test that entity_type takes priority over other factors.""" def test_person_entity_type_takes_priority_over_intent( self, selector: DynamicContextSelector ): """Person entity_type should override intent mapping.""" result = selector.select( intent="geographic", # Would normally be LOCATION_BROWSE entity_type="person", # But this takes priority ) assert result.template == ContextTemplate.PERSON_RESEARCH assert "person" in result.selection_reason def test_person_entity_type_takes_priority_over_custodian_type( self, selector: DynamicContextSelector ): """Person entity_type should override custodian_type.""" result = selector.select( entity_type="person", custodian_type="A", # Would be ARCHIVE_SEARCH for institutions ) assert result.template == ContextTemplate.PERSON_RESEARCH def test_both_entity_type_uses_general_heritage( self, selector: DynamicContextSelector ): """entity_type='both' should use GENERAL_HERITAGE.""" result = selector.select(entity_type="both") assert result.template == ContextTemplate.GENERAL_HERITAGE assert "both" in result.selection_reason def test_institution_entity_type_allows_intent_mapping( self, selector: DynamicContextSelector ): """entity_type='institution' should fall through to intent mapping.""" result = selector.select( intent="geographic", entity_type="institution", ) assert result.template == ContextTemplate.LOCATION_BROWSE # ============================================================================= # Test: Custodian Type Refinement # ============================================================================= class TestCustodianTypeRefinement: """Test custodian type (GLAMORCUBESFIXPHDNT) refinement for institutions.""" def test_archive_custodian_type_maps_to_archive_search( self, selector: DynamicContextSelector ): """Custodian type 'A' should map to ARCHIVE_SEARCH.""" result = selector.select( entity_type="institution", custodian_type="A", ) assert result.template == ContextTemplate.ARCHIVE_SEARCH assert "A" in result.selection_reason def test_museum_custodian_type_maps_to_museum_search( self, selector: DynamicContextSelector ): """Custodian type 'M' should map to MUSEUM_SEARCH.""" result = selector.select( entity_type="institution", custodian_type="M", ) assert result.template == ContextTemplate.MUSEUM_SEARCH def test_library_custodian_type_maps_to_library_search( self, selector: DynamicContextSelector ): """Custodian type 'L' should map to LIBRARY_SEARCH.""" result = selector.select( entity_type="institution", custodian_type="L", ) assert result.template == ContextTemplate.LIBRARY_SEARCH def test_lowercase_custodian_type_is_normalized( self, selector: DynamicContextSelector ): """Lowercase custodian types should be normalized to uppercase.""" result = selector.select( entity_type="institution", custodian_type="m", # lowercase ) assert result.template == ContextTemplate.MUSEUM_SEARCH def test_custodian_type_without_entity_type_uses_intent( self, selector: DynamicContextSelector ): """Custodian type alone (without entity_type) should use intent mapping.""" result = selector.select( intent="geographic", custodian_type="A", # No entity_type specified ) # Should fall through to intent mapping since entity_type != "institution" assert result.template == ContextTemplate.LOCATION_BROWSE def test_unknown_custodian_type_falls_back_to_intent( self, selector: DynamicContextSelector ): """Unknown custodian type should fall back to intent mapping.""" result = selector.select( intent="geographic", entity_type="institution", custodian_type="Z", # Unknown ) assert result.template == ContextTemplate.LOCATION_BROWSE # ============================================================================= # Test: Threshold Selection # ============================================================================= class TestThresholdSelection: """Test threshold selection per context template.""" def test_person_research_has_lower_threshold( self, selector: DynamicContextSelector ): """PERSON_RESEARCH should have threshold 0.45.""" result = selector.select(entity_type="person") assert result.threshold == 0.45 def test_identifier_lookup_has_lowest_threshold( self, selector: DynamicContextSelector ): """IDENTIFIER_LOOKUP should have threshold 0.40.""" result = selector.select(intent="entity_lookup") assert result.threshold == 0.40 def test_archive_search_has_medium_threshold( self, selector: DynamicContextSelector ): """ARCHIVE_SEARCH should have threshold 0.50.""" result = selector.select( entity_type="institution", custodian_type="A", ) assert result.threshold == 0.50 def test_general_heritage_has_highest_default_threshold( self, selector: DynamicContextSelector ): """GENERAL_HERITAGE should have threshold 0.60.""" result = selector.select() # Fallback assert result.threshold == 0.60 def test_threshold_override_takes_precedence( self, selector: DynamicContextSelector ): """Explicit threshold_override should override default.""" result = selector.select( entity_type="person", threshold_override=0.75, ) assert result.threshold == 0.75 def test_global_threshold_override_applies_to_all(self): """Global threshold override should apply to all selections.""" selector = DynamicContextSelector(global_threshold_override=0.99) result1 = selector.select(entity_type="person") result2 = selector.select(intent="geographic") assert result1.threshold == 0.99 assert result2.threshold == 0.99 def test_get_threshold_for_template( self, selector: DynamicContextSelector ): """get_threshold_for_template() should return correct values.""" assert selector.get_threshold_for_template(ContextTemplate.PERSON_RESEARCH) == 0.45 assert selector.get_threshold_for_template(ContextTemplate.GENERAL_HERITAGE) == 0.60 def test_get_all_thresholds_returns_dict( self, selector: DynamicContextSelector ): """get_all_thresholds() should return dict with all templates.""" thresholds = selector.get_all_thresholds() assert isinstance(thresholds, dict) assert "person_research" in thresholds assert "general_heritage" in thresholds assert thresholds["person_research"] == 0.45 # ============================================================================= # Test: select_from_prediction() # ============================================================================= class TestSelectFromPrediction: """Test selection from HeritageQueryRouter prediction objects.""" def test_select_from_prediction_extracts_intent( self, selector: DynamicContextSelector ): """Should extract intent from prediction.""" prediction = MockPrediction(intent="geographic") result = selector.select_from_prediction(prediction) assert result.template == ContextTemplate.LOCATION_BROWSE assert result.intent == "geographic" def test_select_from_prediction_extracts_entity_type( self, selector: DynamicContextSelector ): """Should extract entity_type from prediction.""" prediction = MockPrediction(entity_type="person") result = selector.select_from_prediction(prediction) assert result.template == ContextTemplate.PERSON_RESEARCH assert result.entity_type == "person" def test_select_from_prediction_extracts_custodian_type( self, selector: DynamicContextSelector ): """Should extract target_custodian_type from prediction.""" prediction = MockPrediction( entity_type="institution", target_custodian_type="A", ) result = selector.select_from_prediction(prediction) assert result.template == ContextTemplate.ARCHIVE_SEARCH assert result.custodian_type == "A" def test_select_from_prediction_normalizes_unknown_custodian_type( self, selector: DynamicContextSelector ): """Should treat 'UNKNOWN' custodian_type as None.""" prediction = MockPrediction( intent="geographic", entity_type="institution", target_custodian_type="UNKNOWN", ) result = selector.select_from_prediction(prediction) # Should fall through to intent mapping assert result.template == ContextTemplate.LOCATION_BROWSE assert result.custodian_type is None def test_select_from_prediction_with_threshold_override( self, selector: DynamicContextSelector ): """Should apply threshold_override.""" prediction = MockPrediction(entity_type="person") result = selector.select_from_prediction(prediction, threshold_override=0.33) assert result.threshold == 0.33 def test_select_from_prediction_handles_missing_attributes( self, selector: DynamicContextSelector ): """Should handle predictions with missing attributes gracefully.""" # Create object with no relevant attributes @dataclass class MinimalPrediction: sparql: str = "SELECT * WHERE { ?s ?p ?o }" prediction = MinimalPrediction() result = selector.select_from_prediction(prediction) # Should fall back to GENERAL_HERITAGE assert result.template == ContextTemplate.GENERAL_HERITAGE # ============================================================================= # Test: Custom Map Injection # ============================================================================= class TestCustomMapInjection: """Test constructor injection of custom maps.""" def test_custom_intent_map( self, custom_intent_map: dict[str, ContextTemplate] ): """Should use custom intent map.""" selector = DynamicContextSelector(intent_map=custom_intent_map) # Custom intent result = selector.select(intent="custom_intent") assert result.template == ContextTemplate.COLLECTION_DISCOVERY # Overridden default result = selector.select(intent="geographic") assert result.template == ContextTemplate.MUSEUM_SEARCH def test_custom_thresholds( self, custom_thresholds: dict[ContextTemplate, float] ): """Should use custom thresholds.""" selector = DynamicContextSelector(default_thresholds=custom_thresholds) result = selector.select(entity_type="person") assert result.threshold == 0.30 # Custom threshold def test_custom_entity_type_map(self): """Should use custom entity type map for non-person types. Note: entity_type="person" is hardcoded to PERSON_RESEARCH as highest priority and cannot be overridden via custom map. This is by design to ensure person queries always get the focused person context. """ # Person is hardcoded and cannot be overridden selector = DynamicContextSelector(entity_type_map={"person": ContextTemplate.ARCHIVE_SEARCH}) result = selector.select(entity_type="person") # Still PERSON_RESEARCH because it's hardcoded as Priority 1 assert result.template == ContextTemplate.PERSON_RESEARCH def test_custom_custodian_type_map(self): """Should use custom custodian type map.""" custom_map = { "A": ContextTemplate.DIGITAL_PLATFORM, # Override "X": ContextTemplate.COLLECTION_DISCOVERY, # New } selector = DynamicContextSelector(custodian_type_map=custom_map) result = selector.select(entity_type="institution", custodian_type="A") assert result.template == ContextTemplate.DIGITAL_PLATFORM # ============================================================================= # Test: Singleton and Convenience Function # ============================================================================= class TestSingletonAndConvenience: """Test singleton pattern and convenience function.""" def test_get_dynamic_context_selector_returns_same_instance(self): """Should return the same singleton instance.""" selector1 = get_dynamic_context_selector() selector2 = get_dynamic_context_selector() assert selector1 is selector2 def test_select_context_for_query_works(self): """Convenience function should work correctly.""" result = select_context_for_query( intent="geographic", entity_type="institution", custodian_type="A", ) assert isinstance(result, ContextSelectionResult) assert result.template == ContextTemplate.ARCHIVE_SEARCH def test_select_context_for_query_with_threshold_override(self): """Convenience function should support threshold override.""" result = select_context_for_query( entity_type="person", threshold_override=0.25, ) assert result.threshold == 0.25 # ============================================================================= # Test: ContextSelectionResult # ============================================================================= class TestContextSelectionResult: """Test ContextSelectionResult dataclass.""" def test_str_representation(self, selector: DynamicContextSelector): """__str__ should provide readable output.""" result = selector.select(entity_type="person") str_repr = str(result) assert "person_research" in str_repr assert "0.45" in str_repr assert "reason=" in str_repr def test_result_contains_all_input_values( self, selector: DynamicContextSelector ): """Result should contain all input values.""" result = selector.select( intent="geographic", entity_type="institution", custodian_type="A", ) assert result.intent == "geographic" assert result.entity_type == "institution" assert result.custodian_type == "A" # ============================================================================= # Test: Selection Priority Order # ============================================================================= class TestSelectionPriorityOrder: """Test that selection priority is: person > custodian > intent > fallback.""" def test_priority_order_person_beats_all( self, selector: DynamicContextSelector ): """Person should beat custodian_type and intent.""" result = selector.select( intent="geographic", entity_type="person", custodian_type="M", ) assert result.template == ContextTemplate.PERSON_RESEARCH def test_priority_order_custodian_beats_intent( self, selector: DynamicContextSelector ): """Custodian type should beat intent for institutions.""" result = selector.select( intent="geographic", # Would be LOCATION_BROWSE entity_type="institution", custodian_type="M", # But this wins for institutions ) assert result.template == ContextTemplate.MUSEUM_SEARCH def test_priority_order_intent_used_when_no_custodian( self, selector: DynamicContextSelector ): """Intent should be used when no valid custodian type.""" result = selector.select( intent="geographic", entity_type="institution", # No custodian_type ) assert result.template == ContextTemplate.LOCATION_BROWSE def test_priority_order_fallback_when_nothing_specified( self, selector: DynamicContextSelector ): """Should fall back to GENERAL_HERITAGE when nothing specified.""" result = selector.select() assert result.template == ContextTemplate.GENERAL_HERITAGE # ============================================================================= # Test: Module Constants # ============================================================================= class TestModuleConstants: """Test that module-level constants are properly defined.""" def test_intent_to_context_map_has_expected_intents(self): """INTENT_TO_CONTEXT_MAP should have all expected intents.""" expected_intents = { "geographic", "statistical", "relational", "temporal", "entity_lookup", "comparative", "exploration", } assert expected_intents == set(INTENT_TO_CONTEXT_MAP.keys()) def test_entity_type_to_context_map_has_person(self): """ENTITY_TYPE_TO_CONTEXT_MAP should have 'person' mapping.""" assert "person" in ENTITY_TYPE_TO_CONTEXT_MAP assert ENTITY_TYPE_TO_CONTEXT_MAP["person"] == ContextTemplate.PERSON_RESEARCH def test_default_thresholds_covers_all_templates(self): """DEFAULT_THRESHOLDS should cover all context templates.""" for template in ContextTemplate: assert template in DEFAULT_THRESHOLDS, f"Missing threshold for {template}" def test_default_thresholds_in_valid_range(self): """All default thresholds should be between 0 and 1.""" for template, threshold in DEFAULT_THRESHOLDS.items(): assert 0.0 <= threshold <= 1.0, f"Invalid threshold {threshold} for {template}" # ============================================================================= # Test: Edge Cases # ============================================================================= class TestEdgeCases: """Test edge cases and error handling.""" def test_none_values_handled_gracefully( self, selector: DynamicContextSelector ): """Should handle None values without error.""" result = selector.select( intent=None, entity_type=None, custodian_type=None, ) assert result.template == ContextTemplate.GENERAL_HERITAGE def test_empty_string_intent_treated_as_unknown( self, selector: DynamicContextSelector ): """Empty string intent should fall back to general.""" result = selector.select(intent="") assert result.template == ContextTemplate.GENERAL_HERITAGE def test_whitespace_custodian_type_handled( self, selector: DynamicContextSelector ): """Whitespace custodian type should fall back to intent.""" result = selector.select( intent="geographic", entity_type="institution", custodian_type=" ", ) # " ".upper() = " " which is not in the map assert result.template == ContextTemplate.LOCATION_BROWSE