""" Tests for backend.rag.specificity.lookup module. This module tests the specificity score lookup from LinkML schema annotations: - Loading scores from YAML files - Threshold filtering via get_classes_for_template() - Default scores for unannotated classes - Template-specific score overrides - SpecificityScore.passes_threshold() method - reload() functionality - Error handling for malformed YAML Coverage: - SpecificityLookup class - SpecificityScore dataclass - get_specificity_lookup() singleton - get_classes_for_template_cached() function """ from __future__ import annotations import tempfile from pathlib import Path from unittest.mock import patch import pytest import yaml from backend.rag.specificity.lookup import ( SpecificityLookup, get_classes_for_template_cached, get_specificity_lookup, ) from backend.rag.specificity.models import ( ContextTemplate, SpecificityScore, ) # ============================================================================= # Fixtures # ============================================================================= @pytest.fixture def temp_schema_dir(tmp_path: Path) -> Path: """Create a temporary schema directory with test class files.""" classes_dir = tmp_path / "modules" / "classes" classes_dir.mkdir(parents=True) return tmp_path @pytest.fixture def sample_class_with_full_annotations() -> dict: """Sample class YAML with full specificity annotations.""" return { "classes": { "FullyAnnotatedClass": { "description": "A class with complete specificity annotations", "annotations": { "specificity_score": 0.75, "specificity_rationale": "Fairly specific to archival contexts", "template_specificity": { "archive_search": 0.2, "museum_search": 0.9, "library_search": 0.8, "general_heritage": 0.5, }, }, } } } @pytest.fixture def sample_class_with_base_score_only() -> dict: """Sample class YAML with only base specificity score.""" return { "classes": { "BaseScoreOnlyClass": { "description": "A class with only base specificity score", "annotations": { "specificity_score": 0.4, "specificity_rationale": "Moderately broad relevance", }, } } } @pytest.fixture def sample_class_without_annotations() -> dict: """Sample class YAML without specificity annotations.""" return { "classes": { "UnannotatedClass": { "description": "A class without specificity annotations", "slots": ["slot_one", "slot_two"], } } } @pytest.fixture def sample_class_with_invalid_score() -> dict: """Sample class YAML with invalid specificity score.""" return { "classes": { "InvalidScoreClass": { "description": "A class with invalid specificity score", "annotations": { "specificity_score": "not_a_number", "specificity_rationale": "This should fail", }, } } } @pytest.fixture def sample_class_with_invalid_template() -> dict: """Sample class YAML with invalid template name in template_specificity.""" return { "classes": { "InvalidTemplateClass": { "description": "A class with invalid template specificity", "annotations": { "specificity_score": 0.5, "template_specificity": { "archive_search": 0.3, "invalid_template_name": 0.7, # Invalid }, }, } } } @pytest.fixture def multiple_classes_yaml() -> dict: """Sample YAML with multiple classes for threshold testing.""" return { "classes": { "BroadClass": { "description": "Broadly relevant class", "annotations": { "specificity_score": 0.2, "specificity_rationale": "Universal class", "template_specificity": { "archive_search": 0.1, "museum_search": 0.1, }, }, }, "ModerateClass": { "description": "Moderately specific class", "annotations": { "specificity_score": 0.5, "specificity_rationale": "Average specificity", "template_specificity": { "archive_search": 0.3, "museum_search": 0.7, }, }, }, "NarrowClass": { "description": "Narrowly specific class", "annotations": { "specificity_score": 0.8, "specificity_rationale": "Highly specific", "template_specificity": { "archive_search": 0.9, "museum_search": 0.2, }, }, }, } } def create_yaml_file(directory: Path, filename: str, content: dict) -> Path: """Helper to create a YAML file in the given directory.""" classes_dir = directory / "modules" / "classes" classes_dir.mkdir(parents=True, exist_ok=True) file_path = classes_dir / filename with open(file_path, "w", encoding="utf-8") as f: yaml.dump(content, f) return file_path # ============================================================================= # Test: SpecificityScore Dataclass # ============================================================================= class TestSpecificityScore: """Test the SpecificityScore dataclass.""" def test_get_score_returns_base_score_when_no_template_override(self): """get_score() should return base_score when no template override exists.""" score = SpecificityScore( class_name="TestClass", base_score=0.6, template_scores={}, ) assert score.get_score(ContextTemplate.ARCHIVE_SEARCH) == 0.6 assert score.get_score(ContextTemplate.MUSEUM_SEARCH) == 0.6 assert score.get_score(ContextTemplate.GENERAL_HERITAGE) == 0.6 def test_get_score_returns_template_score_when_override_exists(self): """get_score() should return template-specific score when override exists.""" score = SpecificityScore( class_name="TestClass", base_score=0.6, template_scores={ ContextTemplate.ARCHIVE_SEARCH: 0.2, ContextTemplate.MUSEUM_SEARCH: 0.9, }, ) assert score.get_score(ContextTemplate.ARCHIVE_SEARCH) == 0.2 assert score.get_score(ContextTemplate.MUSEUM_SEARCH) == 0.9 assert score.get_score(ContextTemplate.LIBRARY_SEARCH) == 0.6 # Falls back to base def test_passes_threshold_returns_true_when_score_below_threshold(self): """passes_threshold() should return True when score <= threshold.""" score = SpecificityScore( class_name="TestClass", base_score=0.4, template_scores={}, ) assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.6) is True assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.5) is True assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.4) is True # Equal def test_passes_threshold_returns_false_when_score_above_threshold(self): """passes_threshold() should return False when score > threshold.""" score = SpecificityScore( class_name="TestClass", base_score=0.8, template_scores={}, ) assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.6) is False assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.79) is False def test_passes_threshold_with_template_override(self): """passes_threshold() should use template-specific score for comparison.""" score = SpecificityScore( class_name="TestClass", base_score=0.8, # Would fail threshold of 0.6 template_scores={ ContextTemplate.ARCHIVE_SEARCH: 0.2, # Would pass }, ) # Archive uses override (0.2), passes threshold 0.6 assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.6) is True # Museum uses base (0.8), fails threshold 0.6 assert score.passes_threshold(ContextTemplate.MUSEUM_SEARCH, 0.6) is False def test_rationale_is_optional(self): """rationale field should be optional.""" score = SpecificityScore( class_name="TestClass", base_score=0.5, ) assert score.rationale is None def test_template_scores_defaults_to_empty_dict(self): """template_scores should default to empty dict.""" score = SpecificityScore( class_name="TestClass", base_score=0.5, ) assert score.template_scores == {} # ============================================================================= # Test: SpecificityLookup Loading from YAML # ============================================================================= class TestSpecificityLookupLoading: """Test loading specificity scores from YAML files.""" def test_load_class_with_full_annotations( self, temp_schema_dir: Path, sample_class_with_full_annotations: dict, ): """Should correctly load class with full specificity annotations.""" create_yaml_file( temp_schema_dir, "FullyAnnotatedClass.yaml", sample_class_with_full_annotations, ) lookup = SpecificityLookup(schema_dir=temp_schema_dir) score = lookup.get_score("FullyAnnotatedClass") assert score.class_name == "FullyAnnotatedClass" assert score.base_score == 0.75 assert score.rationale == "Fairly specific to archival contexts" assert score.template_scores[ContextTemplate.ARCHIVE_SEARCH] == 0.2 assert score.template_scores[ContextTemplate.MUSEUM_SEARCH] == 0.9 assert score.template_scores[ContextTemplate.LIBRARY_SEARCH] == 0.8 assert score.template_scores[ContextTemplate.GENERAL_HERITAGE] == 0.5 def test_load_class_with_base_score_only( self, temp_schema_dir: Path, sample_class_with_base_score_only: dict, ): """Should correctly load class with only base specificity score.""" create_yaml_file( temp_schema_dir, "BaseScoreOnlyClass.yaml", sample_class_with_base_score_only, ) lookup = SpecificityLookup(schema_dir=temp_schema_dir) score = lookup.get_score("BaseScoreOnlyClass") assert score.class_name == "BaseScoreOnlyClass" assert score.base_score == 0.4 assert score.rationale == "Moderately broad relevance" assert score.template_scores == {} # No template overrides def test_load_class_without_annotations_uses_default( self, temp_schema_dir: Path, sample_class_without_annotations: dict, ): """Should use default score for classes without annotations.""" create_yaml_file( temp_schema_dir, "UnannotatedClass.yaml", sample_class_without_annotations, ) lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.5) score = lookup.get_score("UnannotatedClass") assert score.class_name == "UnannotatedClass" assert score.base_score == 0.5 # Default assert "default" in score.rationale.lower() or "No specificity" in score.rationale def test_custom_default_score(self, temp_schema_dir: Path): """Should use custom default_score parameter.""" lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.7) score = lookup.get_score("NonExistentClass") assert score.base_score == 0.7 def test_nonexistent_class_returns_default(self, temp_schema_dir: Path): """Should return default score for classes not in schema.""" lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.5) score = lookup.get_score("ClassNotInSchema") assert score.class_name == "ClassNotInSchema" assert score.base_score == 0.5 assert "not found" in score.rationale.lower() def test_invalid_score_is_skipped( self, temp_schema_dir: Path, sample_class_with_invalid_score: dict, ): """Should skip classes with invalid (non-numeric) specificity scores.""" create_yaml_file( temp_schema_dir, "InvalidScoreClass.yaml", sample_class_with_invalid_score, ) lookup = SpecificityLookup(schema_dir=temp_schema_dir) scores = lookup.get_all_scores() # Invalid score class should not be in loaded scores assert "InvalidScoreClass" not in scores def test_invalid_template_name_is_skipped( self, temp_schema_dir: Path, sample_class_with_invalid_template: dict, ): """Should skip invalid template names in template_specificity.""" create_yaml_file( temp_schema_dir, "InvalidTemplateClass.yaml", sample_class_with_invalid_template, ) lookup = SpecificityLookup(schema_dir=temp_schema_dir) score = lookup.get_score("InvalidTemplateClass") # Valid template should be loaded assert score.template_scores.get(ContextTemplate.ARCHIVE_SEARCH) == 0.3 # Invalid template should NOT be loaded assert len(score.template_scores) == 1 # Only the valid one def test_empty_yaml_file_is_handled(self, temp_schema_dir: Path): """Should handle empty YAML files gracefully.""" classes_dir = temp_schema_dir / "modules" / "classes" classes_dir.mkdir(parents=True, exist_ok=True) empty_file = classes_dir / "Empty.yaml" empty_file.write_text("") lookup = SpecificityLookup(schema_dir=temp_schema_dir) scores = lookup.get_all_scores() # Should not crash, just have no scores from empty file assert isinstance(scores, dict) def test_nonexistent_schema_dir_handled(self, tmp_path: Path): """Should handle nonexistent schema directory gracefully.""" fake_dir = tmp_path / "nonexistent" lookup = SpecificityLookup(schema_dir=fake_dir) scores = lookup.get_all_scores() assert scores == {} # ============================================================================= # Test: Threshold Filtering # ============================================================================= class TestThresholdFiltering: """Test filtering classes by specificity threshold.""" def test_get_classes_for_template_filters_by_threshold( self, temp_schema_dir: Path, multiple_classes_yaml: dict, ): """Should return only classes that pass the threshold.""" create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml) lookup = SpecificityLookup(schema_dir=temp_schema_dir) # Threshold 0.6: BroadClass (0.2) and ModerateClass (0.5) pass classes = lookup.get_classes_for_template( ContextTemplate.GENERAL_HERITAGE, threshold=0.6, ) assert "BroadClass" in classes assert "ModerateClass" in classes assert "NarrowClass" not in classes def test_get_classes_for_template_uses_template_specific_scores( self, temp_schema_dir: Path, multiple_classes_yaml: dict, ): """Should use template-specific scores when filtering.""" create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml) lookup = SpecificityLookup(schema_dir=temp_schema_dir) # For ARCHIVE_SEARCH with threshold 0.5: # BroadClass: 0.1 (pass), ModerateClass: 0.3 (pass), NarrowClass: 0.9 (fail) classes = lookup.get_classes_for_template( ContextTemplate.ARCHIVE_SEARCH, threshold=0.5, ) assert "BroadClass" in classes assert "ModerateClass" in classes assert "NarrowClass" not in classes # For MUSEUM_SEARCH with threshold 0.5: # BroadClass: 0.1 (pass), ModerateClass: 0.7 (fail), NarrowClass: 0.2 (pass) classes = lookup.get_classes_for_template( ContextTemplate.MUSEUM_SEARCH, threshold=0.5, ) assert "BroadClass" in classes assert "ModerateClass" not in classes assert "NarrowClass" in classes def test_get_classes_for_template_returns_sorted_list( self, temp_schema_dir: Path, multiple_classes_yaml: dict, ): """Should return classes sorted alphabetically.""" create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml) lookup = SpecificityLookup(schema_dir=temp_schema_dir) # All pass with high threshold classes = lookup.get_classes_for_template( ContextTemplate.GENERAL_HERITAGE, threshold=1.0, ) assert classes == sorted(classes) def test_get_classes_for_template_with_zero_threshold( self, temp_schema_dir: Path, multiple_classes_yaml: dict, ): """Threshold 0.0 should return empty or only score=0 classes.""" create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml) lookup = SpecificityLookup(schema_dir=temp_schema_dir) classes = lookup.get_classes_for_template( ContextTemplate.GENERAL_HERITAGE, threshold=0.0, ) # No classes have score 0, so should be empty assert classes == [] def test_get_classes_for_template_with_one_threshold( self, temp_schema_dir: Path, multiple_classes_yaml: dict, ): """Threshold 1.0 should return all classes.""" create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml) lookup = SpecificityLookup(schema_dir=temp_schema_dir) classes = lookup.get_classes_for_template( ContextTemplate.GENERAL_HERITAGE, threshold=1.0, ) assert len(classes) == 3 assert "BroadClass" in classes assert "ModerateClass" in classes assert "NarrowClass" in classes # ============================================================================= # Test: get_filtered_scores() # ============================================================================= class TestGetFilteredScores: """Test the get_filtered_scores() method.""" def test_get_filtered_scores_returns_dict_of_scores( self, temp_schema_dir: Path, multiple_classes_yaml: dict, ): """Should return dict of class name → SpecificityScore for passing classes.""" create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml) lookup = SpecificityLookup(schema_dir=temp_schema_dir) scores = lookup.get_filtered_scores( ContextTemplate.GENERAL_HERITAGE, threshold=0.6, ) assert "BroadClass" in scores assert "ModerateClass" in scores assert "NarrowClass" not in scores # Verify score objects are correct assert isinstance(scores["BroadClass"], SpecificityScore) assert scores["BroadClass"].base_score == 0.2 # ============================================================================= # Test: get_all_scores() # ============================================================================= class TestGetAllScores: """Test the get_all_scores() method.""" def test_get_all_scores_returns_copy( self, temp_schema_dir: Path, multiple_classes_yaml: dict, ): """Should return a copy, not the internal dict.""" create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml) lookup = SpecificityLookup(schema_dir=temp_schema_dir) scores1 = lookup.get_all_scores() scores2 = lookup.get_all_scores() # Should be equal but not the same object assert scores1 == scores2 assert scores1 is not scores2 # ============================================================================= # Test: reload() # ============================================================================= class TestReload: """Test the reload() method.""" def test_reload_clears_cache( self, temp_schema_dir: Path, sample_class_with_base_score_only: dict, ): """reload() should clear cached scores and reload from disk.""" create_yaml_file( temp_schema_dir, "BaseScoreOnlyClass.yaml", sample_class_with_base_score_only, ) lookup = SpecificityLookup(schema_dir=temp_schema_dir) # Initial load score1 = lookup.get_score("BaseScoreOnlyClass") assert score1.base_score == 0.4 # Modify file modified_yaml = { "classes": { "BaseScoreOnlyClass": { "annotations": { "specificity_score": 0.9, # Changed }, } } } create_yaml_file(temp_schema_dir, "BaseScoreOnlyClass.yaml", modified_yaml) # Without reload, should still see old value (cached) score2 = lookup.get_score("BaseScoreOnlyClass") assert score2.base_score == 0.4 # After reload, should see new value lookup.reload() score3 = lookup.get_score("BaseScoreOnlyClass") assert score3.base_score == 0.9 # ============================================================================= # Test: Caching Behavior # ============================================================================= class TestCachingBehavior: """Test that scores are cached after first load.""" def test_scores_are_cached( self, temp_schema_dir: Path, sample_class_with_base_score_only: dict, ): """Scores should be cached after first load.""" create_yaml_file( temp_schema_dir, "BaseScoreOnlyClass.yaml", sample_class_with_base_score_only, ) lookup = SpecificityLookup(schema_dir=temp_schema_dir) # Force load lookup.get_all_scores() # Internal cache should not be None assert lookup._scores is not None # ============================================================================= # Test: Singleton Instance # ============================================================================= class TestSingletonInstance: """Test the singleton getter function.""" def test_get_specificity_lookup_returns_instance(self): """get_specificity_lookup() should return a SpecificityLookup.""" lookup = get_specificity_lookup() assert isinstance(lookup, SpecificityLookup) def test_singleton_returns_same_instance(self): """Multiple calls should return the same instance.""" lookup1 = get_specificity_lookup() lookup2 = get_specificity_lookup() assert lookup1 is lookup2 # ============================================================================= # Test: get_classes_for_template_cached() # ============================================================================= class TestGetClassesForTemplateCached: """Test the cached function for getting classes by template.""" def test_cached_function_returns_tuple(self): """Cached function should return tuple (for hashability).""" # Need to clear cache first get_classes_for_template_cached.cache_clear() result = get_classes_for_template_cached("general_heritage", 0.6) assert isinstance(result, tuple) def test_cached_function_handles_invalid_template(self): """Should fall back to GENERAL_HERITAGE for invalid template string.""" get_classes_for_template_cached.cache_clear() # Invalid template name result = get_classes_for_template_cached("invalid_template_name", 0.6) # Should not crash, falls back to GENERAL_HERITAGE assert isinstance(result, tuple) # ============================================================================= # Test: Multiple Classes in Single File # ============================================================================= class TestMultipleClassesInSingleFile: """Test handling of YAML files with multiple class definitions.""" def test_load_multiple_classes_from_single_file( self, temp_schema_dir: Path, multiple_classes_yaml: dict, ): """Should load all classes from a single YAML file.""" create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml) lookup = SpecificityLookup(schema_dir=temp_schema_dir) scores = lookup.get_all_scores() assert "BroadClass" in scores assert "ModerateClass" in scores assert "NarrowClass" in scores assert len(scores) == 3 # ============================================================================= # Test: Edge Cases # ============================================================================= class TestEdgeCases: """Test edge cases and error handling.""" def test_class_with_empty_annotations_dict(self, temp_schema_dir: Path): """Should handle class with empty annotations dict.""" yaml_content = { "classes": { "EmptyAnnotationsClass": { "description": "Has empty annotations", "annotations": {}, } } } create_yaml_file(temp_schema_dir, "EmptyAnnotations.yaml", yaml_content) lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.5) score = lookup.get_score("EmptyAnnotationsClass") # Should use default score assert score.base_score == 0.5 def test_class_with_null_class_def(self, temp_schema_dir: Path): """Should handle class with null definition.""" yaml_content = { "classes": { "NullClass": None, } } create_yaml_file(temp_schema_dir, "NullClass.yaml", yaml_content) lookup = SpecificityLookup(schema_dir=temp_schema_dir) scores = lookup.get_all_scores() # Should not crash, class with null def is skipped assert "NullClass" not in scores def test_template_specificity_not_a_dict(self, temp_schema_dir: Path): """Should handle template_specificity that isn't a dict.""" yaml_content = { "classes": { "BadTemplateClass": { "annotations": { "specificity_score": 0.5, "template_specificity": "not_a_dict", # Invalid type } } } } create_yaml_file(temp_schema_dir, "BadTemplate.yaml", yaml_content) lookup = SpecificityLookup(schema_dir=temp_schema_dir) score = lookup.get_score("BadTemplateClass") # Should load base score, skip invalid template_specificity assert score.base_score == 0.5 assert score.template_scores == {} def test_score_as_string_is_converted(self, temp_schema_dir: Path): """Should convert string score to float.""" yaml_content = { "classes": { "StringScoreClass": { "annotations": { "specificity_score": "0.75", # String, not float } } } } create_yaml_file(temp_schema_dir, "StringScore.yaml", yaml_content) lookup = SpecificityLookup(schema_dir=temp_schema_dir) score = lookup.get_score("StringScoreClass") assert score.base_score == 0.75 assert isinstance(score.base_score, float) def test_score_as_integer_is_converted(self, temp_schema_dir: Path): """Should convert integer score to float.""" yaml_content = { "classes": { "IntScoreClass": { "annotations": { "specificity_score": 1, # Integer, not float } } } } create_yaml_file(temp_schema_dir, "IntScore.yaml", yaml_content) lookup = SpecificityLookup(schema_dir=temp_schema_dir) score = lookup.get_score("IntScoreClass") assert score.base_score == 1.0 assert isinstance(score.base_score, float)