glam/tests/rag/test_specificity_lookup.py
kempersc 11983014bb Enhance specificity scoring system integration with existing infrastructure
- Updated documentation to clarify integration points with existing components in the RAG pipeline and DSPy framework.
- Added detailed mapping of SPARQL templates to context templates for improved specificity filtering.
- Implemented wrapper patterns around existing classifiers to extend functionality without duplication.
- Introduced new tests for the SpecificityAwareClassifier and SPARQLToContextMapper to ensure proper integration and functionality.
- Enhanced the CustodianRDFConverter to include ISO country and subregion codes from GHCID for better geospatial data handling.
2026-01-05 17:37:49 +01:00

839 lines
29 KiB
Python

"""
Tests for backend.rag.specificity.lookup module.
This module tests the specificity score lookup from LinkML schema annotations:
- Loading scores from YAML files
- Threshold filtering via get_classes_for_template()
- Default scores for unannotated classes
- Template-specific score overrides
- SpecificityScore.passes_threshold() method
- reload() functionality
- Error handling for malformed YAML
Coverage:
- SpecificityLookup class
- SpecificityScore dataclass
- get_specificity_lookup() singleton
- get_classes_for_template_cached() function
"""
from __future__ import annotations
import tempfile
from pathlib import Path
from unittest.mock import patch
import pytest
import yaml
from backend.rag.specificity.lookup import (
SpecificityLookup,
get_classes_for_template_cached,
get_specificity_lookup,
)
from backend.rag.specificity.models import (
ContextTemplate,
SpecificityScore,
)
# =============================================================================
# Fixtures
# =============================================================================
@pytest.fixture
def temp_schema_dir(tmp_path: Path) -> Path:
"""Create a temporary schema directory with test class files."""
classes_dir = tmp_path / "modules" / "classes"
classes_dir.mkdir(parents=True)
return tmp_path
@pytest.fixture
def sample_class_with_full_annotations() -> dict:
"""Sample class YAML with full specificity annotations."""
return {
"classes": {
"FullyAnnotatedClass": {
"description": "A class with complete specificity annotations",
"annotations": {
"specificity_score": 0.75,
"specificity_rationale": "Fairly specific to archival contexts",
"template_specificity": {
"archive_search": 0.2,
"museum_search": 0.9,
"library_search": 0.8,
"general_heritage": 0.5,
},
},
}
}
}
@pytest.fixture
def sample_class_with_base_score_only() -> dict:
"""Sample class YAML with only base specificity score."""
return {
"classes": {
"BaseScoreOnlyClass": {
"description": "A class with only base specificity score",
"annotations": {
"specificity_score": 0.4,
"specificity_rationale": "Moderately broad relevance",
},
}
}
}
@pytest.fixture
def sample_class_without_annotations() -> dict:
"""Sample class YAML without specificity annotations."""
return {
"classes": {
"UnannotatedClass": {
"description": "A class without specificity annotations",
"slots": ["slot_one", "slot_two"],
}
}
}
@pytest.fixture
def sample_class_with_invalid_score() -> dict:
"""Sample class YAML with invalid specificity score."""
return {
"classes": {
"InvalidScoreClass": {
"description": "A class with invalid specificity score",
"annotations": {
"specificity_score": "not_a_number",
"specificity_rationale": "This should fail",
},
}
}
}
@pytest.fixture
def sample_class_with_invalid_template() -> dict:
"""Sample class YAML with invalid template name in template_specificity."""
return {
"classes": {
"InvalidTemplateClass": {
"description": "A class with invalid template specificity",
"annotations": {
"specificity_score": 0.5,
"template_specificity": {
"archive_search": 0.3,
"invalid_template_name": 0.7, # Invalid
},
},
}
}
}
@pytest.fixture
def multiple_classes_yaml() -> dict:
"""Sample YAML with multiple classes for threshold testing."""
return {
"classes": {
"BroadClass": {
"description": "Broadly relevant class",
"annotations": {
"specificity_score": 0.2,
"specificity_rationale": "Universal class",
"template_specificity": {
"archive_search": 0.1,
"museum_search": 0.1,
},
},
},
"ModerateClass": {
"description": "Moderately specific class",
"annotations": {
"specificity_score": 0.5,
"specificity_rationale": "Average specificity",
"template_specificity": {
"archive_search": 0.3,
"museum_search": 0.7,
},
},
},
"NarrowClass": {
"description": "Narrowly specific class",
"annotations": {
"specificity_score": 0.8,
"specificity_rationale": "Highly specific",
"template_specificity": {
"archive_search": 0.9,
"museum_search": 0.2,
},
},
},
}
}
def create_yaml_file(directory: Path, filename: str, content: dict) -> Path:
"""Helper to create a YAML file in the given directory."""
classes_dir = directory / "modules" / "classes"
classes_dir.mkdir(parents=True, exist_ok=True)
file_path = classes_dir / filename
with open(file_path, "w", encoding="utf-8") as f:
yaml.dump(content, f)
return file_path
# =============================================================================
# Test: SpecificityScore Dataclass
# =============================================================================
class TestSpecificityScore:
"""Test the SpecificityScore dataclass."""
def test_get_score_returns_base_score_when_no_template_override(self):
"""get_score() should return base_score when no template override exists."""
score = SpecificityScore(
class_name="TestClass",
base_score=0.6,
template_scores={},
)
assert score.get_score(ContextTemplate.ARCHIVE_SEARCH) == 0.6
assert score.get_score(ContextTemplate.MUSEUM_SEARCH) == 0.6
assert score.get_score(ContextTemplate.GENERAL_HERITAGE) == 0.6
def test_get_score_returns_template_score_when_override_exists(self):
"""get_score() should return template-specific score when override exists."""
score = SpecificityScore(
class_name="TestClass",
base_score=0.6,
template_scores={
ContextTemplate.ARCHIVE_SEARCH: 0.2,
ContextTemplate.MUSEUM_SEARCH: 0.9,
},
)
assert score.get_score(ContextTemplate.ARCHIVE_SEARCH) == 0.2
assert score.get_score(ContextTemplate.MUSEUM_SEARCH) == 0.9
assert score.get_score(ContextTemplate.LIBRARY_SEARCH) == 0.6 # Falls back to base
def test_passes_threshold_returns_true_when_score_below_threshold(self):
"""passes_threshold() should return True when score <= threshold."""
score = SpecificityScore(
class_name="TestClass",
base_score=0.4,
template_scores={},
)
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.6) is True
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.5) is True
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.4) is True # Equal
def test_passes_threshold_returns_false_when_score_above_threshold(self):
"""passes_threshold() should return False when score > threshold."""
score = SpecificityScore(
class_name="TestClass",
base_score=0.8,
template_scores={},
)
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.6) is False
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.79) is False
def test_passes_threshold_with_template_override(self):
"""passes_threshold() should use template-specific score for comparison."""
score = SpecificityScore(
class_name="TestClass",
base_score=0.8, # Would fail threshold of 0.6
template_scores={
ContextTemplate.ARCHIVE_SEARCH: 0.2, # Would pass
},
)
# Archive uses override (0.2), passes threshold 0.6
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.6) is True
# Museum uses base (0.8), fails threshold 0.6
assert score.passes_threshold(ContextTemplate.MUSEUM_SEARCH, 0.6) is False
def test_rationale_is_optional(self):
"""rationale field should be optional."""
score = SpecificityScore(
class_name="TestClass",
base_score=0.5,
)
assert score.rationale is None
def test_template_scores_defaults_to_empty_dict(self):
"""template_scores should default to empty dict."""
score = SpecificityScore(
class_name="TestClass",
base_score=0.5,
)
assert score.template_scores == {}
# =============================================================================
# Test: SpecificityLookup Loading from YAML
# =============================================================================
class TestSpecificityLookupLoading:
"""Test loading specificity scores from YAML files."""
def test_load_class_with_full_annotations(
self,
temp_schema_dir: Path,
sample_class_with_full_annotations: dict,
):
"""Should correctly load class with full specificity annotations."""
create_yaml_file(
temp_schema_dir,
"FullyAnnotatedClass.yaml",
sample_class_with_full_annotations,
)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
score = lookup.get_score("FullyAnnotatedClass")
assert score.class_name == "FullyAnnotatedClass"
assert score.base_score == 0.75
assert score.rationale == "Fairly specific to archival contexts"
assert score.template_scores[ContextTemplate.ARCHIVE_SEARCH] == 0.2
assert score.template_scores[ContextTemplate.MUSEUM_SEARCH] == 0.9
assert score.template_scores[ContextTemplate.LIBRARY_SEARCH] == 0.8
assert score.template_scores[ContextTemplate.GENERAL_HERITAGE] == 0.5
def test_load_class_with_base_score_only(
self,
temp_schema_dir: Path,
sample_class_with_base_score_only: dict,
):
"""Should correctly load class with only base specificity score."""
create_yaml_file(
temp_schema_dir,
"BaseScoreOnlyClass.yaml",
sample_class_with_base_score_only,
)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
score = lookup.get_score("BaseScoreOnlyClass")
assert score.class_name == "BaseScoreOnlyClass"
assert score.base_score == 0.4
assert score.rationale == "Moderately broad relevance"
assert score.template_scores == {} # No template overrides
def test_load_class_without_annotations_uses_default(
self,
temp_schema_dir: Path,
sample_class_without_annotations: dict,
):
"""Should use default score for classes without annotations."""
create_yaml_file(
temp_schema_dir,
"UnannotatedClass.yaml",
sample_class_without_annotations,
)
lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.5)
score = lookup.get_score("UnannotatedClass")
assert score.class_name == "UnannotatedClass"
assert score.base_score == 0.5 # Default
assert "default" in score.rationale.lower() or "No specificity" in score.rationale
def test_custom_default_score(self, temp_schema_dir: Path):
"""Should use custom default_score parameter."""
lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.7)
score = lookup.get_score("NonExistentClass")
assert score.base_score == 0.7
def test_nonexistent_class_returns_default(self, temp_schema_dir: Path):
"""Should return default score for classes not in schema."""
lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.5)
score = lookup.get_score("ClassNotInSchema")
assert score.class_name == "ClassNotInSchema"
assert score.base_score == 0.5
assert "not found" in score.rationale.lower()
def test_invalid_score_is_skipped(
self,
temp_schema_dir: Path,
sample_class_with_invalid_score: dict,
):
"""Should skip classes with invalid (non-numeric) specificity scores."""
create_yaml_file(
temp_schema_dir,
"InvalidScoreClass.yaml",
sample_class_with_invalid_score,
)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
scores = lookup.get_all_scores()
# Invalid score class should not be in loaded scores
assert "InvalidScoreClass" not in scores
def test_invalid_template_name_is_skipped(
self,
temp_schema_dir: Path,
sample_class_with_invalid_template: dict,
):
"""Should skip invalid template names in template_specificity."""
create_yaml_file(
temp_schema_dir,
"InvalidTemplateClass.yaml",
sample_class_with_invalid_template,
)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
score = lookup.get_score("InvalidTemplateClass")
# Valid template should be loaded
assert score.template_scores.get(ContextTemplate.ARCHIVE_SEARCH) == 0.3
# Invalid template should NOT be loaded
assert len(score.template_scores) == 1 # Only the valid one
def test_empty_yaml_file_is_handled(self, temp_schema_dir: Path):
"""Should handle empty YAML files gracefully."""
classes_dir = temp_schema_dir / "modules" / "classes"
classes_dir.mkdir(parents=True, exist_ok=True)
empty_file = classes_dir / "Empty.yaml"
empty_file.write_text("")
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
scores = lookup.get_all_scores()
# Should not crash, just have no scores from empty file
assert isinstance(scores, dict)
def test_nonexistent_schema_dir_handled(self, tmp_path: Path):
"""Should handle nonexistent schema directory gracefully."""
fake_dir = tmp_path / "nonexistent"
lookup = SpecificityLookup(schema_dir=fake_dir)
scores = lookup.get_all_scores()
assert scores == {}
# =============================================================================
# Test: Threshold Filtering
# =============================================================================
class TestThresholdFiltering:
"""Test filtering classes by specificity threshold."""
def test_get_classes_for_template_filters_by_threshold(
self,
temp_schema_dir: Path,
multiple_classes_yaml: dict,
):
"""Should return only classes that pass the threshold."""
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
# Threshold 0.6: BroadClass (0.2) and ModerateClass (0.5) pass
classes = lookup.get_classes_for_template(
ContextTemplate.GENERAL_HERITAGE,
threshold=0.6,
)
assert "BroadClass" in classes
assert "ModerateClass" in classes
assert "NarrowClass" not in classes
def test_get_classes_for_template_uses_template_specific_scores(
self,
temp_schema_dir: Path,
multiple_classes_yaml: dict,
):
"""Should use template-specific scores when filtering."""
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
# For ARCHIVE_SEARCH with threshold 0.5:
# BroadClass: 0.1 (pass), ModerateClass: 0.3 (pass), NarrowClass: 0.9 (fail)
classes = lookup.get_classes_for_template(
ContextTemplate.ARCHIVE_SEARCH,
threshold=0.5,
)
assert "BroadClass" in classes
assert "ModerateClass" in classes
assert "NarrowClass" not in classes
# For MUSEUM_SEARCH with threshold 0.5:
# BroadClass: 0.1 (pass), ModerateClass: 0.7 (fail), NarrowClass: 0.2 (pass)
classes = lookup.get_classes_for_template(
ContextTemplate.MUSEUM_SEARCH,
threshold=0.5,
)
assert "BroadClass" in classes
assert "ModerateClass" not in classes
assert "NarrowClass" in classes
def test_get_classes_for_template_returns_sorted_list(
self,
temp_schema_dir: Path,
multiple_classes_yaml: dict,
):
"""Should return classes sorted alphabetically."""
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
# All pass with high threshold
classes = lookup.get_classes_for_template(
ContextTemplate.GENERAL_HERITAGE,
threshold=1.0,
)
assert classes == sorted(classes)
def test_get_classes_for_template_with_zero_threshold(
self,
temp_schema_dir: Path,
multiple_classes_yaml: dict,
):
"""Threshold 0.0 should return empty or only score=0 classes."""
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
classes = lookup.get_classes_for_template(
ContextTemplate.GENERAL_HERITAGE,
threshold=0.0,
)
# No classes have score 0, so should be empty
assert classes == []
def test_get_classes_for_template_with_one_threshold(
self,
temp_schema_dir: Path,
multiple_classes_yaml: dict,
):
"""Threshold 1.0 should return all classes."""
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
classes = lookup.get_classes_for_template(
ContextTemplate.GENERAL_HERITAGE,
threshold=1.0,
)
assert len(classes) == 3
assert "BroadClass" in classes
assert "ModerateClass" in classes
assert "NarrowClass" in classes
# =============================================================================
# Test: get_filtered_scores()
# =============================================================================
class TestGetFilteredScores:
"""Test the get_filtered_scores() method."""
def test_get_filtered_scores_returns_dict_of_scores(
self,
temp_schema_dir: Path,
multiple_classes_yaml: dict,
):
"""Should return dict of class name → SpecificityScore for passing classes."""
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
scores = lookup.get_filtered_scores(
ContextTemplate.GENERAL_HERITAGE,
threshold=0.6,
)
assert "BroadClass" in scores
assert "ModerateClass" in scores
assert "NarrowClass" not in scores
# Verify score objects are correct
assert isinstance(scores["BroadClass"], SpecificityScore)
assert scores["BroadClass"].base_score == 0.2
# =============================================================================
# Test: get_all_scores()
# =============================================================================
class TestGetAllScores:
"""Test the get_all_scores() method."""
def test_get_all_scores_returns_copy(
self,
temp_schema_dir: Path,
multiple_classes_yaml: dict,
):
"""Should return a copy, not the internal dict."""
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
scores1 = lookup.get_all_scores()
scores2 = lookup.get_all_scores()
# Should be equal but not the same object
assert scores1 == scores2
assert scores1 is not scores2
# =============================================================================
# Test: reload()
# =============================================================================
class TestReload:
"""Test the reload() method."""
def test_reload_clears_cache(
self,
temp_schema_dir: Path,
sample_class_with_base_score_only: dict,
):
"""reload() should clear cached scores and reload from disk."""
create_yaml_file(
temp_schema_dir,
"BaseScoreOnlyClass.yaml",
sample_class_with_base_score_only,
)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
# Initial load
score1 = lookup.get_score("BaseScoreOnlyClass")
assert score1.base_score == 0.4
# Modify file
modified_yaml = {
"classes": {
"BaseScoreOnlyClass": {
"annotations": {
"specificity_score": 0.9, # Changed
},
}
}
}
create_yaml_file(temp_schema_dir, "BaseScoreOnlyClass.yaml", modified_yaml)
# Without reload, should still see old value (cached)
score2 = lookup.get_score("BaseScoreOnlyClass")
assert score2.base_score == 0.4
# After reload, should see new value
lookup.reload()
score3 = lookup.get_score("BaseScoreOnlyClass")
assert score3.base_score == 0.9
# =============================================================================
# Test: Caching Behavior
# =============================================================================
class TestCachingBehavior:
"""Test that scores are cached after first load."""
def test_scores_are_cached(
self,
temp_schema_dir: Path,
sample_class_with_base_score_only: dict,
):
"""Scores should be cached after first load."""
create_yaml_file(
temp_schema_dir,
"BaseScoreOnlyClass.yaml",
sample_class_with_base_score_only,
)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
# Force load
lookup.get_all_scores()
# Internal cache should not be None
assert lookup._scores is not None
# =============================================================================
# Test: Singleton Instance
# =============================================================================
class TestSingletonInstance:
"""Test the singleton getter function."""
def test_get_specificity_lookup_returns_instance(self):
"""get_specificity_lookup() should return a SpecificityLookup."""
lookup = get_specificity_lookup()
assert isinstance(lookup, SpecificityLookup)
def test_singleton_returns_same_instance(self):
"""Multiple calls should return the same instance."""
lookup1 = get_specificity_lookup()
lookup2 = get_specificity_lookup()
assert lookup1 is lookup2
# =============================================================================
# Test: get_classes_for_template_cached()
# =============================================================================
class TestGetClassesForTemplateCached:
"""Test the cached function for getting classes by template."""
def test_cached_function_returns_tuple(self):
"""Cached function should return tuple (for hashability)."""
# Need to clear cache first
get_classes_for_template_cached.cache_clear()
result = get_classes_for_template_cached("general_heritage", 0.6)
assert isinstance(result, tuple)
def test_cached_function_handles_invalid_template(self):
"""Should fall back to GENERAL_HERITAGE for invalid template string."""
get_classes_for_template_cached.cache_clear()
# Invalid template name
result = get_classes_for_template_cached("invalid_template_name", 0.6)
# Should not crash, falls back to GENERAL_HERITAGE
assert isinstance(result, tuple)
# =============================================================================
# Test: Multiple Classes in Single File
# =============================================================================
class TestMultipleClassesInSingleFile:
"""Test handling of YAML files with multiple class definitions."""
def test_load_multiple_classes_from_single_file(
self,
temp_schema_dir: Path,
multiple_classes_yaml: dict,
):
"""Should load all classes from a single YAML file."""
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
scores = lookup.get_all_scores()
assert "BroadClass" in scores
assert "ModerateClass" in scores
assert "NarrowClass" in scores
assert len(scores) == 3
# =============================================================================
# Test: Edge Cases
# =============================================================================
class TestEdgeCases:
"""Test edge cases and error handling."""
def test_class_with_empty_annotations_dict(self, temp_schema_dir: Path):
"""Should handle class with empty annotations dict."""
yaml_content = {
"classes": {
"EmptyAnnotationsClass": {
"description": "Has empty annotations",
"annotations": {},
}
}
}
create_yaml_file(temp_schema_dir, "EmptyAnnotations.yaml", yaml_content)
lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.5)
score = lookup.get_score("EmptyAnnotationsClass")
# Should use default score
assert score.base_score == 0.5
def test_class_with_null_class_def(self, temp_schema_dir: Path):
"""Should handle class with null definition."""
yaml_content = {
"classes": {
"NullClass": None,
}
}
create_yaml_file(temp_schema_dir, "NullClass.yaml", yaml_content)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
scores = lookup.get_all_scores()
# Should not crash, class with null def is skipped
assert "NullClass" not in scores
def test_template_specificity_not_a_dict(self, temp_schema_dir: Path):
"""Should handle template_specificity that isn't a dict."""
yaml_content = {
"classes": {
"BadTemplateClass": {
"annotations": {
"specificity_score": 0.5,
"template_specificity": "not_a_dict", # Invalid type
}
}
}
}
create_yaml_file(temp_schema_dir, "BadTemplate.yaml", yaml_content)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
score = lookup.get_score("BadTemplateClass")
# Should load base score, skip invalid template_specificity
assert score.base_score == 0.5
assert score.template_scores == {}
def test_score_as_string_is_converted(self, temp_schema_dir: Path):
"""Should convert string score to float."""
yaml_content = {
"classes": {
"StringScoreClass": {
"annotations": {
"specificity_score": "0.75", # String, not float
}
}
}
}
create_yaml_file(temp_schema_dir, "StringScore.yaml", yaml_content)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
score = lookup.get_score("StringScoreClass")
assert score.base_score == 0.75
assert isinstance(score.base_score, float)
def test_score_as_integer_is_converted(self, temp_schema_dir: Path):
"""Should convert integer score to float."""
yaml_content = {
"classes": {
"IntScoreClass": {
"annotations": {
"specificity_score": 1, # Integer, not float
}
}
}
}
create_yaml_file(temp_schema_dir, "IntScore.yaml", yaml_content)
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
score = lookup.get_score("IntScoreClass")
assert score.base_score == 1.0
assert isinstance(score.base_score, float)