- Updated documentation to clarify integration points with existing components in the RAG pipeline and DSPy framework. - Added detailed mapping of SPARQL templates to context templates for improved specificity filtering. - Implemented wrapper patterns around existing classifiers to extend functionality without duplication. - Introduced new tests for the SpecificityAwareClassifier and SPARQLToContextMapper to ensure proper integration and functionality. - Enhanced the CustodianRDFConverter to include ISO country and subregion codes from GHCID for better geospatial data handling.
839 lines
29 KiB
Python
839 lines
29 KiB
Python
"""
|
|
Tests for backend.rag.specificity.lookup module.
|
|
|
|
This module tests the specificity score lookup from LinkML schema annotations:
|
|
- Loading scores from YAML files
|
|
- Threshold filtering via get_classes_for_template()
|
|
- Default scores for unannotated classes
|
|
- Template-specific score overrides
|
|
- SpecificityScore.passes_threshold() method
|
|
- reload() functionality
|
|
- Error handling for malformed YAML
|
|
|
|
Coverage:
|
|
- SpecificityLookup class
|
|
- SpecificityScore dataclass
|
|
- get_specificity_lookup() singleton
|
|
- get_classes_for_template_cached() function
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
from backend.rag.specificity.lookup import (
|
|
SpecificityLookup,
|
|
get_classes_for_template_cached,
|
|
get_specificity_lookup,
|
|
)
|
|
from backend.rag.specificity.models import (
|
|
ContextTemplate,
|
|
SpecificityScore,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Fixtures
|
|
# =============================================================================
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_schema_dir(tmp_path: Path) -> Path:
|
|
"""Create a temporary schema directory with test class files."""
|
|
classes_dir = tmp_path / "modules" / "classes"
|
|
classes_dir.mkdir(parents=True)
|
|
return tmp_path
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_class_with_full_annotations() -> dict:
|
|
"""Sample class YAML with full specificity annotations."""
|
|
return {
|
|
"classes": {
|
|
"FullyAnnotatedClass": {
|
|
"description": "A class with complete specificity annotations",
|
|
"annotations": {
|
|
"specificity_score": 0.75,
|
|
"specificity_rationale": "Fairly specific to archival contexts",
|
|
"template_specificity": {
|
|
"archive_search": 0.2,
|
|
"museum_search": 0.9,
|
|
"library_search": 0.8,
|
|
"general_heritage": 0.5,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_class_with_base_score_only() -> dict:
|
|
"""Sample class YAML with only base specificity score."""
|
|
return {
|
|
"classes": {
|
|
"BaseScoreOnlyClass": {
|
|
"description": "A class with only base specificity score",
|
|
"annotations": {
|
|
"specificity_score": 0.4,
|
|
"specificity_rationale": "Moderately broad relevance",
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_class_without_annotations() -> dict:
|
|
"""Sample class YAML without specificity annotations."""
|
|
return {
|
|
"classes": {
|
|
"UnannotatedClass": {
|
|
"description": "A class without specificity annotations",
|
|
"slots": ["slot_one", "slot_two"],
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_class_with_invalid_score() -> dict:
|
|
"""Sample class YAML with invalid specificity score."""
|
|
return {
|
|
"classes": {
|
|
"InvalidScoreClass": {
|
|
"description": "A class with invalid specificity score",
|
|
"annotations": {
|
|
"specificity_score": "not_a_number",
|
|
"specificity_rationale": "This should fail",
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_class_with_invalid_template() -> dict:
|
|
"""Sample class YAML with invalid template name in template_specificity."""
|
|
return {
|
|
"classes": {
|
|
"InvalidTemplateClass": {
|
|
"description": "A class with invalid template specificity",
|
|
"annotations": {
|
|
"specificity_score": 0.5,
|
|
"template_specificity": {
|
|
"archive_search": 0.3,
|
|
"invalid_template_name": 0.7, # Invalid
|
|
},
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def multiple_classes_yaml() -> dict:
|
|
"""Sample YAML with multiple classes for threshold testing."""
|
|
return {
|
|
"classes": {
|
|
"BroadClass": {
|
|
"description": "Broadly relevant class",
|
|
"annotations": {
|
|
"specificity_score": 0.2,
|
|
"specificity_rationale": "Universal class",
|
|
"template_specificity": {
|
|
"archive_search": 0.1,
|
|
"museum_search": 0.1,
|
|
},
|
|
},
|
|
},
|
|
"ModerateClass": {
|
|
"description": "Moderately specific class",
|
|
"annotations": {
|
|
"specificity_score": 0.5,
|
|
"specificity_rationale": "Average specificity",
|
|
"template_specificity": {
|
|
"archive_search": 0.3,
|
|
"museum_search": 0.7,
|
|
},
|
|
},
|
|
},
|
|
"NarrowClass": {
|
|
"description": "Narrowly specific class",
|
|
"annotations": {
|
|
"specificity_score": 0.8,
|
|
"specificity_rationale": "Highly specific",
|
|
"template_specificity": {
|
|
"archive_search": 0.9,
|
|
"museum_search": 0.2,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
|
|
def create_yaml_file(directory: Path, filename: str, content: dict) -> Path:
|
|
"""Helper to create a YAML file in the given directory."""
|
|
classes_dir = directory / "modules" / "classes"
|
|
classes_dir.mkdir(parents=True, exist_ok=True)
|
|
file_path = classes_dir / filename
|
|
with open(file_path, "w", encoding="utf-8") as f:
|
|
yaml.dump(content, f)
|
|
return file_path
|
|
|
|
|
|
# =============================================================================
|
|
# Test: SpecificityScore Dataclass
|
|
# =============================================================================
|
|
|
|
|
|
class TestSpecificityScore:
|
|
"""Test the SpecificityScore dataclass."""
|
|
|
|
def test_get_score_returns_base_score_when_no_template_override(self):
|
|
"""get_score() should return base_score when no template override exists."""
|
|
score = SpecificityScore(
|
|
class_name="TestClass",
|
|
base_score=0.6,
|
|
template_scores={},
|
|
)
|
|
assert score.get_score(ContextTemplate.ARCHIVE_SEARCH) == 0.6
|
|
assert score.get_score(ContextTemplate.MUSEUM_SEARCH) == 0.6
|
|
assert score.get_score(ContextTemplate.GENERAL_HERITAGE) == 0.6
|
|
|
|
def test_get_score_returns_template_score_when_override_exists(self):
|
|
"""get_score() should return template-specific score when override exists."""
|
|
score = SpecificityScore(
|
|
class_name="TestClass",
|
|
base_score=0.6,
|
|
template_scores={
|
|
ContextTemplate.ARCHIVE_SEARCH: 0.2,
|
|
ContextTemplate.MUSEUM_SEARCH: 0.9,
|
|
},
|
|
)
|
|
assert score.get_score(ContextTemplate.ARCHIVE_SEARCH) == 0.2
|
|
assert score.get_score(ContextTemplate.MUSEUM_SEARCH) == 0.9
|
|
assert score.get_score(ContextTemplate.LIBRARY_SEARCH) == 0.6 # Falls back to base
|
|
|
|
def test_passes_threshold_returns_true_when_score_below_threshold(self):
|
|
"""passes_threshold() should return True when score <= threshold."""
|
|
score = SpecificityScore(
|
|
class_name="TestClass",
|
|
base_score=0.4,
|
|
template_scores={},
|
|
)
|
|
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.6) is True
|
|
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.5) is True
|
|
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.4) is True # Equal
|
|
|
|
def test_passes_threshold_returns_false_when_score_above_threshold(self):
|
|
"""passes_threshold() should return False when score > threshold."""
|
|
score = SpecificityScore(
|
|
class_name="TestClass",
|
|
base_score=0.8,
|
|
template_scores={},
|
|
)
|
|
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.6) is False
|
|
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.79) is False
|
|
|
|
def test_passes_threshold_with_template_override(self):
|
|
"""passes_threshold() should use template-specific score for comparison."""
|
|
score = SpecificityScore(
|
|
class_name="TestClass",
|
|
base_score=0.8, # Would fail threshold of 0.6
|
|
template_scores={
|
|
ContextTemplate.ARCHIVE_SEARCH: 0.2, # Would pass
|
|
},
|
|
)
|
|
# Archive uses override (0.2), passes threshold 0.6
|
|
assert score.passes_threshold(ContextTemplate.ARCHIVE_SEARCH, 0.6) is True
|
|
# Museum uses base (0.8), fails threshold 0.6
|
|
assert score.passes_threshold(ContextTemplate.MUSEUM_SEARCH, 0.6) is False
|
|
|
|
def test_rationale_is_optional(self):
|
|
"""rationale field should be optional."""
|
|
score = SpecificityScore(
|
|
class_name="TestClass",
|
|
base_score=0.5,
|
|
)
|
|
assert score.rationale is None
|
|
|
|
def test_template_scores_defaults_to_empty_dict(self):
|
|
"""template_scores should default to empty dict."""
|
|
score = SpecificityScore(
|
|
class_name="TestClass",
|
|
base_score=0.5,
|
|
)
|
|
assert score.template_scores == {}
|
|
|
|
|
|
# =============================================================================
|
|
# Test: SpecificityLookup Loading from YAML
|
|
# =============================================================================
|
|
|
|
|
|
class TestSpecificityLookupLoading:
|
|
"""Test loading specificity scores from YAML files."""
|
|
|
|
def test_load_class_with_full_annotations(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
sample_class_with_full_annotations: dict,
|
|
):
|
|
"""Should correctly load class with full specificity annotations."""
|
|
create_yaml_file(
|
|
temp_schema_dir,
|
|
"FullyAnnotatedClass.yaml",
|
|
sample_class_with_full_annotations,
|
|
)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
score = lookup.get_score("FullyAnnotatedClass")
|
|
|
|
assert score.class_name == "FullyAnnotatedClass"
|
|
assert score.base_score == 0.75
|
|
assert score.rationale == "Fairly specific to archival contexts"
|
|
assert score.template_scores[ContextTemplate.ARCHIVE_SEARCH] == 0.2
|
|
assert score.template_scores[ContextTemplate.MUSEUM_SEARCH] == 0.9
|
|
assert score.template_scores[ContextTemplate.LIBRARY_SEARCH] == 0.8
|
|
assert score.template_scores[ContextTemplate.GENERAL_HERITAGE] == 0.5
|
|
|
|
def test_load_class_with_base_score_only(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
sample_class_with_base_score_only: dict,
|
|
):
|
|
"""Should correctly load class with only base specificity score."""
|
|
create_yaml_file(
|
|
temp_schema_dir,
|
|
"BaseScoreOnlyClass.yaml",
|
|
sample_class_with_base_score_only,
|
|
)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
score = lookup.get_score("BaseScoreOnlyClass")
|
|
|
|
assert score.class_name == "BaseScoreOnlyClass"
|
|
assert score.base_score == 0.4
|
|
assert score.rationale == "Moderately broad relevance"
|
|
assert score.template_scores == {} # No template overrides
|
|
|
|
def test_load_class_without_annotations_uses_default(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
sample_class_without_annotations: dict,
|
|
):
|
|
"""Should use default score for classes without annotations."""
|
|
create_yaml_file(
|
|
temp_schema_dir,
|
|
"UnannotatedClass.yaml",
|
|
sample_class_without_annotations,
|
|
)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.5)
|
|
score = lookup.get_score("UnannotatedClass")
|
|
|
|
assert score.class_name == "UnannotatedClass"
|
|
assert score.base_score == 0.5 # Default
|
|
assert "default" in score.rationale.lower() or "No specificity" in score.rationale
|
|
|
|
def test_custom_default_score(self, temp_schema_dir: Path):
|
|
"""Should use custom default_score parameter."""
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.7)
|
|
score = lookup.get_score("NonExistentClass")
|
|
|
|
assert score.base_score == 0.7
|
|
|
|
def test_nonexistent_class_returns_default(self, temp_schema_dir: Path):
|
|
"""Should return default score for classes not in schema."""
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.5)
|
|
score = lookup.get_score("ClassNotInSchema")
|
|
|
|
assert score.class_name == "ClassNotInSchema"
|
|
assert score.base_score == 0.5
|
|
assert "not found" in score.rationale.lower()
|
|
|
|
def test_invalid_score_is_skipped(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
sample_class_with_invalid_score: dict,
|
|
):
|
|
"""Should skip classes with invalid (non-numeric) specificity scores."""
|
|
create_yaml_file(
|
|
temp_schema_dir,
|
|
"InvalidScoreClass.yaml",
|
|
sample_class_with_invalid_score,
|
|
)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
scores = lookup.get_all_scores()
|
|
|
|
# Invalid score class should not be in loaded scores
|
|
assert "InvalidScoreClass" not in scores
|
|
|
|
def test_invalid_template_name_is_skipped(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
sample_class_with_invalid_template: dict,
|
|
):
|
|
"""Should skip invalid template names in template_specificity."""
|
|
create_yaml_file(
|
|
temp_schema_dir,
|
|
"InvalidTemplateClass.yaml",
|
|
sample_class_with_invalid_template,
|
|
)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
score = lookup.get_score("InvalidTemplateClass")
|
|
|
|
# Valid template should be loaded
|
|
assert score.template_scores.get(ContextTemplate.ARCHIVE_SEARCH) == 0.3
|
|
# Invalid template should NOT be loaded
|
|
assert len(score.template_scores) == 1 # Only the valid one
|
|
|
|
def test_empty_yaml_file_is_handled(self, temp_schema_dir: Path):
|
|
"""Should handle empty YAML files gracefully."""
|
|
classes_dir = temp_schema_dir / "modules" / "classes"
|
|
classes_dir.mkdir(parents=True, exist_ok=True)
|
|
empty_file = classes_dir / "Empty.yaml"
|
|
empty_file.write_text("")
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
scores = lookup.get_all_scores()
|
|
|
|
# Should not crash, just have no scores from empty file
|
|
assert isinstance(scores, dict)
|
|
|
|
def test_nonexistent_schema_dir_handled(self, tmp_path: Path):
|
|
"""Should handle nonexistent schema directory gracefully."""
|
|
fake_dir = tmp_path / "nonexistent"
|
|
|
|
lookup = SpecificityLookup(schema_dir=fake_dir)
|
|
scores = lookup.get_all_scores()
|
|
|
|
assert scores == {}
|
|
|
|
|
|
# =============================================================================
|
|
# Test: Threshold Filtering
|
|
# =============================================================================
|
|
|
|
|
|
class TestThresholdFiltering:
|
|
"""Test filtering classes by specificity threshold."""
|
|
|
|
def test_get_classes_for_template_filters_by_threshold(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
multiple_classes_yaml: dict,
|
|
):
|
|
"""Should return only classes that pass the threshold."""
|
|
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
|
|
# Threshold 0.6: BroadClass (0.2) and ModerateClass (0.5) pass
|
|
classes = lookup.get_classes_for_template(
|
|
ContextTemplate.GENERAL_HERITAGE,
|
|
threshold=0.6,
|
|
)
|
|
assert "BroadClass" in classes
|
|
assert "ModerateClass" in classes
|
|
assert "NarrowClass" not in classes
|
|
|
|
def test_get_classes_for_template_uses_template_specific_scores(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
multiple_classes_yaml: dict,
|
|
):
|
|
"""Should use template-specific scores when filtering."""
|
|
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
|
|
# For ARCHIVE_SEARCH with threshold 0.5:
|
|
# BroadClass: 0.1 (pass), ModerateClass: 0.3 (pass), NarrowClass: 0.9 (fail)
|
|
classes = lookup.get_classes_for_template(
|
|
ContextTemplate.ARCHIVE_SEARCH,
|
|
threshold=0.5,
|
|
)
|
|
assert "BroadClass" in classes
|
|
assert "ModerateClass" in classes
|
|
assert "NarrowClass" not in classes
|
|
|
|
# For MUSEUM_SEARCH with threshold 0.5:
|
|
# BroadClass: 0.1 (pass), ModerateClass: 0.7 (fail), NarrowClass: 0.2 (pass)
|
|
classes = lookup.get_classes_for_template(
|
|
ContextTemplate.MUSEUM_SEARCH,
|
|
threshold=0.5,
|
|
)
|
|
assert "BroadClass" in classes
|
|
assert "ModerateClass" not in classes
|
|
assert "NarrowClass" in classes
|
|
|
|
def test_get_classes_for_template_returns_sorted_list(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
multiple_classes_yaml: dict,
|
|
):
|
|
"""Should return classes sorted alphabetically."""
|
|
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
|
|
# All pass with high threshold
|
|
classes = lookup.get_classes_for_template(
|
|
ContextTemplate.GENERAL_HERITAGE,
|
|
threshold=1.0,
|
|
)
|
|
assert classes == sorted(classes)
|
|
|
|
def test_get_classes_for_template_with_zero_threshold(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
multiple_classes_yaml: dict,
|
|
):
|
|
"""Threshold 0.0 should return empty or only score=0 classes."""
|
|
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
|
|
classes = lookup.get_classes_for_template(
|
|
ContextTemplate.GENERAL_HERITAGE,
|
|
threshold=0.0,
|
|
)
|
|
# No classes have score 0, so should be empty
|
|
assert classes == []
|
|
|
|
def test_get_classes_for_template_with_one_threshold(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
multiple_classes_yaml: dict,
|
|
):
|
|
"""Threshold 1.0 should return all classes."""
|
|
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
|
|
classes = lookup.get_classes_for_template(
|
|
ContextTemplate.GENERAL_HERITAGE,
|
|
threshold=1.0,
|
|
)
|
|
assert len(classes) == 3
|
|
assert "BroadClass" in classes
|
|
assert "ModerateClass" in classes
|
|
assert "NarrowClass" in classes
|
|
|
|
|
|
# =============================================================================
|
|
# Test: get_filtered_scores()
|
|
# =============================================================================
|
|
|
|
|
|
class TestGetFilteredScores:
|
|
"""Test the get_filtered_scores() method."""
|
|
|
|
def test_get_filtered_scores_returns_dict_of_scores(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
multiple_classes_yaml: dict,
|
|
):
|
|
"""Should return dict of class name → SpecificityScore for passing classes."""
|
|
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
|
|
scores = lookup.get_filtered_scores(
|
|
ContextTemplate.GENERAL_HERITAGE,
|
|
threshold=0.6,
|
|
)
|
|
|
|
assert "BroadClass" in scores
|
|
assert "ModerateClass" in scores
|
|
assert "NarrowClass" not in scores
|
|
|
|
# Verify score objects are correct
|
|
assert isinstance(scores["BroadClass"], SpecificityScore)
|
|
assert scores["BroadClass"].base_score == 0.2
|
|
|
|
|
|
# =============================================================================
|
|
# Test: get_all_scores()
|
|
# =============================================================================
|
|
|
|
|
|
class TestGetAllScores:
|
|
"""Test the get_all_scores() method."""
|
|
|
|
def test_get_all_scores_returns_copy(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
multiple_classes_yaml: dict,
|
|
):
|
|
"""Should return a copy, not the internal dict."""
|
|
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
|
|
scores1 = lookup.get_all_scores()
|
|
scores2 = lookup.get_all_scores()
|
|
|
|
# Should be equal but not the same object
|
|
assert scores1 == scores2
|
|
assert scores1 is not scores2
|
|
|
|
|
|
# =============================================================================
|
|
# Test: reload()
|
|
# =============================================================================
|
|
|
|
|
|
class TestReload:
|
|
"""Test the reload() method."""
|
|
|
|
def test_reload_clears_cache(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
sample_class_with_base_score_only: dict,
|
|
):
|
|
"""reload() should clear cached scores and reload from disk."""
|
|
create_yaml_file(
|
|
temp_schema_dir,
|
|
"BaseScoreOnlyClass.yaml",
|
|
sample_class_with_base_score_only,
|
|
)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
|
|
# Initial load
|
|
score1 = lookup.get_score("BaseScoreOnlyClass")
|
|
assert score1.base_score == 0.4
|
|
|
|
# Modify file
|
|
modified_yaml = {
|
|
"classes": {
|
|
"BaseScoreOnlyClass": {
|
|
"annotations": {
|
|
"specificity_score": 0.9, # Changed
|
|
},
|
|
}
|
|
}
|
|
}
|
|
create_yaml_file(temp_schema_dir, "BaseScoreOnlyClass.yaml", modified_yaml)
|
|
|
|
# Without reload, should still see old value (cached)
|
|
score2 = lookup.get_score("BaseScoreOnlyClass")
|
|
assert score2.base_score == 0.4
|
|
|
|
# After reload, should see new value
|
|
lookup.reload()
|
|
score3 = lookup.get_score("BaseScoreOnlyClass")
|
|
assert score3.base_score == 0.9
|
|
|
|
|
|
# =============================================================================
|
|
# Test: Caching Behavior
|
|
# =============================================================================
|
|
|
|
|
|
class TestCachingBehavior:
|
|
"""Test that scores are cached after first load."""
|
|
|
|
def test_scores_are_cached(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
sample_class_with_base_score_only: dict,
|
|
):
|
|
"""Scores should be cached after first load."""
|
|
create_yaml_file(
|
|
temp_schema_dir,
|
|
"BaseScoreOnlyClass.yaml",
|
|
sample_class_with_base_score_only,
|
|
)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
|
|
# Force load
|
|
lookup.get_all_scores()
|
|
|
|
# Internal cache should not be None
|
|
assert lookup._scores is not None
|
|
|
|
|
|
# =============================================================================
|
|
# Test: Singleton Instance
|
|
# =============================================================================
|
|
|
|
|
|
class TestSingletonInstance:
|
|
"""Test the singleton getter function."""
|
|
|
|
def test_get_specificity_lookup_returns_instance(self):
|
|
"""get_specificity_lookup() should return a SpecificityLookup."""
|
|
lookup = get_specificity_lookup()
|
|
assert isinstance(lookup, SpecificityLookup)
|
|
|
|
def test_singleton_returns_same_instance(self):
|
|
"""Multiple calls should return the same instance."""
|
|
lookup1 = get_specificity_lookup()
|
|
lookup2 = get_specificity_lookup()
|
|
assert lookup1 is lookup2
|
|
|
|
|
|
# =============================================================================
|
|
# Test: get_classes_for_template_cached()
|
|
# =============================================================================
|
|
|
|
|
|
class TestGetClassesForTemplateCached:
|
|
"""Test the cached function for getting classes by template."""
|
|
|
|
def test_cached_function_returns_tuple(self):
|
|
"""Cached function should return tuple (for hashability)."""
|
|
# Need to clear cache first
|
|
get_classes_for_template_cached.cache_clear()
|
|
|
|
result = get_classes_for_template_cached("general_heritage", 0.6)
|
|
assert isinstance(result, tuple)
|
|
|
|
def test_cached_function_handles_invalid_template(self):
|
|
"""Should fall back to GENERAL_HERITAGE for invalid template string."""
|
|
get_classes_for_template_cached.cache_clear()
|
|
|
|
# Invalid template name
|
|
result = get_classes_for_template_cached("invalid_template_name", 0.6)
|
|
|
|
# Should not crash, falls back to GENERAL_HERITAGE
|
|
assert isinstance(result, tuple)
|
|
|
|
|
|
# =============================================================================
|
|
# Test: Multiple Classes in Single File
|
|
# =============================================================================
|
|
|
|
|
|
class TestMultipleClassesInSingleFile:
|
|
"""Test handling of YAML files with multiple class definitions."""
|
|
|
|
def test_load_multiple_classes_from_single_file(
|
|
self,
|
|
temp_schema_dir: Path,
|
|
multiple_classes_yaml: dict,
|
|
):
|
|
"""Should load all classes from a single YAML file."""
|
|
create_yaml_file(temp_schema_dir, "MultipleClasses.yaml", multiple_classes_yaml)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
scores = lookup.get_all_scores()
|
|
|
|
assert "BroadClass" in scores
|
|
assert "ModerateClass" in scores
|
|
assert "NarrowClass" in scores
|
|
assert len(scores) == 3
|
|
|
|
|
|
# =============================================================================
|
|
# Test: Edge Cases
|
|
# =============================================================================
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""Test edge cases and error handling."""
|
|
|
|
def test_class_with_empty_annotations_dict(self, temp_schema_dir: Path):
|
|
"""Should handle class with empty annotations dict."""
|
|
yaml_content = {
|
|
"classes": {
|
|
"EmptyAnnotationsClass": {
|
|
"description": "Has empty annotations",
|
|
"annotations": {},
|
|
}
|
|
}
|
|
}
|
|
create_yaml_file(temp_schema_dir, "EmptyAnnotations.yaml", yaml_content)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir, default_score=0.5)
|
|
score = lookup.get_score("EmptyAnnotationsClass")
|
|
|
|
# Should use default score
|
|
assert score.base_score == 0.5
|
|
|
|
def test_class_with_null_class_def(self, temp_schema_dir: Path):
|
|
"""Should handle class with null definition."""
|
|
yaml_content = {
|
|
"classes": {
|
|
"NullClass": None,
|
|
}
|
|
}
|
|
create_yaml_file(temp_schema_dir, "NullClass.yaml", yaml_content)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
scores = lookup.get_all_scores()
|
|
|
|
# Should not crash, class with null def is skipped
|
|
assert "NullClass" not in scores
|
|
|
|
def test_template_specificity_not_a_dict(self, temp_schema_dir: Path):
|
|
"""Should handle template_specificity that isn't a dict."""
|
|
yaml_content = {
|
|
"classes": {
|
|
"BadTemplateClass": {
|
|
"annotations": {
|
|
"specificity_score": 0.5,
|
|
"template_specificity": "not_a_dict", # Invalid type
|
|
}
|
|
}
|
|
}
|
|
}
|
|
create_yaml_file(temp_schema_dir, "BadTemplate.yaml", yaml_content)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
score = lookup.get_score("BadTemplateClass")
|
|
|
|
# Should load base score, skip invalid template_specificity
|
|
assert score.base_score == 0.5
|
|
assert score.template_scores == {}
|
|
|
|
def test_score_as_string_is_converted(self, temp_schema_dir: Path):
|
|
"""Should convert string score to float."""
|
|
yaml_content = {
|
|
"classes": {
|
|
"StringScoreClass": {
|
|
"annotations": {
|
|
"specificity_score": "0.75", # String, not float
|
|
}
|
|
}
|
|
}
|
|
}
|
|
create_yaml_file(temp_schema_dir, "StringScore.yaml", yaml_content)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
score = lookup.get_score("StringScoreClass")
|
|
|
|
assert score.base_score == 0.75
|
|
assert isinstance(score.base_score, float)
|
|
|
|
def test_score_as_integer_is_converted(self, temp_schema_dir: Path):
|
|
"""Should convert integer score to float."""
|
|
yaml_content = {
|
|
"classes": {
|
|
"IntScoreClass": {
|
|
"annotations": {
|
|
"specificity_score": 1, # Integer, not float
|
|
}
|
|
}
|
|
}
|
|
}
|
|
create_yaml_file(temp_schema_dir, "IntScore.yaml", yaml_content)
|
|
|
|
lookup = SpecificityLookup(schema_dir=temp_schema_dir)
|
|
score = lookup.get_score("IntScoreClass")
|
|
|
|
assert score.base_score == 1.0
|
|
assert isinstance(score.base_score, float)
|