- Updated documentation to clarify integration points with existing components in the RAG pipeline and DSPy framework. - Added detailed mapping of SPARQL templates to context templates for improved specificity filtering. - Implemented wrapper patterns around existing classifiers to extend functionality without duplication. - Introduced new tests for the SpecificityAwareClassifier and SPARQLToContextMapper to ensure proper integration and functionality. - Enhanced the CustodianRDFConverter to include ISO country and subregion codes from GHCID for better geospatial data handling.
323 lines
12 KiB
Python
323 lines
12 KiB
Python
"""
|
|
Integration tests for specificity score system with real schema files.
|
|
|
|
These tests verify that the specificity lookup correctly loads and filters
|
|
annotations from the actual LinkML schema YAML files.
|
|
"""
|
|
|
|
import pytest
|
|
from pathlib import Path
|
|
|
|
from backend.rag.specificity import (
|
|
ContextTemplate,
|
|
SpecificityLookup,
|
|
SpecificityScore,
|
|
SPARQLToContextMapper,
|
|
SpecificityAwareClassifier,
|
|
get_specificity_lookup,
|
|
)
|
|
|
|
|
|
# Real schema directory
|
|
SCHEMA_DIR = Path(__file__).parent.parent.parent / "schemas" / "20251121" / "linkml"
|
|
|
|
|
|
class TestRealSchemaLoading:
|
|
"""Test loading specificity scores from real schema files."""
|
|
|
|
@pytest.fixture
|
|
def lookup(self) -> SpecificityLookup:
|
|
"""Create lookup with real schema directory."""
|
|
return SpecificityLookup(SCHEMA_DIR)
|
|
|
|
def test_loads_many_classes(self, lookup: SpecificityLookup):
|
|
"""Verify a substantial number of classes are loaded."""
|
|
all_scores = lookup.get_all_scores()
|
|
# We expect ~600+ classes from 300+ files
|
|
assert len(all_scores) >= 300, f"Only {len(all_scores)} classes loaded"
|
|
|
|
def test_core_classes_have_low_scores(self, lookup: SpecificityLookup):
|
|
"""Core classes should have low specificity (broadly relevant)."""
|
|
core_classes = ["Custodian", "CustodianIdentifier", "Collection", "CustodianPlace"]
|
|
|
|
for class_name in core_classes:
|
|
score = lookup.get_score(class_name)
|
|
assert score.base_score <= 0.35, (
|
|
f"{class_name} should have low base_score (<=0.35), got {score.base_score}"
|
|
)
|
|
|
|
def test_type_classes_have_moderate_to_high_scores(self, lookup: SpecificityLookup):
|
|
"""Type/taxonomy classes should have moderate to high specificity."""
|
|
type_classes = ["MuseumType"]
|
|
|
|
for class_name in type_classes:
|
|
score = lookup.get_score(class_name)
|
|
assert score.base_score >= 0.5, (
|
|
f"{class_name} should have base_score (>=0.5), got {score.base_score}"
|
|
)
|
|
|
|
def test_all_classes_have_annotations(self, lookup: SpecificityLookup):
|
|
"""All loaded classes should have specificity annotations."""
|
|
all_scores = lookup.get_all_scores()
|
|
|
|
without_annotation = [
|
|
name for name, score in all_scores.items()
|
|
if score.rationale == "No specificity annotation (using default)"
|
|
]
|
|
|
|
# Allow some tolerance for new classes added without annotations
|
|
tolerance_percent = 5 # 5% tolerance
|
|
max_without = len(all_scores) * (tolerance_percent / 100)
|
|
|
|
assert len(without_annotation) <= max_without, (
|
|
f"{len(without_annotation)} classes missing annotations: "
|
|
f"{without_annotation[:10]}..."
|
|
)
|
|
|
|
def test_template_scores_are_populated(self, lookup: SpecificityLookup):
|
|
"""Classes should have template-specific scores."""
|
|
all_scores = lookup.get_all_scores()
|
|
|
|
with_template_scores = [
|
|
name for name, score in all_scores.items()
|
|
if score.template_scores
|
|
]
|
|
|
|
# Most classes should have template scores
|
|
min_expected = len(all_scores) * 0.9 # 90% minimum
|
|
assert len(with_template_scores) >= min_expected, (
|
|
f"Only {len(with_template_scores)}/{len(all_scores)} have template scores"
|
|
)
|
|
|
|
|
|
class TestTemplateFiltering:
|
|
"""Test filtering classes by context template."""
|
|
|
|
@pytest.fixture
|
|
def lookup(self) -> SpecificityLookup:
|
|
return SpecificityLookup(SCHEMA_DIR)
|
|
|
|
def test_archive_search_includes_archive_specific_classes(self, lookup: SpecificityLookup):
|
|
"""Archive search should include archive-related classes."""
|
|
classes = lookup.get_classes_for_template(
|
|
ContextTemplate.ARCHIVE_SEARCH,
|
|
threshold=0.6
|
|
)
|
|
|
|
# CustodianArchive has archive_search=0.25, should pass
|
|
assert "CustodianArchive" in classes, "CustodianArchive should pass archive_search threshold"
|
|
|
|
def test_museum_search_includes_core_classes(self, lookup: SpecificityLookup):
|
|
"""Museum search should include core classes."""
|
|
classes = lookup.get_classes_for_template(
|
|
ContextTemplate.MUSEUM_SEARCH,
|
|
threshold=0.6
|
|
)
|
|
|
|
assert "Custodian" in classes, "Core Custodian should always pass"
|
|
|
|
def test_general_heritage_includes_most_classes(self, lookup: SpecificityLookup):
|
|
"""General heritage template should include most classes at threshold 0.6."""
|
|
classes = lookup.get_classes_for_template(
|
|
ContextTemplate.GENERAL_HERITAGE,
|
|
threshold=0.6
|
|
)
|
|
|
|
all_scores = lookup.get_all_scores()
|
|
coverage = len(classes) / len(all_scores)
|
|
|
|
# Most classes have base_score 0.5, so should pass 0.6 threshold
|
|
assert coverage >= 0.8, f"General heritage should cover 80%+ classes, got {coverage:.1%}"
|
|
|
|
def test_threshold_affects_class_count(self, lookup: SpecificityLookup):
|
|
"""Stricter thresholds should return fewer classes."""
|
|
template = ContextTemplate.ARCHIVE_SEARCH
|
|
|
|
classes_60 = lookup.get_classes_for_template(template, threshold=0.6)
|
|
classes_40 = lookup.get_classes_for_template(template, threshold=0.4)
|
|
classes_20 = lookup.get_classes_for_template(template, threshold=0.2)
|
|
|
|
assert len(classes_60) >= len(classes_40) >= len(classes_20), (
|
|
f"Stricter thresholds should return fewer classes: "
|
|
f"0.6={len(classes_60)}, 0.4={len(classes_40)}, 0.2={len(classes_20)}"
|
|
)
|
|
|
|
def test_filtered_scores_returns_score_objects(self, lookup: SpecificityLookup):
|
|
"""get_filtered_scores should return SpecificityScore objects."""
|
|
filtered = lookup.get_filtered_scores(
|
|
ContextTemplate.COLLECTION_DISCOVERY,
|
|
threshold=0.5
|
|
)
|
|
|
|
assert len(filtered) > 0, "Should return some filtered scores"
|
|
|
|
for class_name, score in filtered.items():
|
|
assert isinstance(score, SpecificityScore)
|
|
assert score.class_name == class_name
|
|
|
|
|
|
class TestSPARQLToContextMapping:
|
|
"""Test SPARQL template to context template mapping."""
|
|
|
|
@pytest.fixture
|
|
def mapper(self) -> SPARQLToContextMapper:
|
|
return SPARQLToContextMapper()
|
|
|
|
def test_known_templates_map_correctly(self, mapper: SPARQLToContextMapper):
|
|
"""Known SPARQL templates should map to expected context templates."""
|
|
# Use the actual template IDs from the mapper
|
|
mappings = {
|
|
"list_institutions_by_type_city": ContextTemplate.LOCATION_BROWSE,
|
|
"list_institutions_by_type_region": ContextTemplate.LOCATION_BROWSE,
|
|
"find_person_by_role": ContextTemplate.PERSON_RESEARCH,
|
|
"list_collections_by_type": ContextTemplate.COLLECTION_DISCOVERY,
|
|
"find_institution_by_identifier": ContextTemplate.IDENTIFIER_LOOKUP,
|
|
}
|
|
|
|
for sparql_id, expected in mappings.items():
|
|
result = mapper.map(sparql_id)
|
|
assert result == expected, f"{sparql_id} should map to {expected}, got {result}"
|
|
|
|
def test_institution_type_refinement_for_refinable_templates(self, mapper: SPARQLToContextMapper):
|
|
"""Institution type slots should refine queries for refinable templates."""
|
|
# Only refinable templates support institution_type refinement
|
|
# list_institutions_by_type_city IS refinable
|
|
result = mapper.map(
|
|
"list_institutions_by_type_city",
|
|
slots={"institution_type": "A"}
|
|
)
|
|
assert result == ContextTemplate.ARCHIVE_SEARCH
|
|
|
|
result = mapper.map(
|
|
"list_institutions_by_type_city",
|
|
slots={"institution_type": "M"}
|
|
)
|
|
assert result == ContextTemplate.MUSEUM_SEARCH
|
|
|
|
def test_unknown_template_returns_general(self, mapper: SPARQLToContextMapper):
|
|
"""Unknown SPARQL templates should map to general heritage."""
|
|
result = mapper.map("unknown_template_xyz")
|
|
assert result == ContextTemplate.GENERAL_HERITAGE
|
|
|
|
|
|
class TestSpecificityAwareClassifier:
|
|
"""Test the wrapper classifier that adds specificity filtering."""
|
|
|
|
def test_classifier_uses_lookup_directly(self):
|
|
"""Classifier should use lookup to filter classes via get_filtered_classes."""
|
|
lookup = SpecificityLookup(SCHEMA_DIR)
|
|
mapper = SPARQLToContextMapper()
|
|
|
|
classifier = SpecificityAwareClassifier(
|
|
mapper=mapper,
|
|
lookup=lookup,
|
|
default_threshold=0.6,
|
|
)
|
|
|
|
# Test direct filtering via convenience method
|
|
classes = classifier.get_filtered_classes(
|
|
ContextTemplate.LOCATION_BROWSE,
|
|
threshold=0.6
|
|
)
|
|
|
|
assert len(classes) > 0
|
|
assert "Custodian" in classes # Core class should always be included
|
|
|
|
def test_classifier_context_template_mapping(self):
|
|
"""Classifier should correctly map SPARQL templates to context templates."""
|
|
lookup = SpecificityLookup(SCHEMA_DIR)
|
|
mapper = SPARQLToContextMapper()
|
|
|
|
classifier = SpecificityAwareClassifier(
|
|
mapper=mapper,
|
|
lookup=lookup,
|
|
default_threshold=0.6,
|
|
)
|
|
|
|
# Test mapping via convenience method
|
|
context = classifier.get_context_template(
|
|
"list_institutions_by_type_city",
|
|
slots={"institution_type": "A"}
|
|
)
|
|
|
|
assert context == ContextTemplate.ARCHIVE_SEARCH
|
|
|
|
|
|
class TestEndToEndPipeline:
|
|
"""End-to-end tests for the full specificity filtering pipeline."""
|
|
|
|
def test_archive_query_pipeline(self):
|
|
"""Test full pipeline for an archive-related query."""
|
|
# 1. Start with a SPARQL template and slots
|
|
sparql_template = "list_institutions_by_type_city"
|
|
slots = {"institution_type": "A"}
|
|
|
|
# 2. Map to context template
|
|
mapper = SPARQLToContextMapper()
|
|
context = mapper.map(sparql_template, slots)
|
|
|
|
assert context == ContextTemplate.ARCHIVE_SEARCH
|
|
|
|
# 3. Get filtered classes
|
|
lookup = SpecificityLookup(SCHEMA_DIR)
|
|
classes = lookup.get_classes_for_template(context, threshold=0.6)
|
|
|
|
# 4. Verify expected classes are included
|
|
assert "Custodian" in classes
|
|
assert "CustodianArchive" in classes
|
|
assert "Collection" in classes
|
|
|
|
# 5. Verify class count is reasonable (not too many, not too few)
|
|
assert 50 <= len(classes) <= 700, f"Unexpected class count: {len(classes)}"
|
|
|
|
def test_museum_query_pipeline(self):
|
|
"""Test full pipeline for a museum-related query."""
|
|
sparql_template = "list_institutions_by_type_city"
|
|
slots = {"institution_type": "M"}
|
|
|
|
mapper = SPARQLToContextMapper()
|
|
context = mapper.map(sparql_template, slots)
|
|
|
|
assert context == ContextTemplate.MUSEUM_SEARCH
|
|
|
|
lookup = SpecificityLookup(SCHEMA_DIR)
|
|
classes = lookup.get_classes_for_template(context, threshold=0.6)
|
|
|
|
# Core classes should be included
|
|
assert "Custodian" in classes
|
|
|
|
def test_person_research_pipeline(self):
|
|
"""Test full pipeline for person research query."""
|
|
sparql_template = "find_person_by_role"
|
|
slots = {"role": "Director"}
|
|
|
|
mapper = SPARQLToContextMapper()
|
|
context = mapper.map(sparql_template, slots)
|
|
|
|
assert context == ContextTemplate.PERSON_RESEARCH
|
|
|
|
lookup = SpecificityLookup(SCHEMA_DIR)
|
|
classes = lookup.get_classes_for_template(context, threshold=0.6)
|
|
|
|
# Person-related classes should be included
|
|
assert "PersonObservation" in classes
|
|
assert "Custodian" in classes # Core class always included
|
|
|
|
|
|
class TestSingletonBehavior:
|
|
"""Test singleton instances work correctly."""
|
|
|
|
def test_get_specificity_lookup_returns_same_instance(self):
|
|
"""Singleton should return same instance."""
|
|
lookup1 = get_specificity_lookup()
|
|
lookup2 = get_specificity_lookup()
|
|
|
|
assert lookup1 is lookup2
|
|
|
|
def test_singleton_uses_default_schema_dir(self):
|
|
"""Singleton should use the default schema directory."""
|
|
lookup = get_specificity_lookup()
|
|
|
|
# Verify it loaded classes (means it found the schema dir)
|
|
all_scores = lookup.get_all_scores()
|
|
assert len(all_scores) > 100, "Singleton should load real schema files"
|