glam/tests/rag/test_specificity_mapper.py
kempersc 11983014bb Enhance specificity scoring system integration with existing infrastructure
- Updated documentation to clarify integration points with existing components in the RAG pipeline and DSPy framework.
- Added detailed mapping of SPARQL templates to context templates for improved specificity filtering.
- Implemented wrapper patterns around existing classifiers to extend functionality without duplication.
- Introduced new tests for the SpecificityAwareClassifier and SPARQLToContextMapper to ensure proper integration and functionality.
- Enhanced the CustodianRDFConverter to include ISO country and subregion codes from GHCID for better geospatial data handling.
2026-01-05 17:37:49 +01:00

687 lines
26 KiB
Python

"""
Tests for backend.rag.specificity.mapper module.
This module tests the SPARQL template to context template mapper, which:
- Maps SPARQL template IDs (from TemplateClassifier) to context templates
- Refines mappings based on institution_type slot values
- Provides reverse lookup (context → SPARQL templates)
Coverage:
- Basic SPARQL → context mapping
- Institution type refinement (A→archive_search, M→museum_search, etc.)
- Unknown template fallback to general_heritage
- _should_refine_by_institution_type() logic
- get_sparql_templates_for_context() reverse lookup
- Custom map injection via constructor
"""
from __future__ import annotations
import pytest
from backend.rag.specificity.mapper import (
SPARQL_TO_CONTEXT_MAP,
SPARQLToContextMapper,
get_sparql_to_context_mapper,
)
from backend.rag.specificity.models import (
ContextTemplate,
INSTITUTION_TYPE_TO_CONTEXT,
)
# =============================================================================
# Fixtures
# =============================================================================
@pytest.fixture
def mapper() -> SPARQLToContextMapper:
"""Create a fresh SPARQLToContextMapper instance."""
return SPARQLToContextMapper()
@pytest.fixture
def custom_sparql_map() -> dict[str, ContextTemplate]:
"""Custom SPARQL map for testing constructor injection."""
return {
"custom_template_one": ContextTemplate.ARCHIVE_SEARCH,
"custom_template_two": ContextTemplate.MUSEUM_SEARCH,
}
@pytest.fixture
def custom_type_map() -> dict[str, ContextTemplate]:
"""Custom institution type map for testing constructor injection."""
return {
"X": ContextTemplate.COLLECTION_DISCOVERY, # Custom type
"A": ContextTemplate.PERSON_RESEARCH, # Override default
}
# =============================================================================
# Test: Basic SPARQL → Context Mapping
# =============================================================================
class TestBasicSPARQLMapping:
"""Test basic SPARQL template ID to context template mapping."""
def test_location_based_queries_map_to_location_browse(
self, mapper: SPARQLToContextMapper
):
"""Location-based SPARQL templates should map to LOCATION_BROWSE."""
location_templates = [
"list_institutions_by_type_city",
"list_institutions_by_type_region",
"list_institutions_by_type_country",
"list_all_institutions_in_city",
"compare_locations",
"count_institutions_by_type_location",
]
for template_id in location_templates:
result = mapper.map(template_id)
assert result == ContextTemplate.LOCATION_BROWSE, (
f"Expected {template_id} → LOCATION_BROWSE, got {result}"
)
def test_identifier_queries_map_to_identifier_lookup(
self, mapper: SPARQLToContextMapper
):
"""Identifier-based SPARQL templates should map to IDENTIFIER_LOOKUP."""
identifier_templates = [
"find_institution_by_name",
"find_institution_by_identifier",
]
for template_id in identifier_templates:
result = mapper.map(template_id)
assert result == ContextTemplate.IDENTIFIER_LOOKUP
def test_person_queries_map_to_person_research(
self, mapper: SPARQLToContextMapper
):
"""Person-related SPARQL templates should map to PERSON_RESEARCH."""
person_templates = [
"find_person_by_role",
"find_people_at_institution",
"list_staff_by_role_category",
]
for template_id in person_templates:
result = mapper.map(template_id)
assert result == ContextTemplate.PERSON_RESEARCH
def test_collection_queries_map_to_collection_discovery(
self, mapper: SPARQLToContextMapper
):
"""Collection-related SPARQL templates should map to COLLECTION_DISCOVERY."""
collection_templates = [
"list_collections_by_type",
"find_collections_by_subject",
]
for template_id in collection_templates:
result = mapper.map(template_id)
assert result == ContextTemplate.COLLECTION_DISCOVERY
def test_digital_platform_queries_map_correctly(
self, mapper: SPARQLToContextMapper
):
"""Digital platform SPARQL templates should map to DIGITAL_PLATFORM."""
platform_templates = [
"find_digital_platforms",
"list_platform_integrations",
]
for template_id in platform_templates:
result = mapper.map(template_id)
assert result == ContextTemplate.DIGITAL_PLATFORM
def test_organizational_queries_map_correctly(
self, mapper: SPARQLToContextMapper
):
"""Organizational change SPARQL templates should map correctly."""
result = mapper.map("find_institutions_by_founding_date")
assert result == ContextTemplate.ORGANIZATIONAL_CHANGE
def test_general_queries_map_to_general_heritage(
self, mapper: SPARQLToContextMapper
):
"""General SPARQL templates should map to GENERAL_HERITAGE."""
general_templates = [
"count_institutions_by_type",
"find_custodians_by_budget_threshold",
"none",
]
for template_id in general_templates:
result = mapper.map(template_id)
assert result == ContextTemplate.GENERAL_HERITAGE
def test_all_map_entries_have_valid_context_templates(self):
"""All entries in SPARQL_TO_CONTEXT_MAP should have valid ContextTemplate values."""
for sparql_id, context in SPARQL_TO_CONTEXT_MAP.items():
assert isinstance(context, ContextTemplate), (
f"SPARQL_TO_CONTEXT_MAP['{sparql_id}'] = {context} is not a ContextTemplate"
)
# =============================================================================
# Test: Unknown Template Fallback
# =============================================================================
class TestUnknownTemplateFallback:
"""Test that unknown SPARQL templates fall back to GENERAL_HERITAGE."""
def test_unknown_template_returns_general_heritage(
self, mapper: SPARQLToContextMapper
):
"""Unknown SPARQL template IDs should return GENERAL_HERITAGE."""
unknown_templates = [
"unknown_template",
"not_in_map",
"",
"some_random_string",
"list_institutions_by_unknown_criteria",
]
for template_id in unknown_templates:
result = mapper.map(template_id)
assert result == ContextTemplate.GENERAL_HERITAGE, (
f"Expected unknown template '{template_id}' → GENERAL_HERITAGE, got {result}"
)
def test_none_template_maps_to_general_heritage(
self, mapper: SPARQLToContextMapper
):
"""The explicit 'none' template should map to GENERAL_HERITAGE."""
result = mapper.map("none")
assert result == ContextTemplate.GENERAL_HERITAGE
# =============================================================================
# Test: Institution Type Refinement
# =============================================================================
class TestInstitutionTypeRefinement:
"""Test refinement of context templates based on institution_type slot."""
def test_archive_type_refines_to_archive_search(
self, mapper: SPARQLToContextMapper
):
"""Institution type 'A' (archive) should refine to ARCHIVE_SEARCH."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "A"},
)
assert result == ContextTemplate.ARCHIVE_SEARCH
def test_museum_type_refines_to_museum_search(
self, mapper: SPARQLToContextMapper
):
"""Institution type 'M' (museum) should refine to MUSEUM_SEARCH."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "M"},
)
assert result == ContextTemplate.MUSEUM_SEARCH
def test_gallery_type_refines_to_museum_search(
self, mapper: SPARQLToContextMapper
):
"""Institution type 'G' (gallery) should refine to MUSEUM_SEARCH."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "G"},
)
assert result == ContextTemplate.MUSEUM_SEARCH
def test_library_type_refines_to_library_search(
self, mapper: SPARQLToContextMapper
):
"""Institution type 'L' (library) should refine to LIBRARY_SEARCH."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "L"},
)
assert result == ContextTemplate.LIBRARY_SEARCH
def test_research_type_refines_to_library_search(
self, mapper: SPARQLToContextMapper
):
"""Institution type 'R' (research center) should refine to LIBRARY_SEARCH."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "R"},
)
assert result == ContextTemplate.LIBRARY_SEARCH
def test_digital_type_refines_to_digital_platform(
self, mapper: SPARQLToContextMapper
):
"""Institution type 'D' (digital platform) should refine to DIGITAL_PLATFORM."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "D"},
)
assert result == ContextTemplate.DIGITAL_PLATFORM
def test_holy_site_type_refines_to_archive_search(
self, mapper: SPARQLToContextMapper
):
"""Institution type 'H' (holy site) should refine to ARCHIVE_SEARCH."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "H"},
)
assert result == ContextTemplate.ARCHIVE_SEARCH
def test_education_type_refines_to_general_heritage(
self, mapper: SPARQLToContextMapper
):
"""Institution type 'E' (education provider) should refine to GENERAL_HERITAGE."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "E"},
)
assert result == ContextTemplate.GENERAL_HERITAGE
def test_lowercase_institution_type_is_normalized(
self, mapper: SPARQLToContextMapper
):
"""Lowercase institution types should be normalized to uppercase."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "a"}, # lowercase
)
assert result == ContextTemplate.ARCHIVE_SEARCH
def test_full_name_institution_types(self, mapper: SPARQLToContextMapper):
"""Full name institution types should also work."""
test_cases = [
("ARCHIVE", ContextTemplate.ARCHIVE_SEARCH),
("MUSEUM", ContextTemplate.MUSEUM_SEARCH),
("GALLERY", ContextTemplate.MUSEUM_SEARCH),
("LIBRARY", ContextTemplate.LIBRARY_SEARCH),
("RESEARCH_CENTER", ContextTemplate.LIBRARY_SEARCH),
("DIGITAL_PLATFORM", ContextTemplate.DIGITAL_PLATFORM),
]
for inst_type, expected in test_cases:
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": inst_type},
)
assert result == expected, (
f"Expected {inst_type}{expected}, got {result}"
)
def test_unknown_institution_type_keeps_base_context(
self, mapper: SPARQLToContextMapper
):
"""Unknown institution types should not change the base context."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "UNKNOWN_TYPE"},
)
# Should return base mapping (LOCATION_BROWSE) not refined
assert result == ContextTemplate.LOCATION_BROWSE
def test_empty_institution_type_keeps_base_context(
self, mapper: SPARQLToContextMapper
):
"""Empty institution type should not change the base context."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": ""},
)
assert result == ContextTemplate.LOCATION_BROWSE
def test_missing_institution_type_slot_keeps_base_context(
self, mapper: SPARQLToContextMapper
):
"""Missing institution_type slot should not change the base context."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"other_slot": "value"},
)
assert result == ContextTemplate.LOCATION_BROWSE
# =============================================================================
# Test: _should_refine_by_institution_type()
# =============================================================================
class TestShouldRefineByInstitutionType:
"""Test the _should_refine_by_institution_type() method."""
def test_refinable_templates(self, mapper: SPARQLToContextMapper):
"""Templates that should be refined by institution type."""
refinable = [
"list_institutions_by_type_city",
"list_institutions_by_type_region",
"list_institutions_by_type_country",
"count_institutions_by_type_location",
"find_institutions_by_founding_date",
]
for template_id in refinable:
assert mapper._should_refine_by_institution_type(template_id), (
f"Expected {template_id} to be refinable"
)
def test_non_refinable_templates(self, mapper: SPARQLToContextMapper):
"""Templates that should NOT be refined by institution type."""
non_refinable = [
"find_person_by_role",
"find_people_at_institution",
"list_collections_by_type",
"find_institution_by_name",
"find_digital_platforms",
"count_institutions_by_type",
"none",
]
for template_id in non_refinable:
assert not mapper._should_refine_by_institution_type(template_id), (
f"Expected {template_id} to NOT be refinable"
)
def test_non_refinable_template_ignores_institution_type(
self, mapper: SPARQLToContextMapper
):
"""Non-refinable templates should ignore institution_type slot."""
# find_person_by_role maps to PERSON_RESEARCH and should NOT refine
result = mapper.map(
"find_person_by_role",
slots={"institution_type": "A"}, # Should be ignored
)
assert result == ContextTemplate.PERSON_RESEARCH
# =============================================================================
# Test: get_sparql_templates_for_context() Reverse Lookup
# =============================================================================
class TestGetSPARQLTemplatesForContext:
"""Test reverse lookup from context template to SPARQL templates."""
def test_location_browse_returns_expected_templates(
self, mapper: SPARQLToContextMapper
):
"""LOCATION_BROWSE should return location-related SPARQL templates."""
templates = mapper.get_sparql_templates_for_context(
ContextTemplate.LOCATION_BROWSE
)
expected = [
"list_institutions_by_type_city",
"list_institutions_by_type_region",
"list_institutions_by_type_country",
"list_all_institutions_in_city",
"compare_locations",
"count_institutions_by_type_location",
]
for exp in expected:
assert exp in templates, f"Expected {exp} in LOCATION_BROWSE templates"
def test_person_research_returns_expected_templates(
self, mapper: SPARQLToContextMapper
):
"""PERSON_RESEARCH should return person-related SPARQL templates."""
templates = mapper.get_sparql_templates_for_context(
ContextTemplate.PERSON_RESEARCH
)
expected = [
"find_person_by_role",
"find_people_at_institution",
"list_staff_by_role_category",
]
assert set(templates) == set(expected)
def test_collection_discovery_returns_expected_templates(
self, mapper: SPARQLToContextMapper
):
"""COLLECTION_DISCOVERY should return collection-related SPARQL templates."""
templates = mapper.get_sparql_templates_for_context(
ContextTemplate.COLLECTION_DISCOVERY
)
expected = [
"list_collections_by_type",
"find_collections_by_subject",
]
assert set(templates) == set(expected)
def test_general_heritage_returns_expected_templates(
self, mapper: SPARQLToContextMapper
):
"""GENERAL_HERITAGE should return general SPARQL templates."""
templates = mapper.get_sparql_templates_for_context(
ContextTemplate.GENERAL_HERITAGE
)
expected = [
"count_institutions_by_type",
"find_custodians_by_budget_threshold",
"none",
]
assert set(templates) == set(expected)
def test_unused_context_returns_empty_list(
self, mapper: SPARQLToContextMapper
):
"""Context templates with no direct SPARQL mappings should return empty list.
Note: ARCHIVE_SEARCH, MUSEUM_SEARCH, LIBRARY_SEARCH are only reached via
institution type refinement, not direct SPARQL mapping.
"""
# These are only reached via refinement, not direct mapping
for template in [
ContextTemplate.ARCHIVE_SEARCH,
ContextTemplate.MUSEUM_SEARCH,
ContextTemplate.LIBRARY_SEARCH,
]:
templates = mapper.get_sparql_templates_for_context(template)
assert templates == [], (
f"Expected {template} to have no direct SPARQL mappings"
)
# =============================================================================
# Test: get_all_context_templates()
# =============================================================================
class TestGetAllContextTemplates:
"""Test the get_all_context_templates() method."""
def test_returns_all_ten_templates(self, mapper: SPARQLToContextMapper):
"""Should return all 10 context templates."""
templates = mapper.get_all_context_templates()
assert len(templates) == 10
def test_returns_all_expected_templates(self, mapper: SPARQLToContextMapper):
"""Should contain all expected context template values."""
templates = mapper.get_all_context_templates()
expected = [
ContextTemplate.ARCHIVE_SEARCH,
ContextTemplate.MUSEUM_SEARCH,
ContextTemplate.LIBRARY_SEARCH,
ContextTemplate.COLLECTION_DISCOVERY,
ContextTemplate.PERSON_RESEARCH,
ContextTemplate.LOCATION_BROWSE,
ContextTemplate.IDENTIFIER_LOOKUP,
ContextTemplate.ORGANIZATIONAL_CHANGE,
ContextTemplate.DIGITAL_PLATFORM,
ContextTemplate.GENERAL_HERITAGE,
]
assert set(templates) == set(expected)
# =============================================================================
# Test: Custom Map Injection via Constructor
# =============================================================================
class TestCustomMapInjection:
"""Test custom map injection via constructor."""
def test_custom_sparql_map(
self, custom_sparql_map: dict[str, ContextTemplate]
):
"""Custom SPARQL map should override default mappings."""
mapper = SPARQLToContextMapper(sparql_map=custom_sparql_map)
# Custom mappings should work
assert mapper.map("custom_template_one") == ContextTemplate.ARCHIVE_SEARCH
assert mapper.map("custom_template_two") == ContextTemplate.MUSEUM_SEARCH
# Default mappings should NOT work (custom map replaces defaults)
assert mapper.map("list_institutions_by_type_city") == ContextTemplate.GENERAL_HERITAGE
def test_custom_type_refinement_map(
self, custom_type_map: dict[str, ContextTemplate]
):
"""Custom type refinement map should override default type mappings."""
# Use default SPARQL map but custom type map
mapper = SPARQLToContextMapper(type_refinement_map=custom_type_map)
# Custom refinement should work
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "X"}, # Custom type
)
assert result == ContextTemplate.COLLECTION_DISCOVERY
# Override of default 'A' should work
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "A"},
)
assert result == ContextTemplate.PERSON_RESEARCH # Overridden from ARCHIVE_SEARCH
def test_both_custom_maps(
self,
custom_sparql_map: dict[str, ContextTemplate],
custom_type_map: dict[str, ContextTemplate],
):
"""Both custom maps should work together."""
mapper = SPARQLToContextMapper(
sparql_map=custom_sparql_map,
type_refinement_map=custom_type_map,
)
# Custom SPARQL mapping
assert mapper.map("custom_template_one") == ContextTemplate.ARCHIVE_SEARCH
# Custom type refinement (not applicable to custom_template_one since
# it's not in refinable templates)
assert mapper.map(
"custom_template_one",
slots={"institution_type": "X"},
) == ContextTemplate.ARCHIVE_SEARCH # Not refinable
# =============================================================================
# Test: Singleton Instance
# =============================================================================
class TestSingletonInstance:
"""Test the singleton getter function."""
def test_get_sparql_to_context_mapper_returns_instance(self):
"""get_sparql_to_context_mapper() should return a SPARQLToContextMapper."""
mapper = get_sparql_to_context_mapper()
assert isinstance(mapper, SPARQLToContextMapper)
def test_singleton_returns_same_instance(self):
"""Multiple calls should return the same instance."""
mapper1 = get_sparql_to_context_mapper()
mapper2 = get_sparql_to_context_mapper()
assert mapper1 is mapper2
# =============================================================================
# Test: INSTITUTION_TYPE_TO_CONTEXT Mapping Coverage
# =============================================================================
class TestInstitutionTypeMapping:
"""Test the INSTITUTION_TYPE_TO_CONTEXT mapping coverage."""
def test_all_single_letter_codes_have_mappings(self):
"""All common single-letter codes should have mappings."""
expected_codes = ["A", "M", "G", "L", "R", "D", "E", "H"]
for code in expected_codes:
assert code in INSTITUTION_TYPE_TO_CONTEXT, (
f"Missing mapping for single-letter code: {code}"
)
def test_all_full_name_codes_have_mappings(self):
"""Full name codes should have consistent mappings."""
full_names = [
"ARCHIVE",
"MUSEUM",
"GALLERY",
"LIBRARY",
"RESEARCH_CENTER",
"DIGITAL_PLATFORM",
"EDUCATION_PROVIDER",
"HOLY_SACRED_SITE",
]
for name in full_names:
assert name in INSTITUTION_TYPE_TO_CONTEXT, (
f"Missing mapping for full name: {name}"
)
def test_single_letter_and_full_name_map_consistently(self):
"""Single-letter and full-name codes should map to same context."""
pairs = [
("A", "ARCHIVE"),
("M", "MUSEUM"),
("G", "GALLERY"),
("L", "LIBRARY"),
("R", "RESEARCH_CENTER"),
("D", "DIGITAL_PLATFORM"),
("E", "EDUCATION_PROVIDER"),
]
for letter, full in pairs:
assert INSTITUTION_TYPE_TO_CONTEXT[letter] == INSTITUTION_TYPE_TO_CONTEXT[full], (
f"Inconsistent mapping: {letter}{INSTITUTION_TYPE_TO_CONTEXT[letter]}, "
f"{full}{INSTITUTION_TYPE_TO_CONTEXT[full]}"
)
# =============================================================================
# Test: Edge Cases
# =============================================================================
class TestEdgeCases:
"""Test edge cases and error handling."""
def test_none_slots_does_not_crash(self, mapper: SPARQLToContextMapper):
"""Passing None for slots should not crash."""
result = mapper.map("list_institutions_by_type_city", slots=None)
assert result == ContextTemplate.LOCATION_BROWSE
def test_empty_slots_does_not_crash(self, mapper: SPARQLToContextMapper):
"""Passing empty dict for slots should not crash."""
result = mapper.map("list_institutions_by_type_city", slots={})
assert result == ContextTemplate.LOCATION_BROWSE
def test_whitespace_institution_type_handled(
self, mapper: SPARQLToContextMapper
):
"""Whitespace in institution_type should not cause issues."""
# Whitespace gets uppercased but not stripped - should return base
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": " "},
)
assert result == ContextTemplate.LOCATION_BROWSE
def test_numeric_institution_type_handled(
self, mapper: SPARQLToContextMapper
):
"""Numeric institution_type should not crash."""
result = mapper.map(
"list_institutions_by_type_city",
slots={"institution_type": "123"},
)
assert result == ContextTemplate.LOCATION_BROWSE