glam/tests/rag/test_specificity_integration.py

"""
Integration tests for specificity score system with real schema files.

These tests verify that the specificity lookup correctly loads and filters
annotations from the actual LinkML schema YAML files.
"""

import pytest
from pathlib import Path

from backend.rag.specificity import (
    ContextTemplate,
    SpecificityLookup,
    SpecificityScore,
    SPARQLToContextMapper,
    SpecificityAwareClassifier,
    get_specificity_lookup,
)


# Real schema directory
SCHEMA_DIR = Path(__file__).parent.parent.parent / "schemas" / "20251121" / "linkml"


class TestRealSchemaLoading:
    """Test loading specificity scores from real schema files."""

    @pytest.fixture
    def lookup(self) -> SpecificityLookup:
        """Create lookup with real schema directory."""
        return SpecificityLookup(SCHEMA_DIR)

    def test_loads_many_classes(self, lookup: SpecificityLookup):
        """Verify a substantial number of classes are loaded."""
        all_scores = lookup.get_all_scores()
        # We expect ~600+ classes from 300+ files
        assert len(all_scores) >= 300, f"Only {len(all_scores)} classes loaded"

    def test_core_classes_have_low_scores(self, lookup: SpecificityLookup):
        """Core classes should have low specificity (broadly relevant)."""
        core_classes = ["Custodian", "CustodianIdentifier", "Collection", "CustodianPlace"]

        for class_name in core_classes:
            score = lookup.get_score(class_name)
            assert score.base_score <= 0.35, (
                f"{class_name} should have low base_score (<=0.35), got {score.base_score}"
            )

    def test_type_classes_have_moderate_to_high_scores(self, lookup: SpecificityLookup):
        """Type/taxonomy classes should have moderate to high specificity."""
        type_classes = ["MuseumType"]

        for class_name in type_classes:
            score = lookup.get_score(class_name)
            assert score.base_score >= 0.5, (
                f"{class_name} should have base_score (>=0.5), got {score.base_score}"
            )

    def test_all_classes_have_annotations(self, lookup: SpecificityLookup):
        """All loaded classes should have specificity annotations."""
        all_scores = lookup.get_all_scores()

        without_annotation = [
            name for name, score in all_scores.items()
            if score.rationale == "No specificity annotation (using default)"
        ]

        # Allow some tolerance for new classes added without annotations
        tolerance_percent = 5  # 5% tolerance
        max_without = len(all_scores) * (tolerance_percent / 100)

        assert len(without_annotation) <= max_without, (
            f"{len(without_annotation)} classes missing annotations: "
            f"{without_annotation[:10]}..."
        )

    def test_template_scores_are_populated(self, lookup: SpecificityLookup):
        """Classes should have template-specific scores."""
        all_scores = lookup.get_all_scores()

        with_template_scores = [
            name for name, score in all_scores.items()
            if score.template_scores
        ]

        # Most classes should have template scores
        min_expected = len(all_scores) * 0.9  # 90% minimum
        assert len(with_template_scores) >= min_expected, (
            f"Only {len(with_template_scores)}/{len(all_scores)} have template scores"
        )


class TestTemplateFiltering:
    """Test filtering classes by context template."""

    @pytest.fixture
    def lookup(self) -> SpecificityLookup:
        return SpecificityLookup(SCHEMA_DIR)

    def test_archive_search_includes_archive_specific_classes(self, lookup: SpecificityLookup):
        """Archive search should include archive-related classes."""
        classes = lookup.get_classes_for_template(
            ContextTemplate.ARCHIVE_SEARCH,
            threshold=0.6
        )

        # CustodianArchive has archive_search=0.25, should pass
        assert "CustodianArchive" in classes, "CustodianArchive should pass archive_search threshold"

    def test_museum_search_includes_core_classes(self, lookup: SpecificityLookup):
        """Museum search should include core classes."""
        classes = lookup.get_classes_for_template(
            ContextTemplate.MUSEUM_SEARCH,
            threshold=0.6
        )

        assert "Custodian" in classes, "Core Custodian should always pass"

    def test_general_heritage_includes_most_classes(self, lookup: SpecificityLookup):
        """General heritage template should include most classes at threshold 0.6."""
        classes = lookup.get_classes_for_template(
            ContextTemplate.GENERAL_HERITAGE,
            threshold=0.6
        )

        all_scores = lookup.get_all_scores()
        coverage = len(classes) / len(all_scores)

        # Most classes have base_score 0.5, so should pass 0.6 threshold
        assert coverage >= 0.8, f"General heritage should cover 80%+ classes, got {coverage:.1%}"

    def test_threshold_affects_class_count(self, lookup: SpecificityLookup):
        """Stricter thresholds should return fewer classes."""
        template = ContextTemplate.ARCHIVE_SEARCH

        classes_60 = lookup.get_classes_for_template(template, threshold=0.6)
        classes_40 = lookup.get_classes_for_template(template, threshold=0.4)
        classes_20 = lookup.get_classes_for_template(template, threshold=0.2)

        assert len(classes_60) >= len(classes_40) >= len(classes_20), (
            f"Stricter thresholds should return fewer classes: "
            f"0.6={len(classes_60)}, 0.4={len(classes_40)}, 0.2={len(classes_20)}"
        )

    def test_filtered_scores_returns_score_objects(self, lookup: SpecificityLookup):
        """get_filtered_scores should return SpecificityScore objects."""
        filtered = lookup.get_filtered_scores(
            ContextTemplate.COLLECTION_DISCOVERY,
            threshold=0.5
        )

        assert len(filtered) > 0, "Should return some filtered scores"

        for class_name, score in filtered.items():
            assert isinstance(score, SpecificityScore)
            assert score.class_name == class_name


class TestSPARQLToContextMapping:
    """Test SPARQL template to context template mapping."""

    @pytest.fixture
    def mapper(self) -> SPARQLToContextMapper:
        return SPARQLToContextMapper()

    def test_known_templates_map_correctly(self, mapper: SPARQLToContextMapper):
        """Known SPARQL templates should map to expected context templates."""
        # Use the actual template IDs from the mapper
        mappings = {
            "list_institutions_by_type_city": ContextTemplate.LOCATION_BROWSE,
            "list_institutions_by_type_region": ContextTemplate.LOCATION_BROWSE,
            "find_person_by_role": ContextTemplate.PERSON_RESEARCH,
            "list_collections_by_type": ContextTemplate.COLLECTION_DISCOVERY,
            "find_institution_by_identifier": ContextTemplate.IDENTIFIER_LOOKUP,
        }

        for sparql_id, expected in mappings.items():
            result = mapper.map(sparql_id)
            assert result == expected, f"{sparql_id} should map to {expected}, got {result}"

    def test_institution_type_refinement_for_refinable_templates(self, mapper: SPARQLToContextMapper):
        """Institution type slots should refine queries for refinable templates."""
        # Only refinable templates support institution_type refinement
        # list_institutions_by_type_city IS refinable
        result = mapper.map(
            "list_institutions_by_type_city",
            slots={"institution_type": "A"}
        )
        assert result == ContextTemplate.ARCHIVE_SEARCH

        result = mapper.map(
            "list_institutions_by_type_city",
            slots={"institution_type": "M"}
        )
        assert result == ContextTemplate.MUSEUM_SEARCH

    def test_unknown_template_returns_general(self, mapper: SPARQLToContextMapper):
        """Unknown SPARQL templates should map to general heritage."""
        result = mapper.map("unknown_template_xyz")
        assert result == ContextTemplate.GENERAL_HERITAGE


class TestSpecificityAwareClassifier:
    """Test the wrapper classifier that adds specificity filtering."""

    def test_classifier_uses_lookup_directly(self):
        """Classifier should use lookup to filter classes via get_filtered_classes."""
        lookup = SpecificityLookup(SCHEMA_DIR)
        mapper = SPARQLToContextMapper()

        classifier = SpecificityAwareClassifier(
            mapper=mapper,
            lookup=lookup,
            default_threshold=0.6,
        )

        # Test direct filtering via convenience method
        classes = classifier.get_filtered_classes(
            ContextTemplate.LOCATION_BROWSE,
            threshold=0.6
        )

        assert len(classes) > 0
        assert "Custodian" in classes  # Core class should always be included

    def test_classifier_context_template_mapping(self):
        """Classifier should correctly map SPARQL templates to context templates."""
        lookup = SpecificityLookup(SCHEMA_DIR)
        mapper = SPARQLToContextMapper()

        classifier = SpecificityAwareClassifier(
            mapper=mapper,
            lookup=lookup,
            default_threshold=0.6,
        )

        # Test mapping via convenience method
        context = classifier.get_context_template(
            "list_institutions_by_type_city",
            slots={"institution_type": "A"}
        )

        assert context == ContextTemplate.ARCHIVE_SEARCH


class TestEndToEndPipeline:
    """End-to-end tests for the full specificity filtering pipeline."""

    def test_archive_query_pipeline(self):
        """Test full pipeline for an archive-related query."""
        # 1. Start with a SPARQL template and slots
        sparql_template = "list_institutions_by_type_city"
        slots = {"institution_type": "A"}

        # 2. Map to context template
        mapper = SPARQLToContextMapper()
        context = mapper.map(sparql_template, slots)

        assert context == ContextTemplate.ARCHIVE_SEARCH

        # 3. Get filtered classes
        lookup = SpecificityLookup(SCHEMA_DIR)
        classes = lookup.get_classes_for_template(context, threshold=0.6)

        # 4. Verify expected classes are included
        assert "Custodian" in classes
        assert "CustodianArchive" in classes
        assert "Collection" in classes

        # 5. Verify class count is reasonable (not too many, not too few)
        assert 50 <= len(classes) <= 700, f"Unexpected class count: {len(classes)}"

    def test_museum_query_pipeline(self):
        """Test full pipeline for a museum-related query."""
        sparql_template = "list_institutions_by_type_city"
        slots = {"institution_type": "M"}

        mapper = SPARQLToContextMapper()
        context = mapper.map(sparql_template, slots)

        assert context == ContextTemplate.MUSEUM_SEARCH

        lookup = SpecificityLookup(SCHEMA_DIR)
        classes = lookup.get_classes_for_template(context, threshold=0.6)

        # Core classes should be included
        assert "Custodian" in classes

    def test_person_research_pipeline(self):
        """Test full pipeline for person research query."""
        sparql_template = "find_person_by_role"
        slots = {"role": "Director"}

        mapper = SPARQLToContextMapper()
        context = mapper.map(sparql_template, slots)

        assert context == ContextTemplate.PERSON_RESEARCH

        lookup = SpecificityLookup(SCHEMA_DIR)
        classes = lookup.get_classes_for_template(context, threshold=0.6)

        # Person-related classes should be included
        assert "PersonObservation" in classes
        assert "Custodian" in classes  # Core class always included


class TestSingletonBehavior:
    """Test singleton instances work correctly."""

    def test_get_specificity_lookup_returns_same_instance(self):
        """Singleton should return same instance."""
        lookup1 = get_specificity_lookup()
        lookup2 = get_specificity_lookup()

        assert lookup1 is lookup2

    def test_singleton_uses_default_schema_dir(self):
        """Singleton should use the default schema directory."""
        lookup = get_specificity_lookup()

        # Verify it loaded classes (means it found the schema dir)
        all_scores = lookup.get_all_scores()
        assert len(all_scores) > 100, "Singleton should load real schema files"