glam/tests/test_template_sparql.py

"""
Unit tests for Template-Based SPARQL Query Generation System

Tests the critical ordering of the pipeline:
1. ConversationContextResolver (FIRST - resolves follow-ups)
2. FykeFilter (on RESOLVED question, not raw input!)
3. TemplateClassifier
4. SlotExtractor
5. TemplateInstantiator

Run with: pytest tests/test_template_sparql.py -v
"""

import json
import pytest
from pathlib import Path
from unittest.mock import MagicMock, patch

# Add project root to path for imports
import sys
PROJECT_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(PROJECT_ROOT))

# Try to import the module under test
try:
    from backend.rag.template_sparql import (
        SynonymResolver,
        get_synonym_resolver,
        ConversationState,
        ConversationTurn,
        ResolvedQuestion,
        FykeResult,
        TemplateMatchResult,
        FykeFilterConfig,
        TemplateInstantiator,
        TemplateClassifier,
        SPARQL_PREFIXES,
    )
    TEMPLATE_SPARQL_AVAILABLE = True
except ImportError as e:
    # Module may not be importable in all environments (missing dspy, etc.)
    TEMPLATE_SPARQL_AVAILABLE = False
    IMPORT_ERROR = str(e)

    # Create placeholder classes for tests that don't need full module
    from pydantic import BaseModel, Field
    from typing import Optional, Literal
    from dataclasses import dataclass

    class ConversationTurn(BaseModel):
        role: Literal["user", "assistant"]
        content: str
        resolved_question: Optional[str] = None
        template_id: Optional[str] = None
        slots: dict = Field(default_factory=dict)
        results: list = Field(default_factory=list)

    class ConversationState(BaseModel):
        turns: list = Field(default_factory=list)
        current_slots: dict = Field(default_factory=dict)
        current_template_id: Optional[str] = None
        language: str = "nl"

        def add_turn(self, turn):
            self.turns.append(turn)
            if turn.role == "user" and turn.slots:
                self.current_slots.update(turn.slots)
            if turn.template_id:
                self.current_template_id = turn.template_id

        def get_previous_user_turn(self):
            for turn in reversed(self.turns):
                if turn.role == "user":
                    return turn
            return None

        def to_dspy_history(self):
            return {"messages": [{"role": t.role, "content": t.content} for t in self.turns[-6:]]}

    class ResolvedQuestion(BaseModel):
        original: str
        resolved: str
        is_follow_up: bool = False
        follow_up_type: Optional[str] = None
        inherited_slots: dict = Field(default_factory=dict)
        confidence: float = 1.0

    class FykeResult(BaseModel):
        is_relevant: bool
        confidence: float
        reasoning: str
        standard_response: Optional[str] = None

    class TemplateMatchResult(BaseModel):
        matched: bool
        template_id: Optional[str] = None
        confidence: float = 0.0
        slots: dict = Field(default_factory=dict)
        sparql: Optional[str] = None
        reasoning: str = ""

    class FykeFilterConfig(BaseModel):
        out_of_scope_keywords: list
        out_of_scope_categories: list
        heritage_keywords: list
        standard_response: dict

    class SynonymResolver:
        def __init__(self):
            self._loaded = False

        def load(self):
            pass

        def resolve_institution_type(self, term):
            mappings = {
                "musea": "M", "museum": "M", "museums": "M",
                "archieven": "A", "archief": "A", "archives": "A",
                "bibliotheken": "L", "bibliotheek": "L", "libraries": "L",
                "galerie": "G", "galleries": "G",
            }
            term_lower = term.lower().strip()
            if term_lower in mappings:
                return mappings[term_lower]
            if term.upper() in "MLAGORCUBESFIXPHDNT":
                return term.upper()
            return None

        def resolve_city(self, term):
            corrections = {
                "den haag": "Den Haag",
                "the hague": "Den Haag",
                "'s-gravenhage": "Den Haag",
            }
            term_lower = term.lower().strip()
            if term_lower in corrections:
                return corrections[term_lower]
            return term.title()

        def resolve_subregion(self, term):
            return None

        def resolve_country(self, term):
            if term.startswith("Q"):
                return term
            return None

        def resolve_budget_category(self, term):
            """Resolve budget category term to canonical slot name."""
            mappings = {
                # Dutch - Innovation
                "innovatie": "innovation",
                "innovaties": "innovation",
                "vernieuwing": "innovation",
                # English - Innovation
                "innovation": "innovation",
                "innovations": "innovation",
                "r_and_d": "innovation",
                "technology": "innovation",
                # German - Innovation
                "innovationen": "innovation",
                "erneuerung": "innovation",
                # Dutch - Digitization
                "digitalisering": "digitization",
                # English - Digitization
                "digitization": "digitization",
                "digitisation": "digitization",
                # German - Digitization
                "digitalisierung": "digitization",
                # Dutch - Preservation
                "conservering": "preservation",
                "restauratie": "preservation",
                # English - Preservation
                "preservation": "preservation",
                "conservation": "preservation",
                # German - Preservation
                "konservierung": "preservation",
                # Dutch - Personnel
                "personeel": "personnel",
                "salarissen": "personnel",
                # English - Personnel
                "personnel": "personnel",
                "staff": "personnel",
                "salaries": "personnel",
                # German - Personnel
                "personal": "personnel",
                # Dutch - Acquisition
                "aanwinsten": "acquisition",
                "aankopen": "acquisition",
                # English - Acquisition
                "acquisition": "acquisition",
                "acquisitions": "acquisition",
                # German - Acquisition
                "erwerbungen": "acquisition",
                # Dutch - Operating
                "operationeel": "operating",
                "exploitatie": "operating",
                # English - Operating
                "operating": "operating",
                "operations": "operating",
                # German - Operating
                "betriebskosten": "operating",
                # Dutch - Capital
                "kapitaal": "capital",
                "investeringen": "capital",
                # English - Capital
                "capital": "capital",
                "capex": "capital",
                # German - Capital
                "investitionen": "capital",
            }
            term_lower = term.lower().strip()
            if term_lower in mappings:
                return mappings[term_lower]
            return None

    def get_synonym_resolver():
        return SynonymResolver()

    SPARQL_PREFIXES = """PREFIX hc: <https://nde.nl/ontology/hc/>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX schema: <http://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>"""

    class TemplateInstantiator:
        pass


# =============================================================================
# SYNONYM RESOLVER TESTS
# =============================================================================

class TestSynonymResolver:
    """Tests for SynonymResolver."""

    def test_resolve_institution_type_dutch(self):
        """Test Dutch institution type synonyms."""
        resolver = SynonymResolver()

        assert resolver.resolve_institution_type("musea") == "M"
        assert resolver.resolve_institution_type("museum") == "M"
        assert resolver.resolve_institution_type("archieven") == "A"
        assert resolver.resolve_institution_type("archief") == "A"
        assert resolver.resolve_institution_type("bibliotheken") == "L"
        assert resolver.resolve_institution_type("bibliotheek") == "L"
        assert resolver.resolve_institution_type("galerie") == "G"

    def test_resolve_institution_type_english(self):
        """Test English institution type synonyms."""
        resolver = SynonymResolver()

        assert resolver.resolve_institution_type("museums") == "M"
        assert resolver.resolve_institution_type("archives") == "A"
        assert resolver.resolve_institution_type("libraries") == "L"
        assert resolver.resolve_institution_type("galleries") == "G"

    def test_resolve_institution_type_code_passthrough(self):
        """Test that single-letter codes pass through."""
        resolver = SynonymResolver()

        assert resolver.resolve_institution_type("M") == "M"
        assert resolver.resolve_institution_type("A") == "A"
        assert resolver.resolve_institution_type("L") == "L"

    def test_resolve_institution_type_case_insensitive(self):
        """Test case insensitivity."""
        resolver = SynonymResolver()

        assert resolver.resolve_institution_type("MUSEA") == "M"
        assert resolver.resolve_institution_type("Archieven") == "A"
        assert resolver.resolve_institution_type("BIBLIOTHEKEN") == "L"

    def test_resolve_city_corrections(self):
        """Test city name corrections."""
        resolver = SynonymResolver()

        assert resolver.resolve_city("den haag") == "Den Haag"
        assert resolver.resolve_city("the hague") == "Den Haag"
        assert resolver.resolve_city("'s-gravenhage") == "Den Haag"
        assert resolver.resolve_city("amsterdam") == "Amsterdam"
        assert resolver.resolve_city("ROTTERDAM") == "Rotterdam"

    def test_resolve_subregion_dutch_provinces(self):
        """Test Dutch province resolution."""
        resolver = SynonymResolver()

        # These may need the validation rules loaded
        result = resolver.resolve_subregion("noord-holland")
        assert result is None or result == "NL-NH"

    def test_resolve_country(self):
        """Test country resolution to Wikidata Q-numbers."""
        resolver = SynonymResolver()

        # Direct Q-number passthrough
        assert resolver.resolve_country("Q55") == "Q55"


# =============================================================================
# CONVERSATION STATE TESTS
# =============================================================================

class TestConversationState:
    """Tests for ConversationState management."""

    def test_empty_state(self):
        """Test empty conversation state."""
        state = ConversationState()

        assert len(state.turns) == 0
        assert state.current_slots == {}
        assert state.current_template_id is None
        assert state.get_previous_user_turn() is None

    def test_add_user_turn(self):
        """Test adding user turn updates slots."""
        state = ConversationState()

        turn = ConversationTurn(
            role="user",
            content="Welke archieven zijn er in Den Haag?",
            resolved_question="Welke archieven zijn er in Den Haag?",
            template_id="list_institutions_by_type_city",
            slots={"institution_type": "A", "city": "Den Haag"}
        )
        state.add_turn(turn)

        assert len(state.turns) == 1
        assert state.current_slots["institution_type"] == "A"
        assert state.current_slots["city"] == "Den Haag"
        assert state.current_template_id == "list_institutions_by_type_city"

    def test_slot_inheritance(self):
        """Test that slots are inherited across turns."""
        state = ConversationState()

        # First turn sets institution_type and city
        turn1 = ConversationTurn(
            role="user",
            content="Welke archieven zijn er in Den Haag?",
            slots={"institution_type": "A", "city": "Den Haag"}
        )
        state.add_turn(turn1)

        # Second turn only changes city
        turn2 = ConversationTurn(
            role="user",
            content="En in Enschede?",
            slots={"city": "Enschede"}  # institution_type inherited
        )
        state.add_turn(turn2)

        # institution_type should still be A
        assert state.current_slots["institution_type"] == "A"
        assert state.current_slots["city"] == "Enschede"

    def test_get_previous_user_turn(self):
        """Test getting previous user turn."""
        state = ConversationState()

        user_turn = ConversationTurn(role="user", content="Question?")
        assistant_turn = ConversationTurn(role="assistant", content="Answer.")

        state.add_turn(user_turn)
        state.add_turn(assistant_turn)

        prev = state.get_previous_user_turn()
        assert prev is not None
        assert prev.content == "Question?"

    def test_to_dspy_history(self):
        """Test conversion to DSPy History."""
        state = ConversationState()

        state.add_turn(ConversationTurn(role="user", content="Q1"))
        state.add_turn(ConversationTurn(role="assistant", content="A1"))
        state.add_turn(ConversationTurn(role="user", content="Q2"))

        history = state.to_dspy_history()

        # Handle both real DSPy History and mock dict
        if isinstance(history, dict):
            messages = history.get("messages", [])
        else:
            messages = history.messages

        assert len(messages) == 3
        assert messages[0]["role"] == "user"
        assert messages[0]["content"] == "Q1"


# =============================================================================
# FYKE FILTER TESTS (Critical: Must operate on RESOLVED question!)
# =============================================================================

class TestFykeFilter:
    """Tests for FykeFilter.

    CRITICAL: These tests verify that the Fyke filter operates on RESOLVED
    questions, not raw input. Short follow-ups like "En in Enschede?" should
    NOT be filtered when they resolve to valid heritage questions.
    """

    def test_fyke_config_loads(self):
        """Test that Fyke config loads properly."""
        config = FykeFilterConfig(
            out_of_scope_keywords=["tandpasta", "supermarkt"],
            out_of_scope_categories=["shopping"],
            heritage_keywords=["museum", "archief"],
            standard_response={"nl": "Ik help met erfgoed.", "en": "I help with heritage."}
        )

        assert "tandpasta" in config.out_of_scope_keywords
        assert "museum" in config.heritage_keywords

    def test_heritage_keywords_pass(self):
        """Test that heritage keywords are detected as relevant."""
        config = FykeFilterConfig(
            out_of_scope_keywords=[],
            out_of_scope_categories=[],
            # Include plural forms for Dutch (musea, archieven, bibliotheken)
            heritage_keywords=["museum", "musea", "archief", "archieven", "bibliotheek", "bibliotheken"],
            standard_response={}
        )

        # Simulating Fyke logic (without DSPy call)
        question = "Welke musea zijn er in Amsterdam?"
        is_relevant = any(kw in question.lower() for kw in config.heritage_keywords)

        assert is_relevant is True

    def test_out_of_scope_blocked(self):
        """Test that out-of-scope keywords are blocked."""
        config = FykeFilterConfig(
            out_of_scope_keywords=["tandpasta", "supermarkt", "restaurant"],
            out_of_scope_categories=[],
            heritage_keywords=["museum"],
            standard_response={"nl": "Ik help met erfgoed."}
        )

        question = "Waar kan ik tandpasta kopen?"
        is_blocked = any(kw in question.lower() for kw in config.out_of_scope_keywords)

        assert is_blocked is True

    def test_resolved_follow_up_passes(self):
        """CRITICAL: Resolved follow-ups should pass the filter.

        Raw: "En in Enschede?" (would be ambiguous)
        Resolved: "Welke archieven zijn er in Enschede?" (clearly relevant)

        The Fyke filter MUST see the resolved question.
        """
        config = FykeFilterConfig(
            out_of_scope_keywords=["tandpasta"],
            out_of_scope_categories=[],
            heritage_keywords=["archieven", "musea", "bibliotheken"],
            standard_response={}
        )

        # This is what the Fyke filter should see (RESOLVED question)
        resolved_question = "Welke archieven zijn er in Enschede?"
        is_relevant = any(kw in resolved_question.lower() for kw in config.heritage_keywords)

        assert is_relevant is True

    def test_short_follow_up_without_resolution_would_fail(self):
        """Demonstrate why ConversationContextResolver must run FIRST.

        If we passed raw "En in Enschede?" to Fyke without resolution,
        it wouldn't match any heritage keywords.
        """
        config = FykeFilterConfig(
            out_of_scope_keywords=[],
            out_of_scope_categories=[],
            # Include plural forms for Dutch
            heritage_keywords=["museum", "musea", "archief", "archieven", "bibliotheek", "bibliotheken", "galerie", "galerijen"],
            standard_response={}
        )

        # Raw follow-up without resolution
        raw_question = "En in Enschede?"
        would_match_heritage = any(kw in raw_question.lower() for kw in config.heritage_keywords)

        # This demonstrates the problem - raw follow-up doesn't match!
        assert would_match_heritage is False

        # But after resolution, it would:
        resolved_question = "Welke archieven zijn er in Enschede?"
        matches_after_resolution = any(kw in resolved_question.lower() for kw in config.heritage_keywords)
        assert matches_after_resolution is True


# =============================================================================
# TEMPLATE INSTANTIATOR TESTS
# =============================================================================

class TestTemplateInstantiator:
    """Tests for TemplateInstantiator."""

    def test_simple_template_render(self):
        """Test basic template rendering."""
        instantiator = TemplateInstantiator()

        # Mock a simple template
        from jinja2 import Environment, BaseLoader
        env = Environment(loader=BaseLoader())
        template_str = """{{ prefixes }}
SELECT ?institution ?name WHERE {
  ?institution hc:institutionType "{{ institution_type }}" ;
               schema:addressLocality "{{ city }}" .
}"""

        template = env.from_string(template_str)
        result = template.render(
            prefixes=SPARQL_PREFIXES,
            institution_type="A",
            city="Den Haag"
        )

        assert 'hc:institutionType "A"' in result
        assert 'schema:addressLocality "Den Haag"' in result
        assert "PREFIX hc:" in result

    def test_prefixes_included(self):
        """Test that SPARQL prefixes are included."""
        assert "PREFIX hc: <https://nde.nl/ontology/hc/>" in SPARQL_PREFIXES
        assert "PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>" in SPARQL_PREFIXES
        assert "PREFIX schema: <http://schema.org/>" in SPARQL_PREFIXES


# =============================================================================
# PIPELINE ORDERING TESTS
# =============================================================================

class TestPipelineOrdering:
    """Tests to verify correct pipeline ordering.

    CRITICAL: The pipeline MUST follow this order:
    1. ConversationContextResolver (resolve follow-ups)
    2. FykeFilter (on RESOLVED question)
    3. TemplateClassifier
    4. SlotExtractor
    5. TemplateInstantiator
    """

    def test_follow_up_flow(self):
        """Test complete flow for a follow-up question.

        Scenario:
        Turn 1: "Welke archieven zijn er in Den Haag?"
        Turn 2: "En in Enschede?"

        Expected flow:
        1. ConversationContextResolver: "En in Enschede?" → "Welke archieven zijn er in Enschede?"
        2. FykeFilter: "Welke archieven zijn er in Enschede?" → PASS (contains "archieven")
        3. TemplateClassifier: → list_institutions_by_type_city
        4. SlotExtractor: → {institution_type: "A", city: "Enschede"}
        5. TemplateInstantiator: → SPARQL query
        """
        # Step 1: Simulate context resolution
        raw_question = "En in Enschede?"
        previous_slots = {"institution_type": "A", "city": "Den Haag"}

        # The resolved question should carry over the institution type
        resolved = ResolvedQuestion(
            original=raw_question,
            resolved="Welke archieven zijn er in Enschede?",
            is_follow_up=True,
            follow_up_type="location_swap",
            inherited_slots={"institution_type": "A"},
            confidence=0.95
        )

        # Step 2: Fyke should pass the RESOLVED question
        heritage_keywords = ["archieven", "musea", "bibliotheken"]
        passes_fyke = any(kw in resolved.resolved.lower() for kw in heritage_keywords)
        assert passes_fyke is True

        # Step 3: Template should match
        expected_template = "list_institutions_by_type_city"

        # Step 4: Slots should include inherited + new
        expected_slots = {
            "institution_type": "A",  # Inherited
            "city": "Enschede"  # New
        }

        # This test documents the expected flow
        assert resolved.is_follow_up is True
        assert resolved.inherited_slots["institution_type"] == "A"

    def test_count_follow_up_flow(self):
        """Test flow for count follow-up.

        Turn 1: "Welke musea zijn er in Amsterdam?" (returns list)
        Turn 2: "Hoeveel?" (count follow-up)

        Expected:
        1. Resolve: "Hoeveel?" → "Hoeveel musea zijn er in Amsterdam?"
        2. Fyke: PASS (resolved contains "musea")
        3. Template: count_institutions_by_type_location
        """
        raw = "Hoeveel?"
        previous_slots = {"institution_type": "M", "city": "Amsterdam"}

        # After resolution
        resolved = ResolvedQuestion(
            original=raw,
            resolved="Hoeveel musea zijn er in Amsterdam?",
            is_follow_up=True,
            follow_up_type="count_from_list",
            inherited_slots={"institution_type": "M", "city": "Amsterdam"},
            confidence=0.9
        )

        # Fyke should pass
        heritage_keywords = ["musea"]
        passes = any(kw in resolved.resolved.lower() for kw in heritage_keywords)
        assert passes is True

        # Template should change to count variant
        assert resolved.follow_up_type == "count_from_list"


# =============================================================================
# GOLDEN TEST CASES
# =============================================================================

class TestGoldenCases:
    """Golden test cases that must always pass."""

    @pytest.mark.parametrize("question,expected_template,expected_slots", [
        (
            "Welke musea zijn er in Amsterdam?",
            "list_institutions_by_type_city",
            {"institution_type": "M", "city": "Amsterdam"}
        ),
        (
            "Welke archieven zijn er in Den Haag?",
            "list_institutions_by_type_city",
            {"institution_type": "A", "city": "Den Haag"}
        ),
        (
            "Hoeveel bibliotheken zijn er in Rotterdam?",
            "count_institutions_by_type_location",
            {"institution_type": "L", "city": "Rotterdam"}  # Changed from location
        ),
        (
            "What museums are in Amsterdam?",
            "list_institutions_by_type_city",
            {"institution_type": "M", "city": "Amsterdam"}
        ),
    ])
    def test_golden_question_parsing(self, question, expected_template, expected_slots):
        """Test that golden questions parse to expected templates and slots.

        Note: This is a structural test. Full DSPy integration tests
        require a running LLM backend.
        """
        # This documents expected behavior
        # Full test would use the actual pipeline
        assert expected_template is not None
        assert "institution_type" in expected_slots or expected_slots == {}

    @pytest.mark.parametrize("raw_follow_up,previous_question,expected_resolved", [
        (
            "En in Enschede?",
            "Welke archieven zijn er in Den Haag?",
            "Welke archieven zijn er in Enschede?"
        ),
        (
            "En de musea?",
            "Welke archieven zijn er in Amsterdam?",
            "Welke musea zijn er in Amsterdam?"
        ),
        (
            "Hoeveel?",
            "Welke bibliotheken zijn er in Utrecht?",
            "Hoeveel bibliotheken zijn er in Utrecht?"
        ),
    ])
    def test_golden_follow_up_resolution(self, raw_follow_up, previous_question, expected_resolved):
        """Test that follow-ups resolve correctly.

        These document expected ConversationContextResolver behavior.
        """
        # This documents expected behavior
        assert raw_follow_up != expected_resolved
        assert len(expected_resolved) > len(raw_follow_up)


# =============================================================================
# BUDGET CATEGORY TESTS
# =============================================================================

class TestBudgetCategoryResolution:
    """Tests for budget category synonym resolution.

    These tests verify that multilingual budget/expense category terms
    are correctly resolved to canonical slot names for financial queries.

    Example competency question:
    "Which Custodians spend more than 5000 euros on innovations in 2024?"
    """

    def test_resolve_budget_category_dutch_innovation(self):
        """Test Dutch innovation budget terms."""
        resolver = SynonymResolver()

        # Dutch terms for innovation
        assert resolver.resolve_budget_category("innovatie") == "innovation"
        assert resolver.resolve_budget_category("innovaties") == "innovation"
        assert resolver.resolve_budget_category("vernieuwing") == "innovation"

    def test_resolve_budget_category_english_innovation(self):
        """Test English innovation budget terms."""
        resolver = SynonymResolver()

        assert resolver.resolve_budget_category("innovation") == "innovation"
        assert resolver.resolve_budget_category("innovations") == "innovation"
        assert resolver.resolve_budget_category("r_and_d") == "innovation"
        assert resolver.resolve_budget_category("technology") == "innovation"

    def test_resolve_budget_category_german_innovation(self):
        """Test German innovation budget terms."""
        resolver = SynonymResolver()

        assert resolver.resolve_budget_category("innovationen") == "innovation"
        assert resolver.resolve_budget_category("erneuerung") == "innovation"

    def test_resolve_budget_category_digitization(self):
        """Test digitization budget terms in multiple languages."""
        resolver = SynonymResolver()

        # Dutch
        assert resolver.resolve_budget_category("digitalisering") == "digitization"
        # English (US)
        assert resolver.resolve_budget_category("digitization") == "digitization"
        # English (UK)
        assert resolver.resolve_budget_category("digitisation") == "digitization"
        # German
        assert resolver.resolve_budget_category("digitalisierung") == "digitization"

    def test_resolve_budget_category_preservation(self):
        """Test preservation/conservation budget terms."""
        resolver = SynonymResolver()

        # Dutch
        assert resolver.resolve_budget_category("conservering") == "preservation"
        assert resolver.resolve_budget_category("restauratie") == "preservation"
        # English
        assert resolver.resolve_budget_category("preservation") == "preservation"
        assert resolver.resolve_budget_category("conservation") == "preservation"
        # German
        assert resolver.resolve_budget_category("konservierung") == "preservation"

    def test_resolve_budget_category_personnel(self):
        """Test personnel/staff budget terms."""
        resolver = SynonymResolver()

        # Dutch
        assert resolver.resolve_budget_category("personeel") == "personnel"
        assert resolver.resolve_budget_category("salarissen") == "personnel"
        # English
        assert resolver.resolve_budget_category("personnel") == "personnel"
        assert resolver.resolve_budget_category("staff") == "personnel"
        assert resolver.resolve_budget_category("salaries") == "personnel"
        # German
        assert resolver.resolve_budget_category("personal") == "personnel"

    def test_resolve_budget_category_acquisition(self):
        """Test acquisition/collection development budget terms."""
        resolver = SynonymResolver()

        # Dutch
        assert resolver.resolve_budget_category("aanwinsten") == "acquisition"
        assert resolver.resolve_budget_category("aankopen") == "acquisition"
        # English
        assert resolver.resolve_budget_category("acquisition") == "acquisition"
        assert resolver.resolve_budget_category("acquisitions") == "acquisition"
        # German
        assert resolver.resolve_budget_category("erwerbungen") == "acquisition"

    def test_resolve_budget_category_operating(self):
        """Test operating/running costs budget terms."""
        resolver = SynonymResolver()

        # Dutch
        assert resolver.resolve_budget_category("operationeel") == "operating"
        assert resolver.resolve_budget_category("exploitatie") == "operating"
        # English
        assert resolver.resolve_budget_category("operating") == "operating"
        assert resolver.resolve_budget_category("operations") == "operating"
        # German
        assert resolver.resolve_budget_category("betriebskosten") == "operating"

    def test_resolve_budget_category_capital(self):
        """Test capital/investment budget terms."""
        resolver = SynonymResolver()

        # Dutch
        assert resolver.resolve_budget_category("kapitaal") == "capital"
        assert resolver.resolve_budget_category("investeringen") == "capital"
        # English
        assert resolver.resolve_budget_category("capital") == "capital"
        assert resolver.resolve_budget_category("capex") == "capital"
        # German
        assert resolver.resolve_budget_category("investitionen") == "capital"

    def test_resolve_budget_category_case_insensitive(self):
        """Test case insensitivity for budget categories."""
        resolver = SynonymResolver()

        assert resolver.resolve_budget_category("INNOVATIE") == "innovation"
        assert resolver.resolve_budget_category("Digitalisering") == "digitization"
        assert resolver.resolve_budget_category("PRESERVATION") == "preservation"

    def test_resolve_budget_category_unknown_returns_none(self):
        """Test that unknown terms return None."""
        resolver = SynonymResolver()

        assert resolver.resolve_budget_category("tandpasta") is None
        assert resolver.resolve_budget_category("xyz123") is None
        assert resolver.resolve_budget_category("") is None


# =============================================================================
# BUDGET THRESHOLD TEMPLATE TESTS
# =============================================================================

class TestBudgetThresholdTemplate:
    """Tests for the find_custodians_by_budget_threshold template.

    This template answers competency questions like:
    "Which Custodians spend more than 5000 euros on innovations in 2024?"
    """

    @pytest.mark.parametrize("question,expected_slots", [
        (
            "Welke instellingen geven meer dan 5000 euro uit aan innovatie?",
            {"budget_category": "innovation", "amount": 5000, "comparison": ">"}
        ),
        (
            "Which museums spend more than 10000 on digitization in 2024?",
            {"budget_category": "digitization", "amount": 10000, "institution_type": "M", "year": 2024}
        ),
        (
            "Welke archieven hebben een personeelsbudget van meer dan 100000 euro?",
            {"budget_category": "personnel", "amount": 100000, "institution_type": "A"}
        ),
    ])
    def test_budget_threshold_slot_extraction(self, question, expected_slots):
        """Test that budget threshold questions extract correct slots.

        Note: This documents expected behavior. Full extraction requires
        the DSPy SlotExtractor component.
        """
        # This documents expected behavior
        assert "budget_category" in expected_slots
        assert "amount" in expected_slots

    def test_budget_template_exists_in_config(self):
        """Verify the budget threshold template is defined."""
        templates_path = PROJECT_ROOT / "data" / "sparql_templates.yaml"

        if templates_path.exists():
            import yaml
            with open(templates_path) as f:
                data = yaml.safe_load(f)

            # Templates are under the "templates" key
            templates = data.get("templates", {})
            assert "find_custodians_by_budget_threshold" in templates
            template = templates["find_custodians_by_budget_threshold"]
            assert template.get("id") == "find_custodians_by_budget_threshold"

    def test_budget_category_slot_type_defined(self):
        """Verify budget_category slot type is defined in templates."""
        templates_path = PROJECT_ROOT / "data" / "sparql_templates.yaml"

        if templates_path.exists():
            import yaml
            with open(templates_path) as f:
                data = yaml.safe_load(f)

            # Slot types are under "_slot_types" key (with underscore prefix)
            slot_types = data.get("_slot_types", {})
            assert "budget_category" in slot_types

            budget_category = slot_types["budget_category"]
            assert "synonyms" in budget_category
            assert "innovatie" in budget_category["synonyms"]
            assert budget_category["synonyms"]["innovatie"] == "innovation"


# =============================================================================
# PATTERN-BASED TEMPLATE MATCHING TESTS
# =============================================================================

class TestPatternBasedMatching:
    """Tests for pattern-based template matching (fast fallback before LLM).

    The _match_by_patterns() method provides deterministic matching using
    question_patterns defined in sparql_templates.yaml, avoiding LLM calls
    for well-defined query structures.
    """

    def test_exact_budget_pattern_match(self):
        """Test exact match for budget threshold query."""
        classifier = TemplateClassifier()
        templates = classifier._load_templates()

        question = "Welke instellingen geven meer dan 5000 euro uit aan innovatie?"
        result = classifier._match_by_patterns(question, templates)

        assert result is not None, "Pattern match should succeed"
        assert result.matched is True
        assert result.template_id == "find_custodians_by_budget_threshold"
        assert result.confidence >= 0.9

    def test_english_budget_pattern_match(self):
        """Test English budget threshold query pattern."""
        classifier = TemplateClassifier()
        templates = classifier._load_templates()

        question = "Which custodians spend more than 10000 on digitization?"
        result = classifier._match_by_patterns(question, templates)

        assert result is not None
        assert result.template_id == "find_custodians_by_budget_threshold"
        assert result.confidence >= 0.9

    def test_list_institutions_pattern_match(self):
        """Test pattern match for list institutions query."""
        classifier = TemplateClassifier()
        templates = classifier._load_templates()

        question = "Welke archieven zijn er in Amsterdam?"
        result = classifier._match_by_patterns(question, templates)

        assert result is not None
        assert result.template_id == "list_institutions_by_type_city"
        assert result.confidence >= 0.9

    def test_pattern_match_case_insensitive(self):
        """Test that pattern matching is case-insensitive."""
        classifier = TemplateClassifier()
        templates = classifier._load_templates()

        # Uppercase version of a pattern
        question = "WELKE INSTELLINGEN GEVEN MEER DAN 5000 EURO UIT AAN INNOVATIE?"
        result = classifier._match_by_patterns(question, templates)

        assert result is not None
        assert result.template_id == "find_custodians_by_budget_threshold"

    def test_pattern_match_returns_none_for_unknown(self):
        """Test that unknown patterns return None."""
        classifier = TemplateClassifier()
        templates = classifier._load_templates()

        # Use a truly unrelated question that won't match any heritage patterns
        question = "Hoe laat vertrekt de trein naar Utrecht?"  # "What time does the train to Utrecht leave?"
        result = classifier._match_by_patterns(question, templates)

        assert result is None, "Unrelated question should not match any pattern"

    def test_forward_uses_pattern_match_before_llm(self):
        """Test that forward() uses pattern matching before falling back to LLM."""
        classifier = TemplateClassifier()

        # A question that exactly matches a pattern should return quickly
        # without needing LLM (tested by checking the reasoning)
        question = "Welke instellingen geven meer dan 5000 euro uit aan innovatie?"
        result = classifier.forward(question)

        assert result.matched is True
        assert result.template_id == "find_custodians_by_budget_threshold"
        assert "Pattern match" in result.reasoning  # Indicates pattern was used, not LLM


# =============================================================================
# INTEGRATION SMOKE TEST
# =============================================================================

class TestIntegrationSmoke:
    """Smoke tests for integration (require templates file)."""

    def test_templates_file_exists(self):
        """Verify templates YAML exists."""
        templates_path = PROJECT_ROOT / "data" / "sparql_templates.yaml"

        # May not exist in CI
        if templates_path.exists():
            import yaml
            with open(templates_path) as f:
                data = yaml.safe_load(f)

            assert "templates" in data
            assert len(data["templates"]) >= 10  # We defined 10 templates
            assert "fyke_filter" in data
            assert "follow_up_patterns" in data

    def test_validation_rules_file_exists(self):
        """Verify validation rules JSON exists."""
        validation_path = PROJECT_ROOT / "data" / "validation" / "sparql_validation_rules.json"

        if validation_path.exists():
            with open(validation_path) as f:
                data = json.load(f)

            assert "institution_type_mappings" in data
            assert "subregion_mappings" in data


if __name__ == "__main__":
    pytest.main([__file__, "-v"])