""" Unit tests for Template-Based SPARQL Query Generation System Tests the critical ordering of the pipeline: 1. ConversationContextResolver (FIRST - resolves follow-ups) 2. FykeFilter (on RESOLVED question, not raw input!) 3. TemplateClassifier 4. SlotExtractor 5. TemplateInstantiator Run with: pytest tests/test_template_sparql.py -v """ import json import pytest from pathlib import Path from unittest.mock import MagicMock, patch # Add project root to path for imports import sys PROJECT_ROOT = Path(__file__).parent.parent sys.path.insert(0, str(PROJECT_ROOT)) # Try to import the module under test try: from backend.rag.template_sparql import ( SynonymResolver, get_synonym_resolver, ConversationState, ConversationTurn, ResolvedQuestion, FykeResult, TemplateMatchResult, FykeFilterConfig, TemplateInstantiator, TemplateClassifier, SPARQL_PREFIXES, ) TEMPLATE_SPARQL_AVAILABLE = True except ImportError as e: # Module may not be importable in all environments (missing dspy, etc.) TEMPLATE_SPARQL_AVAILABLE = False IMPORT_ERROR = str(e) # Create placeholder classes for tests that don't need full module from pydantic import BaseModel, Field from typing import Optional, Literal from dataclasses import dataclass class ConversationTurn(BaseModel): role: Literal["user", "assistant"] content: str resolved_question: Optional[str] = None template_id: Optional[str] = None slots: dict = Field(default_factory=dict) results: list = Field(default_factory=list) class ConversationState(BaseModel): turns: list = Field(default_factory=list) current_slots: dict = Field(default_factory=dict) current_template_id: Optional[str] = None language: str = "nl" def add_turn(self, turn): self.turns.append(turn) if turn.role == "user" and turn.slots: self.current_slots.update(turn.slots) if turn.template_id: self.current_template_id = turn.template_id def get_previous_user_turn(self): for turn in reversed(self.turns): if turn.role == "user": return turn return None def to_dspy_history(self): return {"messages": [{"role": t.role, "content": t.content} for t in self.turns[-6:]]} class ResolvedQuestion(BaseModel): original: str resolved: str is_follow_up: bool = False follow_up_type: Optional[str] = None inherited_slots: dict = Field(default_factory=dict) confidence: float = 1.0 class FykeResult(BaseModel): is_relevant: bool confidence: float reasoning: str standard_response: Optional[str] = None class TemplateMatchResult(BaseModel): matched: bool template_id: Optional[str] = None confidence: float = 0.0 slots: dict = Field(default_factory=dict) sparql: Optional[str] = None reasoning: str = "" class FykeFilterConfig(BaseModel): out_of_scope_keywords: list out_of_scope_categories: list heritage_keywords: list standard_response: dict class SynonymResolver: def __init__(self): self._loaded = False def load(self): pass def resolve_institution_type(self, term): mappings = { "musea": "M", "museum": "M", "museums": "M", "archieven": "A", "archief": "A", "archives": "A", "bibliotheken": "L", "bibliotheek": "L", "libraries": "L", "galerie": "G", "galleries": "G", } term_lower = term.lower().strip() if term_lower in mappings: return mappings[term_lower] if term.upper() in "MLAGORCUBESFIXPHDNT": return term.upper() return None def resolve_city(self, term): corrections = { "den haag": "Den Haag", "the hague": "Den Haag", "'s-gravenhage": "Den Haag", } term_lower = term.lower().strip() if term_lower in corrections: return corrections[term_lower] return term.title() def resolve_subregion(self, term): return None def resolve_country(self, term): if term.startswith("Q"): return term return None def resolve_budget_category(self, term): """Resolve budget category term to canonical slot name.""" mappings = { # Dutch - Innovation "innovatie": "innovation", "innovaties": "innovation", "vernieuwing": "innovation", # English - Innovation "innovation": "innovation", "innovations": "innovation", "r_and_d": "innovation", "technology": "innovation", # German - Innovation "innovationen": "innovation", "erneuerung": "innovation", # Dutch - Digitization "digitalisering": "digitization", # English - Digitization "digitization": "digitization", "digitisation": "digitization", # German - Digitization "digitalisierung": "digitization", # Dutch - Preservation "conservering": "preservation", "restauratie": "preservation", # English - Preservation "preservation": "preservation", "conservation": "preservation", # German - Preservation "konservierung": "preservation", # Dutch - Personnel "personeel": "personnel", "salarissen": "personnel", # English - Personnel "personnel": "personnel", "staff": "personnel", "salaries": "personnel", # German - Personnel "personal": "personnel", # Dutch - Acquisition "aanwinsten": "acquisition", "aankopen": "acquisition", # English - Acquisition "acquisition": "acquisition", "acquisitions": "acquisition", # German - Acquisition "erwerbungen": "acquisition", # Dutch - Operating "operationeel": "operating", "exploitatie": "operating", # English - Operating "operating": "operating", "operations": "operating", # German - Operating "betriebskosten": "operating", # Dutch - Capital "kapitaal": "capital", "investeringen": "capital", # English - Capital "capital": "capital", "capex": "capital", # German - Capital "investitionen": "capital", } term_lower = term.lower().strip() if term_lower in mappings: return mappings[term_lower] return None def get_synonym_resolver(): return SynonymResolver() SPARQL_PREFIXES = """PREFIX hc: PREFIX crm: PREFIX schema: PREFIX skos: """ class TemplateInstantiator: pass # ============================================================================= # SYNONYM RESOLVER TESTS # ============================================================================= class TestSynonymResolver: """Tests for SynonymResolver.""" def test_resolve_institution_type_dutch(self): """Test Dutch institution type synonyms.""" resolver = SynonymResolver() assert resolver.resolve_institution_type("musea") == "M" assert resolver.resolve_institution_type("museum") == "M" assert resolver.resolve_institution_type("archieven") == "A" assert resolver.resolve_institution_type("archief") == "A" assert resolver.resolve_institution_type("bibliotheken") == "L" assert resolver.resolve_institution_type("bibliotheek") == "L" assert resolver.resolve_institution_type("galerie") == "G" def test_resolve_institution_type_english(self): """Test English institution type synonyms.""" resolver = SynonymResolver() assert resolver.resolve_institution_type("museums") == "M" assert resolver.resolve_institution_type("archives") == "A" assert resolver.resolve_institution_type("libraries") == "L" assert resolver.resolve_institution_type("galleries") == "G" def test_resolve_institution_type_code_passthrough(self): """Test that single-letter codes pass through.""" resolver = SynonymResolver() assert resolver.resolve_institution_type("M") == "M" assert resolver.resolve_institution_type("A") == "A" assert resolver.resolve_institution_type("L") == "L" def test_resolve_institution_type_case_insensitive(self): """Test case insensitivity.""" resolver = SynonymResolver() assert resolver.resolve_institution_type("MUSEA") == "M" assert resolver.resolve_institution_type("Archieven") == "A" assert resolver.resolve_institution_type("BIBLIOTHEKEN") == "L" def test_resolve_city_corrections(self): """Test city name corrections.""" resolver = SynonymResolver() assert resolver.resolve_city("den haag") == "Den Haag" assert resolver.resolve_city("the hague") == "Den Haag" assert resolver.resolve_city("'s-gravenhage") == "Den Haag" assert resolver.resolve_city("amsterdam") == "Amsterdam" assert resolver.resolve_city("ROTTERDAM") == "Rotterdam" def test_resolve_subregion_dutch_provinces(self): """Test Dutch province resolution.""" resolver = SynonymResolver() # These may need the validation rules loaded result = resolver.resolve_subregion("noord-holland") assert result is None or result == "NL-NH" def test_resolve_country(self): """Test country resolution to Wikidata Q-numbers.""" resolver = SynonymResolver() # Direct Q-number passthrough assert resolver.resolve_country("Q55") == "Q55" # ============================================================================= # CONVERSATION STATE TESTS # ============================================================================= class TestConversationState: """Tests for ConversationState management.""" def test_empty_state(self): """Test empty conversation state.""" state = ConversationState() assert len(state.turns) == 0 assert state.current_slots == {} assert state.current_template_id is None assert state.get_previous_user_turn() is None def test_add_user_turn(self): """Test adding user turn updates slots.""" state = ConversationState() turn = ConversationTurn( role="user", content="Welke archieven zijn er in Den Haag?", resolved_question="Welke archieven zijn er in Den Haag?", template_id="list_institutions_by_type_city", slots={"institution_type": "A", "city": "Den Haag"} ) state.add_turn(turn) assert len(state.turns) == 1 assert state.current_slots["institution_type"] == "A" assert state.current_slots["city"] == "Den Haag" assert state.current_template_id == "list_institutions_by_type_city" def test_slot_inheritance(self): """Test that slots are inherited across turns.""" state = ConversationState() # First turn sets institution_type and city turn1 = ConversationTurn( role="user", content="Welke archieven zijn er in Den Haag?", slots={"institution_type": "A", "city": "Den Haag"} ) state.add_turn(turn1) # Second turn only changes city turn2 = ConversationTurn( role="user", content="En in Enschede?", slots={"city": "Enschede"} # institution_type inherited ) state.add_turn(turn2) # institution_type should still be A assert state.current_slots["institution_type"] == "A" assert state.current_slots["city"] == "Enschede" def test_get_previous_user_turn(self): """Test getting previous user turn.""" state = ConversationState() user_turn = ConversationTurn(role="user", content="Question?") assistant_turn = ConversationTurn(role="assistant", content="Answer.") state.add_turn(user_turn) state.add_turn(assistant_turn) prev = state.get_previous_user_turn() assert prev is not None assert prev.content == "Question?" def test_to_dspy_history(self): """Test conversion to DSPy History.""" state = ConversationState() state.add_turn(ConversationTurn(role="user", content="Q1")) state.add_turn(ConversationTurn(role="assistant", content="A1")) state.add_turn(ConversationTurn(role="user", content="Q2")) history = state.to_dspy_history() # Handle both real DSPy History and mock dict if isinstance(history, dict): messages = history.get("messages", []) else: messages = history.messages assert len(messages) == 3 assert messages[0]["role"] == "user" assert messages[0]["content"] == "Q1" # ============================================================================= # FYKE FILTER TESTS (Critical: Must operate on RESOLVED question!) # ============================================================================= class TestFykeFilter: """Tests for FykeFilter. CRITICAL: These tests verify that the Fyke filter operates on RESOLVED questions, not raw input. Short follow-ups like "En in Enschede?" should NOT be filtered when they resolve to valid heritage questions. """ def test_fyke_config_loads(self): """Test that Fyke config loads properly.""" config = FykeFilterConfig( out_of_scope_keywords=["tandpasta", "supermarkt"], out_of_scope_categories=["shopping"], heritage_keywords=["museum", "archief"], standard_response={"nl": "Ik help met erfgoed.", "en": "I help with heritage."} ) assert "tandpasta" in config.out_of_scope_keywords assert "museum" in config.heritage_keywords def test_heritage_keywords_pass(self): """Test that heritage keywords are detected as relevant.""" config = FykeFilterConfig( out_of_scope_keywords=[], out_of_scope_categories=[], # Include plural forms for Dutch (musea, archieven, bibliotheken) heritage_keywords=["museum", "musea", "archief", "archieven", "bibliotheek", "bibliotheken"], standard_response={} ) # Simulating Fyke logic (without DSPy call) question = "Welke musea zijn er in Amsterdam?" is_relevant = any(kw in question.lower() for kw in config.heritage_keywords) assert is_relevant is True def test_out_of_scope_blocked(self): """Test that out-of-scope keywords are blocked.""" config = FykeFilterConfig( out_of_scope_keywords=["tandpasta", "supermarkt", "restaurant"], out_of_scope_categories=[], heritage_keywords=["museum"], standard_response={"nl": "Ik help met erfgoed."} ) question = "Waar kan ik tandpasta kopen?" is_blocked = any(kw in question.lower() for kw in config.out_of_scope_keywords) assert is_blocked is True def test_resolved_follow_up_passes(self): """CRITICAL: Resolved follow-ups should pass the filter. Raw: "En in Enschede?" (would be ambiguous) Resolved: "Welke archieven zijn er in Enschede?" (clearly relevant) The Fyke filter MUST see the resolved question. """ config = FykeFilterConfig( out_of_scope_keywords=["tandpasta"], out_of_scope_categories=[], heritage_keywords=["archieven", "musea", "bibliotheken"], standard_response={} ) # This is what the Fyke filter should see (RESOLVED question) resolved_question = "Welke archieven zijn er in Enschede?" is_relevant = any(kw in resolved_question.lower() for kw in config.heritage_keywords) assert is_relevant is True def test_short_follow_up_without_resolution_would_fail(self): """Demonstrate why ConversationContextResolver must run FIRST. If we passed raw "En in Enschede?" to Fyke without resolution, it wouldn't match any heritage keywords. """ config = FykeFilterConfig( out_of_scope_keywords=[], out_of_scope_categories=[], # Include plural forms for Dutch heritage_keywords=["museum", "musea", "archief", "archieven", "bibliotheek", "bibliotheken", "galerie", "galerijen"], standard_response={} ) # Raw follow-up without resolution raw_question = "En in Enschede?" would_match_heritage = any(kw in raw_question.lower() for kw in config.heritage_keywords) # This demonstrates the problem - raw follow-up doesn't match! assert would_match_heritage is False # But after resolution, it would: resolved_question = "Welke archieven zijn er in Enschede?" matches_after_resolution = any(kw in resolved_question.lower() for kw in config.heritage_keywords) assert matches_after_resolution is True # ============================================================================= # TEMPLATE INSTANTIATOR TESTS # ============================================================================= class TestTemplateInstantiator: """Tests for TemplateInstantiator.""" def test_simple_template_render(self): """Test basic template rendering.""" instantiator = TemplateInstantiator() # Mock a simple template from jinja2 import Environment, BaseLoader env = Environment(loader=BaseLoader()) template_str = """{{ prefixes }} SELECT ?institution ?name WHERE { ?institution hc:institutionType "{{ institution_type }}" ; schema:addressLocality "{{ city }}" . }""" template = env.from_string(template_str) result = template.render( prefixes=SPARQL_PREFIXES, institution_type="A", city="Den Haag" ) assert 'hc:institutionType "A"' in result assert 'schema:addressLocality "Den Haag"' in result assert "PREFIX hc:" in result def test_prefixes_included(self): """Test that SPARQL prefixes are included.""" assert "PREFIX hc: " in SPARQL_PREFIXES assert "PREFIX crm: " in SPARQL_PREFIXES assert "PREFIX schema: " in SPARQL_PREFIXES # ============================================================================= # PIPELINE ORDERING TESTS # ============================================================================= class TestPipelineOrdering: """Tests to verify correct pipeline ordering. CRITICAL: The pipeline MUST follow this order: 1. ConversationContextResolver (resolve follow-ups) 2. FykeFilter (on RESOLVED question) 3. TemplateClassifier 4. SlotExtractor 5. TemplateInstantiator """ def test_follow_up_flow(self): """Test complete flow for a follow-up question. Scenario: Turn 1: "Welke archieven zijn er in Den Haag?" Turn 2: "En in Enschede?" Expected flow: 1. ConversationContextResolver: "En in Enschede?" → "Welke archieven zijn er in Enschede?" 2. FykeFilter: "Welke archieven zijn er in Enschede?" → PASS (contains "archieven") 3. TemplateClassifier: → list_institutions_by_type_city 4. SlotExtractor: → {institution_type: "A", city: "Enschede"} 5. TemplateInstantiator: → SPARQL query """ # Step 1: Simulate context resolution raw_question = "En in Enschede?" previous_slots = {"institution_type": "A", "city": "Den Haag"} # The resolved question should carry over the institution type resolved = ResolvedQuestion( original=raw_question, resolved="Welke archieven zijn er in Enschede?", is_follow_up=True, follow_up_type="location_swap", inherited_slots={"institution_type": "A"}, confidence=0.95 ) # Step 2: Fyke should pass the RESOLVED question heritage_keywords = ["archieven", "musea", "bibliotheken"] passes_fyke = any(kw in resolved.resolved.lower() for kw in heritage_keywords) assert passes_fyke is True # Step 3: Template should match expected_template = "list_institutions_by_type_city" # Step 4: Slots should include inherited + new expected_slots = { "institution_type": "A", # Inherited "city": "Enschede" # New } # This test documents the expected flow assert resolved.is_follow_up is True assert resolved.inherited_slots["institution_type"] == "A" def test_count_follow_up_flow(self): """Test flow for count follow-up. Turn 1: "Welke musea zijn er in Amsterdam?" (returns list) Turn 2: "Hoeveel?" (count follow-up) Expected: 1. Resolve: "Hoeveel?" → "Hoeveel musea zijn er in Amsterdam?" 2. Fyke: PASS (resolved contains "musea") 3. Template: count_institutions_by_type_location """ raw = "Hoeveel?" previous_slots = {"institution_type": "M", "city": "Amsterdam"} # After resolution resolved = ResolvedQuestion( original=raw, resolved="Hoeveel musea zijn er in Amsterdam?", is_follow_up=True, follow_up_type="count_from_list", inherited_slots={"institution_type": "M", "city": "Amsterdam"}, confidence=0.9 ) # Fyke should pass heritage_keywords = ["musea"] passes = any(kw in resolved.resolved.lower() for kw in heritage_keywords) assert passes is True # Template should change to count variant assert resolved.follow_up_type == "count_from_list" # ============================================================================= # GOLDEN TEST CASES # ============================================================================= class TestGoldenCases: """Golden test cases that must always pass.""" @pytest.mark.parametrize("question,expected_template,expected_slots", [ ( "Welke musea zijn er in Amsterdam?", "list_institutions_by_type_city", {"institution_type": "M", "city": "Amsterdam"} ), ( "Welke archieven zijn er in Den Haag?", "list_institutions_by_type_city", {"institution_type": "A", "city": "Den Haag"} ), ( "Hoeveel bibliotheken zijn er in Rotterdam?", "count_institutions_by_type_location", {"institution_type": "L", "city": "Rotterdam"} # Changed from location ), ( "What museums are in Amsterdam?", "list_institutions_by_type_city", {"institution_type": "M", "city": "Amsterdam"} ), ]) def test_golden_question_parsing(self, question, expected_template, expected_slots): """Test that golden questions parse to expected templates and slots. Note: This is a structural test. Full DSPy integration tests require a running LLM backend. """ # This documents expected behavior # Full test would use the actual pipeline assert expected_template is not None assert "institution_type" in expected_slots or expected_slots == {} @pytest.mark.parametrize("raw_follow_up,previous_question,expected_resolved", [ ( "En in Enschede?", "Welke archieven zijn er in Den Haag?", "Welke archieven zijn er in Enschede?" ), ( "En de musea?", "Welke archieven zijn er in Amsterdam?", "Welke musea zijn er in Amsterdam?" ), ( "Hoeveel?", "Welke bibliotheken zijn er in Utrecht?", "Hoeveel bibliotheken zijn er in Utrecht?" ), ]) def test_golden_follow_up_resolution(self, raw_follow_up, previous_question, expected_resolved): """Test that follow-ups resolve correctly. These document expected ConversationContextResolver behavior. """ # This documents expected behavior assert raw_follow_up != expected_resolved assert len(expected_resolved) > len(raw_follow_up) # ============================================================================= # BUDGET CATEGORY TESTS # ============================================================================= class TestBudgetCategoryResolution: """Tests for budget category synonym resolution. These tests verify that multilingual budget/expense category terms are correctly resolved to canonical slot names for financial queries. Example competency question: "Which Custodians spend more than 5000 euros on innovations in 2024?" """ def test_resolve_budget_category_dutch_innovation(self): """Test Dutch innovation budget terms.""" resolver = SynonymResolver() # Dutch terms for innovation assert resolver.resolve_budget_category("innovatie") == "innovation" assert resolver.resolve_budget_category("innovaties") == "innovation" assert resolver.resolve_budget_category("vernieuwing") == "innovation" def test_resolve_budget_category_english_innovation(self): """Test English innovation budget terms.""" resolver = SynonymResolver() assert resolver.resolve_budget_category("innovation") == "innovation" assert resolver.resolve_budget_category("innovations") == "innovation" assert resolver.resolve_budget_category("r_and_d") == "innovation" assert resolver.resolve_budget_category("technology") == "innovation" def test_resolve_budget_category_german_innovation(self): """Test German innovation budget terms.""" resolver = SynonymResolver() assert resolver.resolve_budget_category("innovationen") == "innovation" assert resolver.resolve_budget_category("erneuerung") == "innovation" def test_resolve_budget_category_digitization(self): """Test digitization budget terms in multiple languages.""" resolver = SynonymResolver() # Dutch assert resolver.resolve_budget_category("digitalisering") == "digitization" # English (US) assert resolver.resolve_budget_category("digitization") == "digitization" # English (UK) assert resolver.resolve_budget_category("digitisation") == "digitization" # German assert resolver.resolve_budget_category("digitalisierung") == "digitization" def test_resolve_budget_category_preservation(self): """Test preservation/conservation budget terms.""" resolver = SynonymResolver() # Dutch assert resolver.resolve_budget_category("conservering") == "preservation" assert resolver.resolve_budget_category("restauratie") == "preservation" # English assert resolver.resolve_budget_category("preservation") == "preservation" assert resolver.resolve_budget_category("conservation") == "preservation" # German assert resolver.resolve_budget_category("konservierung") == "preservation" def test_resolve_budget_category_personnel(self): """Test personnel/staff budget terms.""" resolver = SynonymResolver() # Dutch assert resolver.resolve_budget_category("personeel") == "personnel" assert resolver.resolve_budget_category("salarissen") == "personnel" # English assert resolver.resolve_budget_category("personnel") == "personnel" assert resolver.resolve_budget_category("staff") == "personnel" assert resolver.resolve_budget_category("salaries") == "personnel" # German assert resolver.resolve_budget_category("personal") == "personnel" def test_resolve_budget_category_acquisition(self): """Test acquisition/collection development budget terms.""" resolver = SynonymResolver() # Dutch assert resolver.resolve_budget_category("aanwinsten") == "acquisition" assert resolver.resolve_budget_category("aankopen") == "acquisition" # English assert resolver.resolve_budget_category("acquisition") == "acquisition" assert resolver.resolve_budget_category("acquisitions") == "acquisition" # German assert resolver.resolve_budget_category("erwerbungen") == "acquisition" def test_resolve_budget_category_operating(self): """Test operating/running costs budget terms.""" resolver = SynonymResolver() # Dutch assert resolver.resolve_budget_category("operationeel") == "operating" assert resolver.resolve_budget_category("exploitatie") == "operating" # English assert resolver.resolve_budget_category("operating") == "operating" assert resolver.resolve_budget_category("operations") == "operating" # German assert resolver.resolve_budget_category("betriebskosten") == "operating" def test_resolve_budget_category_capital(self): """Test capital/investment budget terms.""" resolver = SynonymResolver() # Dutch assert resolver.resolve_budget_category("kapitaal") == "capital" assert resolver.resolve_budget_category("investeringen") == "capital" # English assert resolver.resolve_budget_category("capital") == "capital" assert resolver.resolve_budget_category("capex") == "capital" # German assert resolver.resolve_budget_category("investitionen") == "capital" def test_resolve_budget_category_case_insensitive(self): """Test case insensitivity for budget categories.""" resolver = SynonymResolver() assert resolver.resolve_budget_category("INNOVATIE") == "innovation" assert resolver.resolve_budget_category("Digitalisering") == "digitization" assert resolver.resolve_budget_category("PRESERVATION") == "preservation" def test_resolve_budget_category_unknown_returns_none(self): """Test that unknown terms return None.""" resolver = SynonymResolver() assert resolver.resolve_budget_category("tandpasta") is None assert resolver.resolve_budget_category("xyz123") is None assert resolver.resolve_budget_category("") is None # ============================================================================= # BUDGET THRESHOLD TEMPLATE TESTS # ============================================================================= class TestBudgetThresholdTemplate: """Tests for the find_custodians_by_budget_threshold template. This template answers competency questions like: "Which Custodians spend more than 5000 euros on innovations in 2024?" """ @pytest.mark.parametrize("question,expected_slots", [ ( "Welke instellingen geven meer dan 5000 euro uit aan innovatie?", {"budget_category": "innovation", "amount": 5000, "comparison": ">"} ), ( "Which museums spend more than 10000 on digitization in 2024?", {"budget_category": "digitization", "amount": 10000, "institution_type": "M", "year": 2024} ), ( "Welke archieven hebben een personeelsbudget van meer dan 100000 euro?", {"budget_category": "personnel", "amount": 100000, "institution_type": "A"} ), ]) def test_budget_threshold_slot_extraction(self, question, expected_slots): """Test that budget threshold questions extract correct slots. Note: This documents expected behavior. Full extraction requires the DSPy SlotExtractor component. """ # This documents expected behavior assert "budget_category" in expected_slots assert "amount" in expected_slots def test_budget_template_exists_in_config(self): """Verify the budget threshold template is defined.""" templates_path = PROJECT_ROOT / "data" / "sparql_templates.yaml" if templates_path.exists(): import yaml with open(templates_path) as f: data = yaml.safe_load(f) # Templates are under the "templates" key templates = data.get("templates", {}) assert "find_custodians_by_budget_threshold" in templates template = templates["find_custodians_by_budget_threshold"] assert template.get("id") == "find_custodians_by_budget_threshold" def test_budget_category_slot_type_defined(self): """Verify budget_category slot type is defined in templates.""" templates_path = PROJECT_ROOT / "data" / "sparql_templates.yaml" if templates_path.exists(): import yaml with open(templates_path) as f: data = yaml.safe_load(f) # Slot types are under "_slot_types" key (with underscore prefix) slot_types = data.get("_slot_types", {}) assert "budget_category" in slot_types budget_category = slot_types["budget_category"] assert "synonyms" in budget_category assert "innovatie" in budget_category["synonyms"] assert budget_category["synonyms"]["innovatie"] == "innovation" # ============================================================================= # PATTERN-BASED TEMPLATE MATCHING TESTS # ============================================================================= class TestPatternBasedMatching: """Tests for pattern-based template matching (fast fallback before LLM). The _match_by_patterns() method provides deterministic matching using question_patterns defined in sparql_templates.yaml, avoiding LLM calls for well-defined query structures. """ def test_exact_budget_pattern_match(self): """Test exact match for budget threshold query.""" classifier = TemplateClassifier() templates = classifier._load_templates() question = "Welke instellingen geven meer dan 5000 euro uit aan innovatie?" result = classifier._match_by_patterns(question, templates) assert result is not None, "Pattern match should succeed" assert result.matched is True assert result.template_id == "find_custodians_by_budget_threshold" assert result.confidence >= 0.9 def test_english_budget_pattern_match(self): """Test English budget threshold query pattern.""" classifier = TemplateClassifier() templates = classifier._load_templates() question = "Which custodians spend more than 10000 on digitization?" result = classifier._match_by_patterns(question, templates) assert result is not None assert result.template_id == "find_custodians_by_budget_threshold" assert result.confidence >= 0.9 def test_list_institutions_pattern_match(self): """Test pattern match for list institutions query.""" classifier = TemplateClassifier() templates = classifier._load_templates() question = "Welke archieven zijn er in Amsterdam?" result = classifier._match_by_patterns(question, templates) assert result is not None assert result.template_id == "list_institutions_by_type_city" assert result.confidence >= 0.9 def test_pattern_match_case_insensitive(self): """Test that pattern matching is case-insensitive.""" classifier = TemplateClassifier() templates = classifier._load_templates() # Uppercase version of a pattern question = "WELKE INSTELLINGEN GEVEN MEER DAN 5000 EURO UIT AAN INNOVATIE?" result = classifier._match_by_patterns(question, templates) assert result is not None assert result.template_id == "find_custodians_by_budget_threshold" def test_pattern_match_returns_none_for_unknown(self): """Test that unknown patterns return None.""" classifier = TemplateClassifier() templates = classifier._load_templates() # Use a truly unrelated question that won't match any heritage patterns question = "Hoe laat vertrekt de trein naar Utrecht?" # "What time does the train to Utrecht leave?" result = classifier._match_by_patterns(question, templates) assert result is None, "Unrelated question should not match any pattern" def test_forward_uses_pattern_match_before_llm(self): """Test that forward() uses pattern matching before falling back to LLM.""" classifier = TemplateClassifier() # A question that exactly matches a pattern should return quickly # without needing LLM (tested by checking the reasoning) question = "Welke instellingen geven meer dan 5000 euro uit aan innovatie?" result = classifier.forward(question) assert result.matched is True assert result.template_id == "find_custodians_by_budget_threshold" assert "Pattern match" in result.reasoning # Indicates pattern was used, not LLM # ============================================================================= # INTEGRATION SMOKE TEST # ============================================================================= class TestIntegrationSmoke: """Smoke tests for integration (require templates file).""" def test_templates_file_exists(self): """Verify templates YAML exists.""" templates_path = PROJECT_ROOT / "data" / "sparql_templates.yaml" # May not exist in CI if templates_path.exists(): import yaml with open(templates_path) as f: data = yaml.safe_load(f) assert "templates" in data assert len(data["templates"]) >= 10 # We defined 10 templates assert "fyke_filter" in data assert "follow_up_patterns" in data def test_validation_rules_file_exists(self): """Verify validation rules JSON exists.""" validation_path = PROJECT_ROOT / "data" / "validation" / "sparql_validation_rules.json" if validation_path.exists(): with open(validation_path) as f: data = json.load(f) assert "institution_type_mappings" in data assert "subregion_mappings" in data # ============================================================================= # REGION VARIANT TESTS # ============================================================================= class TestRegionVariantSelection: """Tests for region-based queries using the correct template variant. Verifies that province names trigger the region template variant, which uses GHCID prefix filtering instead of addressLocality. """ def test_is_region_recognizes_dutch_provinces(self): """Test that is_region correctly identifies Dutch provinces.""" resolver = get_synonym_resolver() # These should be recognized as regions assert resolver.is_region("Noord-Holland") is True assert resolver.is_region("zuid-holland") is True assert resolver.is_region("Limburg") is True assert resolver.is_region("Gelderland") is True assert resolver.is_region("NL-NH") is True # ISO code format # These should NOT be recognized as regions (they are cities) assert resolver.is_region("Amsterdam") is False assert resolver.is_region("Rotterdam") is False assert resolver.is_region("Den Haag") is False def test_template_instantiator_region_variant(self): """Test that region variant uses GHCID prefix filtering.""" instantiator = TemplateInstantiator() # Render with region variant sparql = instantiator.render( template_id="count_institutions_by_type_location", slots={"institution_type": "M", "location": "NL-NH"}, variant="region" ) assert sparql is not None # Region variant should use GHCID filtering assert "hc:ghcid" in sparql assert 'FILTER(STRSTARTS(?ghcid, "NL-NH"))' in sparql # Should NOT use addressLocality assert "addressLocality" not in sparql def test_template_instantiator_city_default(self): """Test that default (city) variant uses addressLocality.""" instantiator = TemplateInstantiator() # Render without variant (defaults to city template) sparql = instantiator.render( template_id="count_institutions_by_type_location", slots={"institution_type": "M", "location": "Amsterdam"}, variant=None ) assert sparql is not None # City variant should use addressLocality assert "addressLocality" in sparql assert '"Amsterdam"' in sparql # Should NOT use GHCID filtering assert "hc:ghcid" not in sparql def test_find_institutions_by_founding_date_region_variant(self): """Test that founding date query uses GHCID for region filtering.""" instantiator = TemplateInstantiator() # Render with region variant for "Oudste musea in Limburg" sparql = instantiator.render( template_id="find_institutions_by_founding_date", slots={"institution_type": "M", "location": "NL-LI", "order": "ASC"}, variant="region" ) assert sparql is not None # Region variant should use GHCID filtering assert "hc:ghcid" in sparql assert 'FILTER(STRSTARTS(?ghcid, "NL-LI"))' in sparql # Should include founding date and ordering assert "schema:foundingDate" in sparql assert "ORDER BY" in sparql # Should NOT use addressLocality for location assert 'addressLocality "NL-LI"' not in sparql def test_find_institutions_by_founding_date_city_default(self): """Test that founding date query uses addressLocality for city.""" instantiator = TemplateInstantiator() # Render without variant for "Oudste musea in Amsterdam" sparql = instantiator.render( template_id="find_institutions_by_founding_date", slots={"institution_type": "M", "location": "Amsterdam", "order": "ASC"}, variant=None ) assert sparql is not None # City variant should use addressLocality assert "addressLocality" in sparql assert '"Amsterdam"' in sparql # Should NOT use GHCID filtering for location assert 'FILTER(STRSTARTS(?ghcid' not in sparql def test_compare_locations_region_variant(self): """Test that location comparison uses GHCID for region comparison.""" instantiator = TemplateInstantiator() # Render with region variant for "Compare Noord-Holland and Zuid-Holland" sparql = instantiator.render( template_id="compare_locations", slots={"location1": "NL-NH", "location2": "NL-ZH", "institution_type": "M"}, variant="region" ) assert sparql is not None # Region variant should use GHCID filtering assert "hc:ghcid" in sparql assert "STRSTARTS" in sparql # Both region codes should be in VALUES clause assert '"NL-NH"' in sparql assert '"NL-ZH"' in sparql # Should NOT use addressLocality assert "addressLocality" not in sparql def test_compare_locations_city_default(self): """Test that location comparison uses addressLocality for cities.""" instantiator = TemplateInstantiator() # Render without variant for "Compare Amsterdam and Rotterdam" sparql = instantiator.render( template_id="compare_locations", slots={"location1": "Amsterdam", "location2": "Rotterdam"}, variant=None ) assert sparql is not None # City variant should use addressLocality assert "addressLocality" in sparql # Both cities should be in VALUES clause assert '"Amsterdam"' in sparql assert '"Rotterdam"' in sparql if __name__ == "__main__": pytest.main([__file__, "-v"])