# Test-Driven Development Strategy ## Overview This document defines the TDD approach for implementing the template-based SPARQL system. Tests are organized by component and follow the Red-Green-Refactor cycle. ## Test Organization ``` tests/ template_sparql/ __init__.py test_intent_classifier.py # Intent classification tests test_slot_extractor.py # Slot extraction tests test_template_registry.py # Template registry tests test_template_instantiation.py # Template filling tests test_sparql_validation.py # SPARQL validation tests test_query_pipeline.py # End-to-end pipeline tests test_integration.py # Integration with RAG system conftest.py # Shared fixtures ``` ## Test Categories ### 1. Unit Tests - Intent Classification ```python # tests/template_sparql/test_intent_classifier.py import pytest from backend.rag.template_sparql.intent_classifier import IntentClassifier, QueryIntent class TestIntentClassifier: """Test suite for intent classification.""" @pytest.fixture def classifier(self): """Create classifier instance.""" return IntentClassifier() # ========================================================================= # Location-Based Queries (Class 1) # ========================================================================= @pytest.mark.parametrize("question,expected_template,expected_slots", [ # Dutch - Archives in province ( "Welke archieven zijn er in Drenthe?", "region_institution_search", {"institution_type": "archieven", "province": "Drenthe"} ), # Dutch - Museums in province ( "Welke musea zijn er in Noord-Holland?", "region_institution_search", {"institution_type": "musea", "province": "Noord-Holland"} ), # Dutch - Libraries in province ( "Welke bibliotheken zijn er in Utrecht?", "region_institution_search", {"institution_type": "bibliotheken", "province": "Utrecht"} ), # English variant ( "Archives in Drenthe", "region_institution_search", {"institution_type": "Archives", "province": "Drenthe"} ), # Informal Dutch ( "archieven in drenthe", "region_institution_search", {"institution_type": "archieven", "province": "drenthe"} ), ]) def test_location_based_intent(self, classifier, question, expected_template, expected_slots): """Test classification of location-based queries.""" intent = classifier.classify(question) assert intent.template_id == expected_template assert intent.extracted_slots == expected_slots # ========================================================================= # Aggregation Queries (Class 5) # ========================================================================= @pytest.mark.parametrize("question,expected_template", [ ("Hoeveel archieven zijn er in Nederland?", "count_by_type"), ("Hoeveel musea zijn er in Drenthe?", "count_by_type_region"), ("Tel alle bibliotheken", "count_by_type"), ("How many archives are there?", "count_by_type"), ]) def test_aggregation_intent(self, classifier, question, expected_template): """Test classification of aggregation queries.""" intent = classifier.classify(question) assert intent.template_id == expected_template # ========================================================================= # Entity Lookup Queries (Class 3) # ========================================================================= @pytest.mark.parametrize("question,expected_template", [ ("Wat is het Nationaal Archief?", "entity_lookup"), ("Informatie over Rijksmuseum", "entity_lookup"), ("Details van NL-HaNA", "entity_lookup_by_ghcid"), ]) def test_entity_lookup_intent(self, classifier, question, expected_template): """Test classification of entity lookup queries.""" intent = classifier.classify(question) assert intent.template_id == expected_template # ========================================================================= # Fallback Cases # ========================================================================= @pytest.mark.parametrize("question", [ "Wat is de beste manier om een archief te digitaliseren?", "Hoe kan ik toegang krijgen tot de collectie?", "Hello, I have a question", ]) def test_fallback_to_llm(self, classifier, question): """Test that unmatched questions fall back to LLM.""" intent = classifier.classify(question) assert intent.template_id == "llm_fallback" class TestIntentClassifierEdgeCases: """Edge cases and error handling.""" @pytest.fixture def classifier(self): return IntentClassifier() def test_empty_question(self, classifier): """Empty question should raise ValueError.""" with pytest.raises(ValueError, match="Question cannot be empty"): classifier.classify("") def test_very_long_question(self, classifier): """Very long questions should still be classified.""" long_question = "Welke archieven " + "zijn er " * 100 + "in Drenthe?" intent = classifier.classify(long_question) # Should still extract the key information assert intent.template_id is not None def test_question_with_special_characters(self, classifier): """Questions with special characters should be handled.""" intent = classifier.classify("Welke archieven zijn er in 's-Hertogenbosch?") assert intent.template_id is not None ``` ### 2. Unit Tests - Slot Extraction ```python # tests/template_sparql/test_slot_extractor.py import pytest from backend.rag.template_sparql.slot_extractor import SlotExtractor class TestSlotExtractor: """Test suite for slot value extraction.""" @pytest.fixture def extractor(self): return SlotExtractor() # ========================================================================= # Province Code Extraction # ========================================================================= @pytest.mark.parametrize("input_province,expected_code", [ # Standard Dutch province names ("Drenthe", "NL-DR"), ("Noord-Holland", "NL-NH"), ("Zuid-Holland", "NL-ZH"), ("Noord-Brabant", "NL-NB"), ("Utrecht", "NL-UT"), ("Gelderland", "NL-GE"), ("Limburg", "NL-LI"), ("Overijssel", "NL-OV"), ("Flevoland", "NL-FL"), ("Friesland", "NL-FR"), ("Groningen", "NL-GR"), ("Zeeland", "NL-ZE"), # Case variations ("drenthe", "NL-DR"), ("DRENTHE", "NL-DR"), ("DrEnThE", "NL-DR"), # Alternative spellings ("Fryslan", "NL-FR"), ("Fryslân", "NL-FR"), ]) def test_province_to_code(self, extractor, input_province, expected_code): """Test province name to ISO 3166-2 code conversion.""" result = extractor.extract_province_code(input_province) assert result == expected_code def test_unknown_province(self, extractor): """Unknown province should return None.""" result = extractor.extract_province_code("Atlantis") assert result is None # ========================================================================= # Institution Type Extraction # ========================================================================= @pytest.mark.parametrize("input_type,expected_code", [ # Dutch singular/plural ("archief", "A"), ("archieven", "A"), ("museum", "M"), ("musea", "M"), ("bibliotheek", "L"), ("bibliotheken", "L"), ("galerie", "G"), ("galerijen", "G"), # English ("archive", "A"), ("archives", "A"), ("museum", "M"), ("museums", "M"), ("library", "L"), ("libraries", "L"), ("gallery", "G"), ("galleries", "G"), # Descriptive terms ("regionaal archief", "A"), ("stadsarchief", "A"), ("rijksmuseum", "M"), ("openbare bibliotheek", "L"), ]) def test_institution_type_to_code(self, extractor, input_type, expected_code): """Test institution type to single-letter code conversion.""" result = extractor.extract_institution_type_code(input_type) assert result == expected_code # ========================================================================= # Full Slot Extraction # ========================================================================= def test_extract_all_slots_location_query(self, extractor): """Test full slot extraction for location query.""" question = "Welke archieven zijn er in Drenthe?" template_slots = { "institution_type_code": {"required": True}, "province_code": {"required": True}, } result = extractor.extract(question, template_slots) assert result["institution_type_code"] == "A" assert result["province_code"] == "NL-DR" def test_extract_with_missing_required_slot(self, extractor): """Missing required slot should raise ValueError.""" question = "Welke zijn er in Nederland?" # No institution type template_slots = { "institution_type_code": {"required": True}, "province_code": {"required": False}, } with pytest.raises(ValueError, match="Missing required slot"): extractor.extract(question, template_slots) class TestSlotExtractorFuzzyMatching: """Test fuzzy matching for slot extraction.""" @pytest.fixture def extractor(self): return SlotExtractor(fuzzy_threshold=80) @pytest.mark.parametrize("input_province,expected_code", [ ("Drent", "NL-DR"), # Partial match ("Dremthe", "NL-DR"), # Typo ("N-Holland", "NL-NH"), # Abbreviation ("Noordholland", "NL-NH"), # Without hyphen ]) def test_fuzzy_province_matching(self, extractor, input_province, expected_code): """Test fuzzy matching for province names.""" result = extractor.extract_province_code(input_province, fuzzy=True) assert result == expected_code ``` ### 3. Unit Tests - Template Instantiation ```python # tests/template_sparql/test_template_instantiation.py import pytest from backend.rag.template_sparql.templates import SimpleTemplate, CompositeTemplate, SlotDefinition class TestSimpleTemplate: """Test suite for simple template instantiation.""" def test_basic_instantiation(self): """Test basic template slot filling.""" template = SimpleTemplate( template_id="region_search", description="Search by region", slots={ "province_code": SlotDefinition(name="province_code"), }, sparql_template=""" PREFIX hc: SELECT ?s WHERE { ?s a hc:Custodian . FILTER(CONTAINS(STR(?s), "{{province_code}}")) }""" ) result = template.instantiate({"province_code": "NL-DR"}) assert "NL-DR" in result assert "{{province_code}}" not in result def test_multiple_slots(self): """Test template with multiple slots.""" template = SimpleTemplate( template_id="type_region_search", description="Search by type and region", slots={ "institution_type_code": SlotDefinition(name="institution_type_code"), "province_code": SlotDefinition(name="province_code"), }, sparql_template=""" SELECT ?s WHERE { ?s hcp:institutionType "{{institution_type_code}}" . FILTER(CONTAINS(STR(?s), "{{province_code}}")) }""" ) result = template.instantiate({ "institution_type_code": "A", "province_code": "NL-DR" }) assert '"A"' in result assert "NL-DR" in result def test_missing_required_slot(self): """Missing required slot should raise ValueError.""" template = SimpleTemplate( template_id="test", description="Test", slots={ "province_code": SlotDefinition(name="province_code", required=True), }, sparql_template="FILTER(CONTAINS(STR(?s), '{{province_code}}'))" ) with pytest.raises(ValueError, match="Missing required slot"): template.instantiate({}) def test_invalid_slot_value(self): """Invalid slot value should raise ValueError.""" template = SimpleTemplate( template_id="test", description="Test", slots={ "institution_type_code": SlotDefinition( name="institution_type_code", valid_values=["A", "M", "L", "G"] ), }, sparql_template='hcp:institutionType "{{institution_type_code}}"' ) with pytest.raises(ValueError, match="Invalid value"): template.instantiate({"institution_type_code": "X"}) def test_optional_slot_not_provided(self): """Optional slot not provided should use default.""" template = SimpleTemplate( template_id="test", description="Test", slots={ "limit": SlotDefinition( name="limit", required=False, default_value="100" ), }, sparql_template="LIMIT {{limit}}" ) result = template.instantiate({}) assert "LIMIT 100" in result class TestCompositeTemplate: """Test suite for composite template instantiation.""" def test_and_composition(self): """Test AND composition of sub-templates.""" type_filter = SimpleTemplate( template_id="type_filter", description="Filter by type", slots={"type": SlotDefinition(name="type")}, sparql_template='?s hcp:institutionType "{{type}}" .' ) region_filter = SimpleTemplate( template_id="region_filter", description="Filter by region", slots={"region": SlotDefinition(name="region")}, sparql_template='FILTER(CONTAINS(STR(?s), "{{region}}"))' ) composite = CompositeTemplate( template_id="type_region", description="Type and region filter", slots={ "type": SlotDefinition(name="type"), "region": SlotDefinition(name="region"), }, sub_templates=[type_filter, region_filter], join_type="AND" ) result = composite.instantiate({"type": "A", "region": "NL-DR"}) assert '"A"' in result assert "NL-DR" in result ``` ### 4. Integration Tests - Query Pipeline ```python # tests/template_sparql/test_query_pipeline.py import pytest from backend.rag.template_sparql.pipeline import QueryPipeline, QueryContext class TestQueryPipeline: """End-to-end pipeline tests.""" @pytest.fixture def pipeline(self): """Create fully configured pipeline.""" return QueryPipeline.create_default() # ========================================================================= # Successful Query Generation # ========================================================================= @pytest.mark.parametrize("question,expected_contains", [ ( "Welke archieven zijn er in Drenthe?", ['hc:Custodian', '"A"', 'NL-DR'] ), ( "Welke musea zijn er in Noord-Holland?", ['hc:Custodian', '"M"', 'NL-NH'] ), ( "Hoeveel bibliotheken zijn er in Utrecht?", ['COUNT', '"L"', 'NL-UT'] ), ]) def test_successful_query_generation(self, pipeline, question, expected_contains): """Test successful end-to-end query generation.""" context = QueryContext(original_question=question) result = pipeline.process(context) assert result.errors == [] assert result.sparql_query is not None for expected in expected_contains: assert expected in result.sparql_query, \ f"Expected '{expected}' in query:\n{result.sparql_query}" # ========================================================================= # Error Handling # ========================================================================= def test_invalid_slot_value_error(self, pipeline): """Test error handling for invalid slot values.""" # Question with unrecognized province context = QueryContext(original_question="Welke archieven zijn er in Atlantis?") result = pipeline.process(context) # Should either fall back to LLM or report error assert result.sparql_query is not None or len(result.errors) > 0 # ========================================================================= # SPARQL Validation # ========================================================================= def test_generated_sparql_is_valid(self, pipeline): """Test that all generated SPARQL queries are syntactically valid.""" questions = [ "Welke archieven zijn er in Drenthe?", "Welke musea zijn er in Noord-Holland?", "Hoeveel bibliotheken zijn er?", ] for question in questions: context = QueryContext(original_question=question) result = pipeline.process(context) if result.sparql_query: # Validate with sparql_linter from glam_extractor.api.sparql_linter import lint_sparql lint_result = lint_sparql(result.sparql_query) assert lint_result.valid, \ f"Invalid SPARQL for '{question}':\n{result.sparql_query}\nErrors: {lint_result.issues}" class TestQueryPipelineWithSPARQLEndpoint: """Integration tests with actual SPARQL endpoint.""" @pytest.fixture def pipeline(self): return QueryPipeline.create_default() @pytest.mark.integration @pytest.mark.parametrize("question,min_results", [ ("Welke archieven zijn er in Drenthe?", 1), ("Welke musea zijn er in Noord-Holland?", 1), ]) async def test_query_returns_results(self, pipeline, question, min_results): """Test that generated queries return expected results.""" import httpx context = QueryContext(original_question=question) result = pipeline.process(context) assert result.sparql_query is not None # Execute against SPARQL endpoint async with httpx.AsyncClient() as client: response = await client.post( "https://bronhouder.nl/sparql", data={"query": result.sparql_query}, headers={"Accept": "application/sparql-results+json"} ) assert response.status_code == 200 data = response.json() results = data.get("results", {}).get("bindings", []) assert len(results) >= min_results, \ f"Expected at least {min_results} results for '{question}'" ``` ### 5. Fixtures and Shared Setup ```python # tests/template_sparql/conftest.py import pytest from pathlib import Path import yaml @pytest.fixture(scope="session") def validation_rules(): """Load SPARQL validation rules.""" rules_path = Path("data/validation/sparql_validation_rules.json") import json with open(rules_path) as f: return json.load(f) @pytest.fixture(scope="session") def template_config(): """Load template configuration.""" config_path = Path("data/templates/sparql_templates.yaml") with open(config_path) as f: return yaml.safe_load(f) @pytest.fixture def sample_questions(): """Sample questions for testing.""" return { "location_archive_drenthe": "Welke archieven zijn er in Drenthe?", "location_museum_nh": "Welke musea zijn er in Noord-Holland?", "count_libraries": "Hoeveel bibliotheken zijn er in Nederland?", "entity_lookup": "Wat is het Nationaal Archief?", "complex_query": "Welke archieven in Drenthe hebben meer dan 1000 items?", } @pytest.fixture def expected_sparql_patterns(): """Expected patterns in generated SPARQL.""" return { "location_archive_drenthe": { "must_contain": ['hc:Custodian', 'hcp:institutionType', '"A"', 'NL-DR'], "must_not_contain": ['crm:', 'cidoc:'], }, "location_museum_nh": { "must_contain": ['hc:Custodian', '"M"', 'NL-NH'], "must_not_contain": ['crm:', 'cidoc:'], }, } ``` ## Test Execution ### Running Tests ```bash # Run all template SPARQL tests pytest tests/template_sparql/ -v # Run with coverage pytest tests/template_sparql/ --cov=backend.rag.template_sparql --cov-report=html # Run only unit tests (fast) pytest tests/template_sparql/ -v -m "not integration" # Run integration tests (requires SPARQL endpoint) pytest tests/template_sparql/ -v -m integration # Run specific test file pytest tests/template_sparql/test_intent_classifier.py -v # Run specific test case pytest tests/template_sparql/test_intent_classifier.py::TestIntentClassifier::test_location_based_intent -v ``` ### Test Markers ```python # conftest.py at project root import pytest def pytest_configure(config): config.addinivalue_line("markers", "integration: marks tests as integration tests") config.addinivalue_line("markers", "slow: marks tests as slow") ``` ## Coverage Targets | Component | Target Coverage | |-----------|-----------------| | IntentClassifier | 95% | | SlotExtractor | 95% | | TemplateRegistry | 90% | | Template instantiation | 95% | | QueryPipeline | 85% | | Integration | 80% | ## Test Data Management ### Golden Test Cases Store expected outputs for regression testing: ```yaml # tests/template_sparql/golden/location_queries.yaml test_cases: - id: "archive_drenthe_001" input: question: "Welke archieven zijn er in Drenthe?" language: "nl" expected: template_id: "region_institution_search" slots: institution_type_code: "A" province_code: "NL-DR" sparql_contains: - "hc:Custodian" - 'hcp:institutionType "A"' - 'FILTER(CONTAINS(STR(?' - '"NL-DR"' ``` ### Loading Golden Cases ```python @pytest.fixture def golden_cases(): """Load golden test cases.""" import yaml golden_dir = Path("tests/template_sparql/golden") cases = {} for yaml_file in golden_dir.glob("*.yaml"): with open(yaml_file) as f: data = yaml.safe_load(f) cases.update({ case["id"]: case for case in data.get("test_cases", []) }) return cases def test_golden_cases(pipeline, golden_cases): """Test against golden cases.""" for case_id, case in golden_cases.items(): context = QueryContext(original_question=case["input"]["question"]) result = pipeline.process(context) assert result.intent.template_id == case["expected"]["template_id"], \ f"Case {case_id}: template mismatch" for pattern in case["expected"]["sparql_contains"]: assert pattern in result.sparql_query, \ f"Case {case_id}: missing pattern '{pattern}'" ``` ## Continuous Integration ```yaml # .github/workflows/template-sparql-tests.yml name: Template SPARQL Tests on: push: paths: - 'backend/rag/template_sparql/**' - 'tests/template_sparql/**' pull_request: paths: - 'backend/rag/template_sparql/**' - 'tests/template_sparql/**' jobs: test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.12' - name: Install dependencies run: | pip install -e ".[dev]" - name: Run unit tests run: | pytest tests/template_sparql/ -v -m "not integration" --cov=backend.rag.template_sparql - name: Upload coverage uses: codecov/codecov-action@v4 ```