25 KiB
25 KiB
Test-Driven Development Strategy
Overview
This document defines the TDD approach for implementing the template-based SPARQL system. Tests are organized by component and follow the Red-Green-Refactor cycle.
Test Organization
tests/
template_sparql/
__init__.py
test_intent_classifier.py # Intent classification tests
test_slot_extractor.py # Slot extraction tests
test_template_registry.py # Template registry tests
test_template_instantiation.py # Template filling tests
test_sparql_validation.py # SPARQL validation tests
test_query_pipeline.py # End-to-end pipeline tests
test_integration.py # Integration with RAG system
conftest.py # Shared fixtures
Test Categories
1. Unit Tests - Intent Classification
# tests/template_sparql/test_intent_classifier.py
import pytest
from backend.rag.template_sparql.intent_classifier import IntentClassifier, QueryIntent
class TestIntentClassifier:
"""Test suite for intent classification."""
@pytest.fixture
def classifier(self):
"""Create classifier instance."""
return IntentClassifier()
# =========================================================================
# Location-Based Queries (Class 1)
# =========================================================================
@pytest.mark.parametrize("question,expected_template,expected_slots", [
# Dutch - Archives in province
(
"Welke archieven zijn er in Drenthe?",
"region_institution_search",
{"institution_type": "archieven", "province": "Drenthe"}
),
# Dutch - Museums in province
(
"Welke musea zijn er in Noord-Holland?",
"region_institution_search",
{"institution_type": "musea", "province": "Noord-Holland"}
),
# Dutch - Libraries in province
(
"Welke bibliotheken zijn er in Utrecht?",
"region_institution_search",
{"institution_type": "bibliotheken", "province": "Utrecht"}
),
# English variant
(
"Archives in Drenthe",
"region_institution_search",
{"institution_type": "Archives", "province": "Drenthe"}
),
# Informal Dutch
(
"archieven in drenthe",
"region_institution_search",
{"institution_type": "archieven", "province": "drenthe"}
),
])
def test_location_based_intent(self, classifier, question, expected_template, expected_slots):
"""Test classification of location-based queries."""
intent = classifier.classify(question)
assert intent.template_id == expected_template
assert intent.extracted_slots == expected_slots
# =========================================================================
# Aggregation Queries (Class 5)
# =========================================================================
@pytest.mark.parametrize("question,expected_template", [
("Hoeveel archieven zijn er in Nederland?", "count_by_type"),
("Hoeveel musea zijn er in Drenthe?", "count_by_type_region"),
("Tel alle bibliotheken", "count_by_type"),
("How many archives are there?", "count_by_type"),
])
def test_aggregation_intent(self, classifier, question, expected_template):
"""Test classification of aggregation queries."""
intent = classifier.classify(question)
assert intent.template_id == expected_template
# =========================================================================
# Entity Lookup Queries (Class 3)
# =========================================================================
@pytest.mark.parametrize("question,expected_template", [
("Wat is het Nationaal Archief?", "entity_lookup"),
("Informatie over Rijksmuseum", "entity_lookup"),
("Details van NL-HaNA", "entity_lookup_by_ghcid"),
])
def test_entity_lookup_intent(self, classifier, question, expected_template):
"""Test classification of entity lookup queries."""
intent = classifier.classify(question)
assert intent.template_id == expected_template
# =========================================================================
# Fallback Cases
# =========================================================================
@pytest.mark.parametrize("question", [
"Wat is de beste manier om een archief te digitaliseren?",
"Hoe kan ik toegang krijgen tot de collectie?",
"Hello, I have a question",
])
def test_fallback_to_llm(self, classifier, question):
"""Test that unmatched questions fall back to LLM."""
intent = classifier.classify(question)
assert intent.template_id == "llm_fallback"
class TestIntentClassifierEdgeCases:
"""Edge cases and error handling."""
@pytest.fixture
def classifier(self):
return IntentClassifier()
def test_empty_question(self, classifier):
"""Empty question should raise ValueError."""
with pytest.raises(ValueError, match="Question cannot be empty"):
classifier.classify("")
def test_very_long_question(self, classifier):
"""Very long questions should still be classified."""
long_question = "Welke archieven " + "zijn er " * 100 + "in Drenthe?"
intent = classifier.classify(long_question)
# Should still extract the key information
assert intent.template_id is not None
def test_question_with_special_characters(self, classifier):
"""Questions with special characters should be handled."""
intent = classifier.classify("Welke archieven zijn er in 's-Hertogenbosch?")
assert intent.template_id is not None
2. Unit Tests - Slot Extraction
# tests/template_sparql/test_slot_extractor.py
import pytest
from backend.rag.template_sparql.slot_extractor import SlotExtractor
class TestSlotExtractor:
"""Test suite for slot value extraction."""
@pytest.fixture
def extractor(self):
return SlotExtractor()
# =========================================================================
# Province Code Extraction
# =========================================================================
@pytest.mark.parametrize("input_province,expected_code", [
# Standard Dutch province names
("Drenthe", "NL-DR"),
("Noord-Holland", "NL-NH"),
("Zuid-Holland", "NL-ZH"),
("Noord-Brabant", "NL-NB"),
("Utrecht", "NL-UT"),
("Gelderland", "NL-GE"),
("Limburg", "NL-LI"),
("Overijssel", "NL-OV"),
("Flevoland", "NL-FL"),
("Friesland", "NL-FR"),
("Groningen", "NL-GR"),
("Zeeland", "NL-ZE"),
# Case variations
("drenthe", "NL-DR"),
("DRENTHE", "NL-DR"),
("DrEnThE", "NL-DR"),
# Alternative spellings
("Fryslan", "NL-FR"),
("Fryslân", "NL-FR"),
])
def test_province_to_code(self, extractor, input_province, expected_code):
"""Test province name to ISO 3166-2 code conversion."""
result = extractor.extract_province_code(input_province)
assert result == expected_code
def test_unknown_province(self, extractor):
"""Unknown province should return None."""
result = extractor.extract_province_code("Atlantis")
assert result is None
# =========================================================================
# Institution Type Extraction
# =========================================================================
@pytest.mark.parametrize("input_type,expected_code", [
# Dutch singular/plural
("archief", "A"),
("archieven", "A"),
("museum", "M"),
("musea", "M"),
("bibliotheek", "L"),
("bibliotheken", "L"),
("galerie", "G"),
("galerijen", "G"),
# English
("archive", "A"),
("archives", "A"),
("museum", "M"),
("museums", "M"),
("library", "L"),
("libraries", "L"),
("gallery", "G"),
("galleries", "G"),
# Descriptive terms
("regionaal archief", "A"),
("stadsarchief", "A"),
("rijksmuseum", "M"),
("openbare bibliotheek", "L"),
])
def test_institution_type_to_code(self, extractor, input_type, expected_code):
"""Test institution type to single-letter code conversion."""
result = extractor.extract_institution_type_code(input_type)
assert result == expected_code
# =========================================================================
# Full Slot Extraction
# =========================================================================
def test_extract_all_slots_location_query(self, extractor):
"""Test full slot extraction for location query."""
question = "Welke archieven zijn er in Drenthe?"
template_slots = {
"institution_type_code": {"required": True},
"province_code": {"required": True},
}
result = extractor.extract(question, template_slots)
assert result["institution_type_code"] == "A"
assert result["province_code"] == "NL-DR"
def test_extract_with_missing_required_slot(self, extractor):
"""Missing required slot should raise ValueError."""
question = "Welke zijn er in Nederland?" # No institution type
template_slots = {
"institution_type_code": {"required": True},
"province_code": {"required": False},
}
with pytest.raises(ValueError, match="Missing required slot"):
extractor.extract(question, template_slots)
class TestSlotExtractorFuzzyMatching:
"""Test fuzzy matching for slot extraction."""
@pytest.fixture
def extractor(self):
return SlotExtractor(fuzzy_threshold=80)
@pytest.mark.parametrize("input_province,expected_code", [
("Drent", "NL-DR"), # Partial match
("Dremthe", "NL-DR"), # Typo
("N-Holland", "NL-NH"), # Abbreviation
("Noordholland", "NL-NH"), # Without hyphen
])
def test_fuzzy_province_matching(self, extractor, input_province, expected_code):
"""Test fuzzy matching for province names."""
result = extractor.extract_province_code(input_province, fuzzy=True)
assert result == expected_code
3. Unit Tests - Template Instantiation
# tests/template_sparql/test_template_instantiation.py
import pytest
from backend.rag.template_sparql.templates import SimpleTemplate, CompositeTemplate, SlotDefinition
class TestSimpleTemplate:
"""Test suite for simple template instantiation."""
def test_basic_instantiation(self):
"""Test basic template slot filling."""
template = SimpleTemplate(
template_id="region_search",
description="Search by region",
slots={
"province_code": SlotDefinition(name="province_code"),
},
sparql_template="""
PREFIX hc: <https://nde.nl/ontology/hc/class/>
SELECT ?s WHERE {
?s a hc:Custodian .
FILTER(CONTAINS(STR(?s), "{{province_code}}"))
}"""
)
result = template.instantiate({"province_code": "NL-DR"})
assert "NL-DR" in result
assert "{{province_code}}" not in result
def test_multiple_slots(self):
"""Test template with multiple slots."""
template = SimpleTemplate(
template_id="type_region_search",
description="Search by type and region",
slots={
"institution_type_code": SlotDefinition(name="institution_type_code"),
"province_code": SlotDefinition(name="province_code"),
},
sparql_template="""
SELECT ?s WHERE {
?s hcp:institutionType "{{institution_type_code}}" .
FILTER(CONTAINS(STR(?s), "{{province_code}}"))
}"""
)
result = template.instantiate({
"institution_type_code": "A",
"province_code": "NL-DR"
})
assert '"A"' in result
assert "NL-DR" in result
def test_missing_required_slot(self):
"""Missing required slot should raise ValueError."""
template = SimpleTemplate(
template_id="test",
description="Test",
slots={
"province_code": SlotDefinition(name="province_code", required=True),
},
sparql_template="FILTER(CONTAINS(STR(?s), '{{province_code}}'))"
)
with pytest.raises(ValueError, match="Missing required slot"):
template.instantiate({})
def test_invalid_slot_value(self):
"""Invalid slot value should raise ValueError."""
template = SimpleTemplate(
template_id="test",
description="Test",
slots={
"institution_type_code": SlotDefinition(
name="institution_type_code",
valid_values=["A", "M", "L", "G"]
),
},
sparql_template='hcp:institutionType "{{institution_type_code}}"'
)
with pytest.raises(ValueError, match="Invalid value"):
template.instantiate({"institution_type_code": "X"})
def test_optional_slot_not_provided(self):
"""Optional slot not provided should use default."""
template = SimpleTemplate(
template_id="test",
description="Test",
slots={
"limit": SlotDefinition(
name="limit",
required=False,
default_value="100"
),
},
sparql_template="LIMIT {{limit}}"
)
result = template.instantiate({})
assert "LIMIT 100" in result
class TestCompositeTemplate:
"""Test suite for composite template instantiation."""
def test_and_composition(self):
"""Test AND composition of sub-templates."""
type_filter = SimpleTemplate(
template_id="type_filter",
description="Filter by type",
slots={"type": SlotDefinition(name="type")},
sparql_template='?s hcp:institutionType "{{type}}" .'
)
region_filter = SimpleTemplate(
template_id="region_filter",
description="Filter by region",
slots={"region": SlotDefinition(name="region")},
sparql_template='FILTER(CONTAINS(STR(?s), "{{region}}"))'
)
composite = CompositeTemplate(
template_id="type_region",
description="Type and region filter",
slots={
"type": SlotDefinition(name="type"),
"region": SlotDefinition(name="region"),
},
sub_templates=[type_filter, region_filter],
join_type="AND"
)
result = composite.instantiate({"type": "A", "region": "NL-DR"})
assert '"A"' in result
assert "NL-DR" in result
4. Integration Tests - Query Pipeline
# tests/template_sparql/test_query_pipeline.py
import pytest
from backend.rag.template_sparql.pipeline import QueryPipeline, QueryContext
class TestQueryPipeline:
"""End-to-end pipeline tests."""
@pytest.fixture
def pipeline(self):
"""Create fully configured pipeline."""
return QueryPipeline.create_default()
# =========================================================================
# Successful Query Generation
# =========================================================================
@pytest.mark.parametrize("question,expected_contains", [
(
"Welke archieven zijn er in Drenthe?",
['hc:Custodian', '"A"', 'NL-DR']
),
(
"Welke musea zijn er in Noord-Holland?",
['hc:Custodian', '"M"', 'NL-NH']
),
(
"Hoeveel bibliotheken zijn er in Utrecht?",
['COUNT', '"L"', 'NL-UT']
),
])
def test_successful_query_generation(self, pipeline, question, expected_contains):
"""Test successful end-to-end query generation."""
context = QueryContext(original_question=question)
result = pipeline.process(context)
assert result.errors == []
assert result.sparql_query is not None
for expected in expected_contains:
assert expected in result.sparql_query, \
f"Expected '{expected}' in query:\n{result.sparql_query}"
# =========================================================================
# Error Handling
# =========================================================================
def test_invalid_slot_value_error(self, pipeline):
"""Test error handling for invalid slot values."""
# Question with unrecognized province
context = QueryContext(original_question="Welke archieven zijn er in Atlantis?")
result = pipeline.process(context)
# Should either fall back to LLM or report error
assert result.sparql_query is not None or len(result.errors) > 0
# =========================================================================
# SPARQL Validation
# =========================================================================
def test_generated_sparql_is_valid(self, pipeline):
"""Test that all generated SPARQL queries are syntactically valid."""
questions = [
"Welke archieven zijn er in Drenthe?",
"Welke musea zijn er in Noord-Holland?",
"Hoeveel bibliotheken zijn er?",
]
for question in questions:
context = QueryContext(original_question=question)
result = pipeline.process(context)
if result.sparql_query:
# Validate with sparql_linter
from glam_extractor.api.sparql_linter import lint_sparql
lint_result = lint_sparql(result.sparql_query)
assert lint_result.valid, \
f"Invalid SPARQL for '{question}':\n{result.sparql_query}\nErrors: {lint_result.issues}"
class TestQueryPipelineWithSPARQLEndpoint:
"""Integration tests with actual SPARQL endpoint."""
@pytest.fixture
def pipeline(self):
return QueryPipeline.create_default()
@pytest.mark.integration
@pytest.mark.parametrize("question,min_results", [
("Welke archieven zijn er in Drenthe?", 1),
("Welke musea zijn er in Noord-Holland?", 1),
])
async def test_query_returns_results(self, pipeline, question, min_results):
"""Test that generated queries return expected results."""
import httpx
context = QueryContext(original_question=question)
result = pipeline.process(context)
assert result.sparql_query is not None
# Execute against SPARQL endpoint
async with httpx.AsyncClient() as client:
response = await client.post(
"https://bronhouder.nl/sparql",
data={"query": result.sparql_query},
headers={"Accept": "application/sparql-results+json"}
)
assert response.status_code == 200
data = response.json()
results = data.get("results", {}).get("bindings", [])
assert len(results) >= min_results, \
f"Expected at least {min_results} results for '{question}'"
5. Fixtures and Shared Setup
# tests/template_sparql/conftest.py
import pytest
from pathlib import Path
import yaml
@pytest.fixture(scope="session")
def validation_rules():
"""Load SPARQL validation rules."""
rules_path = Path("data/validation/sparql_validation_rules.json")
import json
with open(rules_path) as f:
return json.load(f)
@pytest.fixture(scope="session")
def template_config():
"""Load template configuration."""
config_path = Path("data/templates/sparql_templates.yaml")
with open(config_path) as f:
return yaml.safe_load(f)
@pytest.fixture
def sample_questions():
"""Sample questions for testing."""
return {
"location_archive_drenthe": "Welke archieven zijn er in Drenthe?",
"location_museum_nh": "Welke musea zijn er in Noord-Holland?",
"count_libraries": "Hoeveel bibliotheken zijn er in Nederland?",
"entity_lookup": "Wat is het Nationaal Archief?",
"complex_query": "Welke archieven in Drenthe hebben meer dan 1000 items?",
}
@pytest.fixture
def expected_sparql_patterns():
"""Expected patterns in generated SPARQL."""
return {
"location_archive_drenthe": {
"must_contain": ['hc:Custodian', 'hcp:institutionType', '"A"', 'NL-DR'],
"must_not_contain": ['crm:', 'cidoc:'],
},
"location_museum_nh": {
"must_contain": ['hc:Custodian', '"M"', 'NL-NH'],
"must_not_contain": ['crm:', 'cidoc:'],
},
}
Test Execution
Running Tests
# Run all template SPARQL tests
pytest tests/template_sparql/ -v
# Run with coverage
pytest tests/template_sparql/ --cov=backend.rag.template_sparql --cov-report=html
# Run only unit tests (fast)
pytest tests/template_sparql/ -v -m "not integration"
# Run integration tests (requires SPARQL endpoint)
pytest tests/template_sparql/ -v -m integration
# Run specific test file
pytest tests/template_sparql/test_intent_classifier.py -v
# Run specific test case
pytest tests/template_sparql/test_intent_classifier.py::TestIntentClassifier::test_location_based_intent -v
Test Markers
# conftest.py at project root
import pytest
def pytest_configure(config):
config.addinivalue_line("markers", "integration: marks tests as integration tests")
config.addinivalue_line("markers", "slow: marks tests as slow")
Coverage Targets
| Component | Target Coverage |
|---|---|
| IntentClassifier | 95% |
| SlotExtractor | 95% |
| TemplateRegistry | 90% |
| Template instantiation | 95% |
| QueryPipeline | 85% |
| Integration | 80% |
Test Data Management
Golden Test Cases
Store expected outputs for regression testing:
# tests/template_sparql/golden/location_queries.yaml
test_cases:
- id: "archive_drenthe_001"
input:
question: "Welke archieven zijn er in Drenthe?"
language: "nl"
expected:
template_id: "region_institution_search"
slots:
institution_type_code: "A"
province_code: "NL-DR"
sparql_contains:
- "hc:Custodian"
- 'hcp:institutionType "A"'
- 'FILTER(CONTAINS(STR(?'
- '"NL-DR"'
Loading Golden Cases
@pytest.fixture
def golden_cases():
"""Load golden test cases."""
import yaml
golden_dir = Path("tests/template_sparql/golden")
cases = {}
for yaml_file in golden_dir.glob("*.yaml"):
with open(yaml_file) as f:
data = yaml.safe_load(f)
cases.update({
case["id"]: case
for case in data.get("test_cases", [])
})
return cases
def test_golden_cases(pipeline, golden_cases):
"""Test against golden cases."""
for case_id, case in golden_cases.items():
context = QueryContext(original_question=case["input"]["question"])
result = pipeline.process(context)
assert result.intent.template_id == case["expected"]["template_id"], \
f"Case {case_id}: template mismatch"
for pattern in case["expected"]["sparql_contains"]:
assert pattern in result.sparql_query, \
f"Case {case_id}: missing pattern '{pattern}'"
Continuous Integration
# .github/workflows/template-sparql-tests.yml
name: Template SPARQL Tests
on:
push:
paths:
- 'backend/rag/template_sparql/**'
- 'tests/template_sparql/**'
pull_request:
paths:
- 'backend/rag/template_sparql/**'
- 'tests/template_sparql/**'
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install dependencies
run: |
pip install -e ".[dev]"
- name: Run unit tests
run: |
pytest tests/template_sparql/ -v -m "not integration" --cov=backend.rag.template_sparql
- name: Upload coverage
uses: codecov/codecov-action@v4