739 lines
25 KiB
Markdown
739 lines
25 KiB
Markdown
# Test-Driven Development Strategy
|
|
|
|
## Overview
|
|
|
|
This document defines the TDD approach for implementing the template-based SPARQL system. Tests are organized by component and follow the Red-Green-Refactor cycle.
|
|
|
|
## Test Organization
|
|
|
|
```
|
|
tests/
|
|
template_sparql/
|
|
__init__.py
|
|
test_intent_classifier.py # Intent classification tests
|
|
test_slot_extractor.py # Slot extraction tests
|
|
test_template_registry.py # Template registry tests
|
|
test_template_instantiation.py # Template filling tests
|
|
test_sparql_validation.py # SPARQL validation tests
|
|
test_query_pipeline.py # End-to-end pipeline tests
|
|
test_integration.py # Integration with RAG system
|
|
conftest.py # Shared fixtures
|
|
```
|
|
|
|
## Test Categories
|
|
|
|
### 1. Unit Tests - Intent Classification
|
|
|
|
```python
|
|
# tests/template_sparql/test_intent_classifier.py
|
|
|
|
import pytest
|
|
from backend.rag.template_sparql.intent_classifier import IntentClassifier, QueryIntent
|
|
|
|
class TestIntentClassifier:
|
|
"""Test suite for intent classification."""
|
|
|
|
@pytest.fixture
|
|
def classifier(self):
|
|
"""Create classifier instance."""
|
|
return IntentClassifier()
|
|
|
|
# =========================================================================
|
|
# Location-Based Queries (Class 1)
|
|
# =========================================================================
|
|
|
|
@pytest.mark.parametrize("question,expected_template,expected_slots", [
|
|
# Dutch - Archives in province
|
|
(
|
|
"Welke archieven zijn er in Drenthe?",
|
|
"region_institution_search",
|
|
{"institution_type": "archieven", "province": "Drenthe"}
|
|
),
|
|
# Dutch - Museums in province
|
|
(
|
|
"Welke musea zijn er in Noord-Holland?",
|
|
"region_institution_search",
|
|
{"institution_type": "musea", "province": "Noord-Holland"}
|
|
),
|
|
# Dutch - Libraries in province
|
|
(
|
|
"Welke bibliotheken zijn er in Utrecht?",
|
|
"region_institution_search",
|
|
{"institution_type": "bibliotheken", "province": "Utrecht"}
|
|
),
|
|
# English variant
|
|
(
|
|
"Archives in Drenthe",
|
|
"region_institution_search",
|
|
{"institution_type": "Archives", "province": "Drenthe"}
|
|
),
|
|
# Informal Dutch
|
|
(
|
|
"archieven in drenthe",
|
|
"region_institution_search",
|
|
{"institution_type": "archieven", "province": "drenthe"}
|
|
),
|
|
])
|
|
def test_location_based_intent(self, classifier, question, expected_template, expected_slots):
|
|
"""Test classification of location-based queries."""
|
|
intent = classifier.classify(question)
|
|
|
|
assert intent.template_id == expected_template
|
|
assert intent.extracted_slots == expected_slots
|
|
|
|
# =========================================================================
|
|
# Aggregation Queries (Class 5)
|
|
# =========================================================================
|
|
|
|
@pytest.mark.parametrize("question,expected_template", [
|
|
("Hoeveel archieven zijn er in Nederland?", "count_by_type"),
|
|
("Hoeveel musea zijn er in Drenthe?", "count_by_type_region"),
|
|
("Tel alle bibliotheken", "count_by_type"),
|
|
("How many archives are there?", "count_by_type"),
|
|
])
|
|
def test_aggregation_intent(self, classifier, question, expected_template):
|
|
"""Test classification of aggregation queries."""
|
|
intent = classifier.classify(question)
|
|
assert intent.template_id == expected_template
|
|
|
|
# =========================================================================
|
|
# Entity Lookup Queries (Class 3)
|
|
# =========================================================================
|
|
|
|
@pytest.mark.parametrize("question,expected_template", [
|
|
("Wat is het Nationaal Archief?", "entity_lookup"),
|
|
("Informatie over Rijksmuseum", "entity_lookup"),
|
|
("Details van NL-HaNA", "entity_lookup_by_ghcid"),
|
|
])
|
|
def test_entity_lookup_intent(self, classifier, question, expected_template):
|
|
"""Test classification of entity lookup queries."""
|
|
intent = classifier.classify(question)
|
|
assert intent.template_id == expected_template
|
|
|
|
# =========================================================================
|
|
# Fallback Cases
|
|
# =========================================================================
|
|
|
|
@pytest.mark.parametrize("question", [
|
|
"Wat is de beste manier om een archief te digitaliseren?",
|
|
"Hoe kan ik toegang krijgen tot de collectie?",
|
|
"Hello, I have a question",
|
|
])
|
|
def test_fallback_to_llm(self, classifier, question):
|
|
"""Test that unmatched questions fall back to LLM."""
|
|
intent = classifier.classify(question)
|
|
assert intent.template_id == "llm_fallback"
|
|
|
|
|
|
class TestIntentClassifierEdgeCases:
|
|
"""Edge cases and error handling."""
|
|
|
|
@pytest.fixture
|
|
def classifier(self):
|
|
return IntentClassifier()
|
|
|
|
def test_empty_question(self, classifier):
|
|
"""Empty question should raise ValueError."""
|
|
with pytest.raises(ValueError, match="Question cannot be empty"):
|
|
classifier.classify("")
|
|
|
|
def test_very_long_question(self, classifier):
|
|
"""Very long questions should still be classified."""
|
|
long_question = "Welke archieven " + "zijn er " * 100 + "in Drenthe?"
|
|
intent = classifier.classify(long_question)
|
|
# Should still extract the key information
|
|
assert intent.template_id is not None
|
|
|
|
def test_question_with_special_characters(self, classifier):
|
|
"""Questions with special characters should be handled."""
|
|
intent = classifier.classify("Welke archieven zijn er in 's-Hertogenbosch?")
|
|
assert intent.template_id is not None
|
|
```
|
|
|
|
### 2. Unit Tests - Slot Extraction
|
|
|
|
```python
|
|
# tests/template_sparql/test_slot_extractor.py
|
|
|
|
import pytest
|
|
from backend.rag.template_sparql.slot_extractor import SlotExtractor
|
|
|
|
class TestSlotExtractor:
|
|
"""Test suite for slot value extraction."""
|
|
|
|
@pytest.fixture
|
|
def extractor(self):
|
|
return SlotExtractor()
|
|
|
|
# =========================================================================
|
|
# Province Code Extraction
|
|
# =========================================================================
|
|
|
|
@pytest.mark.parametrize("input_province,expected_code", [
|
|
# Standard Dutch province names
|
|
("Drenthe", "NL-DR"),
|
|
("Noord-Holland", "NL-NH"),
|
|
("Zuid-Holland", "NL-ZH"),
|
|
("Noord-Brabant", "NL-NB"),
|
|
("Utrecht", "NL-UT"),
|
|
("Gelderland", "NL-GE"),
|
|
("Limburg", "NL-LI"),
|
|
("Overijssel", "NL-OV"),
|
|
("Flevoland", "NL-FL"),
|
|
("Friesland", "NL-FR"),
|
|
("Groningen", "NL-GR"),
|
|
("Zeeland", "NL-ZE"),
|
|
# Case variations
|
|
("drenthe", "NL-DR"),
|
|
("DRENTHE", "NL-DR"),
|
|
("DrEnThE", "NL-DR"),
|
|
# Alternative spellings
|
|
("Fryslan", "NL-FR"),
|
|
("Fryslân", "NL-FR"),
|
|
])
|
|
def test_province_to_code(self, extractor, input_province, expected_code):
|
|
"""Test province name to ISO 3166-2 code conversion."""
|
|
result = extractor.extract_province_code(input_province)
|
|
assert result == expected_code
|
|
|
|
def test_unknown_province(self, extractor):
|
|
"""Unknown province should return None."""
|
|
result = extractor.extract_province_code("Atlantis")
|
|
assert result is None
|
|
|
|
# =========================================================================
|
|
# Institution Type Extraction
|
|
# =========================================================================
|
|
|
|
@pytest.mark.parametrize("input_type,expected_code", [
|
|
# Dutch singular/plural
|
|
("archief", "A"),
|
|
("archieven", "A"),
|
|
("museum", "M"),
|
|
("musea", "M"),
|
|
("bibliotheek", "L"),
|
|
("bibliotheken", "L"),
|
|
("galerie", "G"),
|
|
("galerijen", "G"),
|
|
# English
|
|
("archive", "A"),
|
|
("archives", "A"),
|
|
("museum", "M"),
|
|
("museums", "M"),
|
|
("library", "L"),
|
|
("libraries", "L"),
|
|
("gallery", "G"),
|
|
("galleries", "G"),
|
|
# Descriptive terms
|
|
("regionaal archief", "A"),
|
|
("stadsarchief", "A"),
|
|
("rijksmuseum", "M"),
|
|
("openbare bibliotheek", "L"),
|
|
])
|
|
def test_institution_type_to_code(self, extractor, input_type, expected_code):
|
|
"""Test institution type to single-letter code conversion."""
|
|
result = extractor.extract_institution_type_code(input_type)
|
|
assert result == expected_code
|
|
|
|
# =========================================================================
|
|
# Full Slot Extraction
|
|
# =========================================================================
|
|
|
|
def test_extract_all_slots_location_query(self, extractor):
|
|
"""Test full slot extraction for location query."""
|
|
question = "Welke archieven zijn er in Drenthe?"
|
|
template_slots = {
|
|
"institution_type_code": {"required": True},
|
|
"province_code": {"required": True},
|
|
}
|
|
|
|
result = extractor.extract(question, template_slots)
|
|
|
|
assert result["institution_type_code"] == "A"
|
|
assert result["province_code"] == "NL-DR"
|
|
|
|
def test_extract_with_missing_required_slot(self, extractor):
|
|
"""Missing required slot should raise ValueError."""
|
|
question = "Welke zijn er in Nederland?" # No institution type
|
|
template_slots = {
|
|
"institution_type_code": {"required": True},
|
|
"province_code": {"required": False},
|
|
}
|
|
|
|
with pytest.raises(ValueError, match="Missing required slot"):
|
|
extractor.extract(question, template_slots)
|
|
|
|
|
|
class TestSlotExtractorFuzzyMatching:
|
|
"""Test fuzzy matching for slot extraction."""
|
|
|
|
@pytest.fixture
|
|
def extractor(self):
|
|
return SlotExtractor(fuzzy_threshold=80)
|
|
|
|
@pytest.mark.parametrize("input_province,expected_code", [
|
|
("Drent", "NL-DR"), # Partial match
|
|
("Dremthe", "NL-DR"), # Typo
|
|
("N-Holland", "NL-NH"), # Abbreviation
|
|
("Noordholland", "NL-NH"), # Without hyphen
|
|
])
|
|
def test_fuzzy_province_matching(self, extractor, input_province, expected_code):
|
|
"""Test fuzzy matching for province names."""
|
|
result = extractor.extract_province_code(input_province, fuzzy=True)
|
|
assert result == expected_code
|
|
```
|
|
|
|
### 3. Unit Tests - Template Instantiation
|
|
|
|
```python
|
|
# tests/template_sparql/test_template_instantiation.py
|
|
|
|
import pytest
|
|
from backend.rag.template_sparql.templates import SimpleTemplate, CompositeTemplate, SlotDefinition
|
|
|
|
class TestSimpleTemplate:
|
|
"""Test suite for simple template instantiation."""
|
|
|
|
def test_basic_instantiation(self):
|
|
"""Test basic template slot filling."""
|
|
template = SimpleTemplate(
|
|
template_id="region_search",
|
|
description="Search by region",
|
|
slots={
|
|
"province_code": SlotDefinition(name="province_code"),
|
|
},
|
|
sparql_template="""
|
|
PREFIX hc: <https://nde.nl/ontology/hc/class/>
|
|
SELECT ?s WHERE {
|
|
?s a hc:Custodian .
|
|
FILTER(CONTAINS(STR(?s), "{{province_code}}"))
|
|
}"""
|
|
)
|
|
|
|
result = template.instantiate({"province_code": "NL-DR"})
|
|
|
|
assert "NL-DR" in result
|
|
assert "{{province_code}}" not in result
|
|
|
|
def test_multiple_slots(self):
|
|
"""Test template with multiple slots."""
|
|
template = SimpleTemplate(
|
|
template_id="type_region_search",
|
|
description="Search by type and region",
|
|
slots={
|
|
"institution_type_code": SlotDefinition(name="institution_type_code"),
|
|
"province_code": SlotDefinition(name="province_code"),
|
|
},
|
|
sparql_template="""
|
|
SELECT ?s WHERE {
|
|
?s hcp:institutionType "{{institution_type_code}}" .
|
|
FILTER(CONTAINS(STR(?s), "{{province_code}}"))
|
|
}"""
|
|
)
|
|
|
|
result = template.instantiate({
|
|
"institution_type_code": "A",
|
|
"province_code": "NL-DR"
|
|
})
|
|
|
|
assert '"A"' in result
|
|
assert "NL-DR" in result
|
|
|
|
def test_missing_required_slot(self):
|
|
"""Missing required slot should raise ValueError."""
|
|
template = SimpleTemplate(
|
|
template_id="test",
|
|
description="Test",
|
|
slots={
|
|
"province_code": SlotDefinition(name="province_code", required=True),
|
|
},
|
|
sparql_template="FILTER(CONTAINS(STR(?s), '{{province_code}}'))"
|
|
)
|
|
|
|
with pytest.raises(ValueError, match="Missing required slot"):
|
|
template.instantiate({})
|
|
|
|
def test_invalid_slot_value(self):
|
|
"""Invalid slot value should raise ValueError."""
|
|
template = SimpleTemplate(
|
|
template_id="test",
|
|
description="Test",
|
|
slots={
|
|
"institution_type_code": SlotDefinition(
|
|
name="institution_type_code",
|
|
valid_values=["A", "M", "L", "G"]
|
|
),
|
|
},
|
|
sparql_template='hcp:institutionType "{{institution_type_code}}"'
|
|
)
|
|
|
|
with pytest.raises(ValueError, match="Invalid value"):
|
|
template.instantiate({"institution_type_code": "X"})
|
|
|
|
def test_optional_slot_not_provided(self):
|
|
"""Optional slot not provided should use default."""
|
|
template = SimpleTemplate(
|
|
template_id="test",
|
|
description="Test",
|
|
slots={
|
|
"limit": SlotDefinition(
|
|
name="limit",
|
|
required=False,
|
|
default_value="100"
|
|
),
|
|
},
|
|
sparql_template="LIMIT {{limit}}"
|
|
)
|
|
|
|
result = template.instantiate({})
|
|
assert "LIMIT 100" in result
|
|
|
|
|
|
class TestCompositeTemplate:
|
|
"""Test suite for composite template instantiation."""
|
|
|
|
def test_and_composition(self):
|
|
"""Test AND composition of sub-templates."""
|
|
type_filter = SimpleTemplate(
|
|
template_id="type_filter",
|
|
description="Filter by type",
|
|
slots={"type": SlotDefinition(name="type")},
|
|
sparql_template='?s hcp:institutionType "{{type}}" .'
|
|
)
|
|
|
|
region_filter = SimpleTemplate(
|
|
template_id="region_filter",
|
|
description="Filter by region",
|
|
slots={"region": SlotDefinition(name="region")},
|
|
sparql_template='FILTER(CONTAINS(STR(?s), "{{region}}"))'
|
|
)
|
|
|
|
composite = CompositeTemplate(
|
|
template_id="type_region",
|
|
description="Type and region filter",
|
|
slots={
|
|
"type": SlotDefinition(name="type"),
|
|
"region": SlotDefinition(name="region"),
|
|
},
|
|
sub_templates=[type_filter, region_filter],
|
|
join_type="AND"
|
|
)
|
|
|
|
result = composite.instantiate({"type": "A", "region": "NL-DR"})
|
|
|
|
assert '"A"' in result
|
|
assert "NL-DR" in result
|
|
```
|
|
|
|
### 4. Integration Tests - Query Pipeline
|
|
|
|
```python
|
|
# tests/template_sparql/test_query_pipeline.py
|
|
|
|
import pytest
|
|
from backend.rag.template_sparql.pipeline import QueryPipeline, QueryContext
|
|
|
|
class TestQueryPipeline:
|
|
"""End-to-end pipeline tests."""
|
|
|
|
@pytest.fixture
|
|
def pipeline(self):
|
|
"""Create fully configured pipeline."""
|
|
return QueryPipeline.create_default()
|
|
|
|
# =========================================================================
|
|
# Successful Query Generation
|
|
# =========================================================================
|
|
|
|
@pytest.mark.parametrize("question,expected_contains", [
|
|
(
|
|
"Welke archieven zijn er in Drenthe?",
|
|
['hc:Custodian', '"A"', 'NL-DR']
|
|
),
|
|
(
|
|
"Welke musea zijn er in Noord-Holland?",
|
|
['hc:Custodian', '"M"', 'NL-NH']
|
|
),
|
|
(
|
|
"Hoeveel bibliotheken zijn er in Utrecht?",
|
|
['COUNT', '"L"', 'NL-UT']
|
|
),
|
|
])
|
|
def test_successful_query_generation(self, pipeline, question, expected_contains):
|
|
"""Test successful end-to-end query generation."""
|
|
context = QueryContext(original_question=question)
|
|
result = pipeline.process(context)
|
|
|
|
assert result.errors == []
|
|
assert result.sparql_query is not None
|
|
|
|
for expected in expected_contains:
|
|
assert expected in result.sparql_query, \
|
|
f"Expected '{expected}' in query:\n{result.sparql_query}"
|
|
|
|
# =========================================================================
|
|
# Error Handling
|
|
# =========================================================================
|
|
|
|
def test_invalid_slot_value_error(self, pipeline):
|
|
"""Test error handling for invalid slot values."""
|
|
# Question with unrecognized province
|
|
context = QueryContext(original_question="Welke archieven zijn er in Atlantis?")
|
|
result = pipeline.process(context)
|
|
|
|
# Should either fall back to LLM or report error
|
|
assert result.sparql_query is not None or len(result.errors) > 0
|
|
|
|
# =========================================================================
|
|
# SPARQL Validation
|
|
# =========================================================================
|
|
|
|
def test_generated_sparql_is_valid(self, pipeline):
|
|
"""Test that all generated SPARQL queries are syntactically valid."""
|
|
questions = [
|
|
"Welke archieven zijn er in Drenthe?",
|
|
"Welke musea zijn er in Noord-Holland?",
|
|
"Hoeveel bibliotheken zijn er?",
|
|
]
|
|
|
|
for question in questions:
|
|
context = QueryContext(original_question=question)
|
|
result = pipeline.process(context)
|
|
|
|
if result.sparql_query:
|
|
# Validate with sparql_linter
|
|
from glam_extractor.api.sparql_linter import lint_sparql
|
|
lint_result = lint_sparql(result.sparql_query)
|
|
|
|
assert lint_result.valid, \
|
|
f"Invalid SPARQL for '{question}':\n{result.sparql_query}\nErrors: {lint_result.issues}"
|
|
|
|
|
|
class TestQueryPipelineWithSPARQLEndpoint:
|
|
"""Integration tests with actual SPARQL endpoint."""
|
|
|
|
@pytest.fixture
|
|
def pipeline(self):
|
|
return QueryPipeline.create_default()
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.parametrize("question,min_results", [
|
|
("Welke archieven zijn er in Drenthe?", 1),
|
|
("Welke musea zijn er in Noord-Holland?", 1),
|
|
])
|
|
async def test_query_returns_results(self, pipeline, question, min_results):
|
|
"""Test that generated queries return expected results."""
|
|
import httpx
|
|
|
|
context = QueryContext(original_question=question)
|
|
result = pipeline.process(context)
|
|
|
|
assert result.sparql_query is not None
|
|
|
|
# Execute against SPARQL endpoint
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.post(
|
|
"https://bronhouder.nl/sparql",
|
|
data={"query": result.sparql_query},
|
|
headers={"Accept": "application/sparql-results+json"}
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
results = data.get("results", {}).get("bindings", [])
|
|
assert len(results) >= min_results, \
|
|
f"Expected at least {min_results} results for '{question}'"
|
|
```
|
|
|
|
### 5. Fixtures and Shared Setup
|
|
|
|
```python
|
|
# tests/template_sparql/conftest.py
|
|
|
|
import pytest
|
|
from pathlib import Path
|
|
import yaml
|
|
|
|
@pytest.fixture(scope="session")
|
|
def validation_rules():
|
|
"""Load SPARQL validation rules."""
|
|
rules_path = Path("data/validation/sparql_validation_rules.json")
|
|
import json
|
|
with open(rules_path) as f:
|
|
return json.load(f)
|
|
|
|
@pytest.fixture(scope="session")
|
|
def template_config():
|
|
"""Load template configuration."""
|
|
config_path = Path("data/templates/sparql_templates.yaml")
|
|
with open(config_path) as f:
|
|
return yaml.safe_load(f)
|
|
|
|
@pytest.fixture
|
|
def sample_questions():
|
|
"""Sample questions for testing."""
|
|
return {
|
|
"location_archive_drenthe": "Welke archieven zijn er in Drenthe?",
|
|
"location_museum_nh": "Welke musea zijn er in Noord-Holland?",
|
|
"count_libraries": "Hoeveel bibliotheken zijn er in Nederland?",
|
|
"entity_lookup": "Wat is het Nationaal Archief?",
|
|
"complex_query": "Welke archieven in Drenthe hebben meer dan 1000 items?",
|
|
}
|
|
|
|
@pytest.fixture
|
|
def expected_sparql_patterns():
|
|
"""Expected patterns in generated SPARQL."""
|
|
return {
|
|
"location_archive_drenthe": {
|
|
"must_contain": ['hc:Custodian', 'hcp:institutionType', '"A"', 'NL-DR'],
|
|
"must_not_contain": ['crm:', 'cidoc:'],
|
|
},
|
|
"location_museum_nh": {
|
|
"must_contain": ['hc:Custodian', '"M"', 'NL-NH'],
|
|
"must_not_contain": ['crm:', 'cidoc:'],
|
|
},
|
|
}
|
|
```
|
|
|
|
## Test Execution
|
|
|
|
### Running Tests
|
|
|
|
```bash
|
|
# Run all template SPARQL tests
|
|
pytest tests/template_sparql/ -v
|
|
|
|
# Run with coverage
|
|
pytest tests/template_sparql/ --cov=backend.rag.template_sparql --cov-report=html
|
|
|
|
# Run only unit tests (fast)
|
|
pytest tests/template_sparql/ -v -m "not integration"
|
|
|
|
# Run integration tests (requires SPARQL endpoint)
|
|
pytest tests/template_sparql/ -v -m integration
|
|
|
|
# Run specific test file
|
|
pytest tests/template_sparql/test_intent_classifier.py -v
|
|
|
|
# Run specific test case
|
|
pytest tests/template_sparql/test_intent_classifier.py::TestIntentClassifier::test_location_based_intent -v
|
|
```
|
|
|
|
### Test Markers
|
|
|
|
```python
|
|
# conftest.py at project root
|
|
import pytest
|
|
|
|
def pytest_configure(config):
|
|
config.addinivalue_line("markers", "integration: marks tests as integration tests")
|
|
config.addinivalue_line("markers", "slow: marks tests as slow")
|
|
```
|
|
|
|
## Coverage Targets
|
|
|
|
| Component | Target Coverage |
|
|
|-----------|-----------------|
|
|
| IntentClassifier | 95% |
|
|
| SlotExtractor | 95% |
|
|
| TemplateRegistry | 90% |
|
|
| Template instantiation | 95% |
|
|
| QueryPipeline | 85% |
|
|
| Integration | 80% |
|
|
|
|
## Test Data Management
|
|
|
|
### Golden Test Cases
|
|
|
|
Store expected outputs for regression testing:
|
|
|
|
```yaml
|
|
# tests/template_sparql/golden/location_queries.yaml
|
|
test_cases:
|
|
- id: "archive_drenthe_001"
|
|
input:
|
|
question: "Welke archieven zijn er in Drenthe?"
|
|
language: "nl"
|
|
expected:
|
|
template_id: "region_institution_search"
|
|
slots:
|
|
institution_type_code: "A"
|
|
province_code: "NL-DR"
|
|
sparql_contains:
|
|
- "hc:Custodian"
|
|
- 'hcp:institutionType "A"'
|
|
- 'FILTER(CONTAINS(STR(?'
|
|
- '"NL-DR"'
|
|
```
|
|
|
|
### Loading Golden Cases
|
|
|
|
```python
|
|
@pytest.fixture
|
|
def golden_cases():
|
|
"""Load golden test cases."""
|
|
import yaml
|
|
golden_dir = Path("tests/template_sparql/golden")
|
|
cases = {}
|
|
for yaml_file in golden_dir.glob("*.yaml"):
|
|
with open(yaml_file) as f:
|
|
data = yaml.safe_load(f)
|
|
cases.update({
|
|
case["id"]: case
|
|
for case in data.get("test_cases", [])
|
|
})
|
|
return cases
|
|
|
|
def test_golden_cases(pipeline, golden_cases):
|
|
"""Test against golden cases."""
|
|
for case_id, case in golden_cases.items():
|
|
context = QueryContext(original_question=case["input"]["question"])
|
|
result = pipeline.process(context)
|
|
|
|
assert result.intent.template_id == case["expected"]["template_id"], \
|
|
f"Case {case_id}: template mismatch"
|
|
|
|
for pattern in case["expected"]["sparql_contains"]:
|
|
assert pattern in result.sparql_query, \
|
|
f"Case {case_id}: missing pattern '{pattern}'"
|
|
```
|
|
|
|
## Continuous Integration
|
|
|
|
```yaml
|
|
# .github/workflows/template-sparql-tests.yml
|
|
name: Template SPARQL Tests
|
|
|
|
on:
|
|
push:
|
|
paths:
|
|
- 'backend/rag/template_sparql/**'
|
|
- 'tests/template_sparql/**'
|
|
pull_request:
|
|
paths:
|
|
- 'backend/rag/template_sparql/**'
|
|
- 'tests/template_sparql/**'
|
|
|
|
jobs:
|
|
test:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.12'
|
|
|
|
- name: Install dependencies
|
|
run: |
|
|
pip install -e ".[dev]"
|
|
|
|
- name: Run unit tests
|
|
run: |
|
|
pytest tests/template_sparql/ -v -m "not integration" --cov=backend.rag.template_sparql
|
|
|
|
- name: Upload coverage
|
|
uses: codecov/codecov-action@v4
|
|
```
|