- Layer 1: 35 unit tests (no LLM required) - Layer 2: 56 DSPy module tests with LLM - Layer 3: 10 integration tests with Oxigraph - Layer 4: Comprehensive evaluation suite Fixed: - Coordinate queries to use schema:location -> blank node pattern - Golden query expected intent for location questions - Health check test filtering in Layer 4 Added GitHub Actions workflow for CI/CD evaluation
448 lines
15 KiB
Python
448 lines
15 KiB
Python
"""
|
|
Layer 3: Integration Tests for Heritage RAG
|
|
|
|
These tests verify:
|
|
- API endpoint health
|
|
- Oxigraph connectivity
|
|
- End-to-end query processing
|
|
- Sample query responses
|
|
|
|
Requires:
|
|
- Live Oxigraph instance (via SSH tunnel or direct connection)
|
|
- ANTHROPIC_API_KEY for LLM queries
|
|
|
|
Run locally with SSH tunnel:
|
|
ssh -f -N -L 7878:127.0.0.1:7878 root@91.98.224.44
|
|
export OXIGRAPH_ENDPOINT=http://127.0.0.1:7878
|
|
pytest tests/dspy_gitops/test_layer3_integration.py -v
|
|
"""
|
|
|
|
import os
|
|
import time
|
|
from typing import Any
|
|
|
|
import httpx
|
|
import pytest
|
|
|
|
# Configuration - prefer local tunnel, fallback to environment variable
|
|
# Oxigraph is NOT externally accessible, so we need SSH tunnel
|
|
OXIGRAPH_URL = os.environ.get("OXIGRAPH_ENDPOINT", "http://127.0.0.1:7878")
|
|
API_BASE_URL = os.environ.get("API_BASE_URL", "http://localhost:8000")
|
|
|
|
|
|
# =============================================================================
|
|
# Oxigraph Connectivity Tests
|
|
# =============================================================================
|
|
|
|
@pytest.mark.layer3
|
|
@pytest.mark.requires_oxigraph
|
|
class TestOxigraphConnectivity:
|
|
"""Test Oxigraph SPARQL endpoint connectivity."""
|
|
|
|
def test_oxigraph_health(self):
|
|
"""Verify Oxigraph is accessible."""
|
|
query = "SELECT (COUNT(*) as ?count) WHERE { ?s ?p ?o } LIMIT 1"
|
|
|
|
response = httpx.post(
|
|
f"{OXIGRAPH_URL}/query",
|
|
data={"query": query},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=10.0,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert "results" in data
|
|
assert "bindings" in data["results"]
|
|
|
|
def test_oxigraph_triple_count(self):
|
|
"""Verify Oxigraph has data loaded."""
|
|
query = "SELECT (COUNT(*) as ?count) WHERE { ?s ?p ?o }"
|
|
|
|
response = httpx.post(
|
|
f"{OXIGRAPH_URL}/query",
|
|
data={"query": query},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=30.0,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
count = int(data["results"]["bindings"][0]["count"]["value"])
|
|
|
|
# Should have substantial data
|
|
assert count > 100000, f"Expected > 100k triples, got {count}"
|
|
|
|
def test_dutch_institutions_exist(self):
|
|
"""Verify Dutch institution data is present."""
|
|
query = """
|
|
PREFIX hc: <https://nde.nl/ontology/hc/>
|
|
SELECT (COUNT(DISTINCT ?s) as ?count)
|
|
WHERE { ?s hc:countryCode "NL" . }
|
|
"""
|
|
|
|
response = httpx.post(
|
|
f"{OXIGRAPH_URL}/query",
|
|
data={"query": query},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=30.0,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
count = int(data["results"]["bindings"][0]["count"]["value"])
|
|
|
|
# Should have Dutch institutions
|
|
assert count > 2000, f"Expected > 2000 Dutch institutions, got {count}"
|
|
|
|
def test_dutch_institutions_with_coordinates(self):
|
|
"""Verify Dutch institutions have coordinate data.
|
|
|
|
Note: Coordinates are stored on blank nodes via schema:location,
|
|
NOT directly on the institution subject.
|
|
"""
|
|
query = """
|
|
PREFIX hc: <https://nde.nl/ontology/hc/>
|
|
PREFIX schema: <http://schema.org/>
|
|
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
|
|
SELECT (COUNT(DISTINCT ?s) as ?count)
|
|
WHERE {
|
|
?s hc:countryCode "NL" .
|
|
?s schema:location ?loc .
|
|
?loc geo:lat ?lat .
|
|
}
|
|
"""
|
|
|
|
response = httpx.post(
|
|
f"{OXIGRAPH_URL}/query",
|
|
data={"query": query},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=30.0,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
count = int(data["results"]["bindings"][0]["count"]["value"])
|
|
|
|
# Should have geocoded institutions
|
|
assert count > 2500, f"Expected > 2500 Dutch institutions with coords, got {count}"
|
|
|
|
def test_amsterdam_institutions_query(self):
|
|
"""Test specific Amsterdam institutions query."""
|
|
# Use hc:settlementName (the actual schema field)
|
|
query = """
|
|
PREFIX hc: <https://nde.nl/ontology/hc/>
|
|
SELECT (COUNT(DISTINCT ?s) as ?count)
|
|
WHERE {
|
|
?s hc:countryCode "NL" .
|
|
?s hc:settlementName "Amsterdam" .
|
|
}
|
|
"""
|
|
|
|
response = httpx.post(
|
|
f"{OXIGRAPH_URL}/query",
|
|
data={"query": query},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=30.0,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
count = int(data["results"]["bindings"][0]["count"]["value"])
|
|
|
|
# Amsterdam should have many institutions
|
|
assert count > 100, f"Expected > 100 Amsterdam institutions, got {count}"
|
|
print(f"Found {count} institutions in Amsterdam")
|
|
|
|
|
|
# =============================================================================
|
|
# API Health Tests
|
|
# =============================================================================
|
|
|
|
@pytest.mark.layer3
|
|
class TestAPIHealth:
|
|
"""Test API endpoint health."""
|
|
|
|
@pytest.fixture
|
|
def client(self):
|
|
"""Create HTTP client."""
|
|
return httpx.Client(base_url=API_BASE_URL, timeout=30.0)
|
|
|
|
@pytest.mark.skip(reason="API server not always running in CI")
|
|
def test_dspy_rag_health(self, client):
|
|
"""Test DSPy RAG health endpoint."""
|
|
response = client.get("/api/dspy/rag/health")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
assert data.get("status") == "ok"
|
|
assert "components" in data
|
|
|
|
@pytest.mark.skip(reason="API server not always running in CI")
|
|
def test_dspy_rag_training_data(self, client):
|
|
"""Test training data endpoint."""
|
|
response = client.get("/api/dspy/rag/training-data")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
assert data.get("total_training", 0) > 0
|
|
assert data.get("total_validation", 0) > 0
|
|
|
|
|
|
# =============================================================================
|
|
# Sample Query Tests
|
|
# =============================================================================
|
|
|
|
@pytest.mark.layer3
|
|
@pytest.mark.requires_llm
|
|
class TestSampleQueries:
|
|
"""Test sample queries against live system."""
|
|
|
|
SAMPLE_QUERIES = [
|
|
{
|
|
"question": "Hoeveel musea zijn er in Amsterdam?",
|
|
"language": "nl",
|
|
"expected_intent": "statistical",
|
|
},
|
|
{
|
|
"question": "Waar is het Rijksmuseum gevestigd?",
|
|
"language": "nl",
|
|
"expected_intent": "entity_lookup",
|
|
},
|
|
{
|
|
"question": "How many libraries are in the Netherlands?",
|
|
"language": "en",
|
|
"expected_intent": "statistical",
|
|
},
|
|
]
|
|
|
|
@pytest.fixture
|
|
def async_client(self):
|
|
"""Create async HTTP client."""
|
|
return httpx.AsyncClient(base_url=API_BASE_URL, timeout=60.0)
|
|
|
|
@pytest.mark.skip(reason="API server not always running in CI")
|
|
@pytest.mark.asyncio
|
|
async def test_sample_queries(self, async_client):
|
|
"""Test sample queries return valid responses."""
|
|
for query in self.SAMPLE_QUERIES:
|
|
start = time.time()
|
|
|
|
response = await async_client.post(
|
|
"/api/dspy/rag/query",
|
|
json={
|
|
"question": query["question"],
|
|
"language": query["language"],
|
|
"include_visualization": False,
|
|
"use_agent": False,
|
|
},
|
|
)
|
|
|
|
duration_ms = (time.time() - start) * 1000
|
|
|
|
assert response.status_code == 200, f"Failed for: {query['question']}"
|
|
data = response.json()
|
|
|
|
# Verify response structure
|
|
assert "answer" in data
|
|
assert "intent" in data
|
|
assert "confidence" in data
|
|
|
|
# Verify answer is not empty
|
|
assert data["answer"], f"Empty answer for: {query['question']}"
|
|
|
|
# Log results
|
|
print(f"\nQuery: {query['question'][:50]}...")
|
|
print(f" Intent: {data['intent']} (expected: {query['expected_intent']})")
|
|
print(f" Duration: {duration_ms:.0f}ms")
|
|
print(f" Answer: {data['answer'][:100]}...")
|
|
|
|
|
|
# =============================================================================
|
|
# Direct SPARQL Tests for Heritage Queries
|
|
# =============================================================================
|
|
|
|
@pytest.mark.layer3
|
|
@pytest.mark.requires_oxigraph
|
|
class TestHeritageSPARQL:
|
|
"""Test heritage-specific SPARQL queries directly.
|
|
|
|
Note: Uses the actual hc: ontology schema, which uses:
|
|
- hc:institutionType with single-letter codes (M=Museum, L=Library, A=Archive, etc.)
|
|
- hc:settlementName for city names (NOT hc:city)
|
|
- hc:countryCode for country codes
|
|
- skos:prefLabel or schema:name for institution names
|
|
"""
|
|
|
|
def test_count_museums_amsterdam(self):
|
|
"""Count museums in Amsterdam via SPARQL."""
|
|
# Institution types use single-letter codes: M=Museum
|
|
query = """
|
|
PREFIX hc: <https://nde.nl/ontology/hc/>
|
|
SELECT (COUNT(DISTINCT ?s) as ?count)
|
|
WHERE {
|
|
?s hc:institutionType "M" .
|
|
?s hc:countryCode "NL" .
|
|
?s hc:settlementName "Amsterdam" .
|
|
}
|
|
"""
|
|
|
|
response = httpx.post(
|
|
f"{OXIGRAPH_URL}/query",
|
|
data={"query": query},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=30.0,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
count = int(data["results"]["bindings"][0]["count"]["value"])
|
|
|
|
print(f"Museums in Amsterdam: {count}")
|
|
assert count > 30, f"Expected > 30 Amsterdam museums, got {count}"
|
|
|
|
def test_find_rijksmuseum(self):
|
|
"""Find Rijksmuseum by name."""
|
|
query = """
|
|
PREFIX hc: <https://nde.nl/ontology/hc/>
|
|
PREFIX schema: <http://schema.org/>
|
|
SELECT ?s ?name ?city
|
|
WHERE {
|
|
?s schema:name ?name .
|
|
FILTER(CONTAINS(LCASE(?name), "rijksmuseum"))
|
|
?s hc:settlementName ?city .
|
|
}
|
|
LIMIT 5
|
|
"""
|
|
|
|
response = httpx.post(
|
|
f"{OXIGRAPH_URL}/query",
|
|
data={"query": query},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=30.0,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
bindings = data["results"]["bindings"]
|
|
|
|
assert len(bindings) > 0, "No Rijksmuseum found"
|
|
|
|
# Should find Amsterdam Rijksmuseum
|
|
names = [b["name"]["value"] for b in bindings]
|
|
cities = [b.get("city", {}).get("value", "") for b in bindings]
|
|
|
|
print(f"Found: {names}")
|
|
print(f"Cities: {cities}")
|
|
|
|
def test_count_libraries_nl(self):
|
|
"""Count libraries in Netherlands."""
|
|
# Institution type L = Library
|
|
query = """
|
|
PREFIX hc: <https://nde.nl/ontology/hc/>
|
|
SELECT (COUNT(DISTINCT ?s) as ?count)
|
|
WHERE {
|
|
?s hc:institutionType "L" .
|
|
?s hc:countryCode "NL" .
|
|
}
|
|
"""
|
|
|
|
response = httpx.post(
|
|
f"{OXIGRAPH_URL}/query",
|
|
data={"query": query},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=30.0,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
count = int(data["results"]["bindings"][0]["count"]["value"])
|
|
|
|
print(f"Libraries in Netherlands: {count}")
|
|
assert count > 100, f"Expected > 100 libraries, got {count}"
|
|
|
|
def test_geographic_query_amsterdam(self):
|
|
"""Test geographic query near Amsterdam coordinates.
|
|
|
|
Note: Coordinates are stored on blank nodes via schema:location,
|
|
NOT directly on the institution subject.
|
|
Amsterdam coordinates: ~52.37, 4.89
|
|
"""
|
|
query = """
|
|
PREFIX hc: <https://nde.nl/ontology/hc/>
|
|
PREFIX schema: <http://schema.org/>
|
|
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
|
|
SELECT ?s ?name ?lat ?lon
|
|
WHERE {
|
|
?s hc:countryCode "NL" .
|
|
?s schema:name ?name .
|
|
?s schema:location ?loc .
|
|
?loc geo:lat ?lat .
|
|
?loc geo:long ?lon .
|
|
FILTER(
|
|
?lat > 52.3 &&
|
|
?lat < 52.4 &&
|
|
?lon > 4.8 &&
|
|
?lon < 5.0
|
|
)
|
|
}
|
|
LIMIT 10
|
|
"""
|
|
|
|
response = httpx.post(
|
|
f"{OXIGRAPH_URL}/query",
|
|
data={"query": query},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=30.0,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
bindings = data["results"]["bindings"]
|
|
|
|
print(f"Found {len(bindings)} institutions near Amsterdam")
|
|
for b in bindings[:5]:
|
|
print(f" - {b.get('name', {}).get('value', 'N/A')}")
|
|
|
|
# Should find institutions near Amsterdam center
|
|
assert len(bindings) > 0, "No institutions found near Amsterdam coordinates"
|
|
|
|
def test_institution_type_distribution(self):
|
|
"""Verify institution type distribution in data."""
|
|
query = """
|
|
PREFIX hc: <https://nde.nl/ontology/hc/>
|
|
SELECT ?type (COUNT(DISTINCT ?s) as ?count)
|
|
WHERE {
|
|
?s hc:institutionType ?type .
|
|
?s hc:countryCode "NL" .
|
|
}
|
|
GROUP BY ?type
|
|
ORDER BY DESC(?count)
|
|
"""
|
|
|
|
response = httpx.post(
|
|
f"{OXIGRAPH_URL}/query",
|
|
data={"query": query},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=30.0,
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
bindings = data["results"]["bindings"]
|
|
|
|
# Should have multiple institution types
|
|
assert len(bindings) > 5, f"Expected > 5 institution types, got {len(bindings)}"
|
|
|
|
# Print distribution
|
|
print("Institution type distribution (NL):")
|
|
for b in bindings[:10]:
|
|
type_code = b["type"]["value"]
|
|
count = b["count"]["value"]
|
|
print(f" {type_code}: {count}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v", "--tb=short"])
|