#!/usr/bin/env python3
"""
Test DSPy Heritage RAG with live SPARQL endpoint.
Requires SSH tunnel to be active:
ssh -f -N -L 7878:localhost:7878 -L 6333:localhost:6333 root@91.98.224.44
"""
import json
import os
import sys
from datetime import datetime
import httpx
# Add project root to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
# Configure DSPy
import dspy
# Use GPT-4o-mini for fast testing
lm = dspy.LM(
model="openai/gpt-4o-mini",
temperature=0.3,
max_tokens=2000,
)
dspy.configure(lm=lm)
from backend.rag.dspy_heritage_rag import (
HeritageRAGPipeline,
HeritageQueryRouter,
HeritageSPARQLGenerator,
HeritageEntityExtractor,
MultiHopHeritageRetriever,
SCHEMA_LOADER_AVAILABLE,
get_schema_aware_sparql_signature,
get_schema_aware_entity_signature,
get_schema_aware_answer_signature,
validate_custodian_type,
)
def test_sparql_endpoint():
"""Test direct SPARQL access."""
print("\n" + "="*60)
print("Testing SPARQL Endpoint (localhost:7878)")
print("="*60)
# Count custodians
query = """
PREFIX crm:
SELECT (COUNT(*) as ?count) WHERE { ?s a crm:E39_Actor }
"""
response = httpx.post(
"http://localhost:7878/query",
content=query,
headers={
"Content-Type": "application/sparql-query",
"Accept": "application/sparql-results+json",
},
timeout=30.0,
)
if response.status_code == 200:
data = response.json()
count = data["results"]["bindings"][0]["count"]["value"]
print(f"✓ Connected! Found {count} heritage custodians")
return True
else:
print(f"✗ Failed: {response.status_code}")
return False
def test_query_router():
"""Test query intent classification."""
print("\n" + "="*60)
print("Testing Query Router")
print("="*60)
# HeritageQueryRouter is a dspy.Module, instantiate it directly
router = HeritageQueryRouter()
test_questions = [
("Hoeveel musea zijn er in Amsterdam?", "statistical"),
("Waar is het Rijksmuseum?", "geographic"),
("Welke archieven hebben WO2 documenten?", "exploration"),
("Wat is de ISIL code van de KB?", "entity_lookup"),
("Vergelijk het Mauritshuis met het Rijksmuseum", "comparative"),
]
for question, expected in test_questions:
result = router(question=question)
status = "✓" if result.intent == expected else "✗"
print(f"{status} '{question[:40]}...' → {result.intent} (expected: {expected})")
def test_sparql_generation():
"""Test SPARQL query generation."""
print("\n" + "="*60)
print("Testing SPARQL Generation")
print("="*60)
generator = dspy.ChainOfThought(HeritageSPARQLGenerator)
test_cases = [
{
"question": "Hoeveel musea zijn er in Nederland?",
"intent": "statistical",
"entities": ["museum", "Nederland"],
},
{
"question": "Geef me alle archieven in Amsterdam",
"intent": "geographic",
"entities": ["archief", "Amsterdam"],
},
{
"question": "Welke bibliotheken hebben een website?",
"intent": "exploration",
"entities": ["bibliotheek", "website"],
},
]
for tc in test_cases:
print(f"\nQuestion: {tc['question']}")
result = generator(
question=tc["question"],
intent=tc["intent"],
entities=tc["entities"],
context="",
)
print(f"SPARQL:\n{result.sparql}")
print(f"Explanation: {result.explanation}")
# Try to execute the query
try:
response = httpx.post(
"http://localhost:7878/query",
content=result.sparql,
headers={
"Content-Type": "application/sparql-query",
"Accept": "application/sparql-results+json",
},
timeout=30.0,
)
if response.status_code == 200:
data = response.json()
count = len(data.get("results", {}).get("bindings", []))
print(f"✓ Query executed successfully, {count} results")
else:
print(f"✗ Query failed: {response.status_code} - {response.text[:200]}")
except Exception as e:
print(f"✗ Query error: {e}")
def test_full_pipeline():
"""Test the full RAG pipeline."""
print("\n" + "="*60)
print("Testing Full RAG Pipeline")
print("="*60)
# Load saved pipeline
pipeline = HeritageRAGPipeline()
model_path = "backend/rag/optimized_models/heritage_rag_latest.json"
if os.path.exists(model_path):
print(f"Loading saved model from {model_path}")
pipeline.load(model_path)
else:
print("No saved model found, using default pipeline")
test_questions = [
("Hoeveel musea zijn er in Amsterdam?", "nl"),
("What archives are in The Hague?", "en"),
("Welke bibliotheken hebben sociale media?", "nl"),
]
for question, language in test_questions:
print(f"\n{'─'*50}")
print(f"Q: {question}")
try:
result = pipeline(question=question, language=language)
print(f"Intent: {result.intent}")
print(f"Answer: {result.answer[:200]}..." if len(result.answer) > 200 else f"Answer: {result.answer}")
if result.sparql:
print(f"SPARQL generated: {len(result.sparql)} chars")
if result.visualization:
print(f"Visualization: {result.visualization.get('type', 'none')}")
except Exception as e:
print(f"✗ Pipeline error: {e}")
def run_sample_queries():
"""Run some interesting sample queries against the live data."""
print("\n" + "="*60)
print("Sample Queries Against Live Data")
print("="*60)
queries = [
("Museums by country", """
PREFIX crm:
PREFIX org:
PREFIX schema:
SELECT ?country (COUNT(?s) as ?count) WHERE {
?s a crm:E39_Actor ;
org:classification "MUSEUM" ;
schema:addressCountry ?country .
} GROUP BY ?country ORDER BY DESC(?count) LIMIT 10
"""),
("Dutch archives with websites", """
PREFIX crm:
PREFIX org:
PREFIX schema:
PREFIX foaf:
PREFIX skos:
SELECT ?name ?homepage WHERE {
?s a crm:E39_Actor ;
org:classification "ARCHIVE" ;
schema:addressCountry "NL" ;
skos:prefLabel ?name ;
foaf:homepage ?homepage .
} LIMIT 10
"""),
("Heritage institutions with social media", """
PREFIX crm:
PREFIX foaf:
PREFIX skos:
SELECT ?name (COUNT(?account) as ?social_count) WHERE {
?s a crm:E39_Actor ;
skos:prefLabel ?name ;
foaf:account ?account .
} GROUP BY ?s ?name ORDER BY DESC(?social_count) LIMIT 10
"""),
]
for name, query in queries:
print(f"\n{name}:")
try:
response = httpx.post(
"http://localhost:7878/query",
content=query.strip(),
headers={
"Content-Type": "application/sparql-query",
"Accept": "application/sparql-results+json",
},
timeout=30.0,
)
if response.status_code == 200:
data = response.json()
bindings = data.get("results", {}).get("bindings", [])
for b in bindings[:5]:
vals = [f"{k}={v['value'][:40]}" for k, v in b.items()]
print(f" {', '.join(vals)}")
if len(bindings) > 5:
print(f" ... and {len(bindings)-5} more")
else:
print(f" Error: {response.status_code}")
except Exception as e:
print(f" Error: {e}")
def test_schema_aware_signatures():
"""Test schema-aware signature functionality."""
print("\n" + "="*60)
print("Testing Schema-Aware Signatures")
print("="*60)
print(f"Schema loader available: {SCHEMA_LOADER_AVAILABLE}")
if not SCHEMA_LOADER_AVAILABLE:
print("⚠️ Schema loader not available, skipping schema-aware tests")
return
# Test signature retrieval
print("\n1. Testing signature factories:")
try:
sparql_sig = get_schema_aware_sparql_signature()
print(f" ✓ SPARQL signature: {sparql_sig.__name__}")
print(f" Docstring length: {len(sparql_sig.__doc__)} chars")
except Exception as e:
print(f" ✗ SPARQL signature failed: {e}")
try:
entity_sig = get_schema_aware_entity_signature()
print(f" ✓ Entity signature: {entity_sig.__name__}")
print(f" Docstring length: {len(entity_sig.__doc__)} chars")
except Exception as e:
print(f" ✗ Entity signature failed: {e}")
try:
answer_sig = get_schema_aware_answer_signature()
print(f" ✓ Answer signature: {answer_sig.__name__}")
print(f" Docstring length: {len(answer_sig.__doc__)} chars")
except Exception as e:
print(f" ✗ Answer signature failed: {e}")
# Test custodian type validation
print("\n2. Testing custodian type validation:")
valid_types = ["MUSEUM", "LIBRARY", "ARCHIVE", "GALLERY"]
invalid_types = ["museum", "INVALID_TYPE", "", "123"]
for t in valid_types:
result = validate_custodian_type(t)
status = "✓" if result else "✗"
print(f" {status} validate_custodian_type('{t}'): {result}")
for t in invalid_types:
result = validate_custodian_type(t)
status = "✓" if not result else "✗" # These should be False
print(f" {status} validate_custodian_type('{t}'): {result} (expected: False)")
# Test schema-aware SPARQL generation
print("\n3. Testing schema-aware SPARQL generation:")
try:
schema_sparql_gen = dspy.ChainOfThought(get_schema_aware_sparql_signature())
result = schema_sparql_gen(
question="Hoeveel musea zijn er in Amsterdam?",
intent="statistical",
entities=["museum", "Amsterdam"],
context="",
)
print(f" ✓ Schema-aware SPARQL generated:")
print(f" Query length: {len(result.sparql)} chars")
print(f" Explanation: {result.explanation[:100]}...")
# Try to execute the query
response = httpx.post(
"http://localhost:7878/query",
content=result.sparql,
headers={
"Content-Type": "application/sparql-query",
"Accept": "application/sparql-results+json",
},
timeout=30.0,
)
if response.status_code == 200:
data = response.json()
count = len(data.get("results", {}).get("bindings", []))
print(f" ✓ Query executed: {count} results")
else:
print(f" ✗ Query failed: {response.status_code}")
except Exception as e:
print(f" ✗ Schema-aware SPARQL generation failed: {e}")
# Test MultiHopHeritageRetriever with schema-aware signatures
print("\n4. Testing MultiHopHeritageRetriever (schema-aware):")
try:
retriever = MultiHopHeritageRetriever(max_hops=2, use_schema_aware=True)
print(f" ✓ Created retriever with use_schema_aware={retriever.use_schema_aware}")
except Exception as e:
print(f" ✗ Failed to create schema-aware retriever: {e}")
print("\nSchema-aware signature tests complete!")
def test_multi_turn_conversation():
"""Test multi-turn conversation with dspy.History."""
print("\n" + "="*60)
print("Testing Multi-Turn Conversation")
print("="*60)
from dspy import History
pipeline = HeritageRAGPipeline()
# Simulate a multi-turn conversation
conversation = []
# Turn 1: Initial query about museums in Amsterdam
question1 = "Hoeveel musea zijn er in Amsterdam?"
print(f"\nTurn 1: {question1}")
try:
history1 = History(messages=[]) # Empty history for first turn
result1 = pipeline(
question=question1,
language="nl",
history=history1,
include_viz=False,
)
print(f" Intent: {result1.intent}")
print(f" Answer: {result1.answer[:150]}..." if len(result1.answer) > 150 else f" Answer: {result1.answer}")
# Add to conversation history
conversation.append({
"question": question1,
"answer": result1.answer
})
except Exception as e:
print(f" ✗ Turn 1 failed: {e}")
return
# Turn 2: Follow-up question (should use context from turn 1)
question2 = "Welke van deze beheren ook archieven?"
print(f"\nTurn 2: {question2}")
print(" (This is a follow-up that refers to 'these' from previous turn)")
try:
history2 = History(messages=conversation)
result2 = pipeline(
question=question2,
language="nl",
history=history2,
include_viz=False,
)
# Check if the resolved_question was captured
resolved = getattr(result2, 'resolved_question', None)
if resolved and resolved != question2:
print(f" ✓ Query resolved: {resolved[:100]}...")
print(f" Intent: {result2.intent}")
print(f" Answer: {result2.answer[:150]}..." if len(result2.answer) > 150 else f" Answer: {result2.answer}")
# Add to conversation
conversation.append({
"question": question2,
"answer": result2.answer
})
except Exception as e:
print(f" ✗ Turn 2 failed: {e}")
return
# Turn 3: Another follow-up
question3 = "Geef me de websites van de eerste drie"
print(f"\nTurn 3: {question3}")
print(" (This refers to 'the first three' from previous results)")
try:
history3 = History(messages=conversation)
result3 = pipeline(
question=question3,
language="nl",
history=history3,
include_viz=False,
)
print(f" Intent: {result3.intent}")
print(f" Answer: {result3.answer[:150]}..." if len(result3.answer) > 150 else f" Answer: {result3.answer}")
except Exception as e:
print(f" ✗ Turn 3 failed: {e}")
print("\n✓ Multi-turn conversation test complete!")
print(f" Total turns: {len(conversation) + 1}")
print(f" History messages: {len(conversation)}")
def test_query_router_with_history():
"""Test that HeritageQueryRouter properly resolves follow-up questions."""
print("\n" + "="*60)
print("Testing Query Router with History")
print("="*60)
from dspy import History
router = HeritageQueryRouter()
# Test 1: Initial question (no history)
q1 = "Toon alle musea in Den Haag"
print(f"\n1. Initial query: {q1}")
result1 = router(question=q1, language="nl")
print(f" Intent: {result1.intent}")
print(f" Entities: {result1.entities}")
resolved1 = getattr(result1, 'resolved_question', q1)
print(f" Resolved: {resolved1}")
# Test 2: Follow-up with history
q2 = "Welke hebben een bibliotheek?"
history = History(messages=[
{"question": q1, "answer": "Ik heb 15 musea gevonden in Den Haag..."}
])
print(f"\n2. Follow-up: {q2}")
print(" (With history about Den Haag museums)")
result2 = router(question=q2, language="nl", history=history)
print(f" Intent: {result2.intent}")
print(f" Entities: {result2.entities}")
resolved2 = getattr(result2, 'resolved_question', q2)
print(f" Resolved: {resolved2}")
# Check if "Den Haag" or "musea" appears in resolved question
if "Den Haag" in resolved2 or "musea" in resolved2.lower():
print(" ✓ Context resolution working - Den Haag/musea referenced")
else:
print(" ⚠️ Context may not have been fully resolved")
print("\n✓ Query router history test complete!")
if __name__ == "__main__":
print("="*60)
print("DSPy Heritage RAG - Live Testing")
print(f"Started: {datetime.now().isoformat()}")
print("="*60)
# Test SPARQL first
if not test_sparql_endpoint():
print("\n⚠️ SPARQL endpoint not available!")
print("Make sure SSH tunnel is active:")
print(" ssh -f -N -L 7878:localhost:7878 root@91.98.224.44")
sys.exit(1)
# Run sample queries to show live data
run_sample_queries()
# Test DSPy components
test_query_router()
test_sparql_generation()
# Test schema-aware signatures
test_schema_aware_signatures()
# Test multi-turn conversation support
test_query_router_with_history()
test_multi_turn_conversation()
test_full_pipeline()
print("\n" + "="*60)
print("Testing Complete!")
print("="*60)