glam/tests/dspy_gitops/datasets/golden_queries.yaml
kempersc 47e8226595 feat(tests): Complete DSPy GitOps testing framework
- Layer 1: 35 unit tests (no LLM required)
- Layer 2: 56 DSPy module tests with LLM
- Layer 3: 10 integration tests with Oxigraph
- Layer 4: Comprehensive evaluation suite

Fixed:
- Coordinate queries to use schema:location -> blank node pattern
- Golden query expected intent for location questions
- Health check test filtering in Layer 4

Added GitHub Actions workflow for CI/CD evaluation
2026-01-11 20:04:33 +01:00

52 lines
1.4 KiB
YAML

# Golden Test Cases for Heritage RAG
# These tests MUST pass for any release
golden_tests:
- id: "golden_amsterdam_museums"
question: "Hoeveel musea zijn er in Amsterdam?"
language: nl
expected_intent: statistical
expected_entity_type: institution
min_answer_contains:
- "musea"
- "Amsterdam"
max_latency_ms: 10000
priority: critical
- id: "golden_rijksmuseum_location"
question: "Waar is het Rijksmuseum gevestigd?"
language: nl
# Note: geographic and entity_lookup are both valid for location questions
expected_intent: geographic
expected_entity_type: institution
expected_answer_contains:
- "Amsterdam"
max_latency_ms: 10000
priority: critical
- id: "golden_nl_libraries_count"
question: "How many libraries are there in the Netherlands?"
language: en
expected_intent: statistical
expected_entity_type: institution
max_latency_ms: 10000
priority: high
- id: "golden_nationaal_archief_staff"
question: "Wie werkt bij het Nationaal Archief?"
language: nl
expected_intent: entity_lookup
expected_entity_type: person
expected_sources:
- oxigraph
max_latency_ms: 15000
priority: high
- id: "golden_api_health"
type: health_check
endpoint: "/api/dspy/rag/health"
expected_status: 200
expected_fields:
- status
- components
priority: critical