glam/tests/dspy_gitops/datasets/golden_queries.yaml

# Golden Test Cases for Heritage RAG
# These tests MUST pass for any release

golden_tests:
  - id: "golden_amsterdam_museums"
    question: "Hoeveel musea zijn er in Amsterdam?"
    language: nl
    expected_intent: statistical
    expected_entity_type: institution
    min_answer_contains:
      - "musea"
      - "Amsterdam"
    max_latency_ms: 10000
    priority: critical

  - id: "golden_rijksmuseum_location"
    question: "Waar is het Rijksmuseum gevestigd?"
    language: nl
    # Note: geographic and entity_lookup are both valid for location questions
    expected_intent: geographic
    expected_entity_type: institution
    expected_answer_contains:
      - "Amsterdam"
    max_latency_ms: 10000
    priority: critical

  - id: "golden_nl_libraries_count"
    question: "How many libraries are there in the Netherlands?"
    language: en
    expected_intent: statistical
    expected_entity_type: institution
    max_latency_ms: 10000
    priority: high

  - id: "golden_nationaal_archief_staff"
    question: "Wie werkt bij het Nationaal Archief?"
    language: nl
    expected_intent: entity_lookup
    expected_entity_type: person
    expected_sources:
      - oxigraph
    max_latency_ms: 15000
    priority: high

  - id: "golden_api_health"
    type: health_check
    endpoint: "/api/dspy/rag/health"
    expected_status: 200
    expected_fields:
      - status
      - components
    priority: critical