glam/data/sparql_templates.yaml
2025-12-30 23:19:38 +01:00

1203 lines
45 KiB
YAML

# SPARQL Query Templates for Heritage Custodian Knowledge Graph
#
# This file defines parameterized SPARQL templates that replace LLM-generated queries
# with deterministic, validated templates. Based on docs/plan/prompt-query_template_mapping/
#
# CRITICAL: Processing Pipeline Order
# ===================================
# 1. User question → ConversationContextResolver (DSPy) → RESOLVED question
# 2. RESOLVED question → FykeFilter (DSPy) → relevant/irrelevant
# ⚠️ FYKE MUST OPERATE ON RESOLVED QUESTION, NOT RAW INPUT!
# "En in Enschede?" resolved to "Welke archieven zijn er in Enschede?" is clearly relevant
# 3. If relevant: resolved question → TemplateClassifier → template_id
# 4. template_id + resolved question → SlotExtractor → slot values
# 5. template + slot values → TemplateInstantiator (Jinja2) → SPARQL query
#
# Slot Value Sources:
# - data/validation/sparql_validation_rules.json (institution_type_mappings, subregion_mappings, etc.)
# - backend/rag/ontology_mapping.py (fuzzy matching, multilingual synonyms)
_metadata:
version: "1.0.0"
created: "2025-01-06"
schema_source: "schemas/20251121/linkml/"
validation_rules: "data/validation/sparql_validation_rules.json"
# Standard SPARQL prefixes used in all templates
_prefixes: |
PREFIX hc: <https://nde.nl/ontology/hc/>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX schema: <http://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX org: <http://www.w3.org/ns/org#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX wd: <http://www.wikidata.org/entity/>
# Slot type definitions with validation sources
_slot_types:
institution_type:
description: "Single-letter custodian type code (M, L, A, G, etc.)"
source: "sparql_validation_rules.json#institution_type_mappings"
valid_values: ["M", "L", "A", "G", "O", "R", "C", "U", "B", "E", "S", "F", "I", "X", "P", "H", "D", "N", "T"]
synonyms:
# Dutch
museum: "M"
musea: "M"
bibliotheek: "L"
bibliotheken: "L"
archief: "A"
archieven: "A"
galerie: "G"
galerij: "G"
galerijen: "G"
# English
museums: "M"
library: "L"
libraries: "L"
archive: "A"
archives: "A"
gallery: "G"
galleries: "G"
# German
bibliothek_de: "L"
bibliotheken_de: "L"
archiv: "A"
archive_de: "A"
galerie_de: "G"
# Special types
dierentuin: "B"
zoo: "B"
botanische_tuin: "B"
botanical_garden: "B"
kerk: "H"
church: "H"
moskee: "H"
mosque: "H"
synagoge: "H"
synagogue: "H"
subregion:
description: "ISO 3166-2 subdivision code (NL-NH, DE-BY, etc.)"
source: "sparql_validation_rules.json#subregion_mappings"
synonyms:
# Netherlands
noord-holland: "NL-NH"
noord_holland: "NL-NH"
noordholland: "NL-NH"
amsterdam_province: "NL-NH"
zuid-holland: "NL-ZH"
zuid_holland: "NL-ZH"
zuidholland: "NL-ZH"
rotterdam_province: "NL-ZH"
den_haag_province: "NL-ZH"
the_hague_province: "NL-ZH"
noord-brabant: "NL-NB"
brabant: "NL-NB"
eindhoven_province: "NL-NB"
gelderland: "NL-GE"
arnhem_province: "NL-GE"
nijmegen_province: "NL-GE"
utrecht_province: "NL-UT"
overijssel: "NL-OV"
zwolle_province: "NL-OV"
enschede_province: "NL-OV"
limburg_nl: "NL-LI"
maastricht_province: "NL-LI"
friesland: "NL-FR"
frisia: "NL-FR"
leeuwarden_province: "NL-FR"
groningen_province: "NL-GR"
drenthe: "NL-DR"
assen_province: "NL-DR"
flevoland: "NL-FL"
almere_province: "NL-FL"
lelystad_province: "NL-FL"
zeeland: "NL-ZE"
middelburg_province: "NL-ZE"
# Belgium
vlaanderen: "BE-VLG"
flanders: "BE-VLG"
antwerpen_province: "BE-VLG"
gent_province: "BE-VLG"
brugge_province: "BE-VLG"
wallonie: "BE-WAL"
wallonia: "BE-WAL"
brussel: "BE-BRU"
brussels: "BE-BRU"
bruxelles: "BE-BRU"
# Germany
bayern: "DE-BY"
bavaria: "DE-BY"
muenchen_province: "DE-BY"
munich_province: "DE-BY"
berlin_state: "DE-BE"
baden_wuerttemberg: "DE-BW"
stuttgart_province: "DE-BW"
nordrhein_westfalen: "DE-NW"
north_rhine_westphalia: "DE-NW"
koeln_province: "DE-NW"
cologne_province: "DE-NW"
duesseldorf_province: "DE-NW"
sachsen: "DE-SN"
saxony: "DE-SN"
dresden_province: "DE-SN"
hessen: "DE-HE"
hesse: "DE-HE"
frankfurt_province: "DE-HE"
country:
description: "Wikidata entity URI for country"
source: "sparql_validation_rules.json#country_mappings"
format: "wikidata_uri"
synonyms:
nederland: "Q55"
netherlands: "Q55"
holland: "Q55"
nl: "Q55"
belgie: "Q31"
belgium: "Q31"
be: "Q31"
duitsland: "Q183"
germany: "Q183"
de_country: "Q183"
frankrijk: "Q142"
france: "Q142"
fr: "Q142"
verenigd_koninkrijk: "Q145"
united_kingdom: "Q145"
uk: "Q145"
gb: "Q145"
engeland: "Q145"
england: "Q145"
verenigde_staten: "Q30"
united_states: "Q30"
usa: "Q30"
us: "Q30"
japan: "Q17"
jp: "Q17"
tsjechie: "Q213"
czech_republic: "Q213"
czechia: "Q213"
cz: "Q213"
oostenrijk: "Q40"
austria: "Q40"
at: "Q40"
zwitserland: "Q39"
switzerland: "Q39"
ch: "Q39"
city:
description: "City/locality name (string literal)"
source: "fuzzy_match"
institution_name:
description: "Institution name for lookup (string literal)"
source: "fuzzy_match"
limit:
description: "Result limit (integer)"
default: 10
max: 100
budget_category:
description: "Budget or expense category for financial queries"
source: "ontology"
valid_values: ["innovation", "digitization", "preservation", "personnel", "acquisition", "operating", "capital", "external_funding", "internal_funding", "endowment_draw"]
synonyms:
# Dutch - Innovation
innovatie: "innovation"
innovaties: "innovation"
vernieuwing: "innovation"
digital_transformatie: "innovation"
digitale_transformatie: "innovation"
r_d: "innovation"
onderzoek_ontwikkeling: "innovation"
# English - Innovation
innovations: "innovation"
r_and_d: "innovation"
research_development: "innovation"
digital_transformation: "innovation"
technology: "innovation"
tech: "innovation"
# German - Innovation
innovationen: "innovation"
erneuerung: "innovation"
# Dutch - Digitization
digitalisering: "digitization"
digitaliseringsbudget: "digitization"
digitale_collectie: "digitization"
# English - Digitization
digitisation: "digitization" # UK spelling
digital: "digitization"
scanning: "digitization"
# German - Digitization
digitalisierung: "digitization"
# Dutch - Preservation
conservering: "preservation"
restauratie: "preservation"
behoud: "preservation"
onderhoud: "preservation"
# English - Preservation
conservation: "preservation"
restoration: "preservation"
maintenance: "preservation"
# German - Preservation
konservierung: "preservation"
restaurierung: "preservation"
# Dutch - Personnel
personeel: "personnel"
personele_kosten: "personnel"
salarissen: "personnel"
medewerkers: "personnel"
fte: "personnel"
# English - Personnel
staff: "personnel"
salaries: "personnel"
employees: "personnel"
hr: "personnel"
human_resources: "personnel"
# German - Personnel
personal: "personnel"
personalkosten: "personnel"
gehälter: "personnel"
# Dutch - Acquisition
aanwinsten: "acquisition"
aankopen: "acquisition"
collectie_aankopen: "acquisition"
verwervingen: "acquisition"
# English - Acquisition
acquisitions: "acquisition"
purchases: "acquisition"
collection_development: "acquisition"
# German - Acquisition
erwerbungen: "acquisition"
ankäufe: "acquisition"
# Dutch - Operating
operationeel: "operating"
exploitatie: "operating"
bedrijfskosten: "operating"
# English - Operating
operations: "operating"
operational: "operating"
running_costs: "operating"
# German - Operating
betriebskosten: "operating"
betrieb: "operating"
# Dutch - Capital
kapitaal: "capital"
investeringen: "capital"
bouw: "capital"
verbouwing: "capital"
# English - Capital
capex: "capital"
investments: "capital"
construction: "capital"
building: "capital"
# German - Capital
kapital: "capital"
investitionen: "capital"
# =============================================================================
# TEMPLATE DEFINITIONS
# =============================================================================
templates:
# ---------------------------------------------------------------------------
# Template 1: List institutions by type and location (city)
# ---------------------------------------------------------------------------
list_institutions_by_type_city:
id: "list_institutions_by_type_city"
description: "List heritage institutions of a specific type in a city"
intent: ["geographic", "exploration"]
question_patterns:
# Dutch - formal
- "Welke {institution_type_nl} zijn er in {city}?"
- "Welke {institution_type_nl} heeft {city}?"
- "Wat zijn de {institution_type_nl} in {city}?"
- "Geef me de {institution_type_nl} in {city}"
- "Toon {institution_type_nl} in {city}"
- "{institution_type_nl} in {city}"
# Dutch - conversational
- "Wat voor {institution_type_nl} hebben ze in {city}?"
- "Wat voor {institution_type_nl} zijn er in {city}?"
- "Wat voor {institution_type_nl} heeft {city}?"
- "Geef een overzicht van {institution_type_nl} in {city}"
- "Geef een overzicht van de {institution_type_nl} in {city}"
- "Geef een overzicht van alle {institution_type_nl} in {city}"
- "Ik zoek {institution_type_nl} in {city}"
- "Zijn er {institution_type_nl} in {city}?"
- "Heeft {city} {institution_type_nl}?"
- "Ken je {institution_type_nl} in {city}?"
- "Welke {institution_type_nl} kan ik vinden in {city}?"
- "Noem {institution_type_nl} in {city}"
- "Lijst van {institution_type_nl} in {city}"
# English
- "What {institution_type_en} are in {city}?"
- "Which {institution_type_en} are there in {city}?"
- "List {institution_type_en} in {city}"
- "Show me {institution_type_en} in {city}"
- "{institution_type_en} in {city}"
- "What kind of {institution_type_en} are in {city}?"
- "Give me an overview of {institution_type_en} in {city}"
- "I'm looking for {institution_type_en} in {city}"
- "Are there {institution_type_en} in {city}?"
# German
- "Welche {institution_type_de} gibt es in {city}?"
- "Welche {institution_type_de} hat {city}?"
- "Was für {institution_type_de} gibt es in {city}?"
- "Gib mir eine Übersicht der {institution_type_de} in {city}"
slots:
institution_type:
type: institution_type
required: true
examples: ["musea", "archieven", "bibliotheken", "museums", "archives"]
city:
type: city
required: true
examples: ["Amsterdam", "Den Haag", "Rotterdam", "Utrecht"]
sparql_template: |
{{ prefixes }}
SELECT DISTINCT ?institution ?name ?website WHERE {
?institution a hc:class/Custodian ;
hc:institutionType "{{ institution_type }}" ;
schema:addressLocality "{{ city }}" ;
skos:prefLabel ?name .
OPTIONAL { ?institution foaf:homepage ?website }
}
ORDER BY ?name
{% if limit %}LIMIT {{ limit }}{% endif %}
examples:
- question: "Welke musea zijn er in Amsterdam?"
slots: {institution_type: "M", city: "Amsterdam"}
- question: "What archives are in The Hague?"
slots: {institution_type: "A", city: "Den Haag"}
# ---------------------------------------------------------------------------
# Template 2: List institutions by type and province/region
# ---------------------------------------------------------------------------
list_institutions_by_type_region:
id: "list_institutions_by_type_region"
description: "List heritage institutions of a specific type in a province/region"
intent: ["geographic", "exploration"]
question_patterns:
# Dutch - formal
- "Welke {institution_type_nl} zijn er in {region}?"
- "Hoeveel {institution_type_nl} heeft {region}?"
- "{institution_type_nl} in {region}"
- "Alle {institution_type_nl} in de provincie {region}"
# Dutch - conversational
- "Wat voor {institution_type_nl} hebben ze in {region}?"
- "Wat voor {institution_type_nl} zijn er in {region}?"
- "Geef een overzicht van {institution_type_nl} in {region}"
- "Geef een overzicht van de {institution_type_nl} in {region}"
- "Geef een overzicht van alle {institution_type_nl} in {region}"
- "Ik zoek {institution_type_nl} in {region}"
- "Zijn er {institution_type_nl} in {region}?"
- "Ken je {institution_type_nl} in {region}?"
- "Welke {institution_type_nl} kan ik vinden in {region}?"
- "Noem {institution_type_nl} in {region}"
- "Lijst van {institution_type_nl} in {region}"
- "{institution_type_nl} in de provincie {region}"
# English
- "What {institution_type_en} are in {region}?"
- "Which {institution_type_en} are there in {region}?"
- "{institution_type_en} in {region}"
- "Give me an overview of {institution_type_en} in {region}"
- "I'm looking for {institution_type_en} in {region}"
slots:
institution_type:
type: institution_type
required: true
region:
type: subregion
required: true
examples: ["Noord-Holland", "Gelderland", "Limburg", "Bavaria", "Flanders"]
sparql_template: |
{{ prefixes }}
SELECT DISTINCT ?institution ?name ?city WHERE {
?institution a hc:class/Custodian ;
hc:institutionType "{{ institution_type }}" ;
hc:ghcid ?ghcid ;
skos:prefLabel ?name .
FILTER(STRSTARTS(?ghcid, "{{ region }}"))
OPTIONAL { ?institution schema:addressLocality ?city }
}
ORDER BY ?name
{% if limit %}LIMIT {{ limit }}{% endif %}
sparql_template_alt: |
{{ prefixes }}
SELECT DISTINCT ?institution ?name ?city WHERE {
?institution a crm:E39_Actor ;
hc:institutionType "{{ institution_type }}" ;
hc:ghcid ?ghcid ;
skos:prefLabel ?name .
FILTER(STRSTARTS(?ghcid, "{{ region | replace('-', '-') }}"))
OPTIONAL { ?institution schema:addressLocality ?city }
}
ORDER BY ?name
{% if limit %}LIMIT {{ limit }}{% endif %}
# ---------------------------------------------------------------------------
# Template 3: List institutions by type and country
# ---------------------------------------------------------------------------
list_institutions_by_type_country:
id: "list_institutions_by_type_country"
description: "List heritage institutions of a specific type in a country"
intent: ["geographic", "exploration"]
question_patterns:
# Dutch
- "Welke {institution_type_nl} zijn er in {country}?"
- "Alle {institution_type_nl} in {country}"
- "{institution_type_nl} in {country}"
# English
- "What {institution_type_en} are in {country}?"
- "List all {institution_type_en} in {country}"
- "{institution_type_en} in {country}"
slots:
institution_type:
type: institution_type
required: true
country:
type: country
required: true
examples: ["Nederland", "Belgium", "Germany", "France"]
sparql_template: |
{{ prefixes }}
SELECT DISTINCT ?institution ?name ?city WHERE {
?institution a crm:E39_Actor ;
hc:institutionType "{{ institution_type }}" ;
schema:addressCountry <http://www.wikidata.org/entity/{{ country }}> ;
skos:prefLabel ?name .
OPTIONAL { ?institution schema:addressLocality ?city }
}
ORDER BY ?name
{% if limit %}LIMIT {{ limit }}{% endif %}
# ---------------------------------------------------------------------------
# Template 4: Count institutions by type and location
# ---------------------------------------------------------------------------
count_institutions_by_type_location:
id: "count_institutions_by_type_location"
description: "Count heritage institutions of a specific type in a location"
intent: ["statistical"]
question_patterns:
# Dutch - formal
- "Hoeveel {institution_type_nl} zijn er in {location}?"
- "Hoeveel {institution_type_nl} heeft {location}?"
- "Aantal {institution_type_nl} in {location}"
- "Tel de {institution_type_nl} in {location}"
# Dutch - conversational
- "Hoeveel {institution_type_nl} telt {location}?"
- "Wat is het aantal {institution_type_nl} in {location}?"
- "Hoeveel {institution_type_nl} kan ik vinden in {location}?"
- "Kun je tellen hoeveel {institution_type_nl} er in {location} zijn?"
- "Hoeveel {institution_type_nl} zitten er in {location}?"
# English
- "How many {institution_type_en} are in {location}?"
- "How many {institution_type_en} does {location} have?"
- "Count of {institution_type_en} in {location}"
- "Number of {institution_type_en} in {location}"
- "What's the number of {institution_type_en} in {location}?"
- "Can you count {institution_type_en} in {location}?"
# German
- "Wie viele {institution_type_de} gibt es in {location}?"
- "Wie viele {institution_type_de} hat {location}?"
- "Anzahl der {institution_type_de} in {location}"
slots:
institution_type:
type: institution_type
required: true
location:
type: city
required: true
fallback_types: [subregion, country]
sparql_template: |
{{ prefixes }}
SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE {
?institution a crm:E39_Actor ;
hc:institutionType "{{ institution_type }}" ;
schema:addressLocality "{{ location }}" .
}
sparql_template_region: |
{{ prefixes }}
SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE {
?institution a crm:E39_Actor ;
hc:institutionType "{{ institution_type }}" ;
hc:ghcid ?ghcid .
FILTER(STRSTARTS(?ghcid, "{{ location }}"))
}
sparql_template_country: |
{{ prefixes }}
SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE {
?institution a crm:E39_Actor ;
hc:institutionType "{{ institution_type }}" ;
schema:addressCountry <http://www.wikidata.org/entity/{{ location }}> .
}
# ---------------------------------------------------------------------------
# Template 5: Count all institutions by type (distribution)
# ---------------------------------------------------------------------------
count_institutions_by_type:
id: "count_institutions_by_type"
description: "Count institutions grouped by type"
intent: ["statistical"]
question_patterns:
# Dutch
- "Hoeveel instellingen per type?"
- "Verdeling van instellingen per type"
- "Hoeveel musea, archieven en bibliotheken zijn er?"
- "Statistieken per instellingstype"
# English
- "How many institutions per type?"
- "Distribution of institutions by type"
- "Statistics by institution type"
- "How many museums, archives and libraries are there?"
slots: {}
sparql_template: |
{{ prefixes }}
SELECT ?type (COUNT(DISTINCT ?institution) AS ?count) WHERE {
?institution a crm:E39_Actor ;
hc:institutionType ?type .
}
GROUP BY ?type
ORDER BY DESC(?count)
# ---------------------------------------------------------------------------
# Template 6: Find institution by name
# ---------------------------------------------------------------------------
find_institution_by_name:
id: "find_institution_by_name"
description: "Find a specific institution by name"
intent: ["entity_lookup"]
question_patterns:
# Dutch - formal
- "Waar is {institution_name}?"
- "Informatie over {institution_name}"
- "Gegevens van {institution_name}"
- "Wat is {institution_name}?"
- "Zoek {institution_name}"
# Dutch - conversational
- "Geef informatie over {institution_name}"
- "Geef me informatie over {institution_name}"
- "Vertel me over {institution_name}"
- "Wat weet je over {institution_name}?"
- "Ken je {institution_name}?"
- "Wat kun je vertellen over {institution_name}?"
- "Ik zoek informatie over {institution_name}"
- "Ik wil meer weten over {institution_name}"
- "Details over {institution_name}"
- "Geef details over {institution_name}"
# English
- "Where is {institution_name}?"
- "Information about {institution_name}"
- "What is {institution_name}?"
- "Find {institution_name}"
- "Tell me about {institution_name}"
- "Give me information about {institution_name}"
- "What do you know about {institution_name}?"
- "I'm looking for information about {institution_name}"
# German
- "Wo ist {institution_name}?"
- "Informationen über {institution_name}"
- "Was ist {institution_name}?"
- "Erzähl mir über {institution_name}"
slots:
institution_name:
type: institution_name
required: true
examples: ["Rijksmuseum", "Nationaal Archief", "Koninklijke Bibliotheek"]
sparql_template: |
{{ prefixes }}
SELECT ?institution ?name ?type ?city ?country ?website ?description WHERE {
?institution a crm:E39_Actor ;
skos:prefLabel ?name .
FILTER(CONTAINS(LCASE(STR(?name)), LCASE("{{ institution_name }}")))
OPTIONAL { ?institution hc:institutionType ?type }
OPTIONAL { ?institution schema:addressLocality ?city }
OPTIONAL { ?institution schema:addressCountry ?country }
OPTIONAL { ?institution foaf:homepage ?website }
OPTIONAL { ?institution schema:description ?description }
}
LIMIT 10
# ---------------------------------------------------------------------------
# Template 7: List institutions in city (all types)
# ---------------------------------------------------------------------------
list_all_institutions_in_city:
id: "list_all_institutions_in_city"
description: "List all heritage institutions in a city"
intent: ["geographic", "exploration"]
question_patterns:
# Dutch
- "Welke erfgoedinstellingen zijn er in {city}?"
- "Alle instellingen in {city}"
- "Erfgoed in {city}"
- "Wat is er te zien in {city}?"
- "Culturele instellingen in {city}"
# English
- "What heritage institutions are in {city}?"
- "All institutions in {city}"
- "Heritage in {city}"
- "What is there to see in {city}?"
- "Cultural institutions in {city}"
slots:
city:
type: city
required: true
sparql_template: |
{{ prefixes }}
SELECT ?institution ?name ?type ?website WHERE {
?institution a crm:E39_Actor ;
schema:addressLocality "{{ city }}" ;
skos:prefLabel ?name .
OPTIONAL { ?institution hc:institutionType ?type }
OPTIONAL { ?institution foaf:homepage ?website }
}
ORDER BY ?type ?name
{% if limit %}LIMIT {{ limit }}{% endif %}
# ---------------------------------------------------------------------------
# Template 8: Find oldest/youngest institutions
# ---------------------------------------------------------------------------
find_institutions_by_founding_date:
id: "find_institutions_by_founding_date"
description: "Find oldest or most recently founded institutions"
intent: ["temporal", "exploration"]
question_patterns:
# Dutch
- "Wat zijn de oudste {institution_type_nl}?"
- "Wat is het oudste {institution_type_nl}?"
- "Oudste {institution_type_nl} in {location}"
- "Wanneer is {institution_name} opgericht?"
- "Wat zijn de nieuwste {institution_type_nl}?"
- "Recent opgerichte {institution_type_nl}"
# English
- "What are the oldest {institution_type_en}?"
- "What is the oldest {institution_type_en}?"
- "Oldest {institution_type_en} in {location}"
- "When was {institution_name} founded?"
- "What are the newest {institution_type_en}?"
- "Recently founded {institution_type_en}"
slots:
institution_type:
type: institution_type
required: false
location:
type: city
required: false
fallback_types: [subregion, country]
order:
type: string
default: "ASC"
valid_values: ["ASC", "DESC"]
sparql_template: |
{{ prefixes }}
SELECT ?institution ?name ?founded ?city WHERE {
?institution a crm:E39_Actor ;
skos:prefLabel ?name ;
schema:foundingDate ?founded .
{% if institution_type %}
?institution hc:institutionType "{{ institution_type }}" .
{% endif %}
{% if location %}
?institution schema:addressLocality "{{ location }}" .
{% endif %}
OPTIONAL { ?institution schema:addressLocality ?city }
}
ORDER BY {{ order }}(?founded)
LIMIT {{ limit | default(10) }}
# ---------------------------------------------------------------------------
# Template 9: Find institutions with specific identifier (ISIL, etc.)
# ---------------------------------------------------------------------------
find_institution_by_identifier:
id: "find_institution_by_identifier"
description: "Find institution by ISIL, GHCID, or other identifier"
intent: ["entity_lookup"]
question_patterns:
- "Welke instelling heeft ISIL {identifier}?"
- "Zoek ISIL {identifier}"
- "GHCID {identifier}"
- "Institution with ISIL {identifier}"
- "Find ISIL {identifier}"
slots:
identifier:
type: string
required: true
examples: ["NL-AmRMA", "NL-HaNA", "DE-1"]
identifier_type:
type: string
default: "isil"
valid_values: ["isil", "ghcid", "wikidata"]
sparql_template_isil: |
{{ prefixes }}
SELECT ?institution ?name ?city ?country ?website WHERE {
?institution a crm:E39_Actor ;
hc:isil "{{ identifier }}" ;
skos:prefLabel ?name .
OPTIONAL { ?institution schema:addressLocality ?city }
OPTIONAL { ?institution schema:addressCountry ?country }
OPTIONAL { ?institution foaf:homepage ?website }
}
sparql_template_ghcid: |
{{ prefixes }}
SELECT ?institution ?name ?city ?country ?website WHERE {
?institution a crm:E39_Actor ;
hc:ghcid "{{ identifier }}" ;
skos:prefLabel ?name .
OPTIONAL { ?institution schema:addressLocality ?city }
OPTIONAL { ?institution schema:addressCountry ?country }
OPTIONAL { ?institution foaf:homepage ?website }
}
# ---------------------------------------------------------------------------
# Template 10: Compare institutions in different locations
# ---------------------------------------------------------------------------
compare_locations:
id: "compare_locations"
description: "Compare number of institutions between locations"
intent: ["comparative", "statistical"]
question_patterns:
# Dutch
- "Vergelijk {location1} en {location2}"
- "Hoeveel meer {institution_type_nl} heeft {location1} dan {location2}?"
- "Verschil tussen {location1} en {location2}"
- "{location1} versus {location2}"
# English
- "Compare {location1} and {location2}"
- "How many more {institution_type_en} does {location1} have than {location2}?"
- "Difference between {location1} and {location2}"
- "{location1} vs {location2}"
slots:
location1:
type: city
required: true
fallback_types: [subregion, country]
location2:
type: city
required: true
fallback_types: [subregion, country]
institution_type:
type: institution_type
required: false
sparql_template: |
{{ prefixes }}
SELECT ?location (COUNT(DISTINCT ?institution) AS ?count) WHERE {
VALUES ?location { "{{ location1 }}" "{{ location2 }}" }
?institution a crm:E39_Actor ;
schema:addressLocality ?location .
{% if institution_type %}
?institution hc:institutionType "{{ institution_type }}" .
{% endif %}
}
GROUP BY ?location
# ---------------------------------------------------------------------------
# Template 11: Find custodians by budget threshold
# ---------------------------------------------------------------------------
find_custodians_by_budget_threshold:
id: "find_custodians_by_budget_threshold"
description: "Find custodians with budget/expense category above or below a threshold"
intent: ["financial", "exploration"]
question_patterns:
# Dutch - Budget (planned) - Standard patterns
- "Welke instellingen besteden meer dan {amount} aan {budget_category}?"
- "Welke instellingen geven meer dan {amount} uit aan {budget_category}?"
- "Welke instellingen hebben een {budget_category}budget van meer dan {amount}?"
- "Welke {institution_type_nl} besteden meer dan {amount} aan {budget_category}?"
- "Instellingen met {budget_category} boven {amount}"
- "Wie geeft meer dan {amount} uit aan {budget_category}?"
# Dutch - Conversational (NEW)
- "Ik zoek {institution_type_nl} met een hoog {budget_category}budget"
- "Ik zoek {institution_type_nl} met een {budget_category}budget boven {amount}"
- "Ik zoek instellingen met een hoog {budget_category}budget"
- "Geef mij een lijst van {institution_type_nl} met een {budget_category}budget boven {amount}"
- "Geef mij een lijst van {institution_type_nl} met een {budget_category}budget boven {amount} euro"
- "Ken je {institution_type_nl} met een hoog {budget_category}budget?"
- "Ken je instellingen die veel uitgeven aan {budget_category}?"
- "Waar vind ik {institution_type_nl} met een groot {budget_category}budget?"
- "{institution_type_nl} met hoge {budget_category}uitgaven"
- "{institution_type_nl} die veel investeren in {budget_category}"
# Dutch - Alternative phrasings (existence/list patterns)
- "Zijn er instellingen die meer dan {amount} uitgeven aan {budget_category}?"
- "Zijn er {institution_type_nl} die meer dan {amount} uitgeven aan {budget_category}?"
- "Zijn er {institution_type_nl} die meer uitgeven dan {amount} aan {budget_category}?"
- "Geef mij instellingen met meer dan {amount} aan {budget_category}"
- "Geef een lijst van instellingen met {budget_category} boven {amount}"
- "Toon instellingen die meer dan {amount} uitgeven aan {budget_category}"
- "Toon alle instellingen die meer dan {amount} uitgeven aan {budget_category}"
- "Welke organisaties besteden meer dan {amount} aan {budget_category}?"
- "Welke organisaties geven meer dan {amount} uit aan {budget_category}?"
# Dutch - With "euro" explicit
- "Welke instellingen geven meer dan {amount} euro uit aan {budget_category}?"
- "Welke instellingen besteden meer dan {amount} euro aan {budget_category}?"
- "Instellingen met een {budget_category}budget van meer dan {amount} euro"
# Dutch - Budget with year
- "Welke instellingen besteden meer dan {amount} aan {budget_category} in {year}?"
- "Hoeveel instellingen geven meer dan {amount} uit aan {budget_category} in {year}?"
# Dutch - Less than
- "Welke instellingen besteden minder dan {amount} aan {budget_category}?"
- "Instellingen met {budget_category} onder {amount}"
- "Zijn er instellingen die minder dan {amount} uitgeven aan {budget_category}?"
- "Toon instellingen die minder dan {amount} uitgeven aan {budget_category}"
# English - Budget (planned)
- "Which custodians spend more than {amount} on {budget_category}?"
- "Which institutions have a {budget_category} budget over {amount}?"
- "Which {institution_type_en} spend more than {amount} on {budget_category}?"
- "Institutions with {budget_category} above {amount}"
- "Who spends more than {amount} on {budget_category}?"
# English - Conversational (NEW)
- "I'm looking for {institution_type_en} with a high {budget_category} budget"
- "I'm looking for {institution_type_en} with a {budget_category} budget over {amount}"
- "I'm looking for institutions with a high {budget_category} budget"
- "Give me a list of {institution_type_en} with a {budget_category} budget over {amount}"
- "Do you know {institution_type_en} with a high {budget_category} budget?"
- "{institution_type_en} with high {budget_category} spending"
- "{institution_type_en} that invest heavily in {budget_category}"
# English - Alternative phrasings
- "Are there institutions that spend more than {amount} on {budget_category}?"
- "Are there {institution_type_en} that spend more than {amount} on {budget_category}?"
- "Show me institutions with {budget_category} over {amount}"
- "List institutions spending more than {amount} on {budget_category}"
- "Give me institutions with {budget_category} budget above {amount}"
- "Which organizations spend more than {amount} on {budget_category}?"
# English - Budget with year
- "Which custodians spend more than {amount} on {budget_category} in {year}?"
- "How many institutions spend more than {amount} on {budget_category} in {year}?"
# English - Less than
- "Which custodians spend less than {amount} on {budget_category}?"
- "Institutions with {budget_category} under {amount}"
- "Are there institutions that spend less than {amount} on {budget_category}?"
# German
- "Welche Institutionen geben mehr als {amount} für {budget_category} aus?"
- "Welche Institutionen haben ein {budget_category}budget über {amount}?"
- "Gibt es Institutionen die mehr als {amount} für {budget_category} ausgeben?"
- "Ich suche {institution_type_de} mit einem hohen {budget_category}budget"
slots:
budget_category:
type: budget_category
required: true
examples: ["innovation", "digitization", "preservation", "personnel", "acquisition"]
amount:
type: decimal
required: true
examples: ["5000", "10000", "50000", "100000"]
year:
type: integer
required: false
examples: ["2024", "2025"]
comparison:
type: string
default: ">"
valid_values: [">", "<", ">=", "<=", "="]
institution_type:
type: institution_type
required: false
source:
type: string
default: "budget"
valid_values: ["budget", "actuals"]
description: "Whether to query Budget (planned) or FinancialStatement (actuals)"
sparql_template: |
{{ prefixes }}
PREFIX frapo: <http://purl.org/cerif/frapo/>
SELECT DISTINCT ?institution ?name ?budget_amount ?fiscal_year WHERE {
?institution a crm:E39_Actor ;
skos:prefLabel ?name .
?budget a hc:class/Budget ;
hc:refers_to_custodian ?institution ;
hc:{{ budget_category }}_budget ?budget_amount .
{% if year %}
?budget hc:fiscal_year_start ?fy_start .
FILTER(YEAR(?fy_start) = {{ year }})
{% endif %}
FILTER(?budget_amount {{ comparison | default(">") }} {{ amount }})
{% if institution_type %}
?institution hc:institutionType "{{ institution_type }}" .
{% endif %}
OPTIONAL {
?budget hc:fiscal_year_start ?fy_start .
BIND(YEAR(?fy_start) AS ?fiscal_year)
}
}
ORDER BY DESC(?budget_amount)
{% if limit %}LIMIT {{ limit }}{% endif %}
sparql_template_actuals: |
{{ prefixes }}
PREFIX frapo: <http://purl.org/cerif/frapo/>
SELECT DISTINCT ?institution ?name ?expense_amount ?reporting_year WHERE {
?institution a crm:E39_Actor ;
skos:prefLabel ?name .
?statement a hc:class/FinancialStatement ;
hc:refers_to_custodian ?institution ;
hc:{{ budget_category }}_expenses ?expense_amount .
{% if year %}
?statement hc:reporting_period_start ?rp_start .
FILTER(YEAR(?rp_start) = {{ year }})
{% endif %}
FILTER(?expense_amount {{ comparison | default(">") }} {{ amount }})
{% if institution_type %}
?institution hc:institutionType "{{ institution_type }}" .
{% endif %}
OPTIONAL {
?statement hc:reporting_period_start ?rp_start .
BIND(YEAR(?rp_start) AS ?reporting_year)
}
}
ORDER BY DESC(?expense_amount)
{% if limit %}LIMIT {{ limit }}{% endif %}
examples:
- question: "Welke instellingen besteden meer dan 5000 euro aan innovatie in 2024?"
slots: {budget_category: "innovation", amount: 5000, year: 2024, comparison: ">"}
- question: "Which custodians spend more than 10000 on digitization?"
slots: {budget_category: "digitization", amount: 10000, comparison: ">"}
- question: "Institutions with preservation budget above 50000"
slots: {budget_category: "preservation", amount: 50000, comparison: ">"}
- question: "Which museums spend less than 1000 on innovation?"
slots: {budget_category: "innovation", amount: 1000, comparison: "<", institution_type: "M"}
# =============================================================================
# FOLLOW-UP PATTERNS (Conversation Context Resolution)
# =============================================================================
# These patterns help ConversationContextResolver (DSPy) expand elliptical
# follow-up questions BEFORE the Fyke filter runs.
#
# CRITICAL: ConversationContextResolver runs FIRST, then Fyke operates on
# the RESOLVED question. This prevents false positives on short follow-ups.
#
# Example flow:
# Turn 1: "Welke archieven zijn er in Den Haag?" → lists archives
# Turn 2: "En in Enschede?" (raw input - would be caught by naive Fyke!)
# ↓ ConversationContextResolver
# "Welke archieven zijn er in Enschede?" (resolved - clearly relevant)
# ↓ FykeFilter
# PASS (relevant)
# ↓ TemplateClassifier
# list_institutions_by_type_city
follow_up_patterns:
location_swap:
description: "Same query type, different location"
patterns:
- "En in {new_location}?"
- "En {new_location}?"
- "What about {new_location}?"
- "And in {new_location}?"
- "Hoe zit het met {new_location}?"
- "In {new_location}?"
- "{new_location}?"
slot_inheritance:
- institution_type
resolution_strategy: "inherit_template_swap_location"
type_swap:
description: "Same location, different institution type"
patterns:
- "En de {new_type}?"
- "Hoe zit het met {new_type}?"
- "What about {new_type}?"
- "And {new_type}?"
- "En {new_type}?"
- "{new_type}?"
slot_inheritance:
- city
- region
- country
resolution_strategy: "inherit_location_swap_type"
count_from_list:
description: "Count after listing"
patterns:
- "Hoeveel zijn dat?"
- "How many is that?"
- "How many are there?"
- "Hoeveel?"
- "How many?"
- "Tel ze"
- "Count them"
transforms_to: "count_institutions_by_type_location"
slot_inheritance:
- institution_type
- city
- region
- country
resolution_strategy: "convert_list_to_count"
details_request:
description: "More details about specific result"
patterns:
- "Vertel me meer over {entity}"
- "Tell me more about {entity}"
- "Meer informatie over {entity}"
- "What about {entity}?"
- "More about {entity}"
- "Details over {entity}"
transforms_to: "find_institution_by_name"
resolution_strategy: "extract_entity_lookup"
ordinal_reference:
description: "Reference to result by position"
patterns:
- "De eerste"
- "De tweede"
- "De derde"
- "The first one"
- "The second one"
- "Number {n}"
- "Nummer {n}"
requires_previous_results: true
resolution_strategy: "resolve_ordinal_to_entity"
pronoun_reference:
description: "Reference using pronouns"
patterns:
- "Wat is hun website?"
- "What is their website?"
- "Waar zijn ze gevestigd?"
- "Where are they located?"
- "Wanneer zijn ze opgericht?"
- "When were they founded?"
requires_previous_results: true
resolution_strategy: "resolve_pronoun_to_entity"
# =============================================================================
# FYKE FILTER CONFIGURATION
# =============================================================================
# The Fyke filter catches irrelevant questions and returns a standard response.
#
# ⚠️ CRITICAL ORDERING:
# 1. ConversationContextResolver FIRST (expands follow-ups)
# 2. FykeFilter on RESOLVED question (not raw input!)
#
# This prevents false positives like:
# - "En in Enschede?" → resolved to "Welke archieven zijn er in Enschede?" → PASS
# - "Hoeveel?" → resolved to "Hoeveel archieven zijn er in Den Haag?" → PASS
fyke_filter:
# DSPy Signature for relevance classification
# Operates on RESOLVED question only!
dspy_signature:
inputs:
resolved_question: "The fully resolved question (after context resolution)"
conversation_summary: "Brief summary of conversation topic"
outputs:
is_relevant: "boolean - whether question is about heritage institutions"
confidence: "float 0-1 - confidence in classification"
reasoning: "Brief explanation of relevance decision"
# Hard-coded out-of-scope keywords (checked AFTER context resolution)
# These are terms that are NEVER relevant to heritage queries
out_of_scope_keywords:
- tandpasta
- toothpaste
- supermarkt
- supermarket
- restaurant
- hotel
- weer
- weather
- voetbal
- soccer
- football
- recept
- recipe
- vliegticket
- flight
- politiek
- politics
- bitcoin
- crypto
- dating
- tinder
# Categories that are out of scope
out_of_scope_categories:
- shopping
- travel_booking
- sports
- cooking
- entertainment
- personal_advice
- medical
- legal
- financial
# Keywords that indicate heritage relevance (boost confidence)
heritage_keywords:
- museum
- musea
- archief
- archieven
- bibliotheek
- bibliotheken
- galerie
- erfgoed
- heritage
- collectie
- collection
- tentoonstelling
- exhibition
- GLAM
- cultureel
- cultural
# Standard responses when question is out of scope
standard_response:
nl: |
Ik kan je helpen met vragen over erfgoedinstellingen zoals musea, archieven,
bibliotheken en galerijen in Nederland en daarbuiten. Stel gerust een vraag
over deze onderwerpen!
en: |
I can help you with questions about heritage institutions such as museums,
archives, libraries and galleries in the Netherlands and beyond. Feel free
to ask a question about these topics!
de: |
Ich kann Ihnen bei Fragen zu Kulturerbeinstitutionen wie Museen, Archiven,
Bibliotheken und Galerien in den Niederlanden und darüber hinaus helfen.
Stellen Sie gerne eine Frage zu diesen Themen!
fr: |
Je peux vous aider avec des questions sur les institutions patrimoniales
comme les musées, les archives, les bibliothèques et les galeries aux
Pays-Bas et au-delà. N'hésitez pas à poser une question sur ces sujets!