870 lines
29 KiB
YAML
870 lines
29 KiB
YAML
# SPARQL Query Templates for Heritage Custodian Knowledge Graph
|
|
#
|
|
# This file defines parameterized SPARQL templates that replace LLM-generated queries
|
|
# with deterministic, validated templates. Based on docs/plan/prompt-query_template_mapping/
|
|
#
|
|
# CRITICAL: Processing Pipeline Order
|
|
# ===================================
|
|
# 1. User question → ConversationContextResolver (DSPy) → RESOLVED question
|
|
# 2. RESOLVED question → FykeFilter (DSPy) → relevant/irrelevant
|
|
# ⚠️ FYKE MUST OPERATE ON RESOLVED QUESTION, NOT RAW INPUT!
|
|
# "En in Enschede?" resolved to "Welke archieven zijn er in Enschede?" is clearly relevant
|
|
# 3. If relevant: resolved question → TemplateClassifier → template_id
|
|
# 4. template_id + resolved question → SlotExtractor → slot values
|
|
# 5. template + slot values → TemplateInstantiator (Jinja2) → SPARQL query
|
|
#
|
|
# Slot Value Sources:
|
|
# - data/validation/sparql_validation_rules.json (institution_type_mappings, subregion_mappings, etc.)
|
|
# - backend/rag/ontology_mapping.py (fuzzy matching, multilingual synonyms)
|
|
|
|
_metadata:
|
|
version: "1.0.0"
|
|
created: "2025-01-06"
|
|
schema_source: "schemas/20251121/linkml/"
|
|
validation_rules: "data/validation/sparql_validation_rules.json"
|
|
|
|
# Standard SPARQL prefixes used in all templates
|
|
_prefixes: |
|
|
PREFIX hc: <https://nde.nl/ontology/hc/>
|
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
|
PREFIX schema: <http://schema.org/>
|
|
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
|
PREFIX org: <http://www.w3.org/ns/org#>
|
|
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
|
PREFIX dcterms: <http://purl.org/dc/terms/>
|
|
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
|
PREFIX wd: <http://www.wikidata.org/entity/>
|
|
|
|
# Slot type definitions with validation sources
|
|
_slot_types:
|
|
institution_type:
|
|
description: "Single-letter custodian type code (M, L, A, G, etc.)"
|
|
source: "sparql_validation_rules.json#institution_type_mappings"
|
|
valid_values: ["M", "L", "A", "G", "O", "R", "C", "U", "B", "E", "S", "F", "I", "X", "P", "H", "D", "N", "T"]
|
|
synonyms:
|
|
# Dutch
|
|
museum: "M"
|
|
musea: "M"
|
|
bibliotheek: "L"
|
|
bibliotheken: "L"
|
|
archief: "A"
|
|
archieven: "A"
|
|
galerie: "G"
|
|
galerij: "G"
|
|
galerijen: "G"
|
|
# English
|
|
museums: "M"
|
|
library: "L"
|
|
libraries: "L"
|
|
archive: "A"
|
|
archives: "A"
|
|
gallery: "G"
|
|
galleries: "G"
|
|
# German
|
|
bibliothek_de: "L"
|
|
bibliotheken_de: "L"
|
|
archiv: "A"
|
|
archive_de: "A"
|
|
galerie_de: "G"
|
|
# Special types
|
|
dierentuin: "B"
|
|
zoo: "B"
|
|
botanische_tuin: "B"
|
|
botanical_garden: "B"
|
|
kerk: "H"
|
|
church: "H"
|
|
moskee: "H"
|
|
mosque: "H"
|
|
synagoge: "H"
|
|
synagogue: "H"
|
|
|
|
subregion:
|
|
description: "ISO 3166-2 subdivision code (NL-NH, DE-BY, etc.)"
|
|
source: "sparql_validation_rules.json#subregion_mappings"
|
|
synonyms:
|
|
# Netherlands
|
|
noord-holland: "NL-NH"
|
|
noord_holland: "NL-NH"
|
|
noordholland: "NL-NH"
|
|
amsterdam_province: "NL-NH"
|
|
zuid-holland: "NL-ZH"
|
|
zuid_holland: "NL-ZH"
|
|
zuidholland: "NL-ZH"
|
|
rotterdam_province: "NL-ZH"
|
|
den_haag_province: "NL-ZH"
|
|
the_hague_province: "NL-ZH"
|
|
noord-brabant: "NL-NB"
|
|
brabant: "NL-NB"
|
|
eindhoven_province: "NL-NB"
|
|
gelderland: "NL-GE"
|
|
arnhem_province: "NL-GE"
|
|
nijmegen_province: "NL-GE"
|
|
utrecht_province: "NL-UT"
|
|
overijssel: "NL-OV"
|
|
zwolle_province: "NL-OV"
|
|
enschede_province: "NL-OV"
|
|
limburg_nl: "NL-LI"
|
|
maastricht_province: "NL-LI"
|
|
friesland: "NL-FR"
|
|
frisia: "NL-FR"
|
|
leeuwarden_province: "NL-FR"
|
|
groningen_province: "NL-GR"
|
|
drenthe: "NL-DR"
|
|
assen_province: "NL-DR"
|
|
flevoland: "NL-FL"
|
|
almere_province: "NL-FL"
|
|
lelystad_province: "NL-FL"
|
|
zeeland: "NL-ZE"
|
|
middelburg_province: "NL-ZE"
|
|
# Belgium
|
|
vlaanderen: "BE-VLG"
|
|
flanders: "BE-VLG"
|
|
antwerpen_province: "BE-VLG"
|
|
gent_province: "BE-VLG"
|
|
brugge_province: "BE-VLG"
|
|
wallonie: "BE-WAL"
|
|
wallonia: "BE-WAL"
|
|
brussel: "BE-BRU"
|
|
brussels: "BE-BRU"
|
|
bruxelles: "BE-BRU"
|
|
# Germany
|
|
bayern: "DE-BY"
|
|
bavaria: "DE-BY"
|
|
muenchen_province: "DE-BY"
|
|
munich_province: "DE-BY"
|
|
berlin_state: "DE-BE"
|
|
baden_wuerttemberg: "DE-BW"
|
|
stuttgart_province: "DE-BW"
|
|
nordrhein_westfalen: "DE-NW"
|
|
north_rhine_westphalia: "DE-NW"
|
|
koeln_province: "DE-NW"
|
|
cologne_province: "DE-NW"
|
|
duesseldorf_province: "DE-NW"
|
|
sachsen: "DE-SN"
|
|
saxony: "DE-SN"
|
|
dresden_province: "DE-SN"
|
|
hessen: "DE-HE"
|
|
hesse: "DE-HE"
|
|
frankfurt_province: "DE-HE"
|
|
|
|
country:
|
|
description: "Wikidata entity URI for country"
|
|
source: "sparql_validation_rules.json#country_mappings"
|
|
format: "wikidata_uri"
|
|
synonyms:
|
|
nederland: "Q55"
|
|
netherlands: "Q55"
|
|
holland: "Q55"
|
|
nl: "Q55"
|
|
belgie: "Q31"
|
|
belgium: "Q31"
|
|
be: "Q31"
|
|
duitsland: "Q183"
|
|
germany: "Q183"
|
|
de_country: "Q183"
|
|
frankrijk: "Q142"
|
|
france: "Q142"
|
|
fr: "Q142"
|
|
verenigd_koninkrijk: "Q145"
|
|
united_kingdom: "Q145"
|
|
uk: "Q145"
|
|
gb: "Q145"
|
|
engeland: "Q145"
|
|
england: "Q145"
|
|
verenigde_staten: "Q30"
|
|
united_states: "Q30"
|
|
usa: "Q30"
|
|
us: "Q30"
|
|
japan: "Q17"
|
|
jp: "Q17"
|
|
tsjechie: "Q213"
|
|
czech_republic: "Q213"
|
|
czechia: "Q213"
|
|
cz: "Q213"
|
|
oostenrijk: "Q40"
|
|
austria: "Q40"
|
|
at: "Q40"
|
|
zwitserland: "Q39"
|
|
switzerland: "Q39"
|
|
ch: "Q39"
|
|
|
|
city:
|
|
description: "City/locality name (string literal)"
|
|
source: "fuzzy_match"
|
|
|
|
institution_name:
|
|
description: "Institution name for lookup (string literal)"
|
|
source: "fuzzy_match"
|
|
|
|
limit:
|
|
description: "Result limit (integer)"
|
|
default: 10
|
|
max: 100
|
|
|
|
# =============================================================================
|
|
# TEMPLATE DEFINITIONS
|
|
# =============================================================================
|
|
|
|
templates:
|
|
# ---------------------------------------------------------------------------
|
|
# Template 1: List institutions by type and location (city)
|
|
# ---------------------------------------------------------------------------
|
|
list_institutions_by_type_city:
|
|
id: "list_institutions_by_type_city"
|
|
description: "List heritage institutions of a specific type in a city"
|
|
intent: ["geographic", "exploration"]
|
|
|
|
question_patterns:
|
|
# Dutch
|
|
- "Welke {institution_type_nl} zijn er in {city}?"
|
|
- "Welke {institution_type_nl} heeft {city}?"
|
|
- "Wat zijn de {institution_type_nl} in {city}?"
|
|
- "Geef me de {institution_type_nl} in {city}"
|
|
- "Toon {institution_type_nl} in {city}"
|
|
- "{institution_type_nl} in {city}"
|
|
# English
|
|
- "What {institution_type_en} are in {city}?"
|
|
- "Which {institution_type_en} are there in {city}?"
|
|
- "List {institution_type_en} in {city}"
|
|
- "Show me {institution_type_en} in {city}"
|
|
- "{institution_type_en} in {city}"
|
|
# German
|
|
- "Welche {institution_type_de} gibt es in {city}?"
|
|
- "Welche {institution_type_de} hat {city}?"
|
|
|
|
slots:
|
|
institution_type:
|
|
type: institution_type
|
|
required: true
|
|
examples: ["musea", "archieven", "bibliotheken", "museums", "archives"]
|
|
city:
|
|
type: city
|
|
required: true
|
|
examples: ["Amsterdam", "Den Haag", "Rotterdam", "Utrecht"]
|
|
|
|
sparql_template: |
|
|
{{ prefixes }}
|
|
SELECT DISTINCT ?institution ?name ?website WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
hc:institutionType "{{ institution_type }}" ;
|
|
schema:addressLocality "{{ city }}" ;
|
|
skos:prefLabel ?name .
|
|
OPTIONAL { ?institution foaf:homepage ?website }
|
|
}
|
|
ORDER BY ?name
|
|
{% if limit %}LIMIT {{ limit }}{% endif %}
|
|
|
|
examples:
|
|
- question: "Welke musea zijn er in Amsterdam?"
|
|
slots: {institution_type: "M", city: "Amsterdam"}
|
|
- question: "What archives are in The Hague?"
|
|
slots: {institution_type: "A", city: "Den Haag"}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Template 2: List institutions by type and province/region
|
|
# ---------------------------------------------------------------------------
|
|
list_institutions_by_type_region:
|
|
id: "list_institutions_by_type_region"
|
|
description: "List heritage institutions of a specific type in a province/region"
|
|
intent: ["geographic", "exploration"]
|
|
|
|
question_patterns:
|
|
# Dutch
|
|
- "Welke {institution_type_nl} zijn er in {region}?"
|
|
- "Hoeveel {institution_type_nl} heeft {region}?"
|
|
- "{institution_type_nl} in {region}"
|
|
- "Alle {institution_type_nl} in de provincie {region}"
|
|
# English
|
|
- "What {institution_type_en} are in {region}?"
|
|
- "Which {institution_type_en} are there in {region}?"
|
|
- "{institution_type_en} in {region}"
|
|
|
|
slots:
|
|
institution_type:
|
|
type: institution_type
|
|
required: true
|
|
region:
|
|
type: subregion
|
|
required: true
|
|
examples: ["Noord-Holland", "Gelderland", "Limburg", "Bavaria", "Flanders"]
|
|
|
|
sparql_template: |
|
|
{{ prefixes }}
|
|
SELECT DISTINCT ?institution ?name ?city WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
hc:institutionType "{{ institution_type }}" ;
|
|
hc:subregion ?subregion ;
|
|
skos:prefLabel ?name .
|
|
?subregion hc:iso3166_2_code "{{ region }}" .
|
|
OPTIONAL { ?institution schema:addressLocality ?city }
|
|
}
|
|
ORDER BY ?name
|
|
{% if limit %}LIMIT {{ limit }}{% endif %}
|
|
|
|
sparql_template_alt: |
|
|
{{ prefixes }}
|
|
SELECT DISTINCT ?institution ?name ?city WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
hc:institutionType "{{ institution_type }}" ;
|
|
hc:ghcid ?ghcid ;
|
|
skos:prefLabel ?name .
|
|
FILTER(STRSTARTS(?ghcid, "{{ region | replace('-', '-') }}"))
|
|
OPTIONAL { ?institution schema:addressLocality ?city }
|
|
}
|
|
ORDER BY ?name
|
|
{% if limit %}LIMIT {{ limit }}{% endif %}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Template 3: List institutions by type and country
|
|
# ---------------------------------------------------------------------------
|
|
list_institutions_by_type_country:
|
|
id: "list_institutions_by_type_country"
|
|
description: "List heritage institutions of a specific type in a country"
|
|
intent: ["geographic", "exploration"]
|
|
|
|
question_patterns:
|
|
# Dutch
|
|
- "Welke {institution_type_nl} zijn er in {country}?"
|
|
- "Alle {institution_type_nl} in {country}"
|
|
- "{institution_type_nl} in {country}"
|
|
# English
|
|
- "What {institution_type_en} are in {country}?"
|
|
- "List all {institution_type_en} in {country}"
|
|
- "{institution_type_en} in {country}"
|
|
|
|
slots:
|
|
institution_type:
|
|
type: institution_type
|
|
required: true
|
|
country:
|
|
type: country
|
|
required: true
|
|
examples: ["Nederland", "Belgium", "Germany", "France"]
|
|
|
|
sparql_template: |
|
|
{{ prefixes }}
|
|
SELECT DISTINCT ?institution ?name ?city WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
hc:institutionType "{{ institution_type }}" ;
|
|
schema:addressCountry <http://www.wikidata.org/entity/{{ country }}> ;
|
|
skos:prefLabel ?name .
|
|
OPTIONAL { ?institution schema:addressLocality ?city }
|
|
}
|
|
ORDER BY ?name
|
|
{% if limit %}LIMIT {{ limit }}{% endif %}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Template 4: Count institutions by type and location
|
|
# ---------------------------------------------------------------------------
|
|
count_institutions_by_type_location:
|
|
id: "count_institutions_by_type_location"
|
|
description: "Count heritage institutions of a specific type in a location"
|
|
intent: ["statistical"]
|
|
|
|
question_patterns:
|
|
# Dutch
|
|
- "Hoeveel {institution_type_nl} zijn er in {location}?"
|
|
- "Hoeveel {institution_type_nl} heeft {location}?"
|
|
- "Aantal {institution_type_nl} in {location}"
|
|
- "Tel de {institution_type_nl} in {location}"
|
|
# English
|
|
- "How many {institution_type_en} are in {location}?"
|
|
- "How many {institution_type_en} does {location} have?"
|
|
- "Count of {institution_type_en} in {location}"
|
|
- "Number of {institution_type_en} in {location}"
|
|
|
|
slots:
|
|
institution_type:
|
|
type: institution_type
|
|
required: true
|
|
location:
|
|
type: city
|
|
required: true
|
|
fallback_types: [subregion, country]
|
|
|
|
sparql_template: |
|
|
{{ prefixes }}
|
|
SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
hc:institutionType "{{ institution_type }}" ;
|
|
schema:addressLocality "{{ location }}" .
|
|
}
|
|
|
|
sparql_template_region: |
|
|
{{ prefixes }}
|
|
SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
hc:institutionType "{{ institution_type }}" ;
|
|
hc:ghcid ?ghcid .
|
|
FILTER(STRSTARTS(?ghcid, "{{ location }}"))
|
|
}
|
|
|
|
sparql_template_country: |
|
|
{{ prefixes }}
|
|
SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
hc:institutionType "{{ institution_type }}" ;
|
|
schema:addressCountry <http://www.wikidata.org/entity/{{ location }}> .
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Template 5: Count all institutions by type (distribution)
|
|
# ---------------------------------------------------------------------------
|
|
count_institutions_by_type:
|
|
id: "count_institutions_by_type"
|
|
description: "Count institutions grouped by type"
|
|
intent: ["statistical"]
|
|
|
|
question_patterns:
|
|
# Dutch
|
|
- "Hoeveel instellingen per type?"
|
|
- "Verdeling van instellingen per type"
|
|
- "Hoeveel musea, archieven en bibliotheken zijn er?"
|
|
- "Statistieken per instellingstype"
|
|
# English
|
|
- "How many institutions per type?"
|
|
- "Distribution of institutions by type"
|
|
- "Statistics by institution type"
|
|
- "How many museums, archives and libraries are there?"
|
|
|
|
slots: {}
|
|
|
|
sparql_template: |
|
|
{{ prefixes }}
|
|
SELECT ?type (COUNT(DISTINCT ?institution) AS ?count) WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
hc:institutionType ?type .
|
|
}
|
|
GROUP BY ?type
|
|
ORDER BY DESC(?count)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Template 6: Find institution by name
|
|
# ---------------------------------------------------------------------------
|
|
find_institution_by_name:
|
|
id: "find_institution_by_name"
|
|
description: "Find a specific institution by name"
|
|
intent: ["entity_lookup"]
|
|
|
|
question_patterns:
|
|
# Dutch
|
|
- "Waar is {institution_name}?"
|
|
- "Informatie over {institution_name}"
|
|
- "Gegevens van {institution_name}"
|
|
- "Wat is {institution_name}?"
|
|
- "Zoek {institution_name}"
|
|
# English
|
|
- "Where is {institution_name}?"
|
|
- "Information about {institution_name}"
|
|
- "What is {institution_name}?"
|
|
- "Find {institution_name}"
|
|
- "Tell me about {institution_name}"
|
|
|
|
slots:
|
|
institution_name:
|
|
type: institution_name
|
|
required: true
|
|
examples: ["Rijksmuseum", "Nationaal Archief", "Koninklijke Bibliotheek"]
|
|
|
|
sparql_template: |
|
|
{{ prefixes }}
|
|
SELECT ?institution ?name ?type ?city ?country ?website ?description WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
skos:prefLabel ?name .
|
|
FILTER(CONTAINS(LCASE(STR(?name)), LCASE("{{ institution_name }}")))
|
|
OPTIONAL { ?institution hc:institutionType ?type }
|
|
OPTIONAL { ?institution schema:addressLocality ?city }
|
|
OPTIONAL { ?institution schema:addressCountry ?country }
|
|
OPTIONAL { ?institution foaf:homepage ?website }
|
|
OPTIONAL { ?institution schema:description ?description }
|
|
}
|
|
LIMIT 10
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Template 7: List institutions in city (all types)
|
|
# ---------------------------------------------------------------------------
|
|
list_all_institutions_in_city:
|
|
id: "list_all_institutions_in_city"
|
|
description: "List all heritage institutions in a city"
|
|
intent: ["geographic", "exploration"]
|
|
|
|
question_patterns:
|
|
# Dutch
|
|
- "Welke erfgoedinstellingen zijn er in {city}?"
|
|
- "Alle instellingen in {city}"
|
|
- "Erfgoed in {city}"
|
|
- "Wat is er te zien in {city}?"
|
|
- "Culturele instellingen in {city}"
|
|
# English
|
|
- "What heritage institutions are in {city}?"
|
|
- "All institutions in {city}"
|
|
- "Heritage in {city}"
|
|
- "What is there to see in {city}?"
|
|
- "Cultural institutions in {city}"
|
|
|
|
slots:
|
|
city:
|
|
type: city
|
|
required: true
|
|
|
|
sparql_template: |
|
|
{{ prefixes }}
|
|
SELECT ?institution ?name ?type ?website WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
schema:addressLocality "{{ city }}" ;
|
|
skos:prefLabel ?name .
|
|
OPTIONAL { ?institution hc:institutionType ?type }
|
|
OPTIONAL { ?institution foaf:homepage ?website }
|
|
}
|
|
ORDER BY ?type ?name
|
|
{% if limit %}LIMIT {{ limit }}{% endif %}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Template 8: Find oldest/youngest institutions
|
|
# ---------------------------------------------------------------------------
|
|
find_institutions_by_founding_date:
|
|
id: "find_institutions_by_founding_date"
|
|
description: "Find oldest or most recently founded institutions"
|
|
intent: ["temporal", "exploration"]
|
|
|
|
question_patterns:
|
|
# Dutch
|
|
- "Wat zijn de oudste {institution_type_nl}?"
|
|
- "Wat is het oudste {institution_type_nl}?"
|
|
- "Oudste {institution_type_nl} in {location}"
|
|
- "Wanneer is {institution_name} opgericht?"
|
|
- "Wat zijn de nieuwste {institution_type_nl}?"
|
|
- "Recent opgerichte {institution_type_nl}"
|
|
# English
|
|
- "What are the oldest {institution_type_en}?"
|
|
- "What is the oldest {institution_type_en}?"
|
|
- "Oldest {institution_type_en} in {location}"
|
|
- "When was {institution_name} founded?"
|
|
- "What are the newest {institution_type_en}?"
|
|
- "Recently founded {institution_type_en}"
|
|
|
|
slots:
|
|
institution_type:
|
|
type: institution_type
|
|
required: false
|
|
location:
|
|
type: city
|
|
required: false
|
|
fallback_types: [subregion, country]
|
|
order:
|
|
type: string
|
|
default: "ASC"
|
|
valid_values: ["ASC", "DESC"]
|
|
|
|
sparql_template: |
|
|
{{ prefixes }}
|
|
SELECT ?institution ?name ?founded ?city WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
skos:prefLabel ?name ;
|
|
schema:foundingDate ?founded .
|
|
{% if institution_type %}
|
|
?institution hc:institutionType "{{ institution_type }}" .
|
|
{% endif %}
|
|
{% if location %}
|
|
?institution schema:addressLocality "{{ location }}" .
|
|
{% endif %}
|
|
OPTIONAL { ?institution schema:addressLocality ?city }
|
|
}
|
|
ORDER BY {{ order }}(?founded)
|
|
LIMIT {{ limit | default(10) }}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Template 9: Find institutions with specific identifier (ISIL, etc.)
|
|
# ---------------------------------------------------------------------------
|
|
find_institution_by_identifier:
|
|
id: "find_institution_by_identifier"
|
|
description: "Find institution by ISIL, GHCID, or other identifier"
|
|
intent: ["entity_lookup"]
|
|
|
|
question_patterns:
|
|
- "Welke instelling heeft ISIL {identifier}?"
|
|
- "Zoek ISIL {identifier}"
|
|
- "GHCID {identifier}"
|
|
- "Institution with ISIL {identifier}"
|
|
- "Find ISIL {identifier}"
|
|
|
|
slots:
|
|
identifier:
|
|
type: string
|
|
required: true
|
|
examples: ["NL-AmRMA", "NL-HaNA", "DE-1"]
|
|
identifier_type:
|
|
type: string
|
|
default: "isil"
|
|
valid_values: ["isil", "ghcid", "wikidata"]
|
|
|
|
sparql_template_isil: |
|
|
{{ prefixes }}
|
|
SELECT ?institution ?name ?city ?country ?website WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
hc:isil "{{ identifier }}" ;
|
|
skos:prefLabel ?name .
|
|
OPTIONAL { ?institution schema:addressLocality ?city }
|
|
OPTIONAL { ?institution schema:addressCountry ?country }
|
|
OPTIONAL { ?institution foaf:homepage ?website }
|
|
}
|
|
|
|
sparql_template_ghcid: |
|
|
{{ prefixes }}
|
|
SELECT ?institution ?name ?city ?country ?website WHERE {
|
|
?institution a crm:E39_Actor ;
|
|
hc:ghcid "{{ identifier }}" ;
|
|
skos:prefLabel ?name .
|
|
OPTIONAL { ?institution schema:addressLocality ?city }
|
|
OPTIONAL { ?institution schema:addressCountry ?country }
|
|
OPTIONAL { ?institution foaf:homepage ?website }
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Template 10: Compare institutions in different locations
|
|
# ---------------------------------------------------------------------------
|
|
compare_locations:
|
|
id: "compare_locations"
|
|
description: "Compare number of institutions between locations"
|
|
intent: ["comparative", "statistical"]
|
|
|
|
question_patterns:
|
|
# Dutch
|
|
- "Vergelijk {location1} en {location2}"
|
|
- "Hoeveel meer {institution_type_nl} heeft {location1} dan {location2}?"
|
|
- "Verschil tussen {location1} en {location2}"
|
|
- "{location1} versus {location2}"
|
|
# English
|
|
- "Compare {location1} and {location2}"
|
|
- "How many more {institution_type_en} does {location1} have than {location2}?"
|
|
- "Difference between {location1} and {location2}"
|
|
- "{location1} vs {location2}"
|
|
|
|
slots:
|
|
location1:
|
|
type: city
|
|
required: true
|
|
fallback_types: [subregion, country]
|
|
location2:
|
|
type: city
|
|
required: true
|
|
fallback_types: [subregion, country]
|
|
institution_type:
|
|
type: institution_type
|
|
required: false
|
|
|
|
sparql_template: |
|
|
{{ prefixes }}
|
|
SELECT ?location (COUNT(DISTINCT ?institution) AS ?count) WHERE {
|
|
VALUES ?location { "{{ location1 }}" "{{ location2 }}" }
|
|
?institution a crm:E39_Actor ;
|
|
schema:addressLocality ?location .
|
|
{% if institution_type %}
|
|
?institution hc:institutionType "{{ institution_type }}" .
|
|
{% endif %}
|
|
}
|
|
GROUP BY ?location
|
|
|
|
# =============================================================================
|
|
# FOLLOW-UP PATTERNS (Conversation Context Resolution)
|
|
# =============================================================================
|
|
# These patterns help ConversationContextResolver (DSPy) expand elliptical
|
|
# follow-up questions BEFORE the Fyke filter runs.
|
|
#
|
|
# CRITICAL: ConversationContextResolver runs FIRST, then Fyke operates on
|
|
# the RESOLVED question. This prevents false positives on short follow-ups.
|
|
#
|
|
# Example flow:
|
|
# Turn 1: "Welke archieven zijn er in Den Haag?" → lists archives
|
|
# Turn 2: "En in Enschede?" (raw input - would be caught by naive Fyke!)
|
|
# ↓ ConversationContextResolver
|
|
# "Welke archieven zijn er in Enschede?" (resolved - clearly relevant)
|
|
# ↓ FykeFilter
|
|
# PASS (relevant)
|
|
# ↓ TemplateClassifier
|
|
# list_institutions_by_type_city
|
|
|
|
follow_up_patterns:
|
|
location_swap:
|
|
description: "Same query type, different location"
|
|
patterns:
|
|
- "En in {new_location}?"
|
|
- "En {new_location}?"
|
|
- "What about {new_location}?"
|
|
- "And in {new_location}?"
|
|
- "Hoe zit het met {new_location}?"
|
|
- "In {new_location}?"
|
|
- "{new_location}?"
|
|
slot_inheritance:
|
|
- institution_type
|
|
resolution_strategy: "inherit_template_swap_location"
|
|
|
|
type_swap:
|
|
description: "Same location, different institution type"
|
|
patterns:
|
|
- "En de {new_type}?"
|
|
- "Hoe zit het met {new_type}?"
|
|
- "What about {new_type}?"
|
|
- "And {new_type}?"
|
|
- "En {new_type}?"
|
|
- "{new_type}?"
|
|
slot_inheritance:
|
|
- city
|
|
- region
|
|
- country
|
|
resolution_strategy: "inherit_location_swap_type"
|
|
|
|
count_from_list:
|
|
description: "Count after listing"
|
|
patterns:
|
|
- "Hoeveel zijn dat?"
|
|
- "How many is that?"
|
|
- "How many are there?"
|
|
- "Hoeveel?"
|
|
- "How many?"
|
|
- "Tel ze"
|
|
- "Count them"
|
|
transforms_to: "count_institutions_by_type_location"
|
|
slot_inheritance:
|
|
- institution_type
|
|
- city
|
|
- region
|
|
- country
|
|
resolution_strategy: "convert_list_to_count"
|
|
|
|
details_request:
|
|
description: "More details about specific result"
|
|
patterns:
|
|
- "Vertel me meer over {entity}"
|
|
- "Tell me more about {entity}"
|
|
- "Meer informatie over {entity}"
|
|
- "What about {entity}?"
|
|
- "More about {entity}"
|
|
- "Details over {entity}"
|
|
transforms_to: "find_institution_by_name"
|
|
resolution_strategy: "extract_entity_lookup"
|
|
|
|
ordinal_reference:
|
|
description: "Reference to result by position"
|
|
patterns:
|
|
- "De eerste"
|
|
- "De tweede"
|
|
- "De derde"
|
|
- "The first one"
|
|
- "The second one"
|
|
- "Number {n}"
|
|
- "Nummer {n}"
|
|
requires_previous_results: true
|
|
resolution_strategy: "resolve_ordinal_to_entity"
|
|
|
|
pronoun_reference:
|
|
description: "Reference using pronouns"
|
|
patterns:
|
|
- "Wat is hun website?"
|
|
- "What is their website?"
|
|
- "Waar zijn ze gevestigd?"
|
|
- "Where are they located?"
|
|
- "Wanneer zijn ze opgericht?"
|
|
- "When were they founded?"
|
|
requires_previous_results: true
|
|
resolution_strategy: "resolve_pronoun_to_entity"
|
|
|
|
# =============================================================================
|
|
# FYKE FILTER CONFIGURATION
|
|
# =============================================================================
|
|
# The Fyke filter catches irrelevant questions and returns a standard response.
|
|
#
|
|
# ⚠️ CRITICAL ORDERING:
|
|
# 1. ConversationContextResolver FIRST (expands follow-ups)
|
|
# 2. FykeFilter on RESOLVED question (not raw input!)
|
|
#
|
|
# This prevents false positives like:
|
|
# - "En in Enschede?" → resolved to "Welke archieven zijn er in Enschede?" → PASS
|
|
# - "Hoeveel?" → resolved to "Hoeveel archieven zijn er in Den Haag?" → PASS
|
|
|
|
fyke_filter:
|
|
# DSPy Signature for relevance classification
|
|
# Operates on RESOLVED question only!
|
|
dspy_signature:
|
|
inputs:
|
|
resolved_question: "The fully resolved question (after context resolution)"
|
|
conversation_summary: "Brief summary of conversation topic"
|
|
outputs:
|
|
is_relevant: "boolean - whether question is about heritage institutions"
|
|
confidence: "float 0-1 - confidence in classification"
|
|
reasoning: "Brief explanation of relevance decision"
|
|
|
|
# Hard-coded out-of-scope keywords (checked AFTER context resolution)
|
|
# These are terms that are NEVER relevant to heritage queries
|
|
out_of_scope_keywords:
|
|
- tandpasta
|
|
- toothpaste
|
|
- supermarkt
|
|
- supermarket
|
|
- restaurant
|
|
- hotel
|
|
- weer
|
|
- weather
|
|
- voetbal
|
|
- soccer
|
|
- football
|
|
- recept
|
|
- recipe
|
|
- vliegticket
|
|
- flight
|
|
- politiek
|
|
- politics
|
|
- bitcoin
|
|
- crypto
|
|
- dating
|
|
- tinder
|
|
|
|
# Categories that are out of scope
|
|
out_of_scope_categories:
|
|
- shopping
|
|
- travel_booking
|
|
- sports
|
|
- cooking
|
|
- entertainment
|
|
- personal_advice
|
|
- medical
|
|
- legal
|
|
- financial
|
|
|
|
# Keywords that indicate heritage relevance (boost confidence)
|
|
heritage_keywords:
|
|
- museum
|
|
- musea
|
|
- archief
|
|
- archieven
|
|
- bibliotheek
|
|
- bibliotheken
|
|
- galerie
|
|
- erfgoed
|
|
- heritage
|
|
- collectie
|
|
- collection
|
|
- tentoonstelling
|
|
- exhibition
|
|
- GLAM
|
|
- cultureel
|
|
- cultural
|
|
|
|
# Standard responses when question is out of scope
|
|
standard_response:
|
|
nl: |
|
|
Ik kan je helpen met vragen over erfgoedinstellingen zoals musea, archieven,
|
|
bibliotheken en galerijen in Nederland en daarbuiten. Stel gerust een vraag
|
|
over deze onderwerpen!
|
|
en: |
|
|
I can help you with questions about heritage institutions such as museums,
|
|
archives, libraries and galleries in the Netherlands and beyond. Feel free
|
|
to ask a question about these topics!
|
|
de: |
|
|
Ich kann Ihnen bei Fragen zu Kulturerbeinstitutionen wie Museen, Archiven,
|
|
Bibliotheken und Galerien in den Niederlanden und darüber hinaus helfen.
|
|
Stellen Sie gerne eine Frage zu diesen Themen!
|
|
fr: |
|
|
Je peux vous aider avec des questions sur les institutions patrimoniales
|
|
comme les musées, les archives, les bibliothèques et les galeries aux
|
|
Pays-Bas et au-delà. N'hésitez pas à poser une question sur ces sujets!
|