# SPARQL Query Templates for Heritage Custodian Knowledge Graph # # This file defines parameterized SPARQL templates that replace LLM-generated queries # with deterministic, validated templates. Based on docs/plan/prompt-query_template_mapping/ # # CRITICAL: Processing Pipeline Order # =================================== # 1. User question → ConversationContextResolver (DSPy) → RESOLVED question # 2. RESOLVED question → FykeFilter (DSPy) → relevant/irrelevant # ⚠️ FYKE MUST OPERATE ON RESOLVED QUESTION, NOT RAW INPUT! # "En in Enschede?" resolved to "Welke archieven zijn er in Enschede?" is clearly relevant # 3. If relevant: resolved question → TemplateClassifier → template_id # 4. template_id + resolved question → SlotExtractor → slot values # 5. template + slot values → TemplateInstantiator (Jinja2) → SPARQL query # # Slot Value Sources: # - data/validation/sparql_validation_rules.json (institution_type_mappings, subregion_mappings, etc.) # - backend/rag/ontology_mapping.py (fuzzy matching, multilingual synonyms) _metadata: version: "1.0.0" created: "2025-01-06" schema_source: "schemas/20251121/linkml/" validation_rules: "data/validation/sparql_validation_rules.json" # Standard SPARQL prefixes used in all templates _prefixes: | PREFIX hc: PREFIX crm: PREFIX schema: PREFIX skos: PREFIX org: PREFIX foaf: PREFIX dcterms: PREFIX xsd: PREFIX wd: # Slot type definitions with validation sources _slot_types: institution_type: description: "Single-letter custodian type code (M, L, A, G, etc.)" source: "sparql_validation_rules.json#institution_type_mappings" valid_values: ["M", "L", "A", "G", "O", "R", "C", "U", "B", "E", "S", "F", "I", "X", "P", "H", "D", "N", "T"] synonyms: # Dutch museum: "M" musea: "M" bibliotheek: "L" bibliotheken: "L" archief: "A" archieven: "A" galerie: "G" galerij: "G" galerijen: "G" # English museums: "M" library: "L" libraries: "L" archive: "A" archives: "A" gallery: "G" galleries: "G" # German bibliothek_de: "L" bibliotheken_de: "L" archiv: "A" archive_de: "A" galerie_de: "G" # Special types dierentuin: "B" zoo: "B" botanische_tuin: "B" botanical_garden: "B" kerk: "H" church: "H" moskee: "H" mosque: "H" synagoge: "H" synagogue: "H" subregion: description: "ISO 3166-2 subdivision code (NL-NH, DE-BY, etc.)" source: "sparql_validation_rules.json#subregion_mappings" synonyms: # Netherlands noord-holland: "NL-NH" noord_holland: "NL-NH" noordholland: "NL-NH" amsterdam_province: "NL-NH" zuid-holland: "NL-ZH" zuid_holland: "NL-ZH" zuidholland: "NL-ZH" rotterdam_province: "NL-ZH" den_haag_province: "NL-ZH" the_hague_province: "NL-ZH" noord-brabant: "NL-NB" brabant: "NL-NB" eindhoven_province: "NL-NB" gelderland: "NL-GE" arnhem_province: "NL-GE" nijmegen_province: "NL-GE" utrecht_province: "NL-UT" overijssel: "NL-OV" zwolle_province: "NL-OV" enschede_province: "NL-OV" limburg_nl: "NL-LI" maastricht_province: "NL-LI" friesland: "NL-FR" frisia: "NL-FR" leeuwarden_province: "NL-FR" groningen_province: "NL-GR" drenthe: "NL-DR" assen_province: "NL-DR" flevoland: "NL-FL" almere_province: "NL-FL" lelystad_province: "NL-FL" zeeland: "NL-ZE" middelburg_province: "NL-ZE" # Belgium vlaanderen: "BE-VLG" flanders: "BE-VLG" antwerpen_province: "BE-VLG" gent_province: "BE-VLG" brugge_province: "BE-VLG" wallonie: "BE-WAL" wallonia: "BE-WAL" brussel: "BE-BRU" brussels: "BE-BRU" bruxelles: "BE-BRU" # Germany bayern: "DE-BY" bavaria: "DE-BY" muenchen_province: "DE-BY" munich_province: "DE-BY" berlin_state: "DE-BE" baden_wuerttemberg: "DE-BW" stuttgart_province: "DE-BW" nordrhein_westfalen: "DE-NW" north_rhine_westphalia: "DE-NW" koeln_province: "DE-NW" cologne_province: "DE-NW" duesseldorf_province: "DE-NW" sachsen: "DE-SN" saxony: "DE-SN" dresden_province: "DE-SN" hessen: "DE-HE" hesse: "DE-HE" frankfurt_province: "DE-HE" country: description: "Wikidata entity URI for country" source: "sparql_validation_rules.json#country_mappings" format: "wikidata_uri" synonyms: nederland: "Q55" netherlands: "Q55" holland: "Q55" nl: "Q55" belgie: "Q31" belgium: "Q31" be: "Q31" duitsland: "Q183" germany: "Q183" de_country: "Q183" frankrijk: "Q142" france: "Q142" fr: "Q142" verenigd_koninkrijk: "Q145" united_kingdom: "Q145" uk: "Q145" gb: "Q145" engeland: "Q145" england: "Q145" verenigde_staten: "Q30" united_states: "Q30" usa: "Q30" us: "Q30" japan: "Q17" jp: "Q17" tsjechie: "Q213" czech_republic: "Q213" czechia: "Q213" cz: "Q213" oostenrijk: "Q40" austria: "Q40" at: "Q40" zwitserland: "Q39" switzerland: "Q39" ch: "Q39" city: description: "City/locality name (string literal)" source: "fuzzy_match" institution_name: description: "Institution name for lookup (string literal)" source: "fuzzy_match" limit: description: "Result limit (integer)" default: 10 max: 100 budget_category: description: "Budget or expense category for financial queries" source: "ontology" valid_values: ["innovation", "digitization", "preservation", "personnel", "acquisition", "operating", "capital", "external_funding", "internal_funding", "endowment_draw"] synonyms: # Dutch - Innovation innovatie: "innovation" innovaties: "innovation" vernieuwing: "innovation" digital_transformatie: "innovation" digitale_transformatie: "innovation" r_d: "innovation" onderzoek_ontwikkeling: "innovation" # English - Innovation innovations: "innovation" r_and_d: "innovation" research_development: "innovation" digital_transformation: "innovation" technology: "innovation" tech: "innovation" # German - Innovation innovationen: "innovation" erneuerung: "innovation" # Dutch - Digitization digitalisering: "digitization" digitaliseringsbudget: "digitization" digitale_collectie: "digitization" # English - Digitization digitisation: "digitization" # UK spelling digital: "digitization" scanning: "digitization" # German - Digitization digitalisierung: "digitization" # Dutch - Preservation conservering: "preservation" restauratie: "preservation" behoud: "preservation" onderhoud: "preservation" # English - Preservation conservation: "preservation" restoration: "preservation" maintenance: "preservation" # German - Preservation konservierung: "preservation" restaurierung: "preservation" # Dutch - Personnel personeel: "personnel" personele_kosten: "personnel" salarissen: "personnel" medewerkers: "personnel" fte: "personnel" # English - Personnel staff: "personnel" salaries: "personnel" employees: "personnel" hr: "personnel" human_resources: "personnel" # German - Personnel personal: "personnel" personalkosten: "personnel" gehälter: "personnel" # Dutch - Acquisition aanwinsten: "acquisition" aankopen: "acquisition" collectie_aankopen: "acquisition" verwervingen: "acquisition" # English - Acquisition acquisitions: "acquisition" purchases: "acquisition" collection_development: "acquisition" # German - Acquisition erwerbungen: "acquisition" ankäufe: "acquisition" # Dutch - Operating operationeel: "operating" exploitatie: "operating" bedrijfskosten: "operating" # English - Operating operations: "operating" operational: "operating" running_costs: "operating" # German - Operating betriebskosten: "operating" betrieb: "operating" # Dutch - Capital kapitaal: "capital" investeringen: "capital" bouw: "capital" verbouwing: "capital" # English - Capital capex: "capital" investments: "capital" construction: "capital" building: "capital" # German - Capital kapital: "capital" investitionen: "capital" # ============================================================================= # TEMPLATE DEFINITIONS # ============================================================================= templates: # --------------------------------------------------------------------------- # Template 1: List institutions by type and location (city) # --------------------------------------------------------------------------- list_institutions_by_type_city: id: "list_institutions_by_type_city" description: "List heritage institutions of a specific type in a city" intent: ["geographic", "exploration"] question_patterns: # Dutch - formal - "Welke {institution_type_nl} zijn er in {city}?" - "Welke {institution_type_nl} heeft {city}?" - "Wat zijn de {institution_type_nl} in {city}?" - "Geef me de {institution_type_nl} in {city}" - "Toon {institution_type_nl} in {city}" - "{institution_type_nl} in {city}" # Dutch - conversational - "Wat voor {institution_type_nl} hebben ze in {city}?" - "Wat voor {institution_type_nl} zijn er in {city}?" - "Wat voor {institution_type_nl} heeft {city}?" - "Geef een overzicht van {institution_type_nl} in {city}" - "Geef een overzicht van de {institution_type_nl} in {city}" - "Geef een overzicht van alle {institution_type_nl} in {city}" - "Ik zoek {institution_type_nl} in {city}" - "Zijn er {institution_type_nl} in {city}?" - "Heeft {city} {institution_type_nl}?" - "Ken je {institution_type_nl} in {city}?" - "Welke {institution_type_nl} kan ik vinden in {city}?" - "Noem {institution_type_nl} in {city}" - "Lijst van {institution_type_nl} in {city}" # English - "What {institution_type_en} are in {city}?" - "Which {institution_type_en} are there in {city}?" - "List {institution_type_en} in {city}" - "Show me {institution_type_en} in {city}" - "{institution_type_en} in {city}" - "What kind of {institution_type_en} are in {city}?" - "Give me an overview of {institution_type_en} in {city}" - "I'm looking for {institution_type_en} in {city}" - "Are there {institution_type_en} in {city}?" # German - "Welche {institution_type_de} gibt es in {city}?" - "Welche {institution_type_de} hat {city}?" - "Was für {institution_type_de} gibt es in {city}?" - "Gib mir eine Übersicht der {institution_type_de} in {city}" slots: institution_type: type: institution_type required: true examples: ["musea", "archieven", "bibliotheken", "museums", "archives"] city: type: city required: true examples: ["Amsterdam", "Den Haag", "Rotterdam", "Utrecht"] sparql_template: | {{ prefixes }} SELECT DISTINCT ?institution ?name ?website WHERE { ?institution a hc:class/Custodian ; hc:institutionType "{{ institution_type }}" ; schema:addressLocality "{{ city }}" ; skos:prefLabel ?name . OPTIONAL { ?institution foaf:homepage ?website } } ORDER BY ?name {% if limit %}LIMIT {{ limit }}{% endif %} examples: - question: "Welke musea zijn er in Amsterdam?" slots: {institution_type: "M", city: "Amsterdam"} - question: "What archives are in The Hague?" slots: {institution_type: "A", city: "Den Haag"} # --------------------------------------------------------------------------- # Template 2: List institutions by type and province/region # --------------------------------------------------------------------------- list_institutions_by_type_region: id: "list_institutions_by_type_region" description: "List heritage institutions of a specific type in a province/region" intent: ["geographic", "exploration"] question_patterns: # Dutch - formal - "Welke {institution_type_nl} zijn er in {region}?" - "Hoeveel {institution_type_nl} heeft {region}?" - "{institution_type_nl} in {region}" - "Alle {institution_type_nl} in de provincie {region}" # Dutch - conversational - "Wat voor {institution_type_nl} hebben ze in {region}?" - "Wat voor {institution_type_nl} zijn er in {region}?" - "Geef een overzicht van {institution_type_nl} in {region}" - "Geef een overzicht van de {institution_type_nl} in {region}" - "Geef een overzicht van alle {institution_type_nl} in {region}" - "Ik zoek {institution_type_nl} in {region}" - "Zijn er {institution_type_nl} in {region}?" - "Ken je {institution_type_nl} in {region}?" - "Welke {institution_type_nl} kan ik vinden in {region}?" - "Noem {institution_type_nl} in {region}" - "Lijst van {institution_type_nl} in {region}" - "{institution_type_nl} in de provincie {region}" # English - "What {institution_type_en} are in {region}?" - "Which {institution_type_en} are there in {region}?" - "{institution_type_en} in {region}" - "Give me an overview of {institution_type_en} in {region}" - "I'm looking for {institution_type_en} in {region}" slots: institution_type: type: institution_type required: true region: type: subregion required: true examples: ["Noord-Holland", "Gelderland", "Limburg", "Bavaria", "Flanders"] sparql_template: | {{ prefixes }} SELECT DISTINCT ?institution ?name ?city WHERE { ?institution a hc:class/Custodian ; hc:institutionType "{{ institution_type }}" ; hc:ghcid ?ghcid ; skos:prefLabel ?name . FILTER(STRSTARTS(?ghcid, "{{ region }}")) OPTIONAL { ?institution schema:addressLocality ?city } } ORDER BY ?name {% if limit %}LIMIT {{ limit }}{% endif %} sparql_template_alt: | {{ prefixes }} SELECT DISTINCT ?institution ?name ?city WHERE { ?institution a crm:E39_Actor ; hc:institutionType "{{ institution_type }}" ; hc:ghcid ?ghcid ; skos:prefLabel ?name . FILTER(STRSTARTS(?ghcid, "{{ region | replace('-', '-') }}")) OPTIONAL { ?institution schema:addressLocality ?city } } ORDER BY ?name {% if limit %}LIMIT {{ limit }}{% endif %} # --------------------------------------------------------------------------- # Template 3: List institutions by type and country # --------------------------------------------------------------------------- list_institutions_by_type_country: id: "list_institutions_by_type_country" description: "List heritage institutions of a specific type in a country" intent: ["geographic", "exploration"] question_patterns: # Dutch - "Welke {institution_type_nl} zijn er in {country}?" - "Alle {institution_type_nl} in {country}" - "{institution_type_nl} in {country}" # English - "What {institution_type_en} are in {country}?" - "List all {institution_type_en} in {country}" - "{institution_type_en} in {country}" slots: institution_type: type: institution_type required: true country: type: country required: true examples: ["Nederland", "Belgium", "Germany", "France"] sparql_template: | {{ prefixes }} SELECT DISTINCT ?institution ?name ?city WHERE { ?institution a crm:E39_Actor ; hc:institutionType "{{ institution_type }}" ; schema:addressCountry ; skos:prefLabel ?name . OPTIONAL { ?institution schema:addressLocality ?city } } ORDER BY ?name {% if limit %}LIMIT {{ limit }}{% endif %} # --------------------------------------------------------------------------- # Template 4: Count institutions by type and location # --------------------------------------------------------------------------- count_institutions_by_type_location: id: "count_institutions_by_type_location" description: "Count heritage institutions of a specific type in a location" intent: ["statistical"] question_patterns: # Dutch - formal - "Hoeveel {institution_type_nl} zijn er in {location}?" - "Hoeveel {institution_type_nl} heeft {location}?" - "Aantal {institution_type_nl} in {location}" - "Tel de {institution_type_nl} in {location}" # Dutch - conversational - "Hoeveel {institution_type_nl} telt {location}?" - "Wat is het aantal {institution_type_nl} in {location}?" - "Hoeveel {institution_type_nl} kan ik vinden in {location}?" - "Kun je tellen hoeveel {institution_type_nl} er in {location} zijn?" - "Hoeveel {institution_type_nl} zitten er in {location}?" # English - "How many {institution_type_en} are in {location}?" - "How many {institution_type_en} does {location} have?" - "Count of {institution_type_en} in {location}" - "Number of {institution_type_en} in {location}" - "What's the number of {institution_type_en} in {location}?" - "Can you count {institution_type_en} in {location}?" # German - "Wie viele {institution_type_de} gibt es in {location}?" - "Wie viele {institution_type_de} hat {location}?" - "Anzahl der {institution_type_de} in {location}" slots: institution_type: type: institution_type required: true location: type: city required: true fallback_types: [subregion, country] sparql_template: | {{ prefixes }} SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE { ?institution a crm:E39_Actor ; hc:institutionType "{{ institution_type }}" ; schema:addressLocality "{{ location }}" . } sparql_template_region: | {{ prefixes }} SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE { ?institution a crm:E39_Actor ; hc:institutionType "{{ institution_type }}" ; hc:ghcid ?ghcid . FILTER(STRSTARTS(?ghcid, "{{ location }}")) } sparql_template_country: | {{ prefixes }} SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE { ?institution a crm:E39_Actor ; hc:institutionType "{{ institution_type }}" ; schema:addressCountry . } # --------------------------------------------------------------------------- # Template 5: Count all institutions by type (distribution) # --------------------------------------------------------------------------- count_institutions_by_type: id: "count_institutions_by_type" description: "Count institutions grouped by type" intent: ["statistical"] question_patterns: # Dutch - "Hoeveel instellingen per type?" - "Verdeling van instellingen per type" - "Hoeveel musea, archieven en bibliotheken zijn er?" - "Statistieken per instellingstype" # English - "How many institutions per type?" - "Distribution of institutions by type" - "Statistics by institution type" - "How many museums, archives and libraries are there?" slots: {} sparql_template: | {{ prefixes }} SELECT ?type (COUNT(DISTINCT ?institution) AS ?count) WHERE { ?institution a crm:E39_Actor ; hc:institutionType ?type . } GROUP BY ?type ORDER BY DESC(?count) # --------------------------------------------------------------------------- # Template 6: Find institution by name # --------------------------------------------------------------------------- find_institution_by_name: id: "find_institution_by_name" description: "Find a specific institution by name" intent: ["entity_lookup"] question_patterns: # Dutch - formal - "Waar is {institution_name}?" - "Informatie over {institution_name}" - "Gegevens van {institution_name}" - "Wat is {institution_name}?" - "Zoek {institution_name}" # Dutch - conversational - "Geef informatie over {institution_name}" - "Geef me informatie over {institution_name}" - "Vertel me over {institution_name}" - "Wat weet je over {institution_name}?" - "Ken je {institution_name}?" - "Wat kun je vertellen over {institution_name}?" - "Ik zoek informatie over {institution_name}" - "Ik wil meer weten over {institution_name}" - "Details over {institution_name}" - "Geef details over {institution_name}" # English - "Where is {institution_name}?" - "Information about {institution_name}" - "What is {institution_name}?" - "Find {institution_name}" - "Tell me about {institution_name}" - "Give me information about {institution_name}" - "What do you know about {institution_name}?" - "I'm looking for information about {institution_name}" # German - "Wo ist {institution_name}?" - "Informationen über {institution_name}" - "Was ist {institution_name}?" - "Erzähl mir über {institution_name}" slots: institution_name: type: institution_name required: true examples: ["Rijksmuseum", "Nationaal Archief", "Koninklijke Bibliotheek"] sparql_template: | {{ prefixes }} SELECT ?institution ?name ?type ?city ?country ?website ?description WHERE { ?institution a crm:E39_Actor ; skos:prefLabel ?name . FILTER(CONTAINS(LCASE(STR(?name)), LCASE("{{ institution_name }}"))) OPTIONAL { ?institution hc:institutionType ?type } OPTIONAL { ?institution schema:addressLocality ?city } OPTIONAL { ?institution schema:addressCountry ?country } OPTIONAL { ?institution foaf:homepage ?website } OPTIONAL { ?institution schema:description ?description } } LIMIT 10 # --------------------------------------------------------------------------- # Template 7: List institutions in city (all types) # --------------------------------------------------------------------------- list_all_institutions_in_city: id: "list_all_institutions_in_city" description: "List all heritage institutions in a city" intent: ["geographic", "exploration"] question_patterns: # Dutch - "Welke erfgoedinstellingen zijn er in {city}?" - "Alle instellingen in {city}" - "Erfgoed in {city}" - "Wat is er te zien in {city}?" - "Culturele instellingen in {city}" # English - "What heritage institutions are in {city}?" - "All institutions in {city}" - "Heritage in {city}" - "What is there to see in {city}?" - "Cultural institutions in {city}" slots: city: type: city required: true sparql_template: | {{ prefixes }} SELECT ?institution ?name ?type ?website WHERE { ?institution a crm:E39_Actor ; schema:addressLocality "{{ city }}" ; skos:prefLabel ?name . OPTIONAL { ?institution hc:institutionType ?type } OPTIONAL { ?institution foaf:homepage ?website } } ORDER BY ?type ?name {% if limit %}LIMIT {{ limit }}{% endif %} # --------------------------------------------------------------------------- # Template 8: Find oldest/youngest institutions # --------------------------------------------------------------------------- find_institutions_by_founding_date: id: "find_institutions_by_founding_date" description: "Find oldest or most recently founded institutions" intent: ["temporal", "exploration"] question_patterns: # Dutch - "Wat zijn de oudste {institution_type_nl}?" - "Wat is het oudste {institution_type_nl}?" - "Oudste {institution_type_nl} in {location}" - "Wanneer is {institution_name} opgericht?" - "Wat zijn de nieuwste {institution_type_nl}?" - "Recent opgerichte {institution_type_nl}" # English - "What are the oldest {institution_type_en}?" - "What is the oldest {institution_type_en}?" - "Oldest {institution_type_en} in {location}" - "When was {institution_name} founded?" - "What are the newest {institution_type_en}?" - "Recently founded {institution_type_en}" slots: institution_type: type: institution_type required: false location: type: city required: false fallback_types: [subregion, country] order: type: string default: "ASC" valid_values: ["ASC", "DESC"] sparql_template: | {{ prefixes }} SELECT ?institution ?name ?founded ?city WHERE { ?institution a crm:E39_Actor ; skos:prefLabel ?name ; schema:foundingDate ?founded . {% if institution_type %} ?institution hc:institutionType "{{ institution_type }}" . {% endif %} {% if location %} ?institution schema:addressLocality "{{ location }}" . {% endif %} OPTIONAL { ?institution schema:addressLocality ?city } } ORDER BY {{ order }}(?founded) LIMIT {{ limit | default(10) }} # --------------------------------------------------------------------------- # Template 9: Find institutions with specific identifier (ISIL, etc.) # --------------------------------------------------------------------------- find_institution_by_identifier: id: "find_institution_by_identifier" description: "Find institution by ISIL, GHCID, or other identifier" intent: ["entity_lookup"] question_patterns: - "Welke instelling heeft ISIL {identifier}?" - "Zoek ISIL {identifier}" - "GHCID {identifier}" - "Institution with ISIL {identifier}" - "Find ISIL {identifier}" slots: identifier: type: string required: true examples: ["NL-AmRMA", "NL-HaNA", "DE-1"] identifier_type: type: string default: "isil" valid_values: ["isil", "ghcid", "wikidata"] sparql_template_isil: | {{ prefixes }} SELECT ?institution ?name ?city ?country ?website WHERE { ?institution a crm:E39_Actor ; hc:isil "{{ identifier }}" ; skos:prefLabel ?name . OPTIONAL { ?institution schema:addressLocality ?city } OPTIONAL { ?institution schema:addressCountry ?country } OPTIONAL { ?institution foaf:homepage ?website } } sparql_template_ghcid: | {{ prefixes }} SELECT ?institution ?name ?city ?country ?website WHERE { ?institution a crm:E39_Actor ; hc:ghcid "{{ identifier }}" ; skos:prefLabel ?name . OPTIONAL { ?institution schema:addressLocality ?city } OPTIONAL { ?institution schema:addressCountry ?country } OPTIONAL { ?institution foaf:homepage ?website } } # --------------------------------------------------------------------------- # Template 10: Compare institutions in different locations # --------------------------------------------------------------------------- compare_locations: id: "compare_locations" description: "Compare number of institutions between locations" intent: ["comparative", "statistical"] question_patterns: # Dutch - "Vergelijk {location1} en {location2}" - "Hoeveel meer {institution_type_nl} heeft {location1} dan {location2}?" - "Verschil tussen {location1} en {location2}" - "{location1} versus {location2}" # English - "Compare {location1} and {location2}" - "How many more {institution_type_en} does {location1} have than {location2}?" - "Difference between {location1} and {location2}" - "{location1} vs {location2}" slots: location1: type: city required: true fallback_types: [subregion, country] location2: type: city required: true fallback_types: [subregion, country] institution_type: type: institution_type required: false sparql_template: | {{ prefixes }} SELECT ?location (COUNT(DISTINCT ?institution) AS ?count) WHERE { VALUES ?location { "{{ location1 }}" "{{ location2 }}" } ?institution a crm:E39_Actor ; schema:addressLocality ?location . {% if institution_type %} ?institution hc:institutionType "{{ institution_type }}" . {% endif %} } GROUP BY ?location # --------------------------------------------------------------------------- # Template 11: Find custodians by budget threshold # --------------------------------------------------------------------------- find_custodians_by_budget_threshold: id: "find_custodians_by_budget_threshold" description: "Find custodians with budget/expense category above or below a threshold" intent: ["financial", "exploration"] question_patterns: # Dutch - Budget (planned) - Standard patterns - "Welke instellingen besteden meer dan {amount} aan {budget_category}?" - "Welke instellingen geven meer dan {amount} uit aan {budget_category}?" - "Welke instellingen hebben een {budget_category}budget van meer dan {amount}?" - "Welke {institution_type_nl} besteden meer dan {amount} aan {budget_category}?" - "Instellingen met {budget_category} boven {amount}" - "Wie geeft meer dan {amount} uit aan {budget_category}?" # Dutch - Conversational (NEW) - "Ik zoek {institution_type_nl} met een hoog {budget_category}budget" - "Ik zoek {institution_type_nl} met een {budget_category}budget boven {amount}" - "Ik zoek instellingen met een hoog {budget_category}budget" - "Geef mij een lijst van {institution_type_nl} met een {budget_category}budget boven {amount}" - "Geef mij een lijst van {institution_type_nl} met een {budget_category}budget boven {amount} euro" - "Ken je {institution_type_nl} met een hoog {budget_category}budget?" - "Ken je instellingen die veel uitgeven aan {budget_category}?" - "Waar vind ik {institution_type_nl} met een groot {budget_category}budget?" - "{institution_type_nl} met hoge {budget_category}uitgaven" - "{institution_type_nl} die veel investeren in {budget_category}" # Dutch - Alternative phrasings (existence/list patterns) - "Zijn er instellingen die meer dan {amount} uitgeven aan {budget_category}?" - "Zijn er {institution_type_nl} die meer dan {amount} uitgeven aan {budget_category}?" - "Zijn er {institution_type_nl} die meer uitgeven dan {amount} aan {budget_category}?" - "Geef mij instellingen met meer dan {amount} aan {budget_category}" - "Geef een lijst van instellingen met {budget_category} boven {amount}" - "Toon instellingen die meer dan {amount} uitgeven aan {budget_category}" - "Toon alle instellingen die meer dan {amount} uitgeven aan {budget_category}" - "Welke organisaties besteden meer dan {amount} aan {budget_category}?" - "Welke organisaties geven meer dan {amount} uit aan {budget_category}?" # Dutch - With "euro" explicit - "Welke instellingen geven meer dan {amount} euro uit aan {budget_category}?" - "Welke instellingen besteden meer dan {amount} euro aan {budget_category}?" - "Instellingen met een {budget_category}budget van meer dan {amount} euro" # Dutch - Budget with year - "Welke instellingen besteden meer dan {amount} aan {budget_category} in {year}?" - "Hoeveel instellingen geven meer dan {amount} uit aan {budget_category} in {year}?" # Dutch - Less than - "Welke instellingen besteden minder dan {amount} aan {budget_category}?" - "Instellingen met {budget_category} onder {amount}" - "Zijn er instellingen die minder dan {amount} uitgeven aan {budget_category}?" - "Toon instellingen die minder dan {amount} uitgeven aan {budget_category}" # English - Budget (planned) - "Which custodians spend more than {amount} on {budget_category}?" - "Which institutions have a {budget_category} budget over {amount}?" - "Which {institution_type_en} spend more than {amount} on {budget_category}?" - "Institutions with {budget_category} above {amount}" - "Who spends more than {amount} on {budget_category}?" # English - Conversational (NEW) - "I'm looking for {institution_type_en} with a high {budget_category} budget" - "I'm looking for {institution_type_en} with a {budget_category} budget over {amount}" - "I'm looking for institutions with a high {budget_category} budget" - "Give me a list of {institution_type_en} with a {budget_category} budget over {amount}" - "Do you know {institution_type_en} with a high {budget_category} budget?" - "{institution_type_en} with high {budget_category} spending" - "{institution_type_en} that invest heavily in {budget_category}" # English - Alternative phrasings - "Are there institutions that spend more than {amount} on {budget_category}?" - "Are there {institution_type_en} that spend more than {amount} on {budget_category}?" - "Show me institutions with {budget_category} over {amount}" - "List institutions spending more than {amount} on {budget_category}" - "Give me institutions with {budget_category} budget above {amount}" - "Which organizations spend more than {amount} on {budget_category}?" # English - Budget with year - "Which custodians spend more than {amount} on {budget_category} in {year}?" - "How many institutions spend more than {amount} on {budget_category} in {year}?" # English - Less than - "Which custodians spend less than {amount} on {budget_category}?" - "Institutions with {budget_category} under {amount}" - "Are there institutions that spend less than {amount} on {budget_category}?" # German - "Welche Institutionen geben mehr als {amount} für {budget_category} aus?" - "Welche Institutionen haben ein {budget_category}budget über {amount}?" - "Gibt es Institutionen die mehr als {amount} für {budget_category} ausgeben?" - "Ich suche {institution_type_de} mit einem hohen {budget_category}budget" slots: budget_category: type: budget_category required: true examples: ["innovation", "digitization", "preservation", "personnel", "acquisition"] amount: type: decimal required: true examples: ["5000", "10000", "50000", "100000"] year: type: integer required: false examples: ["2024", "2025"] comparison: type: string default: ">" valid_values: [">", "<", ">=", "<=", "="] institution_type: type: institution_type required: false source: type: string default: "budget" valid_values: ["budget", "actuals"] description: "Whether to query Budget (planned) or FinancialStatement (actuals)" sparql_template: | {{ prefixes }} PREFIX frapo: SELECT DISTINCT ?institution ?name ?budget_amount ?fiscal_year WHERE { ?institution a crm:E39_Actor ; skos:prefLabel ?name . ?budget a hc:class/Budget ; hc:refers_to_custodian ?institution ; hc:{{ budget_category }}_budget ?budget_amount . {% if year %} ?budget hc:fiscal_year_start ?fy_start . FILTER(YEAR(?fy_start) = {{ year }}) {% endif %} FILTER(?budget_amount {{ comparison | default(">") }} {{ amount }}) {% if institution_type %} ?institution hc:institutionType "{{ institution_type }}" . {% endif %} OPTIONAL { ?budget hc:fiscal_year_start ?fy_start . BIND(YEAR(?fy_start) AS ?fiscal_year) } } ORDER BY DESC(?budget_amount) {% if limit %}LIMIT {{ limit }}{% endif %} sparql_template_actuals: | {{ prefixes }} PREFIX frapo: SELECT DISTINCT ?institution ?name ?expense_amount ?reporting_year WHERE { ?institution a crm:E39_Actor ; skos:prefLabel ?name . ?statement a hc:class/FinancialStatement ; hc:refers_to_custodian ?institution ; hc:{{ budget_category }}_expenses ?expense_amount . {% if year %} ?statement hc:reporting_period_start ?rp_start . FILTER(YEAR(?rp_start) = {{ year }}) {% endif %} FILTER(?expense_amount {{ comparison | default(">") }} {{ amount }}) {% if institution_type %} ?institution hc:institutionType "{{ institution_type }}" . {% endif %} OPTIONAL { ?statement hc:reporting_period_start ?rp_start . BIND(YEAR(?rp_start) AS ?reporting_year) } } ORDER BY DESC(?expense_amount) {% if limit %}LIMIT {{ limit }}{% endif %} examples: - question: "Welke instellingen besteden meer dan 5000 euro aan innovatie in 2024?" slots: {budget_category: "innovation", amount: 5000, year: 2024, comparison: ">"} - question: "Which custodians spend more than 10000 on digitization?" slots: {budget_category: "digitization", amount: 10000, comparison: ">"} - question: "Institutions with preservation budget above 50000" slots: {budget_category: "preservation", amount: 50000, comparison: ">"} - question: "Which museums spend less than 1000 on innovation?" slots: {budget_category: "innovation", amount: 1000, comparison: "<", institution_type: "M"} # ============================================================================= # FOLLOW-UP PATTERNS (Conversation Context Resolution) # ============================================================================= # These patterns help ConversationContextResolver (DSPy) expand elliptical # follow-up questions BEFORE the Fyke filter runs. # # CRITICAL: ConversationContextResolver runs FIRST, then Fyke operates on # the RESOLVED question. This prevents false positives on short follow-ups. # # Example flow: # Turn 1: "Welke archieven zijn er in Den Haag?" → lists archives # Turn 2: "En in Enschede?" (raw input - would be caught by naive Fyke!) # ↓ ConversationContextResolver # "Welke archieven zijn er in Enschede?" (resolved - clearly relevant) # ↓ FykeFilter # PASS (relevant) # ↓ TemplateClassifier # list_institutions_by_type_city follow_up_patterns: location_swap: description: "Same query type, different location" patterns: - "En in {new_location}?" - "En {new_location}?" - "What about {new_location}?" - "And in {new_location}?" - "Hoe zit het met {new_location}?" - "In {new_location}?" - "{new_location}?" slot_inheritance: - institution_type resolution_strategy: "inherit_template_swap_location" type_swap: description: "Same location, different institution type" patterns: - "En de {new_type}?" - "Hoe zit het met {new_type}?" - "What about {new_type}?" - "And {new_type}?" - "En {new_type}?" - "{new_type}?" slot_inheritance: - city - region - country resolution_strategy: "inherit_location_swap_type" count_from_list: description: "Count after listing" patterns: - "Hoeveel zijn dat?" - "How many is that?" - "How many are there?" - "Hoeveel?" - "How many?" - "Tel ze" - "Count them" transforms_to: "count_institutions_by_type_location" slot_inheritance: - institution_type - city - region - country resolution_strategy: "convert_list_to_count" details_request: description: "More details about specific result" patterns: - "Vertel me meer over {entity}" - "Tell me more about {entity}" - "Meer informatie over {entity}" - "What about {entity}?" - "More about {entity}" - "Details over {entity}" transforms_to: "find_institution_by_name" resolution_strategy: "extract_entity_lookup" ordinal_reference: description: "Reference to result by position" patterns: - "De eerste" - "De tweede" - "De derde" - "The first one" - "The second one" - "Number {n}" - "Nummer {n}" requires_previous_results: true resolution_strategy: "resolve_ordinal_to_entity" pronoun_reference: description: "Reference using pronouns" patterns: - "Wat is hun website?" - "What is their website?" - "Waar zijn ze gevestigd?" - "Where are they located?" - "Wanneer zijn ze opgericht?" - "When were they founded?" requires_previous_results: true resolution_strategy: "resolve_pronoun_to_entity" # ============================================================================= # FYKE FILTER CONFIGURATION # ============================================================================= # The Fyke filter catches irrelevant questions and returns a standard response. # # ⚠️ CRITICAL ORDERING: # 1. ConversationContextResolver FIRST (expands follow-ups) # 2. FykeFilter on RESOLVED question (not raw input!) # # This prevents false positives like: # - "En in Enschede?" → resolved to "Welke archieven zijn er in Enschede?" → PASS # - "Hoeveel?" → resolved to "Hoeveel archieven zijn er in Den Haag?" → PASS fyke_filter: # DSPy Signature for relevance classification # Operates on RESOLVED question only! dspy_signature: inputs: resolved_question: "The fully resolved question (after context resolution)" conversation_summary: "Brief summary of conversation topic" outputs: is_relevant: "boolean - whether question is about heritage institutions" confidence: "float 0-1 - confidence in classification" reasoning: "Brief explanation of relevance decision" # Hard-coded out-of-scope keywords (checked AFTER context resolution) # These are terms that are NEVER relevant to heritage queries out_of_scope_keywords: - tandpasta - toothpaste - supermarkt - supermarket - restaurant - hotel - weer - weather - voetbal - soccer - football - recept - recipe - vliegticket - flight - politiek - politics - bitcoin - crypto - dating - tinder # Categories that are out of scope out_of_scope_categories: - shopping - travel_booking - sports - cooking - entertainment - personal_advice - medical - legal - financial # Keywords that indicate heritage relevance (boost confidence) heritage_keywords: - museum - musea - archief - archieven - bibliotheek - bibliotheken - galerie - erfgoed - heritage - collectie - collection - tentoonstelling - exhibition - GLAM - cultureel - cultural # Standard responses when question is out of scope standard_response: nl: | Ik kan je helpen met vragen over erfgoedinstellingen zoals musea, archieven, bibliotheken en galerijen in Nederland en daarbuiten. Stel gerust een vraag over deze onderwerpen! en: | I can help you with questions about heritage institutions such as museums, archives, libraries and galleries in the Netherlands and beyond. Feel free to ask a question about these topics! de: | Ich kann Ihnen bei Fragen zu Kulturerbeinstitutionen wie Museen, Archiven, Bibliotheken und Galerien in den Niederlanden und darüber hinaus helfen. Stellen Sie gerne eine Frage zu diesen Themen! fr: | Je peux vous aider avec des questions sur les institutions patrimoniales comme les musées, les archives, les bibliothèques et les galeries aux Pays-Bas et au-delà. N'hésitez pas à poser une question sur ces sujets!