# SPARQL Query Templates for Heritage Custodian Knowledge Graph # # This file defines parameterized SPARQL templates that replace LLM-generated queries # with deterministic, validated templates. Based on docs/plan/prompt-query_template_mapping/ # # CRITICAL: Processing Pipeline Order # =================================== # 1. User question → ConversationContextResolver (DSPy) → RESOLVED question # 2. RESOLVED question → FykeFilter (DSPy) → relevant/irrelevant # ⚠️ FYKE MUST OPERATE ON RESOLVED QUESTION, NOT RAW INPUT! # "En in Enschede?" resolved to "Welke archieven zijn er in Enschede?" is clearly relevant # 3. If relevant: resolved question → TemplateClassifier → template_id # 4. template_id + resolved question → SlotExtractor → slot values # 5. template + slot values → TemplateInstantiator (Jinja2) → SPARQL query # # Slot Value Sources: # - data/validation/sparql_validation_rules.json (institution_type_mappings, subregion_mappings, etc.) # - backend/rag/ontology_mapping.py (fuzzy matching, multilingual synonyms) _metadata: version: "1.0.0" created: "2025-01-06" schema_source: "schemas/20251121/linkml/" validation_rules: "data/validation/sparql_validation_rules.json" # Standard SPARQL prefixes used in all templates _prefixes: | PREFIX hc: PREFIX crm: PREFIX schema: PREFIX skos: PREFIX org: PREFIX foaf: PREFIX dcterms: PREFIX xsd: PREFIX wd: # Slot type definitions with validation sources _slot_types: institution_type: description: "Single-letter custodian type code (M, L, A, G, etc.)" source: "sparql_validation_rules.json#institution_type_mappings" valid_values: ["M", "L", "A", "G", "O", "R", "C", "U", "B", "E", "S", "F", "I", "X", "P", "H", "D", "N", "T"] synonyms: # Dutch museum: "M" musea: "M" bibliotheek: "L" bibliotheken: "L" archief: "A" archieven: "A" galerie: "G" galerij: "G" galerijen: "G" # English museums: "M" library: "L" libraries: "L" archive: "A" archives: "A" gallery: "G" galleries: "G" # German bibliothek_de: "L" bibliotheken_de: "L" archiv: "A" archive_de: "A" galerie_de: "G" # Special types dierentuin: "B" zoo: "B" botanische_tuin: "B" botanical_garden: "B" kerk: "H" church: "H" moskee: "H" mosque: "H" synagoge: "H" synagogue: "H" subregion: description: "ISO 3166-2 subdivision code (NL-NH, DE-BY, etc.)" source: "sparql_validation_rules.json#subregion_mappings" synonyms: # Netherlands noord-holland: "NL-NH" noord_holland: "NL-NH" noordholland: "NL-NH" amsterdam_province: "NL-NH" zuid-holland: "NL-ZH" zuid_holland: "NL-ZH" zuidholland: "NL-ZH" rotterdam_province: "NL-ZH" den_haag_province: "NL-ZH" the_hague_province: "NL-ZH" noord-brabant: "NL-NB" brabant: "NL-NB" eindhoven_province: "NL-NB" gelderland: "NL-GE" arnhem_province: "NL-GE" nijmegen_province: "NL-GE" utrecht_province: "NL-UT" overijssel: "NL-OV" zwolle_province: "NL-OV" enschede_province: "NL-OV" limburg_nl: "NL-LI" maastricht_province: "NL-LI" friesland: "NL-FR" frisia: "NL-FR" leeuwarden_province: "NL-FR" groningen_province: "NL-GR" drenthe: "NL-DR" assen_province: "NL-DR" flevoland: "NL-FL" almere_province: "NL-FL" lelystad_province: "NL-FL" zeeland: "NL-ZE" middelburg_province: "NL-ZE" # Belgium vlaanderen: "BE-VLG" flanders: "BE-VLG" antwerpen_province: "BE-VLG" gent_province: "BE-VLG" brugge_province: "BE-VLG" wallonie: "BE-WAL" wallonia: "BE-WAL" brussel: "BE-BRU" brussels: "BE-BRU" bruxelles: "BE-BRU" # Germany bayern: "DE-BY" bavaria: "DE-BY" muenchen_province: "DE-BY" munich_province: "DE-BY" berlin_state: "DE-BE" baden_wuerttemberg: "DE-BW" stuttgart_province: "DE-BW" nordrhein_westfalen: "DE-NW" north_rhine_westphalia: "DE-NW" koeln_province: "DE-NW" cologne_province: "DE-NW" duesseldorf_province: "DE-NW" sachsen: "DE-SN" saxony: "DE-SN" dresden_province: "DE-SN" hessen: "DE-HE" hesse: "DE-HE" frankfurt_province: "DE-HE" country: description: "Wikidata entity URI for country" source: "sparql_validation_rules.json#country_mappings" format: "wikidata_uri" synonyms: nederland: "Q55" netherlands: "Q55" holland: "Q55" nl: "Q55" belgie: "Q31" belgium: "Q31" be: "Q31" duitsland: "Q183" germany: "Q183" de_country: "Q183" frankrijk: "Q142" france: "Q142" fr: "Q142" verenigd_koninkrijk: "Q145" united_kingdom: "Q145" uk: "Q145" gb: "Q145" engeland: "Q145" england: "Q145" verenigde_staten: "Q30" united_states: "Q30" usa: "Q30" us: "Q30" japan: "Q17" jp: "Q17" tsjechie: "Q213" czech_republic: "Q213" czechia: "Q213" cz: "Q213" oostenrijk: "Q40" austria: "Q40" at: "Q40" zwitserland: "Q39" switzerland: "Q39" ch: "Q39" city: description: "City/locality name (string literal)" source: "fuzzy_match" institution_name: description: "Institution name for lookup (string literal)" source: "fuzzy_match" limit: description: "Result limit (integer)" default: 10 max: 100 # ============================================================================= # TEMPLATE DEFINITIONS # ============================================================================= templates: # --------------------------------------------------------------------------- # Template 1: List institutions by type and location (city) # --------------------------------------------------------------------------- list_institutions_by_type_city: id: "list_institutions_by_type_city" description: "List heritage institutions of a specific type in a city" intent: ["geographic", "exploration"] question_patterns: # Dutch - "Welke {institution_type_nl} zijn er in {city}?" - "Welke {institution_type_nl} heeft {city}?" - "Wat zijn de {institution_type_nl} in {city}?" - "Geef me de {institution_type_nl} in {city}" - "Toon {institution_type_nl} in {city}" - "{institution_type_nl} in {city}" # English - "What {institution_type_en} are in {city}?" - "Which {institution_type_en} are there in {city}?" - "List {institution_type_en} in {city}" - "Show me {institution_type_en} in {city}" - "{institution_type_en} in {city}" # German - "Welche {institution_type_de} gibt es in {city}?" - "Welche {institution_type_de} hat {city}?" slots: institution_type: type: institution_type required: true examples: ["musea", "archieven", "bibliotheken", "museums", "archives"] city: type: city required: true examples: ["Amsterdam", "Den Haag", "Rotterdam", "Utrecht"] sparql_template: | {{ prefixes }} SELECT DISTINCT ?institution ?name ?website WHERE { ?institution a hc:class/Custodian ; hc:institutionType "{{ institution_type }}" ; schema:addressLocality "{{ city }}" ; skos:prefLabel ?name . OPTIONAL { ?institution foaf:homepage ?website } } ORDER BY ?name {% if limit %}LIMIT {{ limit }}{% endif %} examples: - question: "Welke musea zijn er in Amsterdam?" slots: {institution_type: "M", city: "Amsterdam"} - question: "What archives are in The Hague?" slots: {institution_type: "A", city: "Den Haag"} # --------------------------------------------------------------------------- # Template 2: List institutions by type and province/region # --------------------------------------------------------------------------- list_institutions_by_type_region: id: "list_institutions_by_type_region" description: "List heritage institutions of a specific type in a province/region" intent: ["geographic", "exploration"] question_patterns: # Dutch - "Welke {institution_type_nl} zijn er in {region}?" - "Hoeveel {institution_type_nl} heeft {region}?" - "{institution_type_nl} in {region}" - "Alle {institution_type_nl} in de provincie {region}" # English - "What {institution_type_en} are in {region}?" - "Which {institution_type_en} are there in {region}?" - "{institution_type_en} in {region}" slots: institution_type: type: institution_type required: true region: type: subregion required: true examples: ["Noord-Holland", "Gelderland", "Limburg", "Bavaria", "Flanders"] sparql_template: | {{ prefixes }} SELECT DISTINCT ?institution ?name ?city WHERE { ?institution a hc:class/Custodian ; hc:institutionType "{{ institution_type }}" ; hc:ghcid ?ghcid ; skos:prefLabel ?name . FILTER(STRSTARTS(?ghcid, "{{ region }}")) OPTIONAL { ?institution schema:addressLocality ?city } } ORDER BY ?name {% if limit %}LIMIT {{ limit }}{% endif %} sparql_template_alt: | {{ prefixes }} SELECT DISTINCT ?institution ?name ?city WHERE { ?institution a crm:E39_Actor ; hc:institutionType "{{ institution_type }}" ; hc:ghcid ?ghcid ; skos:prefLabel ?name . FILTER(STRSTARTS(?ghcid, "{{ region | replace('-', '-') }}")) OPTIONAL { ?institution schema:addressLocality ?city } } ORDER BY ?name {% if limit %}LIMIT {{ limit }}{% endif %} # --------------------------------------------------------------------------- # Template 3: List institutions by type and country # --------------------------------------------------------------------------- list_institutions_by_type_country: id: "list_institutions_by_type_country" description: "List heritage institutions of a specific type in a country" intent: ["geographic", "exploration"] question_patterns: # Dutch - "Welke {institution_type_nl} zijn er in {country}?" - "Alle {institution_type_nl} in {country}" - "{institution_type_nl} in {country}" # English - "What {institution_type_en} are in {country}?" - "List all {institution_type_en} in {country}" - "{institution_type_en} in {country}" slots: institution_type: type: institution_type required: true country: type: country required: true examples: ["Nederland", "Belgium", "Germany", "France"] sparql_template: | {{ prefixes }} SELECT DISTINCT ?institution ?name ?city WHERE { ?institution a crm:E39_Actor ; hc:institutionType "{{ institution_type }}" ; schema:addressCountry ; skos:prefLabel ?name . OPTIONAL { ?institution schema:addressLocality ?city } } ORDER BY ?name {% if limit %}LIMIT {{ limit }}{% endif %} # --------------------------------------------------------------------------- # Template 4: Count institutions by type and location # --------------------------------------------------------------------------- count_institutions_by_type_location: id: "count_institutions_by_type_location" description: "Count heritage institutions of a specific type in a location" intent: ["statistical"] question_patterns: # Dutch - "Hoeveel {institution_type_nl} zijn er in {location}?" - "Hoeveel {institution_type_nl} heeft {location}?" - "Aantal {institution_type_nl} in {location}" - "Tel de {institution_type_nl} in {location}" # English - "How many {institution_type_en} are in {location}?" - "How many {institution_type_en} does {location} have?" - "Count of {institution_type_en} in {location}" - "Number of {institution_type_en} in {location}" slots: institution_type: type: institution_type required: true location: type: city required: true fallback_types: [subregion, country] sparql_template: | {{ prefixes }} SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE { ?institution a crm:E39_Actor ; hc:institutionType "{{ institution_type }}" ; schema:addressLocality "{{ location }}" . } sparql_template_region: | {{ prefixes }} SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE { ?institution a crm:E39_Actor ; hc:institutionType "{{ institution_type }}" ; hc:ghcid ?ghcid . FILTER(STRSTARTS(?ghcid, "{{ location }}")) } sparql_template_country: | {{ prefixes }} SELECT (COUNT(DISTINCT ?institution) AS ?count) WHERE { ?institution a crm:E39_Actor ; hc:institutionType "{{ institution_type }}" ; schema:addressCountry . } # --------------------------------------------------------------------------- # Template 5: Count all institutions by type (distribution) # --------------------------------------------------------------------------- count_institutions_by_type: id: "count_institutions_by_type" description: "Count institutions grouped by type" intent: ["statistical"] question_patterns: # Dutch - "Hoeveel instellingen per type?" - "Verdeling van instellingen per type" - "Hoeveel musea, archieven en bibliotheken zijn er?" - "Statistieken per instellingstype" # English - "How many institutions per type?" - "Distribution of institutions by type" - "Statistics by institution type" - "How many museums, archives and libraries are there?" slots: {} sparql_template: | {{ prefixes }} SELECT ?type (COUNT(DISTINCT ?institution) AS ?count) WHERE { ?institution a crm:E39_Actor ; hc:institutionType ?type . } GROUP BY ?type ORDER BY DESC(?count) # --------------------------------------------------------------------------- # Template 6: Find institution by name # --------------------------------------------------------------------------- find_institution_by_name: id: "find_institution_by_name" description: "Find a specific institution by name" intent: ["entity_lookup"] question_patterns: # Dutch - "Waar is {institution_name}?" - "Informatie over {institution_name}" - "Gegevens van {institution_name}" - "Wat is {institution_name}?" - "Zoek {institution_name}" # English - "Where is {institution_name}?" - "Information about {institution_name}" - "What is {institution_name}?" - "Find {institution_name}" - "Tell me about {institution_name}" slots: institution_name: type: institution_name required: true examples: ["Rijksmuseum", "Nationaal Archief", "Koninklijke Bibliotheek"] sparql_template: | {{ prefixes }} SELECT ?institution ?name ?type ?city ?country ?website ?description WHERE { ?institution a crm:E39_Actor ; skos:prefLabel ?name . FILTER(CONTAINS(LCASE(STR(?name)), LCASE("{{ institution_name }}"))) OPTIONAL { ?institution hc:institutionType ?type } OPTIONAL { ?institution schema:addressLocality ?city } OPTIONAL { ?institution schema:addressCountry ?country } OPTIONAL { ?institution foaf:homepage ?website } OPTIONAL { ?institution schema:description ?description } } LIMIT 10 # --------------------------------------------------------------------------- # Template 7: List institutions in city (all types) # --------------------------------------------------------------------------- list_all_institutions_in_city: id: "list_all_institutions_in_city" description: "List all heritage institutions in a city" intent: ["geographic", "exploration"] question_patterns: # Dutch - "Welke erfgoedinstellingen zijn er in {city}?" - "Alle instellingen in {city}" - "Erfgoed in {city}" - "Wat is er te zien in {city}?" - "Culturele instellingen in {city}" # English - "What heritage institutions are in {city}?" - "All institutions in {city}" - "Heritage in {city}" - "What is there to see in {city}?" - "Cultural institutions in {city}" slots: city: type: city required: true sparql_template: | {{ prefixes }} SELECT ?institution ?name ?type ?website WHERE { ?institution a crm:E39_Actor ; schema:addressLocality "{{ city }}" ; skos:prefLabel ?name . OPTIONAL { ?institution hc:institutionType ?type } OPTIONAL { ?institution foaf:homepage ?website } } ORDER BY ?type ?name {% if limit %}LIMIT {{ limit }}{% endif %} # --------------------------------------------------------------------------- # Template 8: Find oldest/youngest institutions # --------------------------------------------------------------------------- find_institutions_by_founding_date: id: "find_institutions_by_founding_date" description: "Find oldest or most recently founded institutions" intent: ["temporal", "exploration"] question_patterns: # Dutch - "Wat zijn de oudste {institution_type_nl}?" - "Wat is het oudste {institution_type_nl}?" - "Oudste {institution_type_nl} in {location}" - "Wanneer is {institution_name} opgericht?" - "Wat zijn de nieuwste {institution_type_nl}?" - "Recent opgerichte {institution_type_nl}" # English - "What are the oldest {institution_type_en}?" - "What is the oldest {institution_type_en}?" - "Oldest {institution_type_en} in {location}" - "When was {institution_name} founded?" - "What are the newest {institution_type_en}?" - "Recently founded {institution_type_en}" slots: institution_type: type: institution_type required: false location: type: city required: false fallback_types: [subregion, country] order: type: string default: "ASC" valid_values: ["ASC", "DESC"] sparql_template: | {{ prefixes }} SELECT ?institution ?name ?founded ?city WHERE { ?institution a crm:E39_Actor ; skos:prefLabel ?name ; schema:foundingDate ?founded . {% if institution_type %} ?institution hc:institutionType "{{ institution_type }}" . {% endif %} {% if location %} ?institution schema:addressLocality "{{ location }}" . {% endif %} OPTIONAL { ?institution schema:addressLocality ?city } } ORDER BY {{ order }}(?founded) LIMIT {{ limit | default(10) }} # --------------------------------------------------------------------------- # Template 9: Find institutions with specific identifier (ISIL, etc.) # --------------------------------------------------------------------------- find_institution_by_identifier: id: "find_institution_by_identifier" description: "Find institution by ISIL, GHCID, or other identifier" intent: ["entity_lookup"] question_patterns: - "Welke instelling heeft ISIL {identifier}?" - "Zoek ISIL {identifier}" - "GHCID {identifier}" - "Institution with ISIL {identifier}" - "Find ISIL {identifier}" slots: identifier: type: string required: true examples: ["NL-AmRMA", "NL-HaNA", "DE-1"] identifier_type: type: string default: "isil" valid_values: ["isil", "ghcid", "wikidata"] sparql_template_isil: | {{ prefixes }} SELECT ?institution ?name ?city ?country ?website WHERE { ?institution a crm:E39_Actor ; hc:isil "{{ identifier }}" ; skos:prefLabel ?name . OPTIONAL { ?institution schema:addressLocality ?city } OPTIONAL { ?institution schema:addressCountry ?country } OPTIONAL { ?institution foaf:homepage ?website } } sparql_template_ghcid: | {{ prefixes }} SELECT ?institution ?name ?city ?country ?website WHERE { ?institution a crm:E39_Actor ; hc:ghcid "{{ identifier }}" ; skos:prefLabel ?name . OPTIONAL { ?institution schema:addressLocality ?city } OPTIONAL { ?institution schema:addressCountry ?country } OPTIONAL { ?institution foaf:homepage ?website } } # --------------------------------------------------------------------------- # Template 10: Compare institutions in different locations # --------------------------------------------------------------------------- compare_locations: id: "compare_locations" description: "Compare number of institutions between locations" intent: ["comparative", "statistical"] question_patterns: # Dutch - "Vergelijk {location1} en {location2}" - "Hoeveel meer {institution_type_nl} heeft {location1} dan {location2}?" - "Verschil tussen {location1} en {location2}" - "{location1} versus {location2}" # English - "Compare {location1} and {location2}" - "How many more {institution_type_en} does {location1} have than {location2}?" - "Difference between {location1} and {location2}" - "{location1} vs {location2}" slots: location1: type: city required: true fallback_types: [subregion, country] location2: type: city required: true fallback_types: [subregion, country] institution_type: type: institution_type required: false sparql_template: | {{ prefixes }} SELECT ?location (COUNT(DISTINCT ?institution) AS ?count) WHERE { VALUES ?location { "{{ location1 }}" "{{ location2 }}" } ?institution a crm:E39_Actor ; schema:addressLocality ?location . {% if institution_type %} ?institution hc:institutionType "{{ institution_type }}" . {% endif %} } GROUP BY ?location # ============================================================================= # FOLLOW-UP PATTERNS (Conversation Context Resolution) # ============================================================================= # These patterns help ConversationContextResolver (DSPy) expand elliptical # follow-up questions BEFORE the Fyke filter runs. # # CRITICAL: ConversationContextResolver runs FIRST, then Fyke operates on # the RESOLVED question. This prevents false positives on short follow-ups. # # Example flow: # Turn 1: "Welke archieven zijn er in Den Haag?" → lists archives # Turn 2: "En in Enschede?" (raw input - would be caught by naive Fyke!) # ↓ ConversationContextResolver # "Welke archieven zijn er in Enschede?" (resolved - clearly relevant) # ↓ FykeFilter # PASS (relevant) # ↓ TemplateClassifier # list_institutions_by_type_city follow_up_patterns: location_swap: description: "Same query type, different location" patterns: - "En in {new_location}?" - "En {new_location}?" - "What about {new_location}?" - "And in {new_location}?" - "Hoe zit het met {new_location}?" - "In {new_location}?" - "{new_location}?" slot_inheritance: - institution_type resolution_strategy: "inherit_template_swap_location" type_swap: description: "Same location, different institution type" patterns: - "En de {new_type}?" - "Hoe zit het met {new_type}?" - "What about {new_type}?" - "And {new_type}?" - "En {new_type}?" - "{new_type}?" slot_inheritance: - city - region - country resolution_strategy: "inherit_location_swap_type" count_from_list: description: "Count after listing" patterns: - "Hoeveel zijn dat?" - "How many is that?" - "How many are there?" - "Hoeveel?" - "How many?" - "Tel ze" - "Count them" transforms_to: "count_institutions_by_type_location" slot_inheritance: - institution_type - city - region - country resolution_strategy: "convert_list_to_count" details_request: description: "More details about specific result" patterns: - "Vertel me meer over {entity}" - "Tell me more about {entity}" - "Meer informatie over {entity}" - "What about {entity}?" - "More about {entity}" - "Details over {entity}" transforms_to: "find_institution_by_name" resolution_strategy: "extract_entity_lookup" ordinal_reference: description: "Reference to result by position" patterns: - "De eerste" - "De tweede" - "De derde" - "The first one" - "The second one" - "Number {n}" - "Nummer {n}" requires_previous_results: true resolution_strategy: "resolve_ordinal_to_entity" pronoun_reference: description: "Reference using pronouns" patterns: - "Wat is hun website?" - "What is their website?" - "Waar zijn ze gevestigd?" - "Where are they located?" - "Wanneer zijn ze opgericht?" - "When were they founded?" requires_previous_results: true resolution_strategy: "resolve_pronoun_to_entity" # ============================================================================= # FYKE FILTER CONFIGURATION # ============================================================================= # The Fyke filter catches irrelevant questions and returns a standard response. # # ⚠️ CRITICAL ORDERING: # 1. ConversationContextResolver FIRST (expands follow-ups) # 2. FykeFilter on RESOLVED question (not raw input!) # # This prevents false positives like: # - "En in Enschede?" → resolved to "Welke archieven zijn er in Enschede?" → PASS # - "Hoeveel?" → resolved to "Hoeveel archieven zijn er in Den Haag?" → PASS fyke_filter: # DSPy Signature for relevance classification # Operates on RESOLVED question only! dspy_signature: inputs: resolved_question: "The fully resolved question (after context resolution)" conversation_summary: "Brief summary of conversation topic" outputs: is_relevant: "boolean - whether question is about heritage institutions" confidence: "float 0-1 - confidence in classification" reasoning: "Brief explanation of relevance decision" # Hard-coded out-of-scope keywords (checked AFTER context resolution) # These are terms that are NEVER relevant to heritage queries out_of_scope_keywords: - tandpasta - toothpaste - supermarkt - supermarket - restaurant - hotel - weer - weather - voetbal - soccer - football - recept - recipe - vliegticket - flight - politiek - politics - bitcoin - crypto - dating - tinder # Categories that are out of scope out_of_scope_categories: - shopping - travel_booking - sports - cooking - entertainment - personal_advice - medical - legal - financial # Keywords that indicate heritage relevance (boost confidence) heritage_keywords: - museum - musea - archief - archieven - bibliotheek - bibliotheken - galerie - erfgoed - heritage - collectie - collection - tentoonstelling - exhibition - GLAM - cultureel - cultural # Standard responses when question is out of scope standard_response: nl: | Ik kan je helpen met vragen over erfgoedinstellingen zoals musea, archieven, bibliotheken en galerijen in Nederland en daarbuiten. Stel gerust een vraag over deze onderwerpen! en: | I can help you with questions about heritage institutions such as museums, archives, libraries and galleries in the Netherlands and beyond. Feel free to ask a question about these topics! de: | Ich kann Ihnen bei Fragen zu Kulturerbeinstitutionen wie Museen, Archiven, Bibliotheken und Galerien in den Niederlanden und darüber hinaus helfen. Stellen Sie gerne eine Frage zu diesen Themen! fr: | Je peux vous aider avec des questions sur les institutions patrimoniales comme les musées, les archives, les bibliothèques et les galeries aux Pays-Bas et au-delà. N'hésitez pas à poser une question sur ces sujets!