glam/docs/dspy_rag/07-sparql-templates.md
2025-12-12 12:51:10 +01:00

14 KiB

SPARQL Query Templates for Heritage Custodian Ontology

Overview

This document provides SPARQL query templates for the Heritage Custodian ontology. These templates support both local RDF/TypeDB queries and federated Wikidata queries.

Namespace Prefixes

PREFIX hc: <https://nde.nl/ontology/hc/>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX prov: <http://www.w3.org/ns/prov#>
PREFIX org: <http://www.w3.org/ns/org#>
PREFIX schema: <http://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rico: <https://www.ica.org/standards/RiC/ontology#>
PREFIX tooi: <https://identifier.overheid.nl/tooi/def/ont/>
PREFIX cpov: <http://data.europa.eu/m8g/>
PREFIX gleif: <https://www.gleif.org/ontology/Base/>

1. Entity Lookup Queries

Get Custodian by GHCID

# Get complete custodian details by GHCID
SELECT ?custodian ?name ?type ?description ?country WHERE {
    ?custodian a hc:Custodian ;
               hc:ghcid "NL-NH-AMS-M-RM" ;
               skos:prefLabel ?name ;
               hc:custodian_type ?type .
    OPTIONAL { ?custodian dct:description ?description }
    OPTIONAL { ?custodian hc:country ?country }
}

Get Custodian by Identifier

# Find custodian by ISIL code
SELECT ?custodian ?name ?ghcid WHERE {
    ?custodian a hc:Custodian ;
               hc:identifiers ?ident ;
               skos:prefLabel ?name ;
               hc:ghcid ?ghcid .
    ?ident hc:identifier_scheme "ISIL" ;
           hc:identifier_value "NL-AmRM" .
}

Get Custodian with All Aspects

# Get custodian hub with all reconstructed aspects
SELECT ?custodian ?name ?legalStatus ?place ?collection ?platform WHERE {
    ?custodian a hc:Custodian ;
               hc:ghcid "NL-NH-AMS-M-RM" ;
               skos:prefLabel ?name .
    
    # Legal status aspect
    OPTIONAL {
        ?custodian hc:has_legal_status ?legalStatus .
        ?legalStatus a hc:CustodianLegalStatus .
    }
    
    # Place aspect
    OPTIONAL {
        ?custodian hc:has_place ?place .
        ?place a hc:CustodianPlace .
    }
    
    # Collection aspect
    OPTIONAL {
        ?custodian hc:manages_collection ?collection .
        ?collection a hc:CustodianCollection .
    }
    
    # Platform aspect
    OPTIONAL {
        ?custodian hc:has_platform ?platform .
        ?platform a hc:DigitalPlatform .
    }
}

2. Type-Based Queries

Get All Custodians by Type

# Get all museums in the Netherlands
SELECT ?custodian ?name ?city ?ghcid WHERE {
    ?custodian a hc:Custodian ;
               hc:custodian_type hc:MUSEUM ;
               hc:country "NL" ;
               skos:prefLabel ?name ;
               hc:ghcid ?ghcid .
    
    OPTIONAL {
        ?custodian hc:has_place ?place .
        ?place hc:settlement ?city .
    }
}
ORDER BY ?name

Get Custodians by Multiple Types (MIXED)

# Get institutions with multiple types
SELECT ?custodian ?name ?types WHERE {
    ?custodian a hc:Custodian ;
               hc:custodian_type hc:MIXED ;
               skos:prefLabel ?name .
    
    # Get all actual types
    ?custodian hc:actual_types ?types .
}

Count Custodians by Type

# Count institutions per type
SELECT ?type (COUNT(?custodian) AS ?count) WHERE {
    ?custodian a hc:Custodian ;
               hc:custodian_type ?type .
}
GROUP BY ?type
ORDER BY DESC(?count)

3. Geographic Queries

Get Custodians by Region

# Get all custodians in Noord-Holland province
SELECT ?custodian ?name ?city ?type WHERE {
    ?custodian a hc:Custodian ;
               skos:prefLabel ?name ;
               hc:custodian_type ?type ;
               hc:has_place ?place .
    
    ?place hc:region_code "NH" ;
           hc:country "NL" .
    
    OPTIONAL { ?place hc:settlement ?city }
}
ORDER BY ?city ?name

Get Custodians Near Location

# Get custodians within 50km of Amsterdam (requires GeoSPARQL)
SELECT ?custodian ?name ?distance WHERE {
    ?custodian a hc:Custodian ;
               skos:prefLabel ?name ;
               hc:has_place ?place .
    
    ?place geo:hasGeometry ?geom .
    ?geom geo:asWKT ?coords .
    
    # Amsterdam coordinates
    BIND("POINT(4.9 52.37)"^^geo:wktLiteral AS ?amsterdam)
    
    BIND(geof:distance(?coords, ?amsterdam, <http://www.opengis.net/def/uom/OGC/1.0/kilometre>) AS ?distance)
    
    FILTER(?distance < 50)
}
ORDER BY ?distance

Get Custodians by GeoNames ID

# Get all custodians in Amsterdam by GeoNames ID
SELECT ?custodian ?name WHERE {
    ?custodian a hc:Custodian ;
               skos:prefLabel ?name ;
               hc:has_place ?place .
    
    ?place hc:geonames_id "2759794" .  # Amsterdam
}

4. Relationship Queries

Get Encompassing Body Members

# Get all members of NDE (Netwerk Digitaal Erfgoed)
SELECT ?custodian ?name ?type ?role WHERE {
    ?body a hc:NetworkOrganisation ;
          skos:prefLabel "Netwerk Digitaal Erfgoed" .
    
    ?custodian hc:is_member_of ?body ;
               skos:prefLabel ?name ;
               hc:custodian_type ?type .
    
    OPTIONAL {
        ?membership hc:member ?custodian ;
                    hc:body ?body ;
                    hc:role ?role .
    }
}
ORDER BY ?name

Get Project Participants

# Get all participants in a project
SELECT ?custodian ?name ?role WHERE {
    ?project a hc:Project ;
             skos:prefLabel "NDE Versnellen 2024" .
    
    ?project hc:participating_custodians ?participation .
    ?participation hc:custodian ?custodian ;
                   hc:role ?role .
    
    ?custodian skos:prefLabel ?name .
}

Get Organizational Hierarchy

# Get organizational hierarchy (parent-child relationships)
SELECT ?parent ?parentName ?child ?childName WHERE {
    ?child a hc:Custodian ;
           org:subOrganizationOf ?parent ;
           skos:prefLabel ?childName .
    
    ?parent skos:prefLabel ?parentName .
}

5. Collection Queries

Get Collections by Subject

# Get collections about World War II
SELECT ?custodian ?custodianName ?collection ?collectionName ?extent WHERE {
    ?custodian a hc:Custodian ;
               skos:prefLabel ?custodianName ;
               hc:manages_collection ?collection .
    
    ?collection skos:prefLabel ?collectionName ;
                hc:subject_areas ?subject .
    
    FILTER(CONTAINS(LCASE(?subject), "world war") || 
           CONTAINS(LCASE(?subject), "tweede wereldoorlog") ||
           CONTAINS(LCASE(?subject), "1940-1945"))
    
    OPTIONAL { ?collection hc:extent ?extent }
}

Get Collections by Temporal Coverage

# Get collections covering 19th century
SELECT ?custodian ?collection ?startDate ?endDate WHERE {
    ?custodian a hc:Custodian ;
               hc:manages_collection ?collection .
    
    ?collection hc:temporal_extent ?timespan .
    ?timespan crm:P82a_begin_of_the_begin ?startDate ;
              crm:P82b_end_of_the_end ?endDate .
    
    FILTER(?startDate <= "1899-12-31"^^xsd:date && ?endDate >= "1800-01-01"^^xsd:date)
}

Get Digitized Collections

# Get collections with digital access
SELECT ?custodian ?collection ?platform ?accessURL WHERE {
    ?custodian a hc:Custodian ;
               hc:manages_collection ?collection ;
               hc:has_platform ?platform .
    
    ?collection hc:digitization_status "DIGITIZED" .
    ?platform hc:platform_url ?accessURL .
}

6. Change Event Queries

Get Organizational History

# Get complete history of an institution
SELECT ?event ?eventType ?date ?description WHERE {
    ?custodian a hc:Custodian ;
               hc:ghcid "NL-NH-HAA-A-NHA" .
    
    ?custodian hc:affected_by_event ?event .
    ?event hc:event_type ?eventType ;
           hc:event_date ?date .
    
    OPTIONAL { ?event dct:description ?description }
}
ORDER BY ?date

Get Mergers

# Get all merger events
SELECT ?event ?date ?affectedOrgs ?resultingOrg WHERE {
    ?event a hc:ChangeEvent ;
           hc:event_type hc:MERGER ;
           hc:event_date ?date .
    
    # Affected organizations
    ?event hc:affected_organization ?affected .
    ?affected skos:prefLabel ?affectedOrgs .
    
    # Resulting organization
    OPTIONAL {
        ?event hc:resulting_organization ?resulting .
        ?resulting skos:prefLabel ?resultingOrg .
    }
}
ORDER BY ?date

Get GHCID History

# Get GHCID changes for an institution
SELECT ?ghcid ?validFrom ?validTo ?reason WHERE {
    ?custodian a hc:Custodian ;
               hc:ghcid "NL-NH-AMS-M-RM" .
    
    ?custodian hc:ghcid_history ?entry .
    ?entry hc:ghcid ?ghcid ;
           hc:valid_from ?validFrom .
    
    OPTIONAL { ?entry hc:valid_to ?validTo }
    OPTIONAL { ?entry hc:reason ?reason }
}
ORDER BY ?validFrom

7. Digital Platform Queries

Get Platforms by Type

# Get all IIIF-enabled platforms
SELECT ?custodian ?platform ?url WHERE {
    ?custodian a hc:Custodian ;
               skos:prefLabel ?name ;
               hc:has_platform ?platform .
    
    ?platform hc:platform_type hc:IIIF_ENDPOINT ;
              hc:platform_url ?url .
}

Get Platforms with API Access

# Get platforms with SPARQL endpoints or OAI-PMH
SELECT ?custodian ?platform ?apiType ?endpoint WHERE {
    ?custodian a hc:Custodian ;
               hc:has_platform ?platform .
    
    ?platform hc:platform_type ?apiType ;
              hc:platform_url ?endpoint .
    
    FILTER(?apiType IN (hc:SPARQL_ENDPOINT, hc:OAI_PMH_ENDPOINT, hc:REST_API))
}

8. Wikidata Federation Queries

Enrich Local Entity from Wikidata

# Get Wikidata enrichment for local entity
SELECT ?custodian ?name ?wikidataId ?instanceOf ?coord ?image WHERE {
    # Local entity
    ?custodian a hc:Custodian ;
               skos:prefLabel ?name ;
               hc:identifiers ?ident .
    
    ?ident hc:identifier_scheme "Wikidata" ;
           hc:identifier_value ?wikidataId .
    
    # Federation to Wikidata
    SERVICE <https://query.wikidata.org/sparql> {
        BIND(IRI(CONCAT("http://www.wikidata.org/entity/", ?wikidataId)) AS ?wdEntity)
        
        ?wdEntity wdt:P31 ?instanceOf .
        OPTIONAL { ?wdEntity wdt:P625 ?coord }
        OPTIONAL { ?wdEntity wdt:P18 ?image }
    }
}

Find Missing Local Entities in Wikidata

# Find Dutch museums in Wikidata not in local dataset
SELECT ?wdEntity ?name ?isil WHERE {
    SERVICE <https://query.wikidata.org/sparql> {
        ?wdEntity wdt:P31 wd:Q33506 ;  # instance of museum
                  wdt:P17 wd:Q55 ;     # country Netherlands
                  rdfs:label ?name .
        
        FILTER(LANG(?name) = "nl" || LANG(?name) = "en")
        
        OPTIONAL { ?wdEntity wdt:P791 ?isil }  # ISIL code
    }
    
    # Check if not in local dataset
    FILTER NOT EXISTS {
        ?local a hc:Custodian ;
               hc:identifiers ?ident .
        ?ident hc:identifier_scheme "Wikidata" ;
               hc:identifier_value ?wikidataId .
        FILTER(?wikidataId = REPLACE(STR(?wdEntity), "http://www.wikidata.org/entity/", ""))
    }
}

Cross-Reference Identifiers

# Get all identifier cross-references from Wikidata
SELECT ?custodian ?name ?isil ?viaf ?isni ?ror WHERE {
    ?custodian a hc:Custodian ;
               skos:prefLabel ?name ;
               hc:identifiers ?wdIdent .
    
    ?wdIdent hc:identifier_scheme "Wikidata" ;
             hc:identifier_value ?wikidataId .
    
    SERVICE <https://query.wikidata.org/sparql> {
        BIND(IRI(CONCAT("http://www.wikidata.org/entity/", ?wikidataId)) AS ?wdEntity)
        
        OPTIONAL { ?wdEntity wdt:P791 ?isil }   # ISIL
        OPTIONAL { ?wdEntity wdt:P214 ?viaf }   # VIAF
        OPTIONAL { ?wdEntity wdt:P213 ?isni }   # ISNI
        OPTIONAL { ?wdEntity wdt:P6782 ?ror }   # ROR
    }
}

9. Provenance Queries

Get Data by Tier

# Get all Tier 1 (authoritative) data
SELECT ?custodian ?name ?source WHERE {
    ?custodian a hc:Custodian ;
               skos:prefLabel ?name ;
               prov:wasDerivedFrom ?observation .
    
    ?observation hc:data_tier hc:TIER_1_AUTHORITATIVE ;
                 hc:source ?source .
}

Get Observation Sources

# Get all observations for an entity with sources
SELECT ?observation ?source ?date ?confidence WHERE {
    ?custodian a hc:Custodian ;
               hc:ghcid "NL-NH-AMS-M-RM" ;
               hc:has_observation ?observation .
    
    ?observation hc:source_url ?source ;
                 hc:retrieved_on ?date .
    
    OPTIONAL { ?observation hc:confidence_score ?confidence }
}
ORDER BY DESC(?date)

Track Reconstruction Activity

# Get reconstruction activities for entity resolution
SELECT ?activity ?method ?date ?usedSources WHERE {
    ?custodian a hc:Custodian ;
               hc:ghcid "NL-NH-AMS-M-RM" ;
               prov:wasGeneratedBy ?activity .
    
    ?activity a hc:ReconstructionActivity ;
              hc:method ?method ;
              prov:startedAtTime ?date ;
              prov:used ?usedSources .
}

10. Analytics Queries

Institution Distribution by Country and Type

# Count institutions by country and type
SELECT ?country ?type (COUNT(?custodian) AS ?count) WHERE {
    ?custodian a hc:Custodian ;
               hc:country ?country ;
               hc:custodian_type ?type .
}
GROUP BY ?country ?type
ORDER BY ?country DESC(?count)

Data Quality Summary

# Data quality summary by tier
SELECT ?tier (COUNT(?observation) AS ?observationCount) 
       (AVG(?confidence) AS ?avgConfidence) WHERE {
    ?observation a hc:CustodianObservation ;
                 hc:data_tier ?tier .
    
    OPTIONAL { ?observation hc:confidence_score ?confidence }
}
GROUP BY ?tier
ORDER BY ?tier

Network Analysis: Most Connected Custodians

# Get custodians with most relationships
SELECT ?custodian ?name (COUNT(?related) AS ?connections) WHERE {
    ?custodian a hc:Custodian ;
               skos:prefLabel ?name .
    
    {
        ?custodian hc:is_member_of ?related .
    } UNION {
        ?custodian hc:participated_in_project ?related .
    } UNION {
        ?custodian org:subOrganizationOf ?related .
    } UNION {
        ?custodian hc:has_partnership_with ?related .
    }
}
GROUP BY ?custodian ?name
ORDER BY DESC(?connections)
LIMIT 50