glam/scripts/add_slot_mappings.py
kempersc 98c42bf272 Fix LinkML URI conflicts and generate RDF outputs
- Fix scope_note → finding_aid_scope_note in FindingAid.yaml
- Remove duplicate wikidata_entity slot from CustodianType.yaml (import instead)
- Remove duplicate rico_record_set_type from class_metadata_slots.yaml
- Fix range types for equals_string compatibility (uriorcurie → string)
- Move class names from close_mappings to see_also in 10 RecordSetTypes files
- Generate all RDF formats: OWL, N-Triples, RDF/XML, N3, JSON-LD context
- Sync schemas to frontend/public/schemas/

Files: 1,151 changed (includes prior CustodianType migration)
2026-01-07 12:32:59 +01:00

2420 lines
66 KiB
Python

#!/usr/bin/env python3
"""
Add semantic mappings to LinkML slot files based on base ontologies.
This script maps slot names to predicates from:
- Schema.org (schema:)
- Dublin Core Terms (dcterms:)
- FOAF (foaf:)
- PROV-O (prov:)
- ORG Ontology (org:)
- SKOS (skos:)
- RiC-O (rico:)
- CIDOC-CRM (crm:)
- BIBFRAME (bf:)
- DCAT (dcat:)
- vCard (vcard:)
Mapping types (per SKOS):
- exact_mappings: Identical meaning (skos:exactMatch)
- close_mappings: Very similar meaning (skos:closeMatch)
- related_mappings: Semantically related (skos:relatedMatch)
- narrow_mappings: More specific (skos:narrowMatch)
- broad_mappings: More general (skos:broadMatch)
"""
import os
import re
import yaml
from pathlib import Path
from typing import Dict, List, Optional, Tuple
# Comprehensive predicate mapping table
# Format: slot_name_pattern -> [(mapping_type, predicate), ...]
SLOT_MAPPINGS: Dict[str, List[Tuple[str, str]]] = {
# === NAMES AND LABELS ===
"name": [
("exact_mappings", "schema:name"),
("exact_mappings", "foaf:name"),
("exact_mappings", "rdfs:label"),
],
"preferred_label": [
("exact_mappings", "skos:prefLabel"),
("exact_mappings", "schema:name"),
],
"alternative_names": [
("exact_mappings", "schema:alternateName"),
("exact_mappings", "skos:altLabel"),
],
"display_name": [
("exact_mappings", "rdfs:label"),
("close_mappings", "schema:name"),
],
"full_name": [
("exact_mappings", "foaf:name"),
("close_mappings", "schema:name"),
],
"given_name": [
("exact_mappings", "foaf:givenName"),
("exact_mappings", "schema:givenName"),
],
"family_name": [
("exact_mappings", "foaf:familyName"),
("exact_mappings", "schema:familyName"),
],
"first_name": [
("exact_mappings", "foaf:firstName"),
("close_mappings", "schema:givenName"),
],
"last_name": [
("exact_mappings", "foaf:lastName"),
("close_mappings", "schema:familyName"),
],
"surname": [
("exact_mappings", "foaf:surname"),
("exact_mappings", "schema:familyName"),
],
"base_surname": [
("close_mappings", "foaf:surname"),
("related_mappings", "schema:familyName"),
],
"nickname": [
("exact_mappings", "foaf:nick"),
("close_mappings", "schema:alternateName"),
],
"title": [
("exact_mappings", "dcterms:title"),
("exact_mappings", "schema:title"),
],
"agent_name": [
("exact_mappings", "foaf:name"),
("close_mappings", "prov:label"),
],
# === DESCRIPTIONS ===
"description": [
("exact_mappings", "dcterms:description"),
("exact_mappings", "schema:description"),
("exact_mappings", "rdfs:comment"),
],
"summary": [
("close_mappings", "schema:abstract"),
("close_mappings", "dcterms:abstract"),
],
"abstract": [
("exact_mappings", "dcterms:abstract"),
("exact_mappings", "schema:abstract"),
],
"notes": [
("exact_mappings", "skos:note"),
("close_mappings", "rdfs:comment"),
],
"comment": [
("exact_mappings", "rdfs:comment"),
("exact_mappings", "schema:comment"),
],
"remarks": [
("close_mappings", "skos:note"),
("related_mappings", "rdfs:comment"),
],
# === IDENTIFIERS ===
"identifier": [
("exact_mappings", "dcterms:identifier"),
("exact_mappings", "schema:identifier"),
],
"id": [
("exact_mappings", "dcterms:identifier"),
("close_mappings", "schema:identifier"),
],
"external_id": [
("close_mappings", "dcterms:identifier"),
("close_mappings", "schema:identifier"),
],
"code": [
("close_mappings", "schema:codeValue"),
("related_mappings", "dcterms:identifier"),
],
"accession_number": [
("close_mappings", "schema:identifier"),
("related_mappings", "dcterms:identifier"),
],
"isil_code": [
("narrow_mappings", "dcterms:identifier"),
],
"wikidata_id": [
("narrow_mappings", "dcterms:identifier"),
("related_mappings", "schema:sameAs"),
],
"viaf_id": [
("narrow_mappings", "dcterms:identifier"),
],
# === DATES AND TIMES ===
"date": [
("exact_mappings", "dcterms:date"),
("exact_mappings", "schema:date"),
],
"created_at": [
("exact_mappings", "dcterms:created"),
("exact_mappings", "schema:dateCreated"),
("exact_mappings", "prov:generatedAtTime"),
],
"created": [
("exact_mappings", "dcterms:created"),
("exact_mappings", "schema:dateCreated"),
],
"modified_at": [
("exact_mappings", "dcterms:modified"),
("exact_mappings", "schema:dateModified"),
],
"modified": [
("exact_mappings", "dcterms:modified"),
("exact_mappings", "schema:dateModified"),
],
"updated_at": [
("close_mappings", "dcterms:modified"),
("close_mappings", "schema:dateModified"),
],
"start_date": [
("exact_mappings", "schema:startDate"),
("close_mappings", "prov:startedAtTime"),
],
"end_date": [
("exact_mappings", "schema:endDate"),
("close_mappings", "prov:endedAtTime"),
],
"valid_from": [
("exact_mappings", "schema:validFrom"),
("close_mappings", "dcterms:valid"),
],
"valid_to": [
("exact_mappings", "schema:validThrough"),
("close_mappings", "dcterms:valid"),
],
"birth_date": [
("exact_mappings", "schema:birthDate"),
("exact_mappings", "foaf:birthday"),
],
"death_date": [
("exact_mappings", "schema:deathDate"),
],
"founding_date": [
("exact_mappings", "schema:foundingDate"),
],
"dissolution_date": [
("exact_mappings", "schema:dissolutionDate"),
],
"acquisition_date": [
("close_mappings", "schema:dateCreated"),
("related_mappings", "dcterms:date"),
],
"publication_date": [
("exact_mappings", "schema:datePublished"),
("exact_mappings", "dcterms:issued"),
],
"issued": [
("exact_mappings", "dcterms:issued"),
("exact_mappings", "schema:datePublished"),
],
"event_date": [
("close_mappings", "schema:startDate"),
("related_mappings", "dcterms:date"),
],
"timestamp": [
("close_mappings", "prov:atTime"),
("related_mappings", "dcterms:date"),
],
"follow_up_date": [
("close_mappings", "schema:scheduledTime"),
],
"approval_date": [
("close_mappings", "dcterms:dateAccepted"),
],
"allocation_date": [
("close_mappings", "dcterms:date"),
],
# === TEMPORAL EXPRESSIONS ===
"begin_of_the_begin": [
("related_mappings", "prov:startedAtTime"),
("narrow_mappings", "schema:startDate"),
],
"begin_of_the_end": [
("related_mappings", "prov:endedAtTime"),
],
"end_of_the_begin": [
("related_mappings", "prov:startedAtTime"),
],
"end_of_the_end": [
("related_mappings", "prov:endedAtTime"),
("narrow_mappings", "schema:endDate"),
],
"temporal_coverage": [
("exact_mappings", "dcterms:temporal"),
("exact_mappings", "schema:temporalCoverage"),
],
# === LOCATIONS ===
"location": [
("exact_mappings", "schema:location"),
("exact_mappings", "dcterms:spatial"),
],
"address": [
("exact_mappings", "schema:address"),
("exact_mappings", "vcard:hasAddress"),
],
"city": [
("exact_mappings", "schema:addressLocality"),
("exact_mappings", "vcard:locality"),
],
"country": [
("exact_mappings", "schema:addressCountry"),
("exact_mappings", "vcard:country-name"),
],
"region": [
("exact_mappings", "schema:addressRegion"),
("exact_mappings", "vcard:region"),
],
"postal_code": [
("exact_mappings", "schema:postalCode"),
("exact_mappings", "vcard:postal-code"),
],
"street_address": [
("exact_mappings", "schema:streetAddress"),
("exact_mappings", "vcard:street-address"),
],
"latitude": [
("exact_mappings", "schema:latitude"),
("exact_mappings", "geo:lat"),
],
"longitude": [
("exact_mappings", "schema:longitude"),
("exact_mappings", "geo:long"),
],
"coordinates": [
("close_mappings", "schema:geo"),
("related_mappings", "geo:geometry"),
],
"birth_place": [
("exact_mappings", "schema:birthPlace"),
],
"death_place": [
("exact_mappings", "schema:deathPlace"),
],
"place_of_origin": [
("close_mappings", "schema:birthPlace"),
("related_mappings", "prov:atLocation"),
],
"altitude": [
("exact_mappings", "schema:elevation"),
],
# === ORGANIZATIONS ===
"organization": [
("exact_mappings", "schema:organization"),
("exact_mappings", "org:organization"),
],
"affiliation": [
("exact_mappings", "schema:affiliation"),
("exact_mappings", "org:memberOf"),
],
"member_of": [
("exact_mappings", "org:memberOf"),
("exact_mappings", "schema:memberOf"),
],
"parent_organization": [
("exact_mappings", "schema:parentOrganization"),
("exact_mappings", "org:subOrganizationOf"),
],
"sub_organization": [
("exact_mappings", "schema:subOrganization"),
("exact_mappings", "org:hasSubOrganization"),
],
"department": [
("exact_mappings", "schema:department"),
("close_mappings", "org:hasUnit"),
],
"role": [
("exact_mappings", "org:role"),
("exact_mappings", "schema:roleName"),
],
"job_title": [
("exact_mappings", "schema:jobTitle"),
("close_mappings", "org:role"),
],
"employer": [
("exact_mappings", "schema:worksFor"),
("close_mappings", "org:organization"),
],
"founding_location": [
("exact_mappings", "schema:foundingLocation"),
],
"headquarters": [
("close_mappings", "org:hasRegisteredSite"),
("related_mappings", "schema:location"),
],
# === PEOPLE ===
"person": [
("exact_mappings", "schema:person"),
("exact_mappings", "foaf:Person"),
],
"author": [
("exact_mappings", "schema:author"),
("exact_mappings", "dcterms:creator"),
],
"creator": [
("exact_mappings", "dcterms:creator"),
("exact_mappings", "schema:creator"),
],
"contributor": [
("exact_mappings", "dcterms:contributor"),
("exact_mappings", "schema:contributor"),
],
"publisher": [
("exact_mappings", "dcterms:publisher"),
("exact_mappings", "schema:publisher"),
],
"editor": [
("exact_mappings", "schema:editor"),
("close_mappings", "dcterms:contributor"),
],
"curator": [
("close_mappings", "schema:contributor"),
("related_mappings", "dcterms:contributor"),
],
"gender": [
("exact_mappings", "schema:gender"),
("exact_mappings", "foaf:gender"),
],
"age": [
("exact_mappings", "foaf:age"),
("close_mappings", "schema:age"),
],
"nationality": [
("exact_mappings", "schema:nationality"),
],
"knows": [
("exact_mappings", "foaf:knows"),
("exact_mappings", "schema:knows"),
],
# === CONTACT INFO ===
"email": [
("exact_mappings", "schema:email"),
("exact_mappings", "foaf:mbox"),
],
"telephone": [
("exact_mappings", "schema:telephone"),
("exact_mappings", "foaf:phone"),
],
"phone": [
("exact_mappings", "schema:telephone"),
("exact_mappings", "foaf:phone"),
],
"fax": [
("exact_mappings", "schema:faxNumber"),
],
"homepage": [
("exact_mappings", "foaf:homepage"),
("exact_mappings", "schema:url"),
],
"website": [
("exact_mappings", "schema:url"),
("exact_mappings", "foaf:homepage"),
],
"url": [
("exact_mappings", "schema:url"),
("exact_mappings", "rdfs:seeAlso"),
],
# === COLLECTIONS AND RECORDS ===
"collection": [
("exact_mappings", "schema:collection"),
("close_mappings", "dcterms:isPartOf"),
],
"collection_name": [
("close_mappings", "schema:name"),
("related_mappings", "dcterms:title"),
],
"collection_description": [
("close_mappings", "schema:description"),
("related_mappings", "dcterms:description"),
],
"part_of": [
("exact_mappings", "dcterms:isPartOf"),
("exact_mappings", "schema:isPartOf"),
],
"has_part": [
("exact_mappings", "dcterms:hasPart"),
("exact_mappings", "schema:hasPart"),
],
"extent": [
("exact_mappings", "dcterms:extent"),
("close_mappings", "schema:size"),
],
"format": [
("exact_mappings", "dcterms:format"),
("exact_mappings", "schema:encodingFormat"),
],
"medium": [
("exact_mappings", "dcterms:medium"),
("close_mappings", "schema:material"),
],
"language": [
("exact_mappings", "dcterms:language"),
("exact_mappings", "schema:inLanguage"),
],
"subject": [
("exact_mappings", "dcterms:subject"),
("exact_mappings", "schema:about"),
],
"topic": [
("exact_mappings", "foaf:topic"),
("exact_mappings", "schema:about"),
],
"keywords": [
("exact_mappings", "schema:keywords"),
("close_mappings", "dcterms:subject"),
],
"arrangement": [
("related_mappings", "rico:hasOrganicProvenance"),
],
"arrangement_system": [
("related_mappings", "rico:hasRecordSetType"),
],
"cataloging_standard": [
("related_mappings", "dcterms:conformsTo"),
],
# === RIGHTS AND ACCESS ===
"rights": [
("exact_mappings", "dcterms:rights"),
("exact_mappings", "schema:license"),
],
"license": [
("exact_mappings", "dcterms:license"),
("exact_mappings", "schema:license"),
],
"access_rights": [
("exact_mappings", "dcterms:accessRights"),
],
"access_policy": [
("close_mappings", "dcterms:accessRights"),
],
"access_restrictions": [
("close_mappings", "dcterms:accessRights"),
],
"copyright": [
("exact_mappings", "schema:copyrightHolder"),
("related_mappings", "dcterms:rights"),
],
# === PROVENANCE ===
"source": [
("exact_mappings", "dcterms:source"),
("exact_mappings", "prov:wasDerivedFrom"),
],
"derived_from": [
("exact_mappings", "prov:wasDerivedFrom"),
("exact_mappings", "dcterms:source"),
],
"generated_by": [
("exact_mappings", "prov:wasGeneratedBy"),
],
"attributed_to": [
("exact_mappings", "prov:wasAttributedTo"),
],
"provenance": [
("exact_mappings", "dcterms:provenance"),
("exact_mappings", "prov:wasGeneratedBy"),
],
"extraction_agent": [
("close_mappings", "prov:wasAttributedTo"),
],
"extraction_method": [
("close_mappings", "prov:wasGeneratedBy"),
],
"retrieval_agent": [
("close_mappings", "prov:wasAttributedTo"),
],
# === RELATIONS ===
"related_to": [
("exact_mappings", "dcterms:relation"),
("exact_mappings", "schema:relatedTo"),
],
"references": [
("exact_mappings", "dcterms:references"),
("exact_mappings", "schema:citation"),
],
"replaces": [
("exact_mappings", "dcterms:replaces"),
],
"replaced_by": [
("exact_mappings", "dcterms:isReplacedBy"),
],
"version": [
("exact_mappings", "dcterms:hasVersion"),
("exact_mappings", "schema:version"),
],
"same_as": [
("exact_mappings", "schema:sameAs"),
("exact_mappings", "owl:sameAs"),
],
"see_also": [
("exact_mappings", "rdfs:seeAlso"),
("close_mappings", "dcterms:relation"),
],
# === TYPES AND CATEGORIES ===
"type": [
("exact_mappings", "dcterms:type"),
("exact_mappings", "rdf:type"),
],
"category": [
("exact_mappings", "schema:category"),
("close_mappings", "dcterms:type"),
],
"classification": [
("exact_mappings", "org:classification"),
("close_mappings", "dcterms:type"),
],
"genre": [
("exact_mappings", "schema:genre"),
("close_mappings", "dcterms:type"),
],
# === STATUS ===
"status": [
("exact_mappings", "schema:status"),
("close_mappings", "adms:status"),
],
"active": [
("close_mappings", "schema:status"),
],
"verified": [
("related_mappings", "prov:wasAttributedTo"),
],
# === DIGITAL/TECHNICAL ===
"api_endpoint": [
("close_mappings", "schema:url"),
("related_mappings", "dcat:endpointURL"),
],
"download_url": [
("exact_mappings", "dcat:downloadURL"),
("close_mappings", "schema:url"),
],
"access_url": [
("exact_mappings", "dcat:accessURL"),
],
"media_type": [
("exact_mappings", "dcat:mediaType"),
("exact_mappings", "dcterms:format"),
],
"file_format": [
("exact_mappings", "dcterms:format"),
("exact_mappings", "schema:fileFormat"),
],
"byte_size": [
("exact_mappings", "dcat:byteSize"),
("exact_mappings", "schema:contentSize"),
],
"checksum": [
("exact_mappings", "dcat:checksum"),
],
# === EVENTS ===
"event": [
("exact_mappings", "schema:event"),
],
"event_type": [
("close_mappings", "schema:eventType"),
("related_mappings", "dcterms:type"),
],
"activity": [
("exact_mappings", "prov:Activity"),
],
"activity_type": [
("close_mappings", "dcterms:type"),
],
"change_type": [
("close_mappings", "dcterms:type"),
],
"affected_by_event": [
("close_mappings", "prov:wasInfluencedBy"),
],
"affects_organization": [
("close_mappings", "prov:influenced"),
],
# === IMAGES AND MEDIA ===
"image": [
("exact_mappings", "schema:image"),
("exact_mappings", "foaf:depiction"),
],
"thumbnail": [
("exact_mappings", "schema:thumbnail"),
("exact_mappings", "foaf:thumbnail"),
],
"logo": [
("exact_mappings", "schema:logo"),
("exact_mappings", "foaf:logo"),
],
"photo": [
("exact_mappings", "schema:photo"),
("close_mappings", "foaf:depiction"),
],
# === QUANTITIES ===
"count": [
("close_mappings", "schema:itemCount"),
],
"quantity": [
("exact_mappings", "schema:quantity"),
],
"value": [
("exact_mappings", "rdf:value"),
("exact_mappings", "schema:value"),
],
"unit": [
("exact_mappings", "schema:unitCode"),
],
"price": [
("exact_mappings", "schema:price"),
],
"currency": [
("exact_mappings", "schema:priceCurrency"),
],
# === HERITAGE-SPECIFIC ===
"custodian": [
("close_mappings", "rico:hasOrIsHeldBy"),
],
"custody": [
("close_mappings", "rico:hasOrIsHeldBy"),
],
"provenance_text": [
("exact_mappings", "dcterms:provenance"),
],
"finding_aid": [
("related_mappings", "rico:hasInstantiation"),
],
"material": [
("exact_mappings", "schema:material"),
("exact_mappings", "dcterms:medium"),
],
"technique": [
("close_mappings", "schema:artMedium"),
],
"dimensions": [
("close_mappings", "schema:size"),
],
"condition": [
("related_mappings", "schema:itemCondition"),
],
# === CANONICAL VALUES ===
"canonical_value": [
("close_mappings", "skos:prefLabel"),
],
"normalized_value": [
("close_mappings", "skos:prefLabel"),
],
# === WEB/CLAIMS ===
"source_url": [
("exact_mappings", "schema:url"),
("exact_mappings", "dcterms:source"),
],
"claim_type": [
("close_mappings", "dcterms:type"),
],
"claim_value": [
("close_mappings", "rdf:value"),
],
"xpath": [
("related_mappings", "prov:atLocation"),
],
"confidence_score": [
("related_mappings", "prov:value"),
],
# === MISCELLANEOUS ===
"deliverables": [
("close_mappings", "schema:result"),
("related_mappings", "prov:generated"),
],
"capacity_items": [
("broad_mappings", "schema:maximumAttendeeCapacity"),
],
# === ADDITIONAL HERITAGE-SPECIFIC ===
"access_policy_ref": [
("close_mappings", "dcterms:accessRights"),
],
"acquisition_method": [
("close_mappings", "prov:wasGeneratedBy"),
("related_mappings", "schema:acquiredFrom"),
],
"acquisition_source": [
("exact_mappings", "schema:acquiredFrom"),
("close_mappings", "prov:wasAttributedTo"),
],
"affected_units": [
("close_mappings", "prov:influenced"),
],
"allocates": [
("close_mappings", "prov:generated"),
],
"allocated_by": [
("close_mappings", "prov:wasAttributedTo"),
],
"alternative_observed_names": [
("exact_mappings", "skos:altLabel"),
("close_mappings", "schema:alternateName"),
],
"appellation_language": [
("exact_mappings", "dcterms:language"),
],
"appellation_value": [
("exact_mappings", "rdf:value"),
("close_mappings", "skos:prefLabel"),
],
"appellation_type": [
("close_mappings", "dcterms:type"),
],
"appellations": [
("close_mappings", "skos:altLabel"),
],
"authentication_required": [
("related_mappings", "schema:authenticationType"),
],
"auxiliary_places": [
("close_mappings", "org:hasSite"),
("related_mappings", "schema:location"),
],
"auxiliary_platforms": [
("close_mappings", "dcterms:hasPart"),
],
"change_rationale": [
("close_mappings", "prov:hadReason"),
("related_mappings", "dcterms:description"),
],
"circumstances_of_death": [
("related_mappings", "schema:description"),
],
"collection_focus": [
("exact_mappings", "dcterms:subject"),
("close_mappings", "schema:about"),
],
"collection_of": [
("close_mappings", "dcterms:isPartOf"),
],
"collection_scope": [
("exact_mappings", "dcterms:coverage"),
("close_mappings", "schema:about"),
],
"collection_size": [
("exact_mappings", "schema:numberOfItems"),
("close_mappings", "dcterms:extent"),
],
"collection_type": [
("close_mappings", "dcterms:type"),
],
"collection_type_ref": [
("close_mappings", "dcterms:type"),
],
# === CONTACT AND SOCIAL ===
"contact_email": [
("exact_mappings", "schema:email"),
("exact_mappings", "vcard:hasEmail"),
],
"contact_info": [
("close_mappings", "schema:contactPoint"),
("close_mappings", "vcard:hasAddress"),
],
"contact_name": [
("close_mappings", "schema:name"),
],
"contact_person": [
("exact_mappings", "schema:contactPoint"),
],
"contact_phone": [
("exact_mappings", "schema:telephone"),
("exact_mappings", "vcard:hasTelephone"),
],
"social_media_links": [
("close_mappings", "schema:sameAs"),
],
"social_media_platform": [
("related_mappings", "dcterms:type"),
],
# === DIGITAL PRESENCE ===
"digital_presence": [
("close_mappings", "schema:url"),
],
"digital_platforms": [
("close_mappings", "dcterms:hasPart"),
],
"platform_name": [
("exact_mappings", "schema:name"),
],
"platform_type": [
("close_mappings", "dcterms:type"),
],
"platform_url": [
("exact_mappings", "schema:url"),
],
# === STAFF AND PERSONNEL ===
"staff_count": [
("close_mappings", "schema:numberOfEmployees"),
],
"staff_members": [
("close_mappings", "schema:employee"),
("related_mappings", "org:hasMember"),
],
"staff_role": [
("exact_mappings", "org:role"),
("exact_mappings", "schema:roleName"),
],
"position": [
("exact_mappings", "schema:jobTitle"),
("close_mappings", "org:role"),
],
"position_title": [
("exact_mappings", "schema:jobTitle"),
],
"employment_type": [
("exact_mappings", "schema:employmentType"),
],
# === ENRICHMENT AND PROVENANCE ===
"enrichment_status": [
("related_mappings", "adms:status"),
],
"enrichment_source": [
("close_mappings", "dcterms:source"),
],
"data_source": [
("exact_mappings", "dcterms:source"),
("close_mappings", "prov:wasDerivedFrom"),
],
"data_tier": [
("related_mappings", "dcterms:type"),
],
"extraction_date": [
("exact_mappings", "prov:generatedAtTime"),
("close_mappings", "dcterms:created"),
],
"extraction_notes": [
("close_mappings", "skos:note"),
],
"last_verified": [
("close_mappings", "dcterms:dateAccepted"),
],
"verified_by": [
("close_mappings", "prov:wasAttributedTo"),
],
"retrieved_on": [
("exact_mappings", "prov:generatedAtTime"),
],
"statement_created_at": [
("exact_mappings", "prov:generatedAtTime"),
("close_mappings", "dcterms:created"),
],
"source_archived_at": [
("close_mappings", "prov:generatedAtTime"),
],
# === GEOGRAPHIC ===
"geonames_id": [
("narrow_mappings", "dcterms:identifier"),
],
"wgs84_coordinates": [
("close_mappings", "geo:geometry"),
],
"geo_feature": [
("close_mappings", "geo:Feature"),
],
"admin_level": [
("related_mappings", "schema:addressRegion"),
],
"municipality": [
("close_mappings", "schema:addressLocality"),
],
"province": [
("close_mappings", "schema:addressRegion"),
],
"country_code": [
("close_mappings", "schema:addressCountry"),
],
# === OPENING HOURS AND SCHEDULES ===
"opening_hours": [
("exact_mappings", "schema:openingHours"),
],
"opening_hours_specification": [
("exact_mappings", "schema:openingHoursSpecification"),
],
"temporarily_closed": [
("related_mappings", "schema:publicAccess"),
],
# === ORGANIZATIONAL STRUCTURE ===
"legal_name": [
("exact_mappings", "schema:legalName"),
],
"legal_form": [
("close_mappings", "org:classification"),
("related_mappings", "schema:legalForm"),
],
"legal_status": [
("close_mappings", "org:classification"),
],
"registration_number": [
("close_mappings", "schema:identifier"),
("close_mappings", "org:identifier"),
],
"kvk_number": [
("narrow_mappings", "org:identifier"),
],
"organizational_unit": [
("exact_mappings", "org:hasUnit"),
],
"organizational_structure": [
("close_mappings", "org:organization"),
],
"parent_custodian": [
("exact_mappings", "org:subOrganizationOf"),
("exact_mappings", "schema:parentOrganization"),
],
"sub_custodians": [
("exact_mappings", "org:hasSubOrganization"),
("exact_mappings", "schema:subOrganization"),
],
# === FUNDING AND FINANCIAL ===
"funding_source": [
("close_mappings", "schema:funder"),
],
"funder": [
("exact_mappings", "schema:funder"),
],
"funding_amount": [
("close_mappings", "schema:amount"),
],
"budget": [
("related_mappings", "schema:price"),
],
"annual_budget": [
("related_mappings", "schema:price"),
],
# === SERVICES AND FACILITIES ===
"services": [
("exact_mappings", "schema:availableService"),
],
"facilities": [
("close_mappings", "schema:amenityFeature"),
],
"accessibility": [
("exact_mappings", "schema:accessibilityFeature"),
],
"accessibility_info": [
("close_mappings", "schema:accessibilitySummary"),
],
"public_access": [
("exact_mappings", "schema:publicAccess"),
],
# === PROJECTS AND ACTIVITIES ===
"project_name": [
("exact_mappings", "schema:name"),
],
"project_description": [
("exact_mappings", "schema:description"),
],
"project_status": [
("close_mappings", "schema:status"),
],
"project_start": [
("exact_mappings", "schema:startDate"),
],
"project_end": [
("exact_mappings", "schema:endDate"),
],
"project_url": [
("exact_mappings", "schema:url"),
],
# === EVENTS AND CHANGES ===
"event_description": [
("exact_mappings", "schema:description"),
],
"event_location": [
("exact_mappings", "schema:location"),
],
"predecessor": [
("exact_mappings", "dcterms:replaces"),
("close_mappings", "prov:wasDerivedFrom"),
],
"successor": [
("exact_mappings", "dcterms:isReplacedBy"),
],
"merged_into": [
("close_mappings", "dcterms:isReplacedBy"),
],
"merged_from": [
("close_mappings", "dcterms:replaces"),
],
"split_from": [
("close_mappings", "prov:wasDerivedFrom"),
],
"split_into": [
("close_mappings", "prov:generated"),
],
# === RATINGS AND REVIEWS ===
"rating": [
("exact_mappings", "schema:aggregateRating"),
],
"review_count": [
("close_mappings", "schema:reviewCount"),
],
"reviews": [
("exact_mappings", "schema:review"),
],
# === STANDARDS AND COMPLIANCE ===
"conforms_to": [
("exact_mappings", "dcterms:conformsTo"),
],
"standard_name": [
("close_mappings", "schema:name"),
],
"standard_version": [
("close_mappings", "schema:version"),
],
"certification": [
("close_mappings", "schema:hasCertification"),
],
# === ARCHIVAL-SPECIFIC ===
"fonds": [
("related_mappings", "rico:hasRecordSetType"),
],
"series": [
("related_mappings", "rico:hasRecordSetType"),
],
"finding_aid_url": [
("exact_mappings", "schema:url"),
],
"record_type": [
("close_mappings", "rico:hasRecordSetType"),
],
"record_count": [
("close_mappings", "schema:numberOfItems"),
],
"linear_meters": [
("close_mappings", "dcterms:extent"),
],
# === MUSEUM-SPECIFIC ===
"visitor_count": [
("close_mappings", "schema:attendeeCount"),
],
"exhibition": [
("close_mappings", "schema:event"),
],
"exhibitions": [
("close_mappings", "schema:event"),
],
"current_exhibitions": [
("close_mappings", "schema:event"),
],
"permanent_collection": [
("close_mappings", "schema:collection"),
],
# === LIBRARY-SPECIFIC ===
"catalog_url": [
("exact_mappings", "schema:url"),
],
"holdings": [
("close_mappings", "schema:collection"),
],
"circulation": [
("related_mappings", "schema:availableService"),
],
# === IDENTIFIERS (ADDITIONAL) ===
"orcid": [
("narrow_mappings", "dcterms:identifier"),
],
"ror_id": [
("narrow_mappings", "dcterms:identifier"),
],
"grid_id": [
("narrow_mappings", "dcterms:identifier"),
],
"doi": [
("narrow_mappings", "dcterms:identifier"),
],
"isbn": [
("exact_mappings", "schema:isbn"),
],
"issn": [
("exact_mappings", "schema:issn"),
],
# === WEB CLAIMS (ADDITIONAL) ===
"html_file": [
("related_mappings", "dcterms:source"),
],
"xpath_match_score": [
("related_mappings", "prov:value"),
],
"retrieval_timestamp": [
("exact_mappings", "prov:generatedAtTime"),
],
# === GHCID-SPECIFIC ===
"ghcid_current": [
("narrow_mappings", "dcterms:identifier"),
],
"ghcid_history": [
("related_mappings", "prov:wasRevisionOf"),
],
"ghcid_uuid": [
("narrow_mappings", "dcterms:identifier"),
],
"ghcid_numeric": [
("narrow_mappings", "dcterms:identifier"),
],
# === LINKEDIN-SPECIFIC ===
"linkedin_url": [
("exact_mappings", "schema:sameAs"),
("close_mappings", "foaf:page"),
],
"linkedin_slug": [
("related_mappings", "dcterms:identifier"),
],
"profile_headline": [
("close_mappings", "schema:description"),
],
"profile_summary": [
("close_mappings", "schema:description"),
],
"connections_count": [
("related_mappings", "schema:interactionCount"),
],
# === WIKIDATA-SPECIFIC ===
"wikidata_label": [
("exact_mappings", "skos:prefLabel"),
],
"wikidata_description": [
("exact_mappings", "schema:description"),
],
"wikidata_aliases": [
("exact_mappings", "skos:altLabel"),
],
"sitelinks": [
("close_mappings", "schema:sameAs"),
],
# === GOOGLE MAPS-SPECIFIC ===
"place_id": [
("narrow_mappings", "dcterms:identifier"),
],
"google_maps_url": [
("close_mappings", "schema:sameAs"),
],
"popular_times": [
("related_mappings", "schema:openingHours"),
],
"photo_count": [
("close_mappings", "schema:numberOfItems"),
],
"business_status": [
("close_mappings", "schema:status"),
],
# === ADDITIONAL UNMAPPED SLOTS ===
"confidence": [
("close_mappings", "prov:value"),
("related_mappings", "schema:ratingValue"),
],
"confidence_method": [
("close_mappings", "prov:wasGeneratedBy"),
],
"confidence_value": [
("close_mappings", "prov:value"),
],
"conflict_status": [
("related_mappings", "schema:status"),
],
"conservation_lab": [
("related_mappings", "schema:location"),
],
"contact": [
("exact_mappings", "schema:contactPoint"),
],
"contact_point": [
("exact_mappings", "schema:contactPoint"),
],
"content_hash": [
("exact_mappings", "dcat:checksum"),
],
"cost_usd": [
("close_mappings", "schema:price"),
],
"css_selector": [
("related_mappings", "prov:atLocation"),
],
"curation_activities": [
("close_mappings", "prov:Activity"),
],
"custodial_history": [
("exact_mappings", "dcterms:provenance"),
],
"custodian_names": [
("close_mappings", "skos:prefLabel"),
],
"custodian_observations": [
("related_mappings", "prov:Entity"),
],
"custodians": [
("close_mappings", "schema:organization"),
],
"data_license_policy": [
("close_mappings", "dcterms:license"),
],
"data_service_endpoints": [
("close_mappings", "dcat:endpointURL"),
],
"date_of_incorporation": [
("exact_mappings", "schema:foundingDate"),
],
"definition": [
("exact_mappings", "skos:definition"),
("close_mappings", "rdfs:comment"),
],
"delegation": [
("related_mappings", "prov:actedOnBehalfOf"),
],
"emic_name": [
("close_mappings", "skos:prefLabel"),
("related_mappings", "schema:name"),
],
"endorsed_standards": [
("close_mappings", "dcterms:conformsTo"),
],
"entity_observations": [
("related_mappings", "prov:Entity"),
],
"evidence_strength": [
("close_mappings", "prov:value"),
],
"exclusion_criteria": [
("related_mappings", "schema:description"),
],
"first_observation": [
("close_mappings", "prov:generatedAtTime"),
],
"format_types": [
("close_mappings", "dcterms:format"),
],
"formatted_address": [
("exact_mappings", "schema:address"),
("exact_mappings", "vcard:hasAddress"),
],
# === MORE UNMAPPED SLOTS ===
"geographic_scope": [
("exact_mappings", "dcterms:spatial"),
],
"government_level": [
("related_mappings", "org:classification"),
],
"has_digital_catalog": [
("related_mappings", "schema:url"),
],
"has_finding_aid": [
("related_mappings", "schema:url"),
],
"has_member": [
("exact_mappings", "org:hasMember"),
],
"has_opening_hours": [
("close_mappings", "schema:openingHours"),
],
"heritage_significance": [
("related_mappings", "dcterms:description"),
],
"heritage_type": [
("close_mappings", "dcterms:type"),
],
"heritage_types": [
("close_mappings", "dcterms:type"),
],
"historical_significance": [
("related_mappings", "dcterms:description"),
],
"hours_of_operation": [
("exact_mappings", "schema:openingHours"),
],
"html_content": [
("related_mappings", "schema:text"),
],
"identifier_scheme": [
("related_mappings", "dcterms:type"),
],
"identifier_type": [
("related_mappings", "dcterms:type"),
],
"identifier_value": [
("exact_mappings", "rdf:value"),
],
"identifiers": [
("close_mappings", "dcterms:identifier"),
],
"inclusion_criteria": [
("related_mappings", "schema:description"),
],
"industry": [
("exact_mappings", "schema:industry"),
],
"is_active": [
("close_mappings", "schema:status"),
],
"is_defunct": [
("close_mappings", "schema:status"),
],
"is_part_of": [
("exact_mappings", "dcterms:isPartOf"),
("exact_mappings", "schema:isPartOf"),
],
"is_primary": [
("related_mappings", "rdf:type"),
],
"issue_date": [
("exact_mappings", "dcterms:issued"),
],
"items": [
("close_mappings", "schema:itemListElement"),
],
"known_for": [
("close_mappings", "schema:knowsAbout"),
],
"label": [
("exact_mappings", "rdfs:label"),
("exact_mappings", "skos:prefLabel"),
],
"labels": [
("close_mappings", "skos:altLabel"),
],
"last_modified": [
("exact_mappings", "dcterms:modified"),
],
"last_observation": [
("close_mappings", "prov:generatedAtTime"),
],
"last_updated": [
("exact_mappings", "dcterms:modified"),
],
"latitude_dd": [
("exact_mappings", "schema:latitude"),
],
"legal_entity": [
("close_mappings", "org:FormalOrganization"),
],
"level": [
("related_mappings", "schema:position"),
],
"locations": [
("exact_mappings", "schema:location"),
],
"longitude_dd": [
("exact_mappings", "schema:longitude"),
],
"main_language": [
("exact_mappings", "dcterms:language"),
],
"maintained_by": [
("close_mappings", "schema:maintainer"),
],
"managed_by": [
("close_mappings", "prov:wasAttributedTo"),
],
"mandate": [
("related_mappings", "org:purpose"),
],
"membership_type": [
("close_mappings", "org:role"),
],
"metadata_standard": [
("close_mappings", "dcterms:conformsTo"),
],
"metadata_standards": [
("close_mappings", "dcterms:conformsTo"),
],
"mission": [
("close_mappings", "org:purpose"),
],
"mission_statement": [
("close_mappings", "org:purpose"),
],
"name_type": [
("related_mappings", "dcterms:type"),
],
"name_variant": [
("exact_mappings", "skos:altLabel"),
],
"name_variants": [
("exact_mappings", "skos:altLabel"),
],
"namespace": [
("related_mappings", "schema:url"),
],
"network_affiliations": [
("close_mappings", "org:memberOf"),
],
"note": [
("exact_mappings", "skos:note"),
],
"number_of_employees": [
("exact_mappings", "schema:numberOfEmployees"),
],
"observation_date": [
("close_mappings", "prov:generatedAtTime"),
],
"observation_notes": [
("close_mappings", "skos:note"),
],
"observation_source": [
("close_mappings", "dcterms:source"),
],
"observation_type": [
("related_mappings", "dcterms:type"),
],
"observations": [
("related_mappings", "prov:Entity"),
],
"official_name": [
("exact_mappings", "schema:legalName"),
],
"official_website": [
("exact_mappings", "schema:url"),
("exact_mappings", "foaf:homepage"),
],
"online_catalog": [
("close_mappings", "schema:url"),
],
"operated_by": [
("close_mappings", "schema:provider"),
],
"operating_status": [
("close_mappings", "schema:status"),
],
"operational_since": [
("close_mappings", "schema:foundingDate"),
],
"operators": [
("close_mappings", "schema:provider"),
],
"org_type": [
("close_mappings", "org:classification"),
],
"organization_type": [
("close_mappings", "org:classification"),
],
"original_language": [
("exact_mappings", "dcterms:language"),
],
"other_identifiers": [
("close_mappings", "dcterms:identifier"),
],
"other_names": [
("exact_mappings", "skos:altLabel"),
],
"owned_by": [
("close_mappings", "schema:ownedBy"),
],
"owner": [
("exact_mappings", "schema:ownedBy"),
],
"ownership": [
("close_mappings", "schema:ownedBy"),
],
"page_url": [
("exact_mappings", "schema:url"),
],
"parent": [
("exact_mappings", "schema:parentOrganization"),
],
"parent_id": [
("close_mappings", "dcterms:isPartOf"),
],
"participants": [
("exact_mappings", "schema:participant"),
],
"period": [
("close_mappings", "dcterms:temporal"),
],
"period_covered": [
("exact_mappings", "dcterms:temporal"),
],
"phone_number": [
("exact_mappings", "schema:telephone"),
],
"physical_address": [
("exact_mappings", "schema:address"),
],
"postal_address": [
("exact_mappings", "schema:postalAddress"),
],
"prefix": [
("related_mappings", "schema:honorificPrefix"),
],
"previous_name": [
("close_mappings", "skos:altLabel"),
],
"previous_names": [
("close_mappings", "skos:altLabel"),
],
"primary_type": [
("close_mappings", "dcterms:type"),
],
"priority": [
("related_mappings", "schema:position"),
],
"processing_status": [
("close_mappings", "adms:status"),
],
"profile_url": [
("exact_mappings", "schema:url"),
("exact_mappings", "foaf:page"),
],
"project_id": [
("close_mappings", "dcterms:identifier"),
],
"properties": [
("related_mappings", "schema:additionalProperty"),
],
"provider": [
("exact_mappings", "schema:provider"),
],
"purpose": [
("exact_mappings", "org:purpose"),
],
"qualifier": [
("related_mappings", "skos:note"),
],
"quality_score": [
("close_mappings", "prov:value"),
],
"raw_value": [
("close_mappings", "rdf:value"),
],
"reason": [
("close_mappings", "prov:hadReason"),
],
"record_id": [
("close_mappings", "dcterms:identifier"),
],
"reference_url": [
("exact_mappings", "schema:url"),
],
"registration_country": [
("close_mappings", "schema:addressCountry"),
],
"related": [
("exact_mappings", "dcterms:relation"),
],
"related_entities": [
("close_mappings", "dcterms:relation"),
],
"related_institutions": [
("close_mappings", "dcterms:relation"),
],
"related_organizations": [
("close_mappings", "dcterms:relation"),
],
"relationship_type": [
("related_mappings", "dcterms:type"),
],
"relevance_score": [
("close_mappings", "prov:value"),
],
"request_url": [
("close_mappings", "schema:url"),
],
"resource_type": [
("close_mappings", "dcterms:type"),
],
"response_format": [
("close_mappings", "dcterms:format"),
],
"result": [
("exact_mappings", "schema:result"),
],
"resulting_organization": [
("close_mappings", "prov:generated"),
],
"scope": [
("close_mappings", "dcterms:coverage"),
],
"scope_note": [
("exact_mappings", "skos:scopeNote"),
],
"search_url": [
("close_mappings", "schema:url"),
],
"secondary_type": [
("close_mappings", "dcterms:type"),
],
"section": [
("related_mappings", "dcterms:isPartOf"),
],
"service_type": [
("close_mappings", "dcterms:type"),
],
"short_name": [
("close_mappings", "skos:altLabel"),
],
"skills": [
("close_mappings", "schema:knowsAbout"),
],
"slug": [
("related_mappings", "dcterms:identifier"),
],
"snapshot_date": [
("close_mappings", "prov:generatedAtTime"),
],
"source_file": [
("close_mappings", "dcterms:source"),
],
"source_id": [
("close_mappings", "dcterms:source"),
],
"source_language": [
("close_mappings", "dcterms:language"),
],
"source_name": [
("close_mappings", "dcterms:source"),
],
"source_type": [
("related_mappings", "dcterms:type"),
],
"sources": [
("exact_mappings", "dcterms:source"),
],
"spatial_coverage": [
("exact_mappings", "dcterms:spatial"),
],
"specialization": [
("close_mappings", "schema:knowsAbout"),
],
"species_count": [
("close_mappings", "schema:numberOfItems"),
],
"start_year": [
("close_mappings", "schema:startDate"),
],
"state": [
("close_mappings", "schema:addressRegion"),
],
"storage_type": [
("related_mappings", "dcterms:type"),
],
"street": [
("exact_mappings", "schema:streetAddress"),
],
"subtype": [
("close_mappings", "dcterms:type"),
],
"suffix": [
("related_mappings", "schema:honorificSuffix"),
],
"supported_formats": [
("close_mappings", "dcterms:format"),
],
"synonyms": [
("exact_mappings", "skos:altLabel"),
],
"system_name": [
("close_mappings", "schema:name"),
],
"tags": [
("close_mappings", "schema:keywords"),
],
"target_audience": [
("close_mappings", "schema:audience"),
],
"temporal": [
("exact_mappings", "dcterms:temporal"),
],
"temporal_scope": [
("exact_mappings", "dcterms:temporal"),
],
"text": [
("exact_mappings", "schema:text"),
],
"text_content": [
("exact_mappings", "schema:text"),
],
"time_zone": [
("close_mappings", "schema:timeZone"),
],
"total_count": [
("close_mappings", "schema:numberOfItems"),
],
"type_label": [
("close_mappings", "rdfs:label"),
],
"uri": [
("exact_mappings", "schema:url"),
],
"usage_notes": [
("close_mappings", "skos:note"),
],
"valid_until": [
("exact_mappings", "schema:validThrough"),
],
"validation_status": [
("related_mappings", "adms:status"),
],
"verification_date": [
("close_mappings", "prov:generatedAtTime"),
],
"verification_method": [
("close_mappings", "prov:wasGeneratedBy"),
],
"verification_status": [
("close_mappings", "adms:status"),
],
"vision": [
("close_mappings", "org:purpose"),
],
"web_archive_url": [
("close_mappings", "schema:url"),
],
"web_claims": [
("related_mappings", "prov:Entity"),
],
"web_presence": [
("close_mappings", "schema:url"),
],
"website_url": [
("exact_mappings", "schema:url"),
],
"year_established": [
("close_mappings", "schema:foundingDate"),
],
"year_founded": [
("exact_mappings", "schema:foundingDate"),
],
# === REMAINING 50 UNMAPPED SLOTS ===
"class_metadata_slots": [
("related_mappings", "rdfs:Resource"),
],
"collections_under_responsibility": [
("close_mappings", "rico:hasOrIsHeldBy"),
("related_mappings", "schema:collection"),
],
"deceased": [
("close_mappings", "schema:deathDate"),
],
"encompasses": [
("close_mappings", "dcterms:hasPart"),
("related_mappings", "schema:containsPlace"),
],
"exposes_collections": [
("close_mappings", "schema:collection"),
],
"external_identifiers": [
("exact_mappings", "dcterms:identifier"),
("close_mappings", "schema:identifier"),
],
"extraction_metadata": [
("close_mappings", "prov:Entity"),
],
"finding_aids": [
("close_mappings", "rico:hasInstantiation"),
("related_mappings", "schema:url"),
],
"gender_identity": [
("exact_mappings", "schema:gender"),
],
"generates": [
("exact_mappings", "prov:generated"),
],
"governance_structure": [
("close_mappings", "org:organization"),
],
"holds_record_set_types": [
("close_mappings", "rico:hasRecordSetType"),
],
"humidity_tolerance": [
("related_mappings", "schema:additionalProperty"),
],
"identifier_format_used": [
("close_mappings", "dcterms:conformsTo"),
],
"identifies": [
("close_mappings", "dcterms:identifier"),
],
"initials": [
("close_mappings", "foaf:name"),
("related_mappings", "skos:altLabel"),
],
"jurisdiction": [
("exact_mappings", "schema:areaServed"),
("close_mappings", "dcterms:spatial"),
],
"justification": [
("close_mappings", "prov:hadReason"),
("related_mappings", "skos:note"),
],
"legal_jurisdiction": [
("exact_mappings", "schema:areaServed"),
],
"legal_responsibility_basis": [
("close_mappings", "dcterms:rights"),
],
"linkedin_profile_path": [
("close_mappings", "schema:sameAs"),
],
"method": [
("exact_mappings", "prov:wasGeneratedBy"),
],
"name_authority": [
("close_mappings", "dcterms:source"),
],
"name_validity_period": [
("close_mappings", "dcterms:temporal"),
],
"observation_context": [
("close_mappings", "prov:atLocation"),
],
"offers_donation_schemes": [
("related_mappings", "schema:availableService"),
],
"organizational_change_events": [
("close_mappings", "prov:Activity"),
],
"parent_collection": [
("exact_mappings", "dcterms:isPartOf"),
],
"platform_of": [
("close_mappings", "dcterms:isPartOf"),
],
"portal_data_sources": [
("close_mappings", "dcterms:source"),
],
"powered_by_cms": [
("related_mappings", "schema:softwareVersion"),
],
"preservation_level": [
("related_mappings", "dcterms:type"),
],
"price_currency": [
("exact_mappings", "schema:priceCurrency"),
],
"primary_register": [
("close_mappings", "dcterms:source"),
],
"pronouns": [
("related_mappings", "foaf:name"),
],
"protocol": [
("close_mappings", "dcterms:conformsTo"),
],
"provenance_statement": [
("exact_mappings", "dcterms:provenance"),
],
"response_formats": [
("close_mappings", "dcterms:format"),
],
"resulting_units": [
("close_mappings", "prov:generated"),
],
"rico_equivalent": [
("exact_mappings", "skos:exactMatch"),
],
"role_title": [
("exact_mappings", "schema:roleName"),
("close_mappings", "org:role"),
],
"security_level": [
("related_mappings", "dcterms:accessRights"),
],
"serves_finding_aids": [
("close_mappings", "schema:availableService"),
],
"settlement": [
("close_mappings", "schema:addressLocality"),
],
"source_creator": [
("exact_mappings", "dcterms:creator"),
],
"source_uri": [
("exact_mappings", "dcterms:source"),
],
"sub_collections": [
("exact_mappings", "dcterms:hasPart"),
],
"subregion": [
("close_mappings", "schema:addressRegion"),
],
"supersedes": [
("exact_mappings", "dcterms:replaces"),
],
"temperature_tolerance": [
("related_mappings", "schema:additionalProperty"),
],
"typical_domains": [
("close_mappings", "dcterms:subject"),
],
"typical_technical_features": [
("close_mappings", "schema:featureList"),
],
"unit_affiliation": [
("close_mappings", "org:memberOf"),
],
"used_sources": [
("exact_mappings", "dcterms:source"),
],
"used": [
("close_mappings", "prov:used"),
],
"was_derived_from": [
("exact_mappings", "prov:wasDerivedFrom"),
],
"was_revision_of": [
("exact_mappings", "prov:wasRevisionOf"),
],
# === 4 MISSING CENTRALIZED SLOTS ===
"archive_branches": [
("close_mappings", "org:hasSubOrganization"),
("related_mappings", "schema:subOrganization"),
],
"archive_department_of": [
("close_mappings", "org:subOrganizationOf"),
("related_mappings", "schema:parentOrganization"),
],
"parent_corporation": [
("exact_mappings", "schema:parentOrganization"),
("close_mappings", "org:subOrganizationOf"),
],
"wikidata_entity": [
("exact_mappings", "schema:sameAs"),
("close_mappings", "skos:exactMatch"),
],
}
# Pattern-based mappings for slots that match certain patterns
PATTERN_MAPPINGS = [
# Slots ending with _date
(r".*_date$", [
("broad_mappings", "dcterms:date"),
]),
# Slots ending with _url
(r".*_url$", [
("broad_mappings", "schema:url"),
]),
# Slots ending with _id
(r".*_id$", [
("broad_mappings", "dcterms:identifier"),
]),
# Slots ending with _name
(r".*_name$", [
("broad_mappings", "rdfs:label"),
]),
# Slots ending with _description
(r".*_description$", [
("broad_mappings", "dcterms:description"),
]),
# Slots ending with _type
(r".*_type$", [
("broad_mappings", "dcterms:type"),
]),
# Slots ending with _code
(r".*_code$", [
("broad_mappings", "dcterms:identifier"),
]),
# Slots starting with is_ or has_
(r"^(is|has)_.*$", [
("related_mappings", "rdf:type"),
]),
# Slots ending with _at (timestamps)
(r".*_at$", [
("broad_mappings", "prov:atTime"),
]),
# Slots ending with _by (agents)
(r".*_by$", [
("broad_mappings", "prov:wasAttributedTo"),
]),
# Slots ending with _count
(r".*_count$", [
("broad_mappings", "schema:numberOfItems"),
]),
# Slots ending with _status
(r".*_status$", [
("broad_mappings", "adms:status"),
]),
# Slots ending with _source
(r".*_source$", [
("broad_mappings", "dcterms:source"),
]),
# Slots ending with _ref or _reference
(r".*_(ref|reference)$", [
("broad_mappings", "dcterms:references"),
]),
# Slots ending with _time
(r".*_time$", [
("broad_mappings", "prov:atTime"),
]),
# Slots ending with _timestamp
(r".*_timestamp$", [
("broad_mappings", "prov:generatedAtTime"),
]),
# Slots ending with _address
(r".*_address$", [
("broad_mappings", "schema:address"),
]),
# Slots ending with _email
(r".*_email$", [
("broad_mappings", "schema:email"),
]),
# Slots ending with _phone
(r".*_phone$", [
("broad_mappings", "schema:telephone"),
]),
# Slots ending with _notes or _note
(r".*_notes?$", [
("broad_mappings", "skos:note"),
]),
# Slots ending with _label
(r".*_label$", [
("broad_mappings", "rdfs:label"),
]),
# Slots ending with _value
(r".*_value$", [
("broad_mappings", "rdf:value"),
]),
# Slots ending with _language
(r".*_language$", [
("broad_mappings", "dcterms:language"),
]),
# Slots ending with _format
(r".*_format$", [
("broad_mappings", "dcterms:format"),
]),
# Slots ending with _version
(r".*_version$", [
("broad_mappings", "schema:version"),
]),
# Slots ending with _scope
(r".*_scope$", [
("broad_mappings", "dcterms:coverage"),
]),
# Slots ending with _coverage
(r".*_coverage$", [
("broad_mappings", "dcterms:coverage"),
]),
# Slots ending with _extent
(r".*_extent$", [
("broad_mappings", "dcterms:extent"),
]),
# Slots ending with _size
(r".*_size$", [
("broad_mappings", "dcterms:extent"),
]),
# Slots ending with _entity or _entities
(r".*_entit(y|ies)$", [
("broad_mappings", "prov:Entity"),
]),
# Slots ending with _observation or _observations
(r".*_observations?$", [
("broad_mappings", "prov:Entity"),
]),
# Slots ending with _claim or _claims
(r".*_claims?$", [
("broad_mappings", "prov:Entity"),
]),
# Slots ending with _confidence
(r".*_confidence$", [
("broad_mappings", "prov:value"),
]),
# Slots ending with _score
(r".*_score$", [
("broad_mappings", "prov:value"),
]),
# Slots ending with _platform or _platforms
(r".*_platforms?$", [
("broad_mappings", "dcterms:hasPart"),
]),
# Slots ending with _standard or _standards
(r".*_standards?$", [
("broad_mappings", "dcterms:conformsTo"),
]),
# Slots ending with _policy or _policies
(r".*_polic(y|ies)$", [
("broad_mappings", "dcterms:rights"),
]),
# Slots ending with _method
(r".*_method$", [
("broad_mappings", "prov:wasGeneratedBy"),
]),
# Slots ending with _agent
(r".*_agent$", [
("broad_mappings", "prov:wasAttributedTo"),
]),
# Slots starting with date_ or dates_
(r"^dates?_.*$", [
("broad_mappings", "dcterms:date"),
]),
# Slots containing _of_ (relationships)
(r".*_of_.*$", [
("broad_mappings", "dcterms:relation"),
]),
# Slots ending with _location
(r".*_location$", [
("broad_mappings", "schema:location"),
]),
# Slots ending with _organization or _organisations
(r".*_organi[sz]ations?$", [
("broad_mappings", "schema:organization"),
]),
# Slots ending with _person or _persons
(r".*_persons?$", [
("broad_mappings", "schema:person"),
]),
# Slots ending with _institution or _institutions
(r".*_institutions?$", [
("broad_mappings", "schema:organization"),
]),
# Slots ending with _custodian or _custodians
(r".*_custodians?$", [
("broad_mappings", "schema:organization"),
]),
# Slots ending with _area or _areas
(r".*_areas?$", [
("broad_mappings", "dcterms:subject"),
]),
# Slots ending with _items
(r".*_items$", [
("broad_mappings", "schema:itemListElement"),
]),
# Slots ending with _list
(r".*_list$", [
("broad_mappings", "schema:ItemList"),
]),
# Slots ending with _endpoint or _endpoints
(r".*_endpoints?$", [
("broad_mappings", "dcat:endpointURL"),
]),
# Slots ending with _file or _files
(r".*_files?$", [
("broad_mappings", "dcterms:source"),
]),
# Slots ending with _body or _bodies
(r".*_bod(y|ies)$", [
("broad_mappings", "org:Organization"),
]),
# Slots ending with _link or _links
(r".*_links?$", [
("broad_mappings", "schema:url"),
]),
# Slots ending with _portal or _portals
(r".*_portals?$", [
("broad_mappings", "schema:url"),
]),
]
def load_yaml(filepath: Path) -> dict:
"""Load a YAML file."""
with open(filepath, 'r', encoding='utf-8') as f:
return yaml.safe_load(f)
def save_yaml(filepath: Path, data: dict) -> None:
"""Save data to a YAML file with proper formatting."""
with open(filepath, 'w', encoding='utf-8') as f:
yaml.dump(data, f,
default_flow_style=False,
allow_unicode=True,
sort_keys=False,
width=120)
def get_slot_name_from_file(filepath: Path) -> Optional[str]:
"""Extract the main slot name from a slot file."""
data = load_yaml(filepath)
if 'slots' in data:
slots = data['slots']
if slots:
return list(slots.keys())[0]
return None
def get_mappings_for_slot(slot_name: str) -> Dict[str, List[str]]:
"""Get appropriate mappings for a slot based on its name."""
mappings = {
"exact_mappings": [],
"close_mappings": [],
"related_mappings": [],
"narrow_mappings": [],
"broad_mappings": [],
}
# Check direct match first
if slot_name in SLOT_MAPPINGS:
for mapping_type, predicate in SLOT_MAPPINGS[slot_name]:
if predicate not in mappings[mapping_type]:
mappings[mapping_type].append(predicate)
# Check pattern matches
for pattern, pattern_mappings in PATTERN_MAPPINGS:
if re.match(pattern, slot_name):
for mapping_type, predicate in pattern_mappings:
if predicate not in mappings[mapping_type]:
mappings[mapping_type].append(predicate)
# Remove empty mapping types
return {k: v for k, v in mappings.items() if v}
def add_mappings_to_slot_file(filepath: Path, dry_run: bool = False) -> Tuple[bool, str]:
"""Add mappings to a slot file if needed."""
try:
data = load_yaml(filepath)
if 'slots' not in data or not data['slots']:
return False, "No slots defined"
slot_name = list(data['slots'].keys())[0]
slot_def = data['slots'][slot_name]
# Get existing mappings
existing_mappings = set()
for mapping_type in ["exact_mappings", "close_mappings", "related_mappings",
"narrow_mappings", "broad_mappings"]:
if mapping_type in slot_def:
existing_mappings.update(slot_def[mapping_type])
# Get new mappings
new_mappings = get_mappings_for_slot(slot_name)
if not new_mappings:
return False, "No mappings found for slot"
# Filter out already existing mappings
added_any = False
for mapping_type, predicates in new_mappings.items():
new_predicates = [p for p in predicates if p not in existing_mappings]
if new_predicates:
if mapping_type not in slot_def:
slot_def[mapping_type] = []
for pred in new_predicates:
if pred not in slot_def[mapping_type]:
slot_def[mapping_type].append(pred)
added_any = True
if added_any and not dry_run:
save_yaml(filepath, data)
return added_any, f"Added mappings: {new_mappings}"
except Exception as e:
return False, f"Error: {str(e)}"
def process_all_slots(slots_dir: Path, dry_run: bool = False) -> dict:
"""Process all slot files in the directory."""
results = {
"updated": [],
"skipped": [],
"errors": [],
"no_mappings": [],
}
for yaml_file in sorted(slots_dir.glob("*.yaml")):
success, message = add_mappings_to_slot_file(yaml_file, dry_run)
if success:
results["updated"].append((yaml_file.name, message))
elif "Error" in message:
results["errors"].append((yaml_file.name, message))
elif "No mappings found" in message:
results["no_mappings"].append(yaml_file.name)
else:
results["skipped"].append((yaml_file.name, message))
return results
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(description="Add semantic mappings to LinkML slot files")
parser.add_argument("--dry-run", action="store_true", help="Don't modify files, just show what would be done")
parser.add_argument("--slots-dir", type=Path,
default=Path("/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/slots"),
help="Directory containing slot YAML files")
parser.add_argument("--slot", type=str, help="Process only this specific slot file")
args = parser.parse_args()
if args.slot:
filepath = args.slots_dir / f"{args.slot}.yaml"
if not filepath.exists():
print(f"Error: Slot file not found: {filepath}")
return
success, message = add_mappings_to_slot_file(filepath, args.dry_run)
print(f"{filepath.name}: {'Updated' if success else 'Skipped'} - {message}")
else:
results = process_all_slots(args.slots_dir, args.dry_run)
print(f"\n{'DRY RUN - ' if args.dry_run else ''}Slot Mapping Results:")
print(f"=" * 60)
print(f"Updated: {len(results['updated'])}")
print(f"Skipped (already has mappings): {len(results['skipped'])}")
print(f"No mappings found: {len(results['no_mappings'])}")
print(f"Errors: {len(results['errors'])}")
if results['updated'] and not args.dry_run:
print(f"\nUpdated slots:")
for name, msg in results['updated'][:20]:
print(f" - {name}")
if len(results['updated']) > 20:
print(f" ... and {len(results['updated']) - 20} more")
if results['no_mappings']:
print(f"\nSlots without mapping definitions:")
for name in results['no_mappings'][:20]:
print(f" - {name}")
if len(results['no_mappings']) > 20:
print(f" ... and {len(results['no_mappings']) - 20} more")
if results['errors']:
print(f"\nErrors:")
for name, msg in results['errors']:
print(f" - {name}: {msg}")
if __name__ == "__main__":
main()