- Fix scope_note → finding_aid_scope_note in FindingAid.yaml - Remove duplicate wikidata_entity slot from CustodianType.yaml (import instead) - Remove duplicate rico_record_set_type from class_metadata_slots.yaml - Fix range types for equals_string compatibility (uriorcurie → string) - Move class names from close_mappings to see_also in 10 RecordSetTypes files - Generate all RDF formats: OWL, N-Triples, RDF/XML, N3, JSON-LD context - Sync schemas to frontend/public/schemas/ Files: 1,151 changed (includes prior CustodianType migration)
2420 lines
66 KiB
Python
2420 lines
66 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Add semantic mappings to LinkML slot files based on base ontologies.
|
|
|
|
This script maps slot names to predicates from:
|
|
- Schema.org (schema:)
|
|
- Dublin Core Terms (dcterms:)
|
|
- FOAF (foaf:)
|
|
- PROV-O (prov:)
|
|
- ORG Ontology (org:)
|
|
- SKOS (skos:)
|
|
- RiC-O (rico:)
|
|
- CIDOC-CRM (crm:)
|
|
- BIBFRAME (bf:)
|
|
- DCAT (dcat:)
|
|
- vCard (vcard:)
|
|
|
|
Mapping types (per SKOS):
|
|
- exact_mappings: Identical meaning (skos:exactMatch)
|
|
- close_mappings: Very similar meaning (skos:closeMatch)
|
|
- related_mappings: Semantically related (skos:relatedMatch)
|
|
- narrow_mappings: More specific (skos:narrowMatch)
|
|
- broad_mappings: More general (skos:broadMatch)
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import yaml
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
# Comprehensive predicate mapping table
|
|
# Format: slot_name_pattern -> [(mapping_type, predicate), ...]
|
|
SLOT_MAPPINGS: Dict[str, List[Tuple[str, str]]] = {
|
|
# === NAMES AND LABELS ===
|
|
"name": [
|
|
("exact_mappings", "schema:name"),
|
|
("exact_mappings", "foaf:name"),
|
|
("exact_mappings", "rdfs:label"),
|
|
],
|
|
"preferred_label": [
|
|
("exact_mappings", "skos:prefLabel"),
|
|
("exact_mappings", "schema:name"),
|
|
],
|
|
"alternative_names": [
|
|
("exact_mappings", "schema:alternateName"),
|
|
("exact_mappings", "skos:altLabel"),
|
|
],
|
|
"display_name": [
|
|
("exact_mappings", "rdfs:label"),
|
|
("close_mappings", "schema:name"),
|
|
],
|
|
"full_name": [
|
|
("exact_mappings", "foaf:name"),
|
|
("close_mappings", "schema:name"),
|
|
],
|
|
"given_name": [
|
|
("exact_mappings", "foaf:givenName"),
|
|
("exact_mappings", "schema:givenName"),
|
|
],
|
|
"family_name": [
|
|
("exact_mappings", "foaf:familyName"),
|
|
("exact_mappings", "schema:familyName"),
|
|
],
|
|
"first_name": [
|
|
("exact_mappings", "foaf:firstName"),
|
|
("close_mappings", "schema:givenName"),
|
|
],
|
|
"last_name": [
|
|
("exact_mappings", "foaf:lastName"),
|
|
("close_mappings", "schema:familyName"),
|
|
],
|
|
"surname": [
|
|
("exact_mappings", "foaf:surname"),
|
|
("exact_mappings", "schema:familyName"),
|
|
],
|
|
"base_surname": [
|
|
("close_mappings", "foaf:surname"),
|
|
("related_mappings", "schema:familyName"),
|
|
],
|
|
"nickname": [
|
|
("exact_mappings", "foaf:nick"),
|
|
("close_mappings", "schema:alternateName"),
|
|
],
|
|
"title": [
|
|
("exact_mappings", "dcterms:title"),
|
|
("exact_mappings", "schema:title"),
|
|
],
|
|
"agent_name": [
|
|
("exact_mappings", "foaf:name"),
|
|
("close_mappings", "prov:label"),
|
|
],
|
|
|
|
# === DESCRIPTIONS ===
|
|
"description": [
|
|
("exact_mappings", "dcterms:description"),
|
|
("exact_mappings", "schema:description"),
|
|
("exact_mappings", "rdfs:comment"),
|
|
],
|
|
"summary": [
|
|
("close_mappings", "schema:abstract"),
|
|
("close_mappings", "dcterms:abstract"),
|
|
],
|
|
"abstract": [
|
|
("exact_mappings", "dcterms:abstract"),
|
|
("exact_mappings", "schema:abstract"),
|
|
],
|
|
"notes": [
|
|
("exact_mappings", "skos:note"),
|
|
("close_mappings", "rdfs:comment"),
|
|
],
|
|
"comment": [
|
|
("exact_mappings", "rdfs:comment"),
|
|
("exact_mappings", "schema:comment"),
|
|
],
|
|
"remarks": [
|
|
("close_mappings", "skos:note"),
|
|
("related_mappings", "rdfs:comment"),
|
|
],
|
|
|
|
# === IDENTIFIERS ===
|
|
"identifier": [
|
|
("exact_mappings", "dcterms:identifier"),
|
|
("exact_mappings", "schema:identifier"),
|
|
],
|
|
"id": [
|
|
("exact_mappings", "dcterms:identifier"),
|
|
("close_mappings", "schema:identifier"),
|
|
],
|
|
"external_id": [
|
|
("close_mappings", "dcterms:identifier"),
|
|
("close_mappings", "schema:identifier"),
|
|
],
|
|
"code": [
|
|
("close_mappings", "schema:codeValue"),
|
|
("related_mappings", "dcterms:identifier"),
|
|
],
|
|
"accession_number": [
|
|
("close_mappings", "schema:identifier"),
|
|
("related_mappings", "dcterms:identifier"),
|
|
],
|
|
"isil_code": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
"wikidata_id": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
("related_mappings", "schema:sameAs"),
|
|
],
|
|
"viaf_id": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
|
|
# === DATES AND TIMES ===
|
|
"date": [
|
|
("exact_mappings", "dcterms:date"),
|
|
("exact_mappings", "schema:date"),
|
|
],
|
|
"created_at": [
|
|
("exact_mappings", "dcterms:created"),
|
|
("exact_mappings", "schema:dateCreated"),
|
|
("exact_mappings", "prov:generatedAtTime"),
|
|
],
|
|
"created": [
|
|
("exact_mappings", "dcterms:created"),
|
|
("exact_mappings", "schema:dateCreated"),
|
|
],
|
|
"modified_at": [
|
|
("exact_mappings", "dcterms:modified"),
|
|
("exact_mappings", "schema:dateModified"),
|
|
],
|
|
"modified": [
|
|
("exact_mappings", "dcterms:modified"),
|
|
("exact_mappings", "schema:dateModified"),
|
|
],
|
|
"updated_at": [
|
|
("close_mappings", "dcterms:modified"),
|
|
("close_mappings", "schema:dateModified"),
|
|
],
|
|
"start_date": [
|
|
("exact_mappings", "schema:startDate"),
|
|
("close_mappings", "prov:startedAtTime"),
|
|
],
|
|
"end_date": [
|
|
("exact_mappings", "schema:endDate"),
|
|
("close_mappings", "prov:endedAtTime"),
|
|
],
|
|
"valid_from": [
|
|
("exact_mappings", "schema:validFrom"),
|
|
("close_mappings", "dcterms:valid"),
|
|
],
|
|
"valid_to": [
|
|
("exact_mappings", "schema:validThrough"),
|
|
("close_mappings", "dcterms:valid"),
|
|
],
|
|
"birth_date": [
|
|
("exact_mappings", "schema:birthDate"),
|
|
("exact_mappings", "foaf:birthday"),
|
|
],
|
|
"death_date": [
|
|
("exact_mappings", "schema:deathDate"),
|
|
],
|
|
"founding_date": [
|
|
("exact_mappings", "schema:foundingDate"),
|
|
],
|
|
"dissolution_date": [
|
|
("exact_mappings", "schema:dissolutionDate"),
|
|
],
|
|
"acquisition_date": [
|
|
("close_mappings", "schema:dateCreated"),
|
|
("related_mappings", "dcterms:date"),
|
|
],
|
|
"publication_date": [
|
|
("exact_mappings", "schema:datePublished"),
|
|
("exact_mappings", "dcterms:issued"),
|
|
],
|
|
"issued": [
|
|
("exact_mappings", "dcterms:issued"),
|
|
("exact_mappings", "schema:datePublished"),
|
|
],
|
|
"event_date": [
|
|
("close_mappings", "schema:startDate"),
|
|
("related_mappings", "dcterms:date"),
|
|
],
|
|
"timestamp": [
|
|
("close_mappings", "prov:atTime"),
|
|
("related_mappings", "dcterms:date"),
|
|
],
|
|
"follow_up_date": [
|
|
("close_mappings", "schema:scheduledTime"),
|
|
],
|
|
"approval_date": [
|
|
("close_mappings", "dcterms:dateAccepted"),
|
|
],
|
|
"allocation_date": [
|
|
("close_mappings", "dcterms:date"),
|
|
],
|
|
|
|
# === TEMPORAL EXPRESSIONS ===
|
|
"begin_of_the_begin": [
|
|
("related_mappings", "prov:startedAtTime"),
|
|
("narrow_mappings", "schema:startDate"),
|
|
],
|
|
"begin_of_the_end": [
|
|
("related_mappings", "prov:endedAtTime"),
|
|
],
|
|
"end_of_the_begin": [
|
|
("related_mappings", "prov:startedAtTime"),
|
|
],
|
|
"end_of_the_end": [
|
|
("related_mappings", "prov:endedAtTime"),
|
|
("narrow_mappings", "schema:endDate"),
|
|
],
|
|
"temporal_coverage": [
|
|
("exact_mappings", "dcterms:temporal"),
|
|
("exact_mappings", "schema:temporalCoverage"),
|
|
],
|
|
|
|
# === LOCATIONS ===
|
|
"location": [
|
|
("exact_mappings", "schema:location"),
|
|
("exact_mappings", "dcterms:spatial"),
|
|
],
|
|
"address": [
|
|
("exact_mappings", "schema:address"),
|
|
("exact_mappings", "vcard:hasAddress"),
|
|
],
|
|
"city": [
|
|
("exact_mappings", "schema:addressLocality"),
|
|
("exact_mappings", "vcard:locality"),
|
|
],
|
|
"country": [
|
|
("exact_mappings", "schema:addressCountry"),
|
|
("exact_mappings", "vcard:country-name"),
|
|
],
|
|
"region": [
|
|
("exact_mappings", "schema:addressRegion"),
|
|
("exact_mappings", "vcard:region"),
|
|
],
|
|
"postal_code": [
|
|
("exact_mappings", "schema:postalCode"),
|
|
("exact_mappings", "vcard:postal-code"),
|
|
],
|
|
"street_address": [
|
|
("exact_mappings", "schema:streetAddress"),
|
|
("exact_mappings", "vcard:street-address"),
|
|
],
|
|
"latitude": [
|
|
("exact_mappings", "schema:latitude"),
|
|
("exact_mappings", "geo:lat"),
|
|
],
|
|
"longitude": [
|
|
("exact_mappings", "schema:longitude"),
|
|
("exact_mappings", "geo:long"),
|
|
],
|
|
"coordinates": [
|
|
("close_mappings", "schema:geo"),
|
|
("related_mappings", "geo:geometry"),
|
|
],
|
|
"birth_place": [
|
|
("exact_mappings", "schema:birthPlace"),
|
|
],
|
|
"death_place": [
|
|
("exact_mappings", "schema:deathPlace"),
|
|
],
|
|
"place_of_origin": [
|
|
("close_mappings", "schema:birthPlace"),
|
|
("related_mappings", "prov:atLocation"),
|
|
],
|
|
"altitude": [
|
|
("exact_mappings", "schema:elevation"),
|
|
],
|
|
|
|
# === ORGANIZATIONS ===
|
|
"organization": [
|
|
("exact_mappings", "schema:organization"),
|
|
("exact_mappings", "org:organization"),
|
|
],
|
|
"affiliation": [
|
|
("exact_mappings", "schema:affiliation"),
|
|
("exact_mappings", "org:memberOf"),
|
|
],
|
|
"member_of": [
|
|
("exact_mappings", "org:memberOf"),
|
|
("exact_mappings", "schema:memberOf"),
|
|
],
|
|
"parent_organization": [
|
|
("exact_mappings", "schema:parentOrganization"),
|
|
("exact_mappings", "org:subOrganizationOf"),
|
|
],
|
|
"sub_organization": [
|
|
("exact_mappings", "schema:subOrganization"),
|
|
("exact_mappings", "org:hasSubOrganization"),
|
|
],
|
|
"department": [
|
|
("exact_mappings", "schema:department"),
|
|
("close_mappings", "org:hasUnit"),
|
|
],
|
|
"role": [
|
|
("exact_mappings", "org:role"),
|
|
("exact_mappings", "schema:roleName"),
|
|
],
|
|
"job_title": [
|
|
("exact_mappings", "schema:jobTitle"),
|
|
("close_mappings", "org:role"),
|
|
],
|
|
"employer": [
|
|
("exact_mappings", "schema:worksFor"),
|
|
("close_mappings", "org:organization"),
|
|
],
|
|
"founding_location": [
|
|
("exact_mappings", "schema:foundingLocation"),
|
|
],
|
|
"headquarters": [
|
|
("close_mappings", "org:hasRegisteredSite"),
|
|
("related_mappings", "schema:location"),
|
|
],
|
|
|
|
# === PEOPLE ===
|
|
"person": [
|
|
("exact_mappings", "schema:person"),
|
|
("exact_mappings", "foaf:Person"),
|
|
],
|
|
"author": [
|
|
("exact_mappings", "schema:author"),
|
|
("exact_mappings", "dcterms:creator"),
|
|
],
|
|
"creator": [
|
|
("exact_mappings", "dcterms:creator"),
|
|
("exact_mappings", "schema:creator"),
|
|
],
|
|
"contributor": [
|
|
("exact_mappings", "dcterms:contributor"),
|
|
("exact_mappings", "schema:contributor"),
|
|
],
|
|
"publisher": [
|
|
("exact_mappings", "dcterms:publisher"),
|
|
("exact_mappings", "schema:publisher"),
|
|
],
|
|
"editor": [
|
|
("exact_mappings", "schema:editor"),
|
|
("close_mappings", "dcterms:contributor"),
|
|
],
|
|
"curator": [
|
|
("close_mappings", "schema:contributor"),
|
|
("related_mappings", "dcterms:contributor"),
|
|
],
|
|
"gender": [
|
|
("exact_mappings", "schema:gender"),
|
|
("exact_mappings", "foaf:gender"),
|
|
],
|
|
"age": [
|
|
("exact_mappings", "foaf:age"),
|
|
("close_mappings", "schema:age"),
|
|
],
|
|
"nationality": [
|
|
("exact_mappings", "schema:nationality"),
|
|
],
|
|
"knows": [
|
|
("exact_mappings", "foaf:knows"),
|
|
("exact_mappings", "schema:knows"),
|
|
],
|
|
|
|
# === CONTACT INFO ===
|
|
"email": [
|
|
("exact_mappings", "schema:email"),
|
|
("exact_mappings", "foaf:mbox"),
|
|
],
|
|
"telephone": [
|
|
("exact_mappings", "schema:telephone"),
|
|
("exact_mappings", "foaf:phone"),
|
|
],
|
|
"phone": [
|
|
("exact_mappings", "schema:telephone"),
|
|
("exact_mappings", "foaf:phone"),
|
|
],
|
|
"fax": [
|
|
("exact_mappings", "schema:faxNumber"),
|
|
],
|
|
"homepage": [
|
|
("exact_mappings", "foaf:homepage"),
|
|
("exact_mappings", "schema:url"),
|
|
],
|
|
"website": [
|
|
("exact_mappings", "schema:url"),
|
|
("exact_mappings", "foaf:homepage"),
|
|
],
|
|
"url": [
|
|
("exact_mappings", "schema:url"),
|
|
("exact_mappings", "rdfs:seeAlso"),
|
|
],
|
|
|
|
# === COLLECTIONS AND RECORDS ===
|
|
"collection": [
|
|
("exact_mappings", "schema:collection"),
|
|
("close_mappings", "dcterms:isPartOf"),
|
|
],
|
|
"collection_name": [
|
|
("close_mappings", "schema:name"),
|
|
("related_mappings", "dcterms:title"),
|
|
],
|
|
"collection_description": [
|
|
("close_mappings", "schema:description"),
|
|
("related_mappings", "dcterms:description"),
|
|
],
|
|
"part_of": [
|
|
("exact_mappings", "dcterms:isPartOf"),
|
|
("exact_mappings", "schema:isPartOf"),
|
|
],
|
|
"has_part": [
|
|
("exact_mappings", "dcterms:hasPart"),
|
|
("exact_mappings", "schema:hasPart"),
|
|
],
|
|
"extent": [
|
|
("exact_mappings", "dcterms:extent"),
|
|
("close_mappings", "schema:size"),
|
|
],
|
|
"format": [
|
|
("exact_mappings", "dcterms:format"),
|
|
("exact_mappings", "schema:encodingFormat"),
|
|
],
|
|
"medium": [
|
|
("exact_mappings", "dcterms:medium"),
|
|
("close_mappings", "schema:material"),
|
|
],
|
|
"language": [
|
|
("exact_mappings", "dcterms:language"),
|
|
("exact_mappings", "schema:inLanguage"),
|
|
],
|
|
"subject": [
|
|
("exact_mappings", "dcterms:subject"),
|
|
("exact_mappings", "schema:about"),
|
|
],
|
|
"topic": [
|
|
("exact_mappings", "foaf:topic"),
|
|
("exact_mappings", "schema:about"),
|
|
],
|
|
"keywords": [
|
|
("exact_mappings", "schema:keywords"),
|
|
("close_mappings", "dcterms:subject"),
|
|
],
|
|
"arrangement": [
|
|
("related_mappings", "rico:hasOrganicProvenance"),
|
|
],
|
|
"arrangement_system": [
|
|
("related_mappings", "rico:hasRecordSetType"),
|
|
],
|
|
"cataloging_standard": [
|
|
("related_mappings", "dcterms:conformsTo"),
|
|
],
|
|
|
|
# === RIGHTS AND ACCESS ===
|
|
"rights": [
|
|
("exact_mappings", "dcterms:rights"),
|
|
("exact_mappings", "schema:license"),
|
|
],
|
|
"license": [
|
|
("exact_mappings", "dcterms:license"),
|
|
("exact_mappings", "schema:license"),
|
|
],
|
|
"access_rights": [
|
|
("exact_mappings", "dcterms:accessRights"),
|
|
],
|
|
"access_policy": [
|
|
("close_mappings", "dcterms:accessRights"),
|
|
],
|
|
"access_restrictions": [
|
|
("close_mappings", "dcterms:accessRights"),
|
|
],
|
|
"copyright": [
|
|
("exact_mappings", "schema:copyrightHolder"),
|
|
("related_mappings", "dcterms:rights"),
|
|
],
|
|
|
|
# === PROVENANCE ===
|
|
"source": [
|
|
("exact_mappings", "dcterms:source"),
|
|
("exact_mappings", "prov:wasDerivedFrom"),
|
|
],
|
|
"derived_from": [
|
|
("exact_mappings", "prov:wasDerivedFrom"),
|
|
("exact_mappings", "dcterms:source"),
|
|
],
|
|
"generated_by": [
|
|
("exact_mappings", "prov:wasGeneratedBy"),
|
|
],
|
|
"attributed_to": [
|
|
("exact_mappings", "prov:wasAttributedTo"),
|
|
],
|
|
"provenance": [
|
|
("exact_mappings", "dcterms:provenance"),
|
|
("exact_mappings", "prov:wasGeneratedBy"),
|
|
],
|
|
"extraction_agent": [
|
|
("close_mappings", "prov:wasAttributedTo"),
|
|
],
|
|
"extraction_method": [
|
|
("close_mappings", "prov:wasGeneratedBy"),
|
|
],
|
|
"retrieval_agent": [
|
|
("close_mappings", "prov:wasAttributedTo"),
|
|
],
|
|
|
|
# === RELATIONS ===
|
|
"related_to": [
|
|
("exact_mappings", "dcterms:relation"),
|
|
("exact_mappings", "schema:relatedTo"),
|
|
],
|
|
"references": [
|
|
("exact_mappings", "dcterms:references"),
|
|
("exact_mappings", "schema:citation"),
|
|
],
|
|
"replaces": [
|
|
("exact_mappings", "dcterms:replaces"),
|
|
],
|
|
"replaced_by": [
|
|
("exact_mappings", "dcterms:isReplacedBy"),
|
|
],
|
|
"version": [
|
|
("exact_mappings", "dcterms:hasVersion"),
|
|
("exact_mappings", "schema:version"),
|
|
],
|
|
"same_as": [
|
|
("exact_mappings", "schema:sameAs"),
|
|
("exact_mappings", "owl:sameAs"),
|
|
],
|
|
"see_also": [
|
|
("exact_mappings", "rdfs:seeAlso"),
|
|
("close_mappings", "dcterms:relation"),
|
|
],
|
|
|
|
# === TYPES AND CATEGORIES ===
|
|
"type": [
|
|
("exact_mappings", "dcterms:type"),
|
|
("exact_mappings", "rdf:type"),
|
|
],
|
|
"category": [
|
|
("exact_mappings", "schema:category"),
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"classification": [
|
|
("exact_mappings", "org:classification"),
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"genre": [
|
|
("exact_mappings", "schema:genre"),
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
|
|
# === STATUS ===
|
|
"status": [
|
|
("exact_mappings", "schema:status"),
|
|
("close_mappings", "adms:status"),
|
|
],
|
|
"active": [
|
|
("close_mappings", "schema:status"),
|
|
],
|
|
"verified": [
|
|
("related_mappings", "prov:wasAttributedTo"),
|
|
],
|
|
|
|
# === DIGITAL/TECHNICAL ===
|
|
"api_endpoint": [
|
|
("close_mappings", "schema:url"),
|
|
("related_mappings", "dcat:endpointURL"),
|
|
],
|
|
"download_url": [
|
|
("exact_mappings", "dcat:downloadURL"),
|
|
("close_mappings", "schema:url"),
|
|
],
|
|
"access_url": [
|
|
("exact_mappings", "dcat:accessURL"),
|
|
],
|
|
"media_type": [
|
|
("exact_mappings", "dcat:mediaType"),
|
|
("exact_mappings", "dcterms:format"),
|
|
],
|
|
"file_format": [
|
|
("exact_mappings", "dcterms:format"),
|
|
("exact_mappings", "schema:fileFormat"),
|
|
],
|
|
"byte_size": [
|
|
("exact_mappings", "dcat:byteSize"),
|
|
("exact_mappings", "schema:contentSize"),
|
|
],
|
|
"checksum": [
|
|
("exact_mappings", "dcat:checksum"),
|
|
],
|
|
|
|
# === EVENTS ===
|
|
"event": [
|
|
("exact_mappings", "schema:event"),
|
|
],
|
|
"event_type": [
|
|
("close_mappings", "schema:eventType"),
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
"activity": [
|
|
("exact_mappings", "prov:Activity"),
|
|
],
|
|
"activity_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"change_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"affected_by_event": [
|
|
("close_mappings", "prov:wasInfluencedBy"),
|
|
],
|
|
"affects_organization": [
|
|
("close_mappings", "prov:influenced"),
|
|
],
|
|
|
|
# === IMAGES AND MEDIA ===
|
|
"image": [
|
|
("exact_mappings", "schema:image"),
|
|
("exact_mappings", "foaf:depiction"),
|
|
],
|
|
"thumbnail": [
|
|
("exact_mappings", "schema:thumbnail"),
|
|
("exact_mappings", "foaf:thumbnail"),
|
|
],
|
|
"logo": [
|
|
("exact_mappings", "schema:logo"),
|
|
("exact_mappings", "foaf:logo"),
|
|
],
|
|
"photo": [
|
|
("exact_mappings", "schema:photo"),
|
|
("close_mappings", "foaf:depiction"),
|
|
],
|
|
|
|
# === QUANTITIES ===
|
|
"count": [
|
|
("close_mappings", "schema:itemCount"),
|
|
],
|
|
"quantity": [
|
|
("exact_mappings", "schema:quantity"),
|
|
],
|
|
"value": [
|
|
("exact_mappings", "rdf:value"),
|
|
("exact_mappings", "schema:value"),
|
|
],
|
|
"unit": [
|
|
("exact_mappings", "schema:unitCode"),
|
|
],
|
|
"price": [
|
|
("exact_mappings", "schema:price"),
|
|
],
|
|
"currency": [
|
|
("exact_mappings", "schema:priceCurrency"),
|
|
],
|
|
|
|
# === HERITAGE-SPECIFIC ===
|
|
"custodian": [
|
|
("close_mappings", "rico:hasOrIsHeldBy"),
|
|
],
|
|
"custody": [
|
|
("close_mappings", "rico:hasOrIsHeldBy"),
|
|
],
|
|
"provenance_text": [
|
|
("exact_mappings", "dcterms:provenance"),
|
|
],
|
|
"finding_aid": [
|
|
("related_mappings", "rico:hasInstantiation"),
|
|
],
|
|
"material": [
|
|
("exact_mappings", "schema:material"),
|
|
("exact_mappings", "dcterms:medium"),
|
|
],
|
|
"technique": [
|
|
("close_mappings", "schema:artMedium"),
|
|
],
|
|
"dimensions": [
|
|
("close_mappings", "schema:size"),
|
|
],
|
|
"condition": [
|
|
("related_mappings", "schema:itemCondition"),
|
|
],
|
|
|
|
# === CANONICAL VALUES ===
|
|
"canonical_value": [
|
|
("close_mappings", "skos:prefLabel"),
|
|
],
|
|
"normalized_value": [
|
|
("close_mappings", "skos:prefLabel"),
|
|
],
|
|
|
|
# === WEB/CLAIMS ===
|
|
"source_url": [
|
|
("exact_mappings", "schema:url"),
|
|
("exact_mappings", "dcterms:source"),
|
|
],
|
|
"claim_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"claim_value": [
|
|
("close_mappings", "rdf:value"),
|
|
],
|
|
"xpath": [
|
|
("related_mappings", "prov:atLocation"),
|
|
],
|
|
"confidence_score": [
|
|
("related_mappings", "prov:value"),
|
|
],
|
|
|
|
# === MISCELLANEOUS ===
|
|
"deliverables": [
|
|
("close_mappings", "schema:result"),
|
|
("related_mappings", "prov:generated"),
|
|
],
|
|
"capacity_items": [
|
|
("broad_mappings", "schema:maximumAttendeeCapacity"),
|
|
],
|
|
|
|
# === ADDITIONAL HERITAGE-SPECIFIC ===
|
|
"access_policy_ref": [
|
|
("close_mappings", "dcterms:accessRights"),
|
|
],
|
|
"acquisition_method": [
|
|
("close_mappings", "prov:wasGeneratedBy"),
|
|
("related_mappings", "schema:acquiredFrom"),
|
|
],
|
|
"acquisition_source": [
|
|
("exact_mappings", "schema:acquiredFrom"),
|
|
("close_mappings", "prov:wasAttributedTo"),
|
|
],
|
|
"affected_units": [
|
|
("close_mappings", "prov:influenced"),
|
|
],
|
|
"allocates": [
|
|
("close_mappings", "prov:generated"),
|
|
],
|
|
"allocated_by": [
|
|
("close_mappings", "prov:wasAttributedTo"),
|
|
],
|
|
"alternative_observed_names": [
|
|
("exact_mappings", "skos:altLabel"),
|
|
("close_mappings", "schema:alternateName"),
|
|
],
|
|
"appellation_language": [
|
|
("exact_mappings", "dcterms:language"),
|
|
],
|
|
"appellation_value": [
|
|
("exact_mappings", "rdf:value"),
|
|
("close_mappings", "skos:prefLabel"),
|
|
],
|
|
"appellation_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"appellations": [
|
|
("close_mappings", "skos:altLabel"),
|
|
],
|
|
"authentication_required": [
|
|
("related_mappings", "schema:authenticationType"),
|
|
],
|
|
"auxiliary_places": [
|
|
("close_mappings", "org:hasSite"),
|
|
("related_mappings", "schema:location"),
|
|
],
|
|
"auxiliary_platforms": [
|
|
("close_mappings", "dcterms:hasPart"),
|
|
],
|
|
"change_rationale": [
|
|
("close_mappings", "prov:hadReason"),
|
|
("related_mappings", "dcterms:description"),
|
|
],
|
|
"circumstances_of_death": [
|
|
("related_mappings", "schema:description"),
|
|
],
|
|
"collection_focus": [
|
|
("exact_mappings", "dcterms:subject"),
|
|
("close_mappings", "schema:about"),
|
|
],
|
|
"collection_of": [
|
|
("close_mappings", "dcterms:isPartOf"),
|
|
],
|
|
"collection_scope": [
|
|
("exact_mappings", "dcterms:coverage"),
|
|
("close_mappings", "schema:about"),
|
|
],
|
|
"collection_size": [
|
|
("exact_mappings", "schema:numberOfItems"),
|
|
("close_mappings", "dcterms:extent"),
|
|
],
|
|
"collection_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"collection_type_ref": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
|
|
# === CONTACT AND SOCIAL ===
|
|
"contact_email": [
|
|
("exact_mappings", "schema:email"),
|
|
("exact_mappings", "vcard:hasEmail"),
|
|
],
|
|
"contact_info": [
|
|
("close_mappings", "schema:contactPoint"),
|
|
("close_mappings", "vcard:hasAddress"),
|
|
],
|
|
"contact_name": [
|
|
("close_mappings", "schema:name"),
|
|
],
|
|
"contact_person": [
|
|
("exact_mappings", "schema:contactPoint"),
|
|
],
|
|
"contact_phone": [
|
|
("exact_mappings", "schema:telephone"),
|
|
("exact_mappings", "vcard:hasTelephone"),
|
|
],
|
|
"social_media_links": [
|
|
("close_mappings", "schema:sameAs"),
|
|
],
|
|
"social_media_platform": [
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
|
|
# === DIGITAL PRESENCE ===
|
|
"digital_presence": [
|
|
("close_mappings", "schema:url"),
|
|
],
|
|
"digital_platforms": [
|
|
("close_mappings", "dcterms:hasPart"),
|
|
],
|
|
"platform_name": [
|
|
("exact_mappings", "schema:name"),
|
|
],
|
|
"platform_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"platform_url": [
|
|
("exact_mappings", "schema:url"),
|
|
],
|
|
|
|
# === STAFF AND PERSONNEL ===
|
|
"staff_count": [
|
|
("close_mappings", "schema:numberOfEmployees"),
|
|
],
|
|
"staff_members": [
|
|
("close_mappings", "schema:employee"),
|
|
("related_mappings", "org:hasMember"),
|
|
],
|
|
"staff_role": [
|
|
("exact_mappings", "org:role"),
|
|
("exact_mappings", "schema:roleName"),
|
|
],
|
|
"position": [
|
|
("exact_mappings", "schema:jobTitle"),
|
|
("close_mappings", "org:role"),
|
|
],
|
|
"position_title": [
|
|
("exact_mappings", "schema:jobTitle"),
|
|
],
|
|
"employment_type": [
|
|
("exact_mappings", "schema:employmentType"),
|
|
],
|
|
|
|
# === ENRICHMENT AND PROVENANCE ===
|
|
"enrichment_status": [
|
|
("related_mappings", "adms:status"),
|
|
],
|
|
"enrichment_source": [
|
|
("close_mappings", "dcterms:source"),
|
|
],
|
|
"data_source": [
|
|
("exact_mappings", "dcterms:source"),
|
|
("close_mappings", "prov:wasDerivedFrom"),
|
|
],
|
|
"data_tier": [
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
"extraction_date": [
|
|
("exact_mappings", "prov:generatedAtTime"),
|
|
("close_mappings", "dcterms:created"),
|
|
],
|
|
"extraction_notes": [
|
|
("close_mappings", "skos:note"),
|
|
],
|
|
"last_verified": [
|
|
("close_mappings", "dcterms:dateAccepted"),
|
|
],
|
|
"verified_by": [
|
|
("close_mappings", "prov:wasAttributedTo"),
|
|
],
|
|
"retrieved_on": [
|
|
("exact_mappings", "prov:generatedAtTime"),
|
|
],
|
|
"statement_created_at": [
|
|
("exact_mappings", "prov:generatedAtTime"),
|
|
("close_mappings", "dcterms:created"),
|
|
],
|
|
"source_archived_at": [
|
|
("close_mappings", "prov:generatedAtTime"),
|
|
],
|
|
|
|
# === GEOGRAPHIC ===
|
|
"geonames_id": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
"wgs84_coordinates": [
|
|
("close_mappings", "geo:geometry"),
|
|
],
|
|
"geo_feature": [
|
|
("close_mappings", "geo:Feature"),
|
|
],
|
|
"admin_level": [
|
|
("related_mappings", "schema:addressRegion"),
|
|
],
|
|
"municipality": [
|
|
("close_mappings", "schema:addressLocality"),
|
|
],
|
|
"province": [
|
|
("close_mappings", "schema:addressRegion"),
|
|
],
|
|
"country_code": [
|
|
("close_mappings", "schema:addressCountry"),
|
|
],
|
|
|
|
# === OPENING HOURS AND SCHEDULES ===
|
|
"opening_hours": [
|
|
("exact_mappings", "schema:openingHours"),
|
|
],
|
|
"opening_hours_specification": [
|
|
("exact_mappings", "schema:openingHoursSpecification"),
|
|
],
|
|
"temporarily_closed": [
|
|
("related_mappings", "schema:publicAccess"),
|
|
],
|
|
|
|
# === ORGANIZATIONAL STRUCTURE ===
|
|
"legal_name": [
|
|
("exact_mappings", "schema:legalName"),
|
|
],
|
|
"legal_form": [
|
|
("close_mappings", "org:classification"),
|
|
("related_mappings", "schema:legalForm"),
|
|
],
|
|
"legal_status": [
|
|
("close_mappings", "org:classification"),
|
|
],
|
|
"registration_number": [
|
|
("close_mappings", "schema:identifier"),
|
|
("close_mappings", "org:identifier"),
|
|
],
|
|
"kvk_number": [
|
|
("narrow_mappings", "org:identifier"),
|
|
],
|
|
"organizational_unit": [
|
|
("exact_mappings", "org:hasUnit"),
|
|
],
|
|
"organizational_structure": [
|
|
("close_mappings", "org:organization"),
|
|
],
|
|
"parent_custodian": [
|
|
("exact_mappings", "org:subOrganizationOf"),
|
|
("exact_mappings", "schema:parentOrganization"),
|
|
],
|
|
"sub_custodians": [
|
|
("exact_mappings", "org:hasSubOrganization"),
|
|
("exact_mappings", "schema:subOrganization"),
|
|
],
|
|
|
|
# === FUNDING AND FINANCIAL ===
|
|
"funding_source": [
|
|
("close_mappings", "schema:funder"),
|
|
],
|
|
"funder": [
|
|
("exact_mappings", "schema:funder"),
|
|
],
|
|
"funding_amount": [
|
|
("close_mappings", "schema:amount"),
|
|
],
|
|
"budget": [
|
|
("related_mappings", "schema:price"),
|
|
],
|
|
"annual_budget": [
|
|
("related_mappings", "schema:price"),
|
|
],
|
|
|
|
# === SERVICES AND FACILITIES ===
|
|
"services": [
|
|
("exact_mappings", "schema:availableService"),
|
|
],
|
|
"facilities": [
|
|
("close_mappings", "schema:amenityFeature"),
|
|
],
|
|
"accessibility": [
|
|
("exact_mappings", "schema:accessibilityFeature"),
|
|
],
|
|
"accessibility_info": [
|
|
("close_mappings", "schema:accessibilitySummary"),
|
|
],
|
|
"public_access": [
|
|
("exact_mappings", "schema:publicAccess"),
|
|
],
|
|
|
|
# === PROJECTS AND ACTIVITIES ===
|
|
"project_name": [
|
|
("exact_mappings", "schema:name"),
|
|
],
|
|
"project_description": [
|
|
("exact_mappings", "schema:description"),
|
|
],
|
|
"project_status": [
|
|
("close_mappings", "schema:status"),
|
|
],
|
|
"project_start": [
|
|
("exact_mappings", "schema:startDate"),
|
|
],
|
|
"project_end": [
|
|
("exact_mappings", "schema:endDate"),
|
|
],
|
|
"project_url": [
|
|
("exact_mappings", "schema:url"),
|
|
],
|
|
|
|
# === EVENTS AND CHANGES ===
|
|
"event_description": [
|
|
("exact_mappings", "schema:description"),
|
|
],
|
|
"event_location": [
|
|
("exact_mappings", "schema:location"),
|
|
],
|
|
"predecessor": [
|
|
("exact_mappings", "dcterms:replaces"),
|
|
("close_mappings", "prov:wasDerivedFrom"),
|
|
],
|
|
"successor": [
|
|
("exact_mappings", "dcterms:isReplacedBy"),
|
|
],
|
|
"merged_into": [
|
|
("close_mappings", "dcterms:isReplacedBy"),
|
|
],
|
|
"merged_from": [
|
|
("close_mappings", "dcterms:replaces"),
|
|
],
|
|
"split_from": [
|
|
("close_mappings", "prov:wasDerivedFrom"),
|
|
],
|
|
"split_into": [
|
|
("close_mappings", "prov:generated"),
|
|
],
|
|
|
|
# === RATINGS AND REVIEWS ===
|
|
"rating": [
|
|
("exact_mappings", "schema:aggregateRating"),
|
|
],
|
|
"review_count": [
|
|
("close_mappings", "schema:reviewCount"),
|
|
],
|
|
"reviews": [
|
|
("exact_mappings", "schema:review"),
|
|
],
|
|
|
|
# === STANDARDS AND COMPLIANCE ===
|
|
"conforms_to": [
|
|
("exact_mappings", "dcterms:conformsTo"),
|
|
],
|
|
"standard_name": [
|
|
("close_mappings", "schema:name"),
|
|
],
|
|
"standard_version": [
|
|
("close_mappings", "schema:version"),
|
|
],
|
|
"certification": [
|
|
("close_mappings", "schema:hasCertification"),
|
|
],
|
|
|
|
# === ARCHIVAL-SPECIFIC ===
|
|
"fonds": [
|
|
("related_mappings", "rico:hasRecordSetType"),
|
|
],
|
|
"series": [
|
|
("related_mappings", "rico:hasRecordSetType"),
|
|
],
|
|
"finding_aid_url": [
|
|
("exact_mappings", "schema:url"),
|
|
],
|
|
"record_type": [
|
|
("close_mappings", "rico:hasRecordSetType"),
|
|
],
|
|
"record_count": [
|
|
("close_mappings", "schema:numberOfItems"),
|
|
],
|
|
"linear_meters": [
|
|
("close_mappings", "dcterms:extent"),
|
|
],
|
|
|
|
# === MUSEUM-SPECIFIC ===
|
|
"visitor_count": [
|
|
("close_mappings", "schema:attendeeCount"),
|
|
],
|
|
"exhibition": [
|
|
("close_mappings", "schema:event"),
|
|
],
|
|
"exhibitions": [
|
|
("close_mappings", "schema:event"),
|
|
],
|
|
"current_exhibitions": [
|
|
("close_mappings", "schema:event"),
|
|
],
|
|
"permanent_collection": [
|
|
("close_mappings", "schema:collection"),
|
|
],
|
|
|
|
# === LIBRARY-SPECIFIC ===
|
|
"catalog_url": [
|
|
("exact_mappings", "schema:url"),
|
|
],
|
|
"holdings": [
|
|
("close_mappings", "schema:collection"),
|
|
],
|
|
"circulation": [
|
|
("related_mappings", "schema:availableService"),
|
|
],
|
|
|
|
# === IDENTIFIERS (ADDITIONAL) ===
|
|
"orcid": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
"ror_id": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
"grid_id": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
"doi": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
"isbn": [
|
|
("exact_mappings", "schema:isbn"),
|
|
],
|
|
"issn": [
|
|
("exact_mappings", "schema:issn"),
|
|
],
|
|
|
|
# === WEB CLAIMS (ADDITIONAL) ===
|
|
"html_file": [
|
|
("related_mappings", "dcterms:source"),
|
|
],
|
|
"xpath_match_score": [
|
|
("related_mappings", "prov:value"),
|
|
],
|
|
"retrieval_timestamp": [
|
|
("exact_mappings", "prov:generatedAtTime"),
|
|
],
|
|
|
|
# === GHCID-SPECIFIC ===
|
|
"ghcid_current": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
"ghcid_history": [
|
|
("related_mappings", "prov:wasRevisionOf"),
|
|
],
|
|
"ghcid_uuid": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
"ghcid_numeric": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
|
|
# === LINKEDIN-SPECIFIC ===
|
|
"linkedin_url": [
|
|
("exact_mappings", "schema:sameAs"),
|
|
("close_mappings", "foaf:page"),
|
|
],
|
|
"linkedin_slug": [
|
|
("related_mappings", "dcterms:identifier"),
|
|
],
|
|
"profile_headline": [
|
|
("close_mappings", "schema:description"),
|
|
],
|
|
"profile_summary": [
|
|
("close_mappings", "schema:description"),
|
|
],
|
|
"connections_count": [
|
|
("related_mappings", "schema:interactionCount"),
|
|
],
|
|
|
|
# === WIKIDATA-SPECIFIC ===
|
|
"wikidata_label": [
|
|
("exact_mappings", "skos:prefLabel"),
|
|
],
|
|
"wikidata_description": [
|
|
("exact_mappings", "schema:description"),
|
|
],
|
|
"wikidata_aliases": [
|
|
("exact_mappings", "skos:altLabel"),
|
|
],
|
|
"sitelinks": [
|
|
("close_mappings", "schema:sameAs"),
|
|
],
|
|
|
|
# === GOOGLE MAPS-SPECIFIC ===
|
|
"place_id": [
|
|
("narrow_mappings", "dcterms:identifier"),
|
|
],
|
|
"google_maps_url": [
|
|
("close_mappings", "schema:sameAs"),
|
|
],
|
|
"popular_times": [
|
|
("related_mappings", "schema:openingHours"),
|
|
],
|
|
"photo_count": [
|
|
("close_mappings", "schema:numberOfItems"),
|
|
],
|
|
"business_status": [
|
|
("close_mappings", "schema:status"),
|
|
],
|
|
|
|
# === ADDITIONAL UNMAPPED SLOTS ===
|
|
"confidence": [
|
|
("close_mappings", "prov:value"),
|
|
("related_mappings", "schema:ratingValue"),
|
|
],
|
|
"confidence_method": [
|
|
("close_mappings", "prov:wasGeneratedBy"),
|
|
],
|
|
"confidence_value": [
|
|
("close_mappings", "prov:value"),
|
|
],
|
|
"conflict_status": [
|
|
("related_mappings", "schema:status"),
|
|
],
|
|
"conservation_lab": [
|
|
("related_mappings", "schema:location"),
|
|
],
|
|
"contact": [
|
|
("exact_mappings", "schema:contactPoint"),
|
|
],
|
|
"contact_point": [
|
|
("exact_mappings", "schema:contactPoint"),
|
|
],
|
|
"content_hash": [
|
|
("exact_mappings", "dcat:checksum"),
|
|
],
|
|
"cost_usd": [
|
|
("close_mappings", "schema:price"),
|
|
],
|
|
"css_selector": [
|
|
("related_mappings", "prov:atLocation"),
|
|
],
|
|
"curation_activities": [
|
|
("close_mappings", "prov:Activity"),
|
|
],
|
|
"custodial_history": [
|
|
("exact_mappings", "dcterms:provenance"),
|
|
],
|
|
"custodian_names": [
|
|
("close_mappings", "skos:prefLabel"),
|
|
],
|
|
"custodian_observations": [
|
|
("related_mappings", "prov:Entity"),
|
|
],
|
|
"custodians": [
|
|
("close_mappings", "schema:organization"),
|
|
],
|
|
"data_license_policy": [
|
|
("close_mappings", "dcterms:license"),
|
|
],
|
|
"data_service_endpoints": [
|
|
("close_mappings", "dcat:endpointURL"),
|
|
],
|
|
"date_of_incorporation": [
|
|
("exact_mappings", "schema:foundingDate"),
|
|
],
|
|
"definition": [
|
|
("exact_mappings", "skos:definition"),
|
|
("close_mappings", "rdfs:comment"),
|
|
],
|
|
"delegation": [
|
|
("related_mappings", "prov:actedOnBehalfOf"),
|
|
],
|
|
"emic_name": [
|
|
("close_mappings", "skos:prefLabel"),
|
|
("related_mappings", "schema:name"),
|
|
],
|
|
"endorsed_standards": [
|
|
("close_mappings", "dcterms:conformsTo"),
|
|
],
|
|
"entity_observations": [
|
|
("related_mappings", "prov:Entity"),
|
|
],
|
|
"evidence_strength": [
|
|
("close_mappings", "prov:value"),
|
|
],
|
|
"exclusion_criteria": [
|
|
("related_mappings", "schema:description"),
|
|
],
|
|
"first_observation": [
|
|
("close_mappings", "prov:generatedAtTime"),
|
|
],
|
|
"format_types": [
|
|
("close_mappings", "dcterms:format"),
|
|
],
|
|
"formatted_address": [
|
|
("exact_mappings", "schema:address"),
|
|
("exact_mappings", "vcard:hasAddress"),
|
|
],
|
|
|
|
# === MORE UNMAPPED SLOTS ===
|
|
"geographic_scope": [
|
|
("exact_mappings", "dcterms:spatial"),
|
|
],
|
|
"government_level": [
|
|
("related_mappings", "org:classification"),
|
|
],
|
|
"has_digital_catalog": [
|
|
("related_mappings", "schema:url"),
|
|
],
|
|
"has_finding_aid": [
|
|
("related_mappings", "schema:url"),
|
|
],
|
|
"has_member": [
|
|
("exact_mappings", "org:hasMember"),
|
|
],
|
|
"has_opening_hours": [
|
|
("close_mappings", "schema:openingHours"),
|
|
],
|
|
"heritage_significance": [
|
|
("related_mappings", "dcterms:description"),
|
|
],
|
|
"heritage_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"heritage_types": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"historical_significance": [
|
|
("related_mappings", "dcterms:description"),
|
|
],
|
|
"hours_of_operation": [
|
|
("exact_mappings", "schema:openingHours"),
|
|
],
|
|
"html_content": [
|
|
("related_mappings", "schema:text"),
|
|
],
|
|
"identifier_scheme": [
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
"identifier_type": [
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
"identifier_value": [
|
|
("exact_mappings", "rdf:value"),
|
|
],
|
|
"identifiers": [
|
|
("close_mappings", "dcterms:identifier"),
|
|
],
|
|
"inclusion_criteria": [
|
|
("related_mappings", "schema:description"),
|
|
],
|
|
"industry": [
|
|
("exact_mappings", "schema:industry"),
|
|
],
|
|
"is_active": [
|
|
("close_mappings", "schema:status"),
|
|
],
|
|
"is_defunct": [
|
|
("close_mappings", "schema:status"),
|
|
],
|
|
"is_part_of": [
|
|
("exact_mappings", "dcterms:isPartOf"),
|
|
("exact_mappings", "schema:isPartOf"),
|
|
],
|
|
"is_primary": [
|
|
("related_mappings", "rdf:type"),
|
|
],
|
|
"issue_date": [
|
|
("exact_mappings", "dcterms:issued"),
|
|
],
|
|
"items": [
|
|
("close_mappings", "schema:itemListElement"),
|
|
],
|
|
"known_for": [
|
|
("close_mappings", "schema:knowsAbout"),
|
|
],
|
|
"label": [
|
|
("exact_mappings", "rdfs:label"),
|
|
("exact_mappings", "skos:prefLabel"),
|
|
],
|
|
"labels": [
|
|
("close_mappings", "skos:altLabel"),
|
|
],
|
|
"last_modified": [
|
|
("exact_mappings", "dcterms:modified"),
|
|
],
|
|
"last_observation": [
|
|
("close_mappings", "prov:generatedAtTime"),
|
|
],
|
|
"last_updated": [
|
|
("exact_mappings", "dcterms:modified"),
|
|
],
|
|
"latitude_dd": [
|
|
("exact_mappings", "schema:latitude"),
|
|
],
|
|
"legal_entity": [
|
|
("close_mappings", "org:FormalOrganization"),
|
|
],
|
|
"level": [
|
|
("related_mappings", "schema:position"),
|
|
],
|
|
"locations": [
|
|
("exact_mappings", "schema:location"),
|
|
],
|
|
"longitude_dd": [
|
|
("exact_mappings", "schema:longitude"),
|
|
],
|
|
"main_language": [
|
|
("exact_mappings", "dcterms:language"),
|
|
],
|
|
"maintained_by": [
|
|
("close_mappings", "schema:maintainer"),
|
|
],
|
|
"managed_by": [
|
|
("close_mappings", "prov:wasAttributedTo"),
|
|
],
|
|
"mandate": [
|
|
("related_mappings", "org:purpose"),
|
|
],
|
|
"membership_type": [
|
|
("close_mappings", "org:role"),
|
|
],
|
|
"metadata_standard": [
|
|
("close_mappings", "dcterms:conformsTo"),
|
|
],
|
|
"metadata_standards": [
|
|
("close_mappings", "dcterms:conformsTo"),
|
|
],
|
|
"mission": [
|
|
("close_mappings", "org:purpose"),
|
|
],
|
|
"mission_statement": [
|
|
("close_mappings", "org:purpose"),
|
|
],
|
|
"name_type": [
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
"name_variant": [
|
|
("exact_mappings", "skos:altLabel"),
|
|
],
|
|
"name_variants": [
|
|
("exact_mappings", "skos:altLabel"),
|
|
],
|
|
"namespace": [
|
|
("related_mappings", "schema:url"),
|
|
],
|
|
"network_affiliations": [
|
|
("close_mappings", "org:memberOf"),
|
|
],
|
|
"note": [
|
|
("exact_mappings", "skos:note"),
|
|
],
|
|
"number_of_employees": [
|
|
("exact_mappings", "schema:numberOfEmployees"),
|
|
],
|
|
"observation_date": [
|
|
("close_mappings", "prov:generatedAtTime"),
|
|
],
|
|
"observation_notes": [
|
|
("close_mappings", "skos:note"),
|
|
],
|
|
"observation_source": [
|
|
("close_mappings", "dcterms:source"),
|
|
],
|
|
"observation_type": [
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
"observations": [
|
|
("related_mappings", "prov:Entity"),
|
|
],
|
|
"official_name": [
|
|
("exact_mappings", "schema:legalName"),
|
|
],
|
|
"official_website": [
|
|
("exact_mappings", "schema:url"),
|
|
("exact_mappings", "foaf:homepage"),
|
|
],
|
|
"online_catalog": [
|
|
("close_mappings", "schema:url"),
|
|
],
|
|
"operated_by": [
|
|
("close_mappings", "schema:provider"),
|
|
],
|
|
"operating_status": [
|
|
("close_mappings", "schema:status"),
|
|
],
|
|
"operational_since": [
|
|
("close_mappings", "schema:foundingDate"),
|
|
],
|
|
"operators": [
|
|
("close_mappings", "schema:provider"),
|
|
],
|
|
"org_type": [
|
|
("close_mappings", "org:classification"),
|
|
],
|
|
"organization_type": [
|
|
("close_mappings", "org:classification"),
|
|
],
|
|
"original_language": [
|
|
("exact_mappings", "dcterms:language"),
|
|
],
|
|
"other_identifiers": [
|
|
("close_mappings", "dcterms:identifier"),
|
|
],
|
|
"other_names": [
|
|
("exact_mappings", "skos:altLabel"),
|
|
],
|
|
"owned_by": [
|
|
("close_mappings", "schema:ownedBy"),
|
|
],
|
|
"owner": [
|
|
("exact_mappings", "schema:ownedBy"),
|
|
],
|
|
"ownership": [
|
|
("close_mappings", "schema:ownedBy"),
|
|
],
|
|
"page_url": [
|
|
("exact_mappings", "schema:url"),
|
|
],
|
|
"parent": [
|
|
("exact_mappings", "schema:parentOrganization"),
|
|
],
|
|
"parent_id": [
|
|
("close_mappings", "dcterms:isPartOf"),
|
|
],
|
|
"participants": [
|
|
("exact_mappings", "schema:participant"),
|
|
],
|
|
"period": [
|
|
("close_mappings", "dcterms:temporal"),
|
|
],
|
|
"period_covered": [
|
|
("exact_mappings", "dcterms:temporal"),
|
|
],
|
|
"phone_number": [
|
|
("exact_mappings", "schema:telephone"),
|
|
],
|
|
"physical_address": [
|
|
("exact_mappings", "schema:address"),
|
|
],
|
|
"postal_address": [
|
|
("exact_mappings", "schema:postalAddress"),
|
|
],
|
|
"prefix": [
|
|
("related_mappings", "schema:honorificPrefix"),
|
|
],
|
|
"previous_name": [
|
|
("close_mappings", "skos:altLabel"),
|
|
],
|
|
"previous_names": [
|
|
("close_mappings", "skos:altLabel"),
|
|
],
|
|
"primary_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"priority": [
|
|
("related_mappings", "schema:position"),
|
|
],
|
|
"processing_status": [
|
|
("close_mappings", "adms:status"),
|
|
],
|
|
"profile_url": [
|
|
("exact_mappings", "schema:url"),
|
|
("exact_mappings", "foaf:page"),
|
|
],
|
|
"project_id": [
|
|
("close_mappings", "dcterms:identifier"),
|
|
],
|
|
"properties": [
|
|
("related_mappings", "schema:additionalProperty"),
|
|
],
|
|
"provider": [
|
|
("exact_mappings", "schema:provider"),
|
|
],
|
|
"purpose": [
|
|
("exact_mappings", "org:purpose"),
|
|
],
|
|
"qualifier": [
|
|
("related_mappings", "skos:note"),
|
|
],
|
|
"quality_score": [
|
|
("close_mappings", "prov:value"),
|
|
],
|
|
"raw_value": [
|
|
("close_mappings", "rdf:value"),
|
|
],
|
|
"reason": [
|
|
("close_mappings", "prov:hadReason"),
|
|
],
|
|
"record_id": [
|
|
("close_mappings", "dcterms:identifier"),
|
|
],
|
|
"reference_url": [
|
|
("exact_mappings", "schema:url"),
|
|
],
|
|
"registration_country": [
|
|
("close_mappings", "schema:addressCountry"),
|
|
],
|
|
"related": [
|
|
("exact_mappings", "dcterms:relation"),
|
|
],
|
|
"related_entities": [
|
|
("close_mappings", "dcterms:relation"),
|
|
],
|
|
"related_institutions": [
|
|
("close_mappings", "dcterms:relation"),
|
|
],
|
|
"related_organizations": [
|
|
("close_mappings", "dcterms:relation"),
|
|
],
|
|
"relationship_type": [
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
"relevance_score": [
|
|
("close_mappings", "prov:value"),
|
|
],
|
|
"request_url": [
|
|
("close_mappings", "schema:url"),
|
|
],
|
|
"resource_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"response_format": [
|
|
("close_mappings", "dcterms:format"),
|
|
],
|
|
"result": [
|
|
("exact_mappings", "schema:result"),
|
|
],
|
|
"resulting_organization": [
|
|
("close_mappings", "prov:generated"),
|
|
],
|
|
"scope": [
|
|
("close_mappings", "dcterms:coverage"),
|
|
],
|
|
"scope_note": [
|
|
("exact_mappings", "skos:scopeNote"),
|
|
],
|
|
"search_url": [
|
|
("close_mappings", "schema:url"),
|
|
],
|
|
"secondary_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"section": [
|
|
("related_mappings", "dcterms:isPartOf"),
|
|
],
|
|
"service_type": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"short_name": [
|
|
("close_mappings", "skos:altLabel"),
|
|
],
|
|
"skills": [
|
|
("close_mappings", "schema:knowsAbout"),
|
|
],
|
|
"slug": [
|
|
("related_mappings", "dcterms:identifier"),
|
|
],
|
|
"snapshot_date": [
|
|
("close_mappings", "prov:generatedAtTime"),
|
|
],
|
|
"source_file": [
|
|
("close_mappings", "dcterms:source"),
|
|
],
|
|
"source_id": [
|
|
("close_mappings", "dcterms:source"),
|
|
],
|
|
"source_language": [
|
|
("close_mappings", "dcterms:language"),
|
|
],
|
|
"source_name": [
|
|
("close_mappings", "dcterms:source"),
|
|
],
|
|
"source_type": [
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
"sources": [
|
|
("exact_mappings", "dcterms:source"),
|
|
],
|
|
"spatial_coverage": [
|
|
("exact_mappings", "dcterms:spatial"),
|
|
],
|
|
"specialization": [
|
|
("close_mappings", "schema:knowsAbout"),
|
|
],
|
|
"species_count": [
|
|
("close_mappings", "schema:numberOfItems"),
|
|
],
|
|
"start_year": [
|
|
("close_mappings", "schema:startDate"),
|
|
],
|
|
"state": [
|
|
("close_mappings", "schema:addressRegion"),
|
|
],
|
|
"storage_type": [
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
"street": [
|
|
("exact_mappings", "schema:streetAddress"),
|
|
],
|
|
"subtype": [
|
|
("close_mappings", "dcterms:type"),
|
|
],
|
|
"suffix": [
|
|
("related_mappings", "schema:honorificSuffix"),
|
|
],
|
|
"supported_formats": [
|
|
("close_mappings", "dcterms:format"),
|
|
],
|
|
"synonyms": [
|
|
("exact_mappings", "skos:altLabel"),
|
|
],
|
|
"system_name": [
|
|
("close_mappings", "schema:name"),
|
|
],
|
|
"tags": [
|
|
("close_mappings", "schema:keywords"),
|
|
],
|
|
"target_audience": [
|
|
("close_mappings", "schema:audience"),
|
|
],
|
|
"temporal": [
|
|
("exact_mappings", "dcterms:temporal"),
|
|
],
|
|
"temporal_scope": [
|
|
("exact_mappings", "dcterms:temporal"),
|
|
],
|
|
"text": [
|
|
("exact_mappings", "schema:text"),
|
|
],
|
|
"text_content": [
|
|
("exact_mappings", "schema:text"),
|
|
],
|
|
"time_zone": [
|
|
("close_mappings", "schema:timeZone"),
|
|
],
|
|
"total_count": [
|
|
("close_mappings", "schema:numberOfItems"),
|
|
],
|
|
"type_label": [
|
|
("close_mappings", "rdfs:label"),
|
|
],
|
|
"uri": [
|
|
("exact_mappings", "schema:url"),
|
|
],
|
|
"usage_notes": [
|
|
("close_mappings", "skos:note"),
|
|
],
|
|
"valid_until": [
|
|
("exact_mappings", "schema:validThrough"),
|
|
],
|
|
"validation_status": [
|
|
("related_mappings", "adms:status"),
|
|
],
|
|
"verification_date": [
|
|
("close_mappings", "prov:generatedAtTime"),
|
|
],
|
|
"verification_method": [
|
|
("close_mappings", "prov:wasGeneratedBy"),
|
|
],
|
|
"verification_status": [
|
|
("close_mappings", "adms:status"),
|
|
],
|
|
"vision": [
|
|
("close_mappings", "org:purpose"),
|
|
],
|
|
"web_archive_url": [
|
|
("close_mappings", "schema:url"),
|
|
],
|
|
"web_claims": [
|
|
("related_mappings", "prov:Entity"),
|
|
],
|
|
"web_presence": [
|
|
("close_mappings", "schema:url"),
|
|
],
|
|
"website_url": [
|
|
("exact_mappings", "schema:url"),
|
|
],
|
|
"year_established": [
|
|
("close_mappings", "schema:foundingDate"),
|
|
],
|
|
"year_founded": [
|
|
("exact_mappings", "schema:foundingDate"),
|
|
],
|
|
|
|
# === REMAINING 50 UNMAPPED SLOTS ===
|
|
"class_metadata_slots": [
|
|
("related_mappings", "rdfs:Resource"),
|
|
],
|
|
"collections_under_responsibility": [
|
|
("close_mappings", "rico:hasOrIsHeldBy"),
|
|
("related_mappings", "schema:collection"),
|
|
],
|
|
"deceased": [
|
|
("close_mappings", "schema:deathDate"),
|
|
],
|
|
"encompasses": [
|
|
("close_mappings", "dcterms:hasPart"),
|
|
("related_mappings", "schema:containsPlace"),
|
|
],
|
|
"exposes_collections": [
|
|
("close_mappings", "schema:collection"),
|
|
],
|
|
"external_identifiers": [
|
|
("exact_mappings", "dcterms:identifier"),
|
|
("close_mappings", "schema:identifier"),
|
|
],
|
|
"extraction_metadata": [
|
|
("close_mappings", "prov:Entity"),
|
|
],
|
|
"finding_aids": [
|
|
("close_mappings", "rico:hasInstantiation"),
|
|
("related_mappings", "schema:url"),
|
|
],
|
|
"gender_identity": [
|
|
("exact_mappings", "schema:gender"),
|
|
],
|
|
"generates": [
|
|
("exact_mappings", "prov:generated"),
|
|
],
|
|
"governance_structure": [
|
|
("close_mappings", "org:organization"),
|
|
],
|
|
"holds_record_set_types": [
|
|
("close_mappings", "rico:hasRecordSetType"),
|
|
],
|
|
"humidity_tolerance": [
|
|
("related_mappings", "schema:additionalProperty"),
|
|
],
|
|
"identifier_format_used": [
|
|
("close_mappings", "dcterms:conformsTo"),
|
|
],
|
|
"identifies": [
|
|
("close_mappings", "dcterms:identifier"),
|
|
],
|
|
"initials": [
|
|
("close_mappings", "foaf:name"),
|
|
("related_mappings", "skos:altLabel"),
|
|
],
|
|
"jurisdiction": [
|
|
("exact_mappings", "schema:areaServed"),
|
|
("close_mappings", "dcterms:spatial"),
|
|
],
|
|
"justification": [
|
|
("close_mappings", "prov:hadReason"),
|
|
("related_mappings", "skos:note"),
|
|
],
|
|
"legal_jurisdiction": [
|
|
("exact_mappings", "schema:areaServed"),
|
|
],
|
|
"legal_responsibility_basis": [
|
|
("close_mappings", "dcterms:rights"),
|
|
],
|
|
"linkedin_profile_path": [
|
|
("close_mappings", "schema:sameAs"),
|
|
],
|
|
"method": [
|
|
("exact_mappings", "prov:wasGeneratedBy"),
|
|
],
|
|
"name_authority": [
|
|
("close_mappings", "dcterms:source"),
|
|
],
|
|
"name_validity_period": [
|
|
("close_mappings", "dcterms:temporal"),
|
|
],
|
|
"observation_context": [
|
|
("close_mappings", "prov:atLocation"),
|
|
],
|
|
"offers_donation_schemes": [
|
|
("related_mappings", "schema:availableService"),
|
|
],
|
|
"organizational_change_events": [
|
|
("close_mappings", "prov:Activity"),
|
|
],
|
|
"parent_collection": [
|
|
("exact_mappings", "dcterms:isPartOf"),
|
|
],
|
|
"platform_of": [
|
|
("close_mappings", "dcterms:isPartOf"),
|
|
],
|
|
"portal_data_sources": [
|
|
("close_mappings", "dcterms:source"),
|
|
],
|
|
"powered_by_cms": [
|
|
("related_mappings", "schema:softwareVersion"),
|
|
],
|
|
"preservation_level": [
|
|
("related_mappings", "dcterms:type"),
|
|
],
|
|
"price_currency": [
|
|
("exact_mappings", "schema:priceCurrency"),
|
|
],
|
|
"primary_register": [
|
|
("close_mappings", "dcterms:source"),
|
|
],
|
|
"pronouns": [
|
|
("related_mappings", "foaf:name"),
|
|
],
|
|
"protocol": [
|
|
("close_mappings", "dcterms:conformsTo"),
|
|
],
|
|
"provenance_statement": [
|
|
("exact_mappings", "dcterms:provenance"),
|
|
],
|
|
"response_formats": [
|
|
("close_mappings", "dcterms:format"),
|
|
],
|
|
"resulting_units": [
|
|
("close_mappings", "prov:generated"),
|
|
],
|
|
"rico_equivalent": [
|
|
("exact_mappings", "skos:exactMatch"),
|
|
],
|
|
"role_title": [
|
|
("exact_mappings", "schema:roleName"),
|
|
("close_mappings", "org:role"),
|
|
],
|
|
"security_level": [
|
|
("related_mappings", "dcterms:accessRights"),
|
|
],
|
|
"serves_finding_aids": [
|
|
("close_mappings", "schema:availableService"),
|
|
],
|
|
"settlement": [
|
|
("close_mappings", "schema:addressLocality"),
|
|
],
|
|
"source_creator": [
|
|
("exact_mappings", "dcterms:creator"),
|
|
],
|
|
"source_uri": [
|
|
("exact_mappings", "dcterms:source"),
|
|
],
|
|
"sub_collections": [
|
|
("exact_mappings", "dcterms:hasPart"),
|
|
],
|
|
"subregion": [
|
|
("close_mappings", "schema:addressRegion"),
|
|
],
|
|
"supersedes": [
|
|
("exact_mappings", "dcterms:replaces"),
|
|
],
|
|
"temperature_tolerance": [
|
|
("related_mappings", "schema:additionalProperty"),
|
|
],
|
|
"typical_domains": [
|
|
("close_mappings", "dcterms:subject"),
|
|
],
|
|
"typical_technical_features": [
|
|
("close_mappings", "schema:featureList"),
|
|
],
|
|
"unit_affiliation": [
|
|
("close_mappings", "org:memberOf"),
|
|
],
|
|
"used_sources": [
|
|
("exact_mappings", "dcterms:source"),
|
|
],
|
|
"used": [
|
|
("close_mappings", "prov:used"),
|
|
],
|
|
"was_derived_from": [
|
|
("exact_mappings", "prov:wasDerivedFrom"),
|
|
],
|
|
"was_revision_of": [
|
|
("exact_mappings", "prov:wasRevisionOf"),
|
|
],
|
|
|
|
# === 4 MISSING CENTRALIZED SLOTS ===
|
|
"archive_branches": [
|
|
("close_mappings", "org:hasSubOrganization"),
|
|
("related_mappings", "schema:subOrganization"),
|
|
],
|
|
"archive_department_of": [
|
|
("close_mappings", "org:subOrganizationOf"),
|
|
("related_mappings", "schema:parentOrganization"),
|
|
],
|
|
"parent_corporation": [
|
|
("exact_mappings", "schema:parentOrganization"),
|
|
("close_mappings", "org:subOrganizationOf"),
|
|
],
|
|
"wikidata_entity": [
|
|
("exact_mappings", "schema:sameAs"),
|
|
("close_mappings", "skos:exactMatch"),
|
|
],
|
|
}
|
|
|
|
# Pattern-based mappings for slots that match certain patterns
|
|
PATTERN_MAPPINGS = [
|
|
# Slots ending with _date
|
|
(r".*_date$", [
|
|
("broad_mappings", "dcterms:date"),
|
|
]),
|
|
# Slots ending with _url
|
|
(r".*_url$", [
|
|
("broad_mappings", "schema:url"),
|
|
]),
|
|
# Slots ending with _id
|
|
(r".*_id$", [
|
|
("broad_mappings", "dcterms:identifier"),
|
|
]),
|
|
# Slots ending with _name
|
|
(r".*_name$", [
|
|
("broad_mappings", "rdfs:label"),
|
|
]),
|
|
# Slots ending with _description
|
|
(r".*_description$", [
|
|
("broad_mappings", "dcterms:description"),
|
|
]),
|
|
# Slots ending with _type
|
|
(r".*_type$", [
|
|
("broad_mappings", "dcterms:type"),
|
|
]),
|
|
# Slots ending with _code
|
|
(r".*_code$", [
|
|
("broad_mappings", "dcterms:identifier"),
|
|
]),
|
|
# Slots starting with is_ or has_
|
|
(r"^(is|has)_.*$", [
|
|
("related_mappings", "rdf:type"),
|
|
]),
|
|
# Slots ending with _at (timestamps)
|
|
(r".*_at$", [
|
|
("broad_mappings", "prov:atTime"),
|
|
]),
|
|
# Slots ending with _by (agents)
|
|
(r".*_by$", [
|
|
("broad_mappings", "prov:wasAttributedTo"),
|
|
]),
|
|
# Slots ending with _count
|
|
(r".*_count$", [
|
|
("broad_mappings", "schema:numberOfItems"),
|
|
]),
|
|
# Slots ending with _status
|
|
(r".*_status$", [
|
|
("broad_mappings", "adms:status"),
|
|
]),
|
|
# Slots ending with _source
|
|
(r".*_source$", [
|
|
("broad_mappings", "dcterms:source"),
|
|
]),
|
|
# Slots ending with _ref or _reference
|
|
(r".*_(ref|reference)$", [
|
|
("broad_mappings", "dcterms:references"),
|
|
]),
|
|
# Slots ending with _time
|
|
(r".*_time$", [
|
|
("broad_mappings", "prov:atTime"),
|
|
]),
|
|
# Slots ending with _timestamp
|
|
(r".*_timestamp$", [
|
|
("broad_mappings", "prov:generatedAtTime"),
|
|
]),
|
|
# Slots ending with _address
|
|
(r".*_address$", [
|
|
("broad_mappings", "schema:address"),
|
|
]),
|
|
# Slots ending with _email
|
|
(r".*_email$", [
|
|
("broad_mappings", "schema:email"),
|
|
]),
|
|
# Slots ending with _phone
|
|
(r".*_phone$", [
|
|
("broad_mappings", "schema:telephone"),
|
|
]),
|
|
# Slots ending with _notes or _note
|
|
(r".*_notes?$", [
|
|
("broad_mappings", "skos:note"),
|
|
]),
|
|
# Slots ending with _label
|
|
(r".*_label$", [
|
|
("broad_mappings", "rdfs:label"),
|
|
]),
|
|
# Slots ending with _value
|
|
(r".*_value$", [
|
|
("broad_mappings", "rdf:value"),
|
|
]),
|
|
# Slots ending with _language
|
|
(r".*_language$", [
|
|
("broad_mappings", "dcterms:language"),
|
|
]),
|
|
# Slots ending with _format
|
|
(r".*_format$", [
|
|
("broad_mappings", "dcterms:format"),
|
|
]),
|
|
# Slots ending with _version
|
|
(r".*_version$", [
|
|
("broad_mappings", "schema:version"),
|
|
]),
|
|
# Slots ending with _scope
|
|
(r".*_scope$", [
|
|
("broad_mappings", "dcterms:coverage"),
|
|
]),
|
|
# Slots ending with _coverage
|
|
(r".*_coverage$", [
|
|
("broad_mappings", "dcterms:coverage"),
|
|
]),
|
|
# Slots ending with _extent
|
|
(r".*_extent$", [
|
|
("broad_mappings", "dcterms:extent"),
|
|
]),
|
|
# Slots ending with _size
|
|
(r".*_size$", [
|
|
("broad_mappings", "dcterms:extent"),
|
|
]),
|
|
# Slots ending with _entity or _entities
|
|
(r".*_entit(y|ies)$", [
|
|
("broad_mappings", "prov:Entity"),
|
|
]),
|
|
# Slots ending with _observation or _observations
|
|
(r".*_observations?$", [
|
|
("broad_mappings", "prov:Entity"),
|
|
]),
|
|
# Slots ending with _claim or _claims
|
|
(r".*_claims?$", [
|
|
("broad_mappings", "prov:Entity"),
|
|
]),
|
|
# Slots ending with _confidence
|
|
(r".*_confidence$", [
|
|
("broad_mappings", "prov:value"),
|
|
]),
|
|
# Slots ending with _score
|
|
(r".*_score$", [
|
|
("broad_mappings", "prov:value"),
|
|
]),
|
|
# Slots ending with _platform or _platforms
|
|
(r".*_platforms?$", [
|
|
("broad_mappings", "dcterms:hasPart"),
|
|
]),
|
|
# Slots ending with _standard or _standards
|
|
(r".*_standards?$", [
|
|
("broad_mappings", "dcterms:conformsTo"),
|
|
]),
|
|
# Slots ending with _policy or _policies
|
|
(r".*_polic(y|ies)$", [
|
|
("broad_mappings", "dcterms:rights"),
|
|
]),
|
|
# Slots ending with _method
|
|
(r".*_method$", [
|
|
("broad_mappings", "prov:wasGeneratedBy"),
|
|
]),
|
|
# Slots ending with _agent
|
|
(r".*_agent$", [
|
|
("broad_mappings", "prov:wasAttributedTo"),
|
|
]),
|
|
# Slots starting with date_ or dates_
|
|
(r"^dates?_.*$", [
|
|
("broad_mappings", "dcterms:date"),
|
|
]),
|
|
# Slots containing _of_ (relationships)
|
|
(r".*_of_.*$", [
|
|
("broad_mappings", "dcterms:relation"),
|
|
]),
|
|
# Slots ending with _location
|
|
(r".*_location$", [
|
|
("broad_mappings", "schema:location"),
|
|
]),
|
|
# Slots ending with _organization or _organisations
|
|
(r".*_organi[sz]ations?$", [
|
|
("broad_mappings", "schema:organization"),
|
|
]),
|
|
# Slots ending with _person or _persons
|
|
(r".*_persons?$", [
|
|
("broad_mappings", "schema:person"),
|
|
]),
|
|
# Slots ending with _institution or _institutions
|
|
(r".*_institutions?$", [
|
|
("broad_mappings", "schema:organization"),
|
|
]),
|
|
# Slots ending with _custodian or _custodians
|
|
(r".*_custodians?$", [
|
|
("broad_mappings", "schema:organization"),
|
|
]),
|
|
# Slots ending with _area or _areas
|
|
(r".*_areas?$", [
|
|
("broad_mappings", "dcterms:subject"),
|
|
]),
|
|
# Slots ending with _items
|
|
(r".*_items$", [
|
|
("broad_mappings", "schema:itemListElement"),
|
|
]),
|
|
# Slots ending with _list
|
|
(r".*_list$", [
|
|
("broad_mappings", "schema:ItemList"),
|
|
]),
|
|
# Slots ending with _endpoint or _endpoints
|
|
(r".*_endpoints?$", [
|
|
("broad_mappings", "dcat:endpointURL"),
|
|
]),
|
|
# Slots ending with _file or _files
|
|
(r".*_files?$", [
|
|
("broad_mappings", "dcterms:source"),
|
|
]),
|
|
# Slots ending with _body or _bodies
|
|
(r".*_bod(y|ies)$", [
|
|
("broad_mappings", "org:Organization"),
|
|
]),
|
|
# Slots ending with _link or _links
|
|
(r".*_links?$", [
|
|
("broad_mappings", "schema:url"),
|
|
]),
|
|
# Slots ending with _portal or _portals
|
|
(r".*_portals?$", [
|
|
("broad_mappings", "schema:url"),
|
|
]),
|
|
]
|
|
|
|
|
|
def load_yaml(filepath: Path) -> dict:
|
|
"""Load a YAML file."""
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
return yaml.safe_load(f)
|
|
|
|
|
|
def save_yaml(filepath: Path, data: dict) -> None:
|
|
"""Save data to a YAML file with proper formatting."""
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
yaml.dump(data, f,
|
|
default_flow_style=False,
|
|
allow_unicode=True,
|
|
sort_keys=False,
|
|
width=120)
|
|
|
|
|
|
def get_slot_name_from_file(filepath: Path) -> Optional[str]:
|
|
"""Extract the main slot name from a slot file."""
|
|
data = load_yaml(filepath)
|
|
if 'slots' in data:
|
|
slots = data['slots']
|
|
if slots:
|
|
return list(slots.keys())[0]
|
|
return None
|
|
|
|
|
|
def get_mappings_for_slot(slot_name: str) -> Dict[str, List[str]]:
|
|
"""Get appropriate mappings for a slot based on its name."""
|
|
mappings = {
|
|
"exact_mappings": [],
|
|
"close_mappings": [],
|
|
"related_mappings": [],
|
|
"narrow_mappings": [],
|
|
"broad_mappings": [],
|
|
}
|
|
|
|
# Check direct match first
|
|
if slot_name in SLOT_MAPPINGS:
|
|
for mapping_type, predicate in SLOT_MAPPINGS[slot_name]:
|
|
if predicate not in mappings[mapping_type]:
|
|
mappings[mapping_type].append(predicate)
|
|
|
|
# Check pattern matches
|
|
for pattern, pattern_mappings in PATTERN_MAPPINGS:
|
|
if re.match(pattern, slot_name):
|
|
for mapping_type, predicate in pattern_mappings:
|
|
if predicate not in mappings[mapping_type]:
|
|
mappings[mapping_type].append(predicate)
|
|
|
|
# Remove empty mapping types
|
|
return {k: v for k, v in mappings.items() if v}
|
|
|
|
|
|
def add_mappings_to_slot_file(filepath: Path, dry_run: bool = False) -> Tuple[bool, str]:
|
|
"""Add mappings to a slot file if needed."""
|
|
try:
|
|
data = load_yaml(filepath)
|
|
|
|
if 'slots' not in data or not data['slots']:
|
|
return False, "No slots defined"
|
|
|
|
slot_name = list(data['slots'].keys())[0]
|
|
slot_def = data['slots'][slot_name]
|
|
|
|
# Get existing mappings
|
|
existing_mappings = set()
|
|
for mapping_type in ["exact_mappings", "close_mappings", "related_mappings",
|
|
"narrow_mappings", "broad_mappings"]:
|
|
if mapping_type in slot_def:
|
|
existing_mappings.update(slot_def[mapping_type])
|
|
|
|
# Get new mappings
|
|
new_mappings = get_mappings_for_slot(slot_name)
|
|
|
|
if not new_mappings:
|
|
return False, "No mappings found for slot"
|
|
|
|
# Filter out already existing mappings
|
|
added_any = False
|
|
for mapping_type, predicates in new_mappings.items():
|
|
new_predicates = [p for p in predicates if p not in existing_mappings]
|
|
if new_predicates:
|
|
if mapping_type not in slot_def:
|
|
slot_def[mapping_type] = []
|
|
for pred in new_predicates:
|
|
if pred not in slot_def[mapping_type]:
|
|
slot_def[mapping_type].append(pred)
|
|
added_any = True
|
|
|
|
if added_any and not dry_run:
|
|
save_yaml(filepath, data)
|
|
|
|
return added_any, f"Added mappings: {new_mappings}"
|
|
|
|
except Exception as e:
|
|
return False, f"Error: {str(e)}"
|
|
|
|
|
|
def process_all_slots(slots_dir: Path, dry_run: bool = False) -> dict:
|
|
"""Process all slot files in the directory."""
|
|
results = {
|
|
"updated": [],
|
|
"skipped": [],
|
|
"errors": [],
|
|
"no_mappings": [],
|
|
}
|
|
|
|
for yaml_file in sorted(slots_dir.glob("*.yaml")):
|
|
success, message = add_mappings_to_slot_file(yaml_file, dry_run)
|
|
|
|
if success:
|
|
results["updated"].append((yaml_file.name, message))
|
|
elif "Error" in message:
|
|
results["errors"].append((yaml_file.name, message))
|
|
elif "No mappings found" in message:
|
|
results["no_mappings"].append(yaml_file.name)
|
|
else:
|
|
results["skipped"].append((yaml_file.name, message))
|
|
|
|
return results
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Add semantic mappings to LinkML slot files")
|
|
parser.add_argument("--dry-run", action="store_true", help="Don't modify files, just show what would be done")
|
|
parser.add_argument("--slots-dir", type=Path,
|
|
default=Path("/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/slots"),
|
|
help="Directory containing slot YAML files")
|
|
parser.add_argument("--slot", type=str, help="Process only this specific slot file")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.slot:
|
|
filepath = args.slots_dir / f"{args.slot}.yaml"
|
|
if not filepath.exists():
|
|
print(f"Error: Slot file not found: {filepath}")
|
|
return
|
|
success, message = add_mappings_to_slot_file(filepath, args.dry_run)
|
|
print(f"{filepath.name}: {'Updated' if success else 'Skipped'} - {message}")
|
|
else:
|
|
results = process_all_slots(args.slots_dir, args.dry_run)
|
|
|
|
print(f"\n{'DRY RUN - ' if args.dry_run else ''}Slot Mapping Results:")
|
|
print(f"=" * 60)
|
|
print(f"Updated: {len(results['updated'])}")
|
|
print(f"Skipped (already has mappings): {len(results['skipped'])}")
|
|
print(f"No mappings found: {len(results['no_mappings'])}")
|
|
print(f"Errors: {len(results['errors'])}")
|
|
|
|
if results['updated'] and not args.dry_run:
|
|
print(f"\nUpdated slots:")
|
|
for name, msg in results['updated'][:20]:
|
|
print(f" - {name}")
|
|
if len(results['updated']) > 20:
|
|
print(f" ... and {len(results['updated']) - 20} more")
|
|
|
|
if results['no_mappings']:
|
|
print(f"\nSlots without mapping definitions:")
|
|
for name in results['no_mappings'][:20]:
|
|
print(f" - {name}")
|
|
if len(results['no_mappings']) > 20:
|
|
print(f" ... and {len(results['no_mappings']) - 20} more")
|
|
|
|
if results['errors']:
|
|
print(f"\nErrors:")
|
|
for name, msg in results['errors']:
|
|
print(f" - {name}: {msg}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|