#!/usr/bin/env python3 """ Add semantic mappings to LinkML slot files based on base ontologies. This script maps slot names to predicates from: - Schema.org (schema:) - Dublin Core Terms (dcterms:) - FOAF (foaf:) - PROV-O (prov:) - ORG Ontology (org:) - SKOS (skos:) - RiC-O (rico:) - CIDOC-CRM (crm:) - BIBFRAME (bf:) - DCAT (dcat:) - vCard (vcard:) Mapping types (per SKOS): - exact_mappings: Identical meaning (skos:exactMatch) - close_mappings: Very similar meaning (skos:closeMatch) - related_mappings: Semantically related (skos:relatedMatch) - narrow_mappings: More specific (skos:narrowMatch) - broad_mappings: More general (skos:broadMatch) """ import os import re import yaml from pathlib import Path from typing import Dict, List, Optional, Tuple # Comprehensive predicate mapping table # Format: slot_name_pattern -> [(mapping_type, predicate), ...] SLOT_MAPPINGS: Dict[str, List[Tuple[str, str]]] = { # === NAMES AND LABELS === "name": [ ("exact_mappings", "schema:name"), ("exact_mappings", "foaf:name"), ("exact_mappings", "rdfs:label"), ], "preferred_label": [ ("exact_mappings", "skos:prefLabel"), ("exact_mappings", "schema:name"), ], "alternative_names": [ ("exact_mappings", "schema:alternateName"), ("exact_mappings", "skos:altLabel"), ], "display_name": [ ("exact_mappings", "rdfs:label"), ("close_mappings", "schema:name"), ], "full_name": [ ("exact_mappings", "foaf:name"), ("close_mappings", "schema:name"), ], "given_name": [ ("exact_mappings", "foaf:givenName"), ("exact_mappings", "schema:givenName"), ], "family_name": [ ("exact_mappings", "foaf:familyName"), ("exact_mappings", "schema:familyName"), ], "first_name": [ ("exact_mappings", "foaf:firstName"), ("close_mappings", "schema:givenName"), ], "last_name": [ ("exact_mappings", "foaf:lastName"), ("close_mappings", "schema:familyName"), ], "surname": [ ("exact_mappings", "foaf:surname"), ("exact_mappings", "schema:familyName"), ], "base_surname": [ ("close_mappings", "foaf:surname"), ("related_mappings", "schema:familyName"), ], "nickname": [ ("exact_mappings", "foaf:nick"), ("close_mappings", "schema:alternateName"), ], "title": [ ("exact_mappings", "dcterms:title"), ("exact_mappings", "schema:title"), ], "agent_name": [ ("exact_mappings", "foaf:name"), ("close_mappings", "prov:label"), ], # === DESCRIPTIONS === "description": [ ("exact_mappings", "dcterms:description"), ("exact_mappings", "schema:description"), ("exact_mappings", "rdfs:comment"), ], "summary": [ ("close_mappings", "schema:abstract"), ("close_mappings", "dcterms:abstract"), ], "abstract": [ ("exact_mappings", "dcterms:abstract"), ("exact_mappings", "schema:abstract"), ], "notes": [ ("exact_mappings", "skos:note"), ("close_mappings", "rdfs:comment"), ], "comment": [ ("exact_mappings", "rdfs:comment"), ("exact_mappings", "schema:comment"), ], "remarks": [ ("close_mappings", "skos:note"), ("related_mappings", "rdfs:comment"), ], # === IDENTIFIERS === "identifier": [ ("exact_mappings", "dcterms:identifier"), ("exact_mappings", "schema:identifier"), ], "id": [ ("exact_mappings", "dcterms:identifier"), ("close_mappings", "schema:identifier"), ], "external_id": [ ("close_mappings", "dcterms:identifier"), ("close_mappings", "schema:identifier"), ], "code": [ ("close_mappings", "schema:codeValue"), ("related_mappings", "dcterms:identifier"), ], "accession_number": [ ("close_mappings", "schema:identifier"), ("related_mappings", "dcterms:identifier"), ], "isil_code": [ ("narrow_mappings", "dcterms:identifier"), ], "wikidata_id": [ ("narrow_mappings", "dcterms:identifier"), ("related_mappings", "schema:sameAs"), ], "viaf_id": [ ("narrow_mappings", "dcterms:identifier"), ], # === DATES AND TIMES === "date": [ ("exact_mappings", "dcterms:date"), ("exact_mappings", "schema:date"), ], "created_at": [ ("exact_mappings", "dcterms:created"), ("exact_mappings", "schema:dateCreated"), ("exact_mappings", "prov:generatedAtTime"), ], "created": [ ("exact_mappings", "dcterms:created"), ("exact_mappings", "schema:dateCreated"), ], "modified_at": [ ("exact_mappings", "dcterms:modified"), ("exact_mappings", "schema:dateModified"), ], "modified": [ ("exact_mappings", "dcterms:modified"), ("exact_mappings", "schema:dateModified"), ], "updated_at": [ ("close_mappings", "dcterms:modified"), ("close_mappings", "schema:dateModified"), ], "start_date": [ ("exact_mappings", "schema:startDate"), ("close_mappings", "prov:startedAtTime"), ], "end_date": [ ("exact_mappings", "schema:endDate"), ("close_mappings", "prov:endedAtTime"), ], "valid_from": [ ("exact_mappings", "schema:validFrom"), ("close_mappings", "dcterms:valid"), ], "valid_to": [ ("exact_mappings", "schema:validThrough"), ("close_mappings", "dcterms:valid"), ], "birth_date": [ ("exact_mappings", "schema:birthDate"), ("exact_mappings", "foaf:birthday"), ], "death_date": [ ("exact_mappings", "schema:deathDate"), ], "founding_date": [ ("exact_mappings", "schema:foundingDate"), ], "dissolution_date": [ ("exact_mappings", "schema:dissolutionDate"), ], "acquisition_date": [ ("close_mappings", "schema:dateCreated"), ("related_mappings", "dcterms:date"), ], "publication_date": [ ("exact_mappings", "schema:datePublished"), ("exact_mappings", "dcterms:issued"), ], "issued": [ ("exact_mappings", "dcterms:issued"), ("exact_mappings", "schema:datePublished"), ], "event_date": [ ("close_mappings", "schema:startDate"), ("related_mappings", "dcterms:date"), ], "timestamp": [ ("close_mappings", "prov:atTime"), ("related_mappings", "dcterms:date"), ], "follow_up_date": [ ("close_mappings", "schema:scheduledTime"), ], "approval_date": [ ("close_mappings", "dcterms:dateAccepted"), ], "allocation_date": [ ("close_mappings", "dcterms:date"), ], # === TEMPORAL EXPRESSIONS === "begin_of_the_begin": [ ("related_mappings", "prov:startedAtTime"), ("narrow_mappings", "schema:startDate"), ], "begin_of_the_end": [ ("related_mappings", "prov:endedAtTime"), ], "end_of_the_begin": [ ("related_mappings", "prov:startedAtTime"), ], "end_of_the_end": [ ("related_mappings", "prov:endedAtTime"), ("narrow_mappings", "schema:endDate"), ], "temporal_coverage": [ ("exact_mappings", "dcterms:temporal"), ("exact_mappings", "schema:temporalCoverage"), ], # === LOCATIONS === "location": [ ("exact_mappings", "schema:location"), ("exact_mappings", "dcterms:spatial"), ], "address": [ ("exact_mappings", "schema:address"), ("exact_mappings", "vcard:hasAddress"), ], "city": [ ("exact_mappings", "schema:addressLocality"), ("exact_mappings", "vcard:locality"), ], "country": [ ("exact_mappings", "schema:addressCountry"), ("exact_mappings", "vcard:country-name"), ], "region": [ ("exact_mappings", "schema:addressRegion"), ("exact_mappings", "vcard:region"), ], "postal_code": [ ("exact_mappings", "schema:postalCode"), ("exact_mappings", "vcard:postal-code"), ], "street_address": [ ("exact_mappings", "schema:streetAddress"), ("exact_mappings", "vcard:street-address"), ], "latitude": [ ("exact_mappings", "schema:latitude"), ("exact_mappings", "geo:lat"), ], "longitude": [ ("exact_mappings", "schema:longitude"), ("exact_mappings", "geo:long"), ], "coordinates": [ ("close_mappings", "schema:geo"), ("related_mappings", "geo:geometry"), ], "birth_place": [ ("exact_mappings", "schema:birthPlace"), ], "death_place": [ ("exact_mappings", "schema:deathPlace"), ], "place_of_origin": [ ("close_mappings", "schema:birthPlace"), ("related_mappings", "prov:atLocation"), ], "altitude": [ ("exact_mappings", "schema:elevation"), ], # === ORGANIZATIONS === "organization": [ ("exact_mappings", "schema:organization"), ("exact_mappings", "org:organization"), ], "affiliation": [ ("exact_mappings", "schema:affiliation"), ("exact_mappings", "org:memberOf"), ], "member_of": [ ("exact_mappings", "org:memberOf"), ("exact_mappings", "schema:memberOf"), ], "parent_organization": [ ("exact_mappings", "schema:parentOrganization"), ("exact_mappings", "org:subOrganizationOf"), ], "sub_organization": [ ("exact_mappings", "schema:subOrganization"), ("exact_mappings", "org:hasSubOrganization"), ], "department": [ ("exact_mappings", "schema:department"), ("close_mappings", "org:hasUnit"), ], "role": [ ("exact_mappings", "org:role"), ("exact_mappings", "schema:roleName"), ], "job_title": [ ("exact_mappings", "schema:jobTitle"), ("close_mappings", "org:role"), ], "employer": [ ("exact_mappings", "schema:worksFor"), ("close_mappings", "org:organization"), ], "founding_location": [ ("exact_mappings", "schema:foundingLocation"), ], "headquarters": [ ("close_mappings", "org:hasRegisteredSite"), ("related_mappings", "schema:location"), ], # === PEOPLE === "person": [ ("exact_mappings", "schema:person"), ("exact_mappings", "foaf:Person"), ], "author": [ ("exact_mappings", "schema:author"), ("exact_mappings", "dcterms:creator"), ], "creator": [ ("exact_mappings", "dcterms:creator"), ("exact_mappings", "schema:creator"), ], "contributor": [ ("exact_mappings", "dcterms:contributor"), ("exact_mappings", "schema:contributor"), ], "publisher": [ ("exact_mappings", "dcterms:publisher"), ("exact_mappings", "schema:publisher"), ], "editor": [ ("exact_mappings", "schema:editor"), ("close_mappings", "dcterms:contributor"), ], "curator": [ ("close_mappings", "schema:contributor"), ("related_mappings", "dcterms:contributor"), ], "gender": [ ("exact_mappings", "schema:gender"), ("exact_mappings", "foaf:gender"), ], "age": [ ("exact_mappings", "foaf:age"), ("close_mappings", "schema:age"), ], "nationality": [ ("exact_mappings", "schema:nationality"), ], "knows": [ ("exact_mappings", "foaf:knows"), ("exact_mappings", "schema:knows"), ], # === CONTACT INFO === "email": [ ("exact_mappings", "schema:email"), ("exact_mappings", "foaf:mbox"), ], "telephone": [ ("exact_mappings", "schema:telephone"), ("exact_mappings", "foaf:phone"), ], "phone": [ ("exact_mappings", "schema:telephone"), ("exact_mappings", "foaf:phone"), ], "fax": [ ("exact_mappings", "schema:faxNumber"), ], "homepage": [ ("exact_mappings", "foaf:homepage"), ("exact_mappings", "schema:url"), ], "website": [ ("exact_mappings", "schema:url"), ("exact_mappings", "foaf:homepage"), ], "url": [ ("exact_mappings", "schema:url"), ("exact_mappings", "rdfs:seeAlso"), ], # === COLLECTIONS AND RECORDS === "collection": [ ("exact_mappings", "schema:collection"), ("close_mappings", "dcterms:isPartOf"), ], "collection_name": [ ("close_mappings", "schema:name"), ("related_mappings", "dcterms:title"), ], "collection_description": [ ("close_mappings", "schema:description"), ("related_mappings", "dcterms:description"), ], "part_of": [ ("exact_mappings", "dcterms:isPartOf"), ("exact_mappings", "schema:isPartOf"), ], "has_part": [ ("exact_mappings", "dcterms:hasPart"), ("exact_mappings", "schema:hasPart"), ], "extent": [ ("exact_mappings", "dcterms:extent"), ("close_mappings", "schema:size"), ], "format": [ ("exact_mappings", "dcterms:format"), ("exact_mappings", "schema:encodingFormat"), ], "medium": [ ("exact_mappings", "dcterms:medium"), ("close_mappings", "schema:material"), ], "language": [ ("exact_mappings", "dcterms:language"), ("exact_mappings", "schema:inLanguage"), ], "subject": [ ("exact_mappings", "dcterms:subject"), ("exact_mappings", "schema:about"), ], "topic": [ ("exact_mappings", "foaf:topic"), ("exact_mappings", "schema:about"), ], "keywords": [ ("exact_mappings", "schema:keywords"), ("close_mappings", "dcterms:subject"), ], "arrangement": [ ("related_mappings", "rico:hasOrganicProvenance"), ], "arrangement_system": [ ("related_mappings", "rico:hasRecordSetType"), ], "cataloging_standard": [ ("related_mappings", "dcterms:conformsTo"), ], # === RIGHTS AND ACCESS === "rights": [ ("exact_mappings", "dcterms:rights"), ("exact_mappings", "schema:license"), ], "license": [ ("exact_mappings", "dcterms:license"), ("exact_mappings", "schema:license"), ], "access_rights": [ ("exact_mappings", "dcterms:accessRights"), ], "access_policy": [ ("close_mappings", "dcterms:accessRights"), ], "access_restrictions": [ ("close_mappings", "dcterms:accessRights"), ], "copyright": [ ("exact_mappings", "schema:copyrightHolder"), ("related_mappings", "dcterms:rights"), ], # === PROVENANCE === "source": [ ("exact_mappings", "dcterms:source"), ("exact_mappings", "prov:wasDerivedFrom"), ], "derived_from": [ ("exact_mappings", "prov:wasDerivedFrom"), ("exact_mappings", "dcterms:source"), ], "generated_by": [ ("exact_mappings", "prov:wasGeneratedBy"), ], "attributed_to": [ ("exact_mappings", "prov:wasAttributedTo"), ], "provenance": [ ("exact_mappings", "dcterms:provenance"), ("exact_mappings", "prov:wasGeneratedBy"), ], "extraction_agent": [ ("close_mappings", "prov:wasAttributedTo"), ], "extraction_method": [ ("close_mappings", "prov:wasGeneratedBy"), ], "retrieval_agent": [ ("close_mappings", "prov:wasAttributedTo"), ], # === RELATIONS === "related_to": [ ("exact_mappings", "dcterms:relation"), ("exact_mappings", "schema:relatedTo"), ], "references": [ ("exact_mappings", "dcterms:references"), ("exact_mappings", "schema:citation"), ], "replaces": [ ("exact_mappings", "dcterms:replaces"), ], "replaced_by": [ ("exact_mappings", "dcterms:isReplacedBy"), ], "version": [ ("exact_mappings", "dcterms:hasVersion"), ("exact_mappings", "schema:version"), ], "same_as": [ ("exact_mappings", "schema:sameAs"), ("exact_mappings", "owl:sameAs"), ], "see_also": [ ("exact_mappings", "rdfs:seeAlso"), ("close_mappings", "dcterms:relation"), ], # === TYPES AND CATEGORIES === "type": [ ("exact_mappings", "dcterms:type"), ("exact_mappings", "rdf:type"), ], "category": [ ("exact_mappings", "schema:category"), ("close_mappings", "dcterms:type"), ], "classification": [ ("exact_mappings", "org:classification"), ("close_mappings", "dcterms:type"), ], "genre": [ ("exact_mappings", "schema:genre"), ("close_mappings", "dcterms:type"), ], # === STATUS === "status": [ ("exact_mappings", "schema:status"), ("close_mappings", "adms:status"), ], "active": [ ("close_mappings", "schema:status"), ], "verified": [ ("related_mappings", "prov:wasAttributedTo"), ], # === DIGITAL/TECHNICAL === "api_endpoint": [ ("close_mappings", "schema:url"), ("related_mappings", "dcat:endpointURL"), ], "download_url": [ ("exact_mappings", "dcat:downloadURL"), ("close_mappings", "schema:url"), ], "access_url": [ ("exact_mappings", "dcat:accessURL"), ], "media_type": [ ("exact_mappings", "dcat:mediaType"), ("exact_mappings", "dcterms:format"), ], "file_format": [ ("exact_mappings", "dcterms:format"), ("exact_mappings", "schema:fileFormat"), ], "byte_size": [ ("exact_mappings", "dcat:byteSize"), ("exact_mappings", "schema:contentSize"), ], "checksum": [ ("exact_mappings", "dcat:checksum"), ], # === EVENTS === "event": [ ("exact_mappings", "schema:event"), ], "event_type": [ ("close_mappings", "schema:eventType"), ("related_mappings", "dcterms:type"), ], "activity": [ ("exact_mappings", "prov:Activity"), ], "activity_type": [ ("close_mappings", "dcterms:type"), ], "change_type": [ ("close_mappings", "dcterms:type"), ], "affected_by_event": [ ("close_mappings", "prov:wasInfluencedBy"), ], "affects_organization": [ ("close_mappings", "prov:influenced"), ], # === IMAGES AND MEDIA === "image": [ ("exact_mappings", "schema:image"), ("exact_mappings", "foaf:depiction"), ], "thumbnail": [ ("exact_mappings", "schema:thumbnail"), ("exact_mappings", "foaf:thumbnail"), ], "logo": [ ("exact_mappings", "schema:logo"), ("exact_mappings", "foaf:logo"), ], "photo": [ ("exact_mappings", "schema:photo"), ("close_mappings", "foaf:depiction"), ], # === QUANTITIES === "count": [ ("close_mappings", "schema:itemCount"), ], "quantity": [ ("exact_mappings", "schema:quantity"), ], "value": [ ("exact_mappings", "rdf:value"), ("exact_mappings", "schema:value"), ], "unit": [ ("exact_mappings", "schema:unitCode"), ], "price": [ ("exact_mappings", "schema:price"), ], "currency": [ ("exact_mappings", "schema:priceCurrency"), ], # === HERITAGE-SPECIFIC === "custodian": [ ("close_mappings", "rico:hasOrIsHeldBy"), ], "custody": [ ("close_mappings", "rico:hasOrIsHeldBy"), ], "provenance_text": [ ("exact_mappings", "dcterms:provenance"), ], "finding_aid": [ ("related_mappings", "rico:hasInstantiation"), ], "material": [ ("exact_mappings", "schema:material"), ("exact_mappings", "dcterms:medium"), ], "technique": [ ("close_mappings", "schema:artMedium"), ], "dimensions": [ ("close_mappings", "schema:size"), ], "condition": [ ("related_mappings", "schema:itemCondition"), ], # === CANONICAL VALUES === "canonical_value": [ ("close_mappings", "skos:prefLabel"), ], "normalized_value": [ ("close_mappings", "skos:prefLabel"), ], # === WEB/CLAIMS === "source_url": [ ("exact_mappings", "schema:url"), ("exact_mappings", "dcterms:source"), ], "claim_type": [ ("close_mappings", "dcterms:type"), ], "claim_value": [ ("close_mappings", "rdf:value"), ], "xpath": [ ("related_mappings", "prov:atLocation"), ], "confidence_score": [ ("related_mappings", "prov:value"), ], # === MISCELLANEOUS === "deliverables": [ ("close_mappings", "schema:result"), ("related_mappings", "prov:generated"), ], "capacity_items": [ ("broad_mappings", "schema:maximumAttendeeCapacity"), ], # === ADDITIONAL HERITAGE-SPECIFIC === "access_policy_ref": [ ("close_mappings", "dcterms:accessRights"), ], "acquisition_method": [ ("close_mappings", "prov:wasGeneratedBy"), ("related_mappings", "schema:acquiredFrom"), ], "acquisition_source": [ ("exact_mappings", "schema:acquiredFrom"), ("close_mappings", "prov:wasAttributedTo"), ], "affected_units": [ ("close_mappings", "prov:influenced"), ], "allocates": [ ("close_mappings", "prov:generated"), ], "allocated_by": [ ("close_mappings", "prov:wasAttributedTo"), ], "alternative_observed_names": [ ("exact_mappings", "skos:altLabel"), ("close_mappings", "schema:alternateName"), ], "appellation_language": [ ("exact_mappings", "dcterms:language"), ], "appellation_value": [ ("exact_mappings", "rdf:value"), ("close_mappings", "skos:prefLabel"), ], "appellation_type": [ ("close_mappings", "dcterms:type"), ], "appellations": [ ("close_mappings", "skos:altLabel"), ], "authentication_required": [ ("related_mappings", "schema:authenticationType"), ], "auxiliary_places": [ ("close_mappings", "org:hasSite"), ("related_mappings", "schema:location"), ], "auxiliary_platforms": [ ("close_mappings", "dcterms:hasPart"), ], "change_rationale": [ ("close_mappings", "prov:hadReason"), ("related_mappings", "dcterms:description"), ], "circumstances_of_death": [ ("related_mappings", "schema:description"), ], "collection_focus": [ ("exact_mappings", "dcterms:subject"), ("close_mappings", "schema:about"), ], "collection_of": [ ("close_mappings", "dcterms:isPartOf"), ], "collection_scope": [ ("exact_mappings", "dcterms:coverage"), ("close_mappings", "schema:about"), ], "collection_size": [ ("exact_mappings", "schema:numberOfItems"), ("close_mappings", "dcterms:extent"), ], "collection_type": [ ("close_mappings", "dcterms:type"), ], "collection_type_ref": [ ("close_mappings", "dcterms:type"), ], # === CONTACT AND SOCIAL === "contact_email": [ ("exact_mappings", "schema:email"), ("exact_mappings", "vcard:hasEmail"), ], "contact_info": [ ("close_mappings", "schema:contactPoint"), ("close_mappings", "vcard:hasAddress"), ], "contact_name": [ ("close_mappings", "schema:name"), ], "contact_person": [ ("exact_mappings", "schema:contactPoint"), ], "contact_phone": [ ("exact_mappings", "schema:telephone"), ("exact_mappings", "vcard:hasTelephone"), ], "social_media_links": [ ("close_mappings", "schema:sameAs"), ], "social_media_platform": [ ("related_mappings", "dcterms:type"), ], # === DIGITAL PRESENCE === "digital_presence": [ ("close_mappings", "schema:url"), ], "digital_platforms": [ ("close_mappings", "dcterms:hasPart"), ], "platform_name": [ ("exact_mappings", "schema:name"), ], "platform_type": [ ("close_mappings", "dcterms:type"), ], "platform_url": [ ("exact_mappings", "schema:url"), ], # === STAFF AND PERSONNEL === "staff_count": [ ("close_mappings", "schema:numberOfEmployees"), ], "staff_members": [ ("close_mappings", "schema:employee"), ("related_mappings", "org:hasMember"), ], "staff_role": [ ("exact_mappings", "org:role"), ("exact_mappings", "schema:roleName"), ], "position": [ ("exact_mappings", "schema:jobTitle"), ("close_mappings", "org:role"), ], "position_title": [ ("exact_mappings", "schema:jobTitle"), ], "employment_type": [ ("exact_mappings", "schema:employmentType"), ], # === ENRICHMENT AND PROVENANCE === "enrichment_status": [ ("related_mappings", "adms:status"), ], "enrichment_source": [ ("close_mappings", "dcterms:source"), ], "data_source": [ ("exact_mappings", "dcterms:source"), ("close_mappings", "prov:wasDerivedFrom"), ], "data_tier": [ ("related_mappings", "dcterms:type"), ], "extraction_date": [ ("exact_mappings", "prov:generatedAtTime"), ("close_mappings", "dcterms:created"), ], "extraction_notes": [ ("close_mappings", "skos:note"), ], "last_verified": [ ("close_mappings", "dcterms:dateAccepted"), ], "verified_by": [ ("close_mappings", "prov:wasAttributedTo"), ], "retrieved_on": [ ("exact_mappings", "prov:generatedAtTime"), ], "statement_created_at": [ ("exact_mappings", "prov:generatedAtTime"), ("close_mappings", "dcterms:created"), ], "source_archived_at": [ ("close_mappings", "prov:generatedAtTime"), ], # === GEOGRAPHIC === "geonames_id": [ ("narrow_mappings", "dcterms:identifier"), ], "wgs84_coordinates": [ ("close_mappings", "geo:geometry"), ], "geo_feature": [ ("close_mappings", "geo:Feature"), ], "admin_level": [ ("related_mappings", "schema:addressRegion"), ], "municipality": [ ("close_mappings", "schema:addressLocality"), ], "province": [ ("close_mappings", "schema:addressRegion"), ], "country_code": [ ("close_mappings", "schema:addressCountry"), ], # === OPENING HOURS AND SCHEDULES === "opening_hours": [ ("exact_mappings", "schema:openingHours"), ], "opening_hours_specification": [ ("exact_mappings", "schema:openingHoursSpecification"), ], "temporarily_closed": [ ("related_mappings", "schema:publicAccess"), ], # === ORGANIZATIONAL STRUCTURE === "legal_name": [ ("exact_mappings", "schema:legalName"), ], "legal_form": [ ("close_mappings", "org:classification"), ("related_mappings", "schema:legalForm"), ], "legal_status": [ ("close_mappings", "org:classification"), ], "registration_number": [ ("close_mappings", "schema:identifier"), ("close_mappings", "org:identifier"), ], "kvk_number": [ ("narrow_mappings", "org:identifier"), ], "organizational_unit": [ ("exact_mappings", "org:hasUnit"), ], "organizational_structure": [ ("close_mappings", "org:organization"), ], "parent_custodian": [ ("exact_mappings", "org:subOrganizationOf"), ("exact_mappings", "schema:parentOrganization"), ], "sub_custodians": [ ("exact_mappings", "org:hasSubOrganization"), ("exact_mappings", "schema:subOrganization"), ], # === FUNDING AND FINANCIAL === "funding_source": [ ("close_mappings", "schema:funder"), ], "funder": [ ("exact_mappings", "schema:funder"), ], "funding_amount": [ ("close_mappings", "schema:amount"), ], "budget": [ ("related_mappings", "schema:price"), ], "annual_budget": [ ("related_mappings", "schema:price"), ], # === SERVICES AND FACILITIES === "services": [ ("exact_mappings", "schema:availableService"), ], "facilities": [ ("close_mappings", "schema:amenityFeature"), ], "accessibility": [ ("exact_mappings", "schema:accessibilityFeature"), ], "accessibility_info": [ ("close_mappings", "schema:accessibilitySummary"), ], "public_access": [ ("exact_mappings", "schema:publicAccess"), ], # === PROJECTS AND ACTIVITIES === "project_name": [ ("exact_mappings", "schema:name"), ], "project_description": [ ("exact_mappings", "schema:description"), ], "project_status": [ ("close_mappings", "schema:status"), ], "project_start": [ ("exact_mappings", "schema:startDate"), ], "project_end": [ ("exact_mappings", "schema:endDate"), ], "project_url": [ ("exact_mappings", "schema:url"), ], # === EVENTS AND CHANGES === "event_description": [ ("exact_mappings", "schema:description"), ], "event_location": [ ("exact_mappings", "schema:location"), ], "predecessor": [ ("exact_mappings", "dcterms:replaces"), ("close_mappings", "prov:wasDerivedFrom"), ], "successor": [ ("exact_mappings", "dcterms:isReplacedBy"), ], "merged_into": [ ("close_mappings", "dcterms:isReplacedBy"), ], "merged_from": [ ("close_mappings", "dcterms:replaces"), ], "split_from": [ ("close_mappings", "prov:wasDerivedFrom"), ], "split_into": [ ("close_mappings", "prov:generated"), ], # === RATINGS AND REVIEWS === "rating": [ ("exact_mappings", "schema:aggregateRating"), ], "review_count": [ ("close_mappings", "schema:reviewCount"), ], "reviews": [ ("exact_mappings", "schema:review"), ], # === STANDARDS AND COMPLIANCE === "conforms_to": [ ("exact_mappings", "dcterms:conformsTo"), ], "standard_name": [ ("close_mappings", "schema:name"), ], "standard_version": [ ("close_mappings", "schema:version"), ], "certification": [ ("close_mappings", "schema:hasCertification"), ], # === ARCHIVAL-SPECIFIC === "fonds": [ ("related_mappings", "rico:hasRecordSetType"), ], "series": [ ("related_mappings", "rico:hasRecordSetType"), ], "finding_aid_url": [ ("exact_mappings", "schema:url"), ], "record_type": [ ("close_mappings", "rico:hasRecordSetType"), ], "record_count": [ ("close_mappings", "schema:numberOfItems"), ], "linear_meters": [ ("close_mappings", "dcterms:extent"), ], # === MUSEUM-SPECIFIC === "visitor_count": [ ("close_mappings", "schema:attendeeCount"), ], "exhibition": [ ("close_mappings", "schema:event"), ], "exhibitions": [ ("close_mappings", "schema:event"), ], "current_exhibitions": [ ("close_mappings", "schema:event"), ], "permanent_collection": [ ("close_mappings", "schema:collection"), ], # === LIBRARY-SPECIFIC === "catalog_url": [ ("exact_mappings", "schema:url"), ], "holdings": [ ("close_mappings", "schema:collection"), ], "circulation": [ ("related_mappings", "schema:availableService"), ], # === IDENTIFIERS (ADDITIONAL) === "orcid": [ ("narrow_mappings", "dcterms:identifier"), ], "ror_id": [ ("narrow_mappings", "dcterms:identifier"), ], "grid_id": [ ("narrow_mappings", "dcterms:identifier"), ], "doi": [ ("narrow_mappings", "dcterms:identifier"), ], "isbn": [ ("exact_mappings", "schema:isbn"), ], "issn": [ ("exact_mappings", "schema:issn"), ], # === WEB CLAIMS (ADDITIONAL) === "html_file": [ ("related_mappings", "dcterms:source"), ], "xpath_match_score": [ ("related_mappings", "prov:value"), ], "retrieval_timestamp": [ ("exact_mappings", "prov:generatedAtTime"), ], # === GHCID-SPECIFIC === "ghcid_current": [ ("narrow_mappings", "dcterms:identifier"), ], "ghcid_history": [ ("related_mappings", "prov:wasRevisionOf"), ], "ghcid_uuid": [ ("narrow_mappings", "dcterms:identifier"), ], "ghcid_numeric": [ ("narrow_mappings", "dcterms:identifier"), ], # === LINKEDIN-SPECIFIC === "linkedin_url": [ ("exact_mappings", "schema:sameAs"), ("close_mappings", "foaf:page"), ], "linkedin_slug": [ ("related_mappings", "dcterms:identifier"), ], "profile_headline": [ ("close_mappings", "schema:description"), ], "profile_summary": [ ("close_mappings", "schema:description"), ], "connections_count": [ ("related_mappings", "schema:interactionCount"), ], # === WIKIDATA-SPECIFIC === "wikidata_label": [ ("exact_mappings", "skos:prefLabel"), ], "wikidata_description": [ ("exact_mappings", "schema:description"), ], "wikidata_aliases": [ ("exact_mappings", "skos:altLabel"), ], "sitelinks": [ ("close_mappings", "schema:sameAs"), ], # === GOOGLE MAPS-SPECIFIC === "place_id": [ ("narrow_mappings", "dcterms:identifier"), ], "google_maps_url": [ ("close_mappings", "schema:sameAs"), ], "popular_times": [ ("related_mappings", "schema:openingHours"), ], "photo_count": [ ("close_mappings", "schema:numberOfItems"), ], "business_status": [ ("close_mappings", "schema:status"), ], # === ADDITIONAL UNMAPPED SLOTS === "confidence": [ ("close_mappings", "prov:value"), ("related_mappings", "schema:ratingValue"), ], "confidence_method": [ ("close_mappings", "prov:wasGeneratedBy"), ], "confidence_value": [ ("close_mappings", "prov:value"), ], "conflict_status": [ ("related_mappings", "schema:status"), ], "conservation_lab": [ ("related_mappings", "schema:location"), ], "contact": [ ("exact_mappings", "schema:contactPoint"), ], "contact_point": [ ("exact_mappings", "schema:contactPoint"), ], "content_hash": [ ("exact_mappings", "dcat:checksum"), ], "cost_usd": [ ("close_mappings", "schema:price"), ], "css_selector": [ ("related_mappings", "prov:atLocation"), ], "curation_activities": [ ("close_mappings", "prov:Activity"), ], "custodial_history": [ ("exact_mappings", "dcterms:provenance"), ], "custodian_names": [ ("close_mappings", "skos:prefLabel"), ], "custodian_observations": [ ("related_mappings", "prov:Entity"), ], "custodians": [ ("close_mappings", "schema:organization"), ], "data_license_policy": [ ("close_mappings", "dcterms:license"), ], "data_service_endpoints": [ ("close_mappings", "dcat:endpointURL"), ], "date_of_incorporation": [ ("exact_mappings", "schema:foundingDate"), ], "definition": [ ("exact_mappings", "skos:definition"), ("close_mappings", "rdfs:comment"), ], "delegation": [ ("related_mappings", "prov:actedOnBehalfOf"), ], "emic_name": [ ("close_mappings", "skos:prefLabel"), ("related_mappings", "schema:name"), ], "endorsed_standards": [ ("close_mappings", "dcterms:conformsTo"), ], "entity_observations": [ ("related_mappings", "prov:Entity"), ], "evidence_strength": [ ("close_mappings", "prov:value"), ], "exclusion_criteria": [ ("related_mappings", "schema:description"), ], "first_observation": [ ("close_mappings", "prov:generatedAtTime"), ], "format_types": [ ("close_mappings", "dcterms:format"), ], "formatted_address": [ ("exact_mappings", "schema:address"), ("exact_mappings", "vcard:hasAddress"), ], # === MORE UNMAPPED SLOTS === "geographic_scope": [ ("exact_mappings", "dcterms:spatial"), ], "government_level": [ ("related_mappings", "org:classification"), ], "has_digital_catalog": [ ("related_mappings", "schema:url"), ], "has_finding_aid": [ ("related_mappings", "schema:url"), ], "has_member": [ ("exact_mappings", "org:hasMember"), ], "has_opening_hours": [ ("close_mappings", "schema:openingHours"), ], "heritage_significance": [ ("related_mappings", "dcterms:description"), ], "heritage_type": [ ("close_mappings", "dcterms:type"), ], "heritage_types": [ ("close_mappings", "dcterms:type"), ], "historical_significance": [ ("related_mappings", "dcterms:description"), ], "hours_of_operation": [ ("exact_mappings", "schema:openingHours"), ], "html_content": [ ("related_mappings", "schema:text"), ], "identifier_scheme": [ ("related_mappings", "dcterms:type"), ], "identifier_type": [ ("related_mappings", "dcterms:type"), ], "identifier_value": [ ("exact_mappings", "rdf:value"), ], "identifiers": [ ("close_mappings", "dcterms:identifier"), ], "inclusion_criteria": [ ("related_mappings", "schema:description"), ], "industry": [ ("exact_mappings", "schema:industry"), ], "is_active": [ ("close_mappings", "schema:status"), ], "is_defunct": [ ("close_mappings", "schema:status"), ], "is_part_of": [ ("exact_mappings", "dcterms:isPartOf"), ("exact_mappings", "schema:isPartOf"), ], "is_primary": [ ("related_mappings", "rdf:type"), ], "issue_date": [ ("exact_mappings", "dcterms:issued"), ], "items": [ ("close_mappings", "schema:itemListElement"), ], "known_for": [ ("close_mappings", "schema:knowsAbout"), ], "label": [ ("exact_mappings", "rdfs:label"), ("exact_mappings", "skos:prefLabel"), ], "labels": [ ("close_mappings", "skos:altLabel"), ], "last_modified": [ ("exact_mappings", "dcterms:modified"), ], "last_observation": [ ("close_mappings", "prov:generatedAtTime"), ], "last_updated": [ ("exact_mappings", "dcterms:modified"), ], "latitude_dd": [ ("exact_mappings", "schema:latitude"), ], "legal_entity": [ ("close_mappings", "org:FormalOrganization"), ], "level": [ ("related_mappings", "schema:position"), ], "locations": [ ("exact_mappings", "schema:location"), ], "longitude_dd": [ ("exact_mappings", "schema:longitude"), ], "main_language": [ ("exact_mappings", "dcterms:language"), ], "maintained_by": [ ("close_mappings", "schema:maintainer"), ], "managed_by": [ ("close_mappings", "prov:wasAttributedTo"), ], "mandate": [ ("related_mappings", "org:purpose"), ], "membership_type": [ ("close_mappings", "org:role"), ], "metadata_standard": [ ("close_mappings", "dcterms:conformsTo"), ], "metadata_standards": [ ("close_mappings", "dcterms:conformsTo"), ], "mission": [ ("close_mappings", "org:purpose"), ], "mission_statement": [ ("close_mappings", "org:purpose"), ], "name_type": [ ("related_mappings", "dcterms:type"), ], "name_variant": [ ("exact_mappings", "skos:altLabel"), ], "name_variants": [ ("exact_mappings", "skos:altLabel"), ], "namespace": [ ("related_mappings", "schema:url"), ], "network_affiliations": [ ("close_mappings", "org:memberOf"), ], "note": [ ("exact_mappings", "skos:note"), ], "number_of_employees": [ ("exact_mappings", "schema:numberOfEmployees"), ], "observation_date": [ ("close_mappings", "prov:generatedAtTime"), ], "observation_notes": [ ("close_mappings", "skos:note"), ], "observation_source": [ ("close_mappings", "dcterms:source"), ], "observation_type": [ ("related_mappings", "dcterms:type"), ], "observations": [ ("related_mappings", "prov:Entity"), ], "official_name": [ ("exact_mappings", "schema:legalName"), ], "official_website": [ ("exact_mappings", "schema:url"), ("exact_mappings", "foaf:homepage"), ], "online_catalog": [ ("close_mappings", "schema:url"), ], "operated_by": [ ("close_mappings", "schema:provider"), ], "operating_status": [ ("close_mappings", "schema:status"), ], "operational_since": [ ("close_mappings", "schema:foundingDate"), ], "operators": [ ("close_mappings", "schema:provider"), ], "org_type": [ ("close_mappings", "org:classification"), ], "organization_type": [ ("close_mappings", "org:classification"), ], "original_language": [ ("exact_mappings", "dcterms:language"), ], "other_identifiers": [ ("close_mappings", "dcterms:identifier"), ], "other_names": [ ("exact_mappings", "skos:altLabel"), ], "owned_by": [ ("close_mappings", "schema:ownedBy"), ], "owner": [ ("exact_mappings", "schema:ownedBy"), ], "ownership": [ ("close_mappings", "schema:ownedBy"), ], "page_url": [ ("exact_mappings", "schema:url"), ], "parent": [ ("exact_mappings", "schema:parentOrganization"), ], "parent_id": [ ("close_mappings", "dcterms:isPartOf"), ], "participants": [ ("exact_mappings", "schema:participant"), ], "period": [ ("close_mappings", "dcterms:temporal"), ], "period_covered": [ ("exact_mappings", "dcterms:temporal"), ], "phone_number": [ ("exact_mappings", "schema:telephone"), ], "physical_address": [ ("exact_mappings", "schema:address"), ], "postal_address": [ ("exact_mappings", "schema:postalAddress"), ], "prefix": [ ("related_mappings", "schema:honorificPrefix"), ], "previous_name": [ ("close_mappings", "skos:altLabel"), ], "previous_names": [ ("close_mappings", "skos:altLabel"), ], "primary_type": [ ("close_mappings", "dcterms:type"), ], "priority": [ ("related_mappings", "schema:position"), ], "processing_status": [ ("close_mappings", "adms:status"), ], "profile_url": [ ("exact_mappings", "schema:url"), ("exact_mappings", "foaf:page"), ], "project_id": [ ("close_mappings", "dcterms:identifier"), ], "properties": [ ("related_mappings", "schema:additionalProperty"), ], "provider": [ ("exact_mappings", "schema:provider"), ], "purpose": [ ("exact_mappings", "org:purpose"), ], "qualifier": [ ("related_mappings", "skos:note"), ], "quality_score": [ ("close_mappings", "prov:value"), ], "raw_value": [ ("close_mappings", "rdf:value"), ], "reason": [ ("close_mappings", "prov:hadReason"), ], "record_id": [ ("close_mappings", "dcterms:identifier"), ], "reference_url": [ ("exact_mappings", "schema:url"), ], "registration_country": [ ("close_mappings", "schema:addressCountry"), ], "related": [ ("exact_mappings", "dcterms:relation"), ], "related_entities": [ ("close_mappings", "dcterms:relation"), ], "related_institutions": [ ("close_mappings", "dcterms:relation"), ], "related_organizations": [ ("close_mappings", "dcterms:relation"), ], "relationship_type": [ ("related_mappings", "dcterms:type"), ], "relevance_score": [ ("close_mappings", "prov:value"), ], "request_url": [ ("close_mappings", "schema:url"), ], "resource_type": [ ("close_mappings", "dcterms:type"), ], "response_format": [ ("close_mappings", "dcterms:format"), ], "result": [ ("exact_mappings", "schema:result"), ], "resulting_organization": [ ("close_mappings", "prov:generated"), ], "scope": [ ("close_mappings", "dcterms:coverage"), ], "scope_note": [ ("exact_mappings", "skos:scopeNote"), ], "search_url": [ ("close_mappings", "schema:url"), ], "secondary_type": [ ("close_mappings", "dcterms:type"), ], "section": [ ("related_mappings", "dcterms:isPartOf"), ], "service_type": [ ("close_mappings", "dcterms:type"), ], "short_name": [ ("close_mappings", "skos:altLabel"), ], "skills": [ ("close_mappings", "schema:knowsAbout"), ], "slug": [ ("related_mappings", "dcterms:identifier"), ], "snapshot_date": [ ("close_mappings", "prov:generatedAtTime"), ], "source_file": [ ("close_mappings", "dcterms:source"), ], "source_id": [ ("close_mappings", "dcterms:source"), ], "source_language": [ ("close_mappings", "dcterms:language"), ], "source_name": [ ("close_mappings", "dcterms:source"), ], "source_type": [ ("related_mappings", "dcterms:type"), ], "sources": [ ("exact_mappings", "dcterms:source"), ], "spatial_coverage": [ ("exact_mappings", "dcterms:spatial"), ], "specialization": [ ("close_mappings", "schema:knowsAbout"), ], "species_count": [ ("close_mappings", "schema:numberOfItems"), ], "start_year": [ ("close_mappings", "schema:startDate"), ], "state": [ ("close_mappings", "schema:addressRegion"), ], "storage_type": [ ("related_mappings", "dcterms:type"), ], "street": [ ("exact_mappings", "schema:streetAddress"), ], "subtype": [ ("close_mappings", "dcterms:type"), ], "suffix": [ ("related_mappings", "schema:honorificSuffix"), ], "supported_formats": [ ("close_mappings", "dcterms:format"), ], "synonyms": [ ("exact_mappings", "skos:altLabel"), ], "system_name": [ ("close_mappings", "schema:name"), ], "tags": [ ("close_mappings", "schema:keywords"), ], "target_audience": [ ("close_mappings", "schema:audience"), ], "temporal": [ ("exact_mappings", "dcterms:temporal"), ], "temporal_scope": [ ("exact_mappings", "dcterms:temporal"), ], "text": [ ("exact_mappings", "schema:text"), ], "text_content": [ ("exact_mappings", "schema:text"), ], "time_zone": [ ("close_mappings", "schema:timeZone"), ], "total_count": [ ("close_mappings", "schema:numberOfItems"), ], "type_label": [ ("close_mappings", "rdfs:label"), ], "uri": [ ("exact_mappings", "schema:url"), ], "usage_notes": [ ("close_mappings", "skos:note"), ], "valid_until": [ ("exact_mappings", "schema:validThrough"), ], "validation_status": [ ("related_mappings", "adms:status"), ], "verification_date": [ ("close_mappings", "prov:generatedAtTime"), ], "verification_method": [ ("close_mappings", "prov:wasGeneratedBy"), ], "verification_status": [ ("close_mappings", "adms:status"), ], "vision": [ ("close_mappings", "org:purpose"), ], "web_archive_url": [ ("close_mappings", "schema:url"), ], "web_claims": [ ("related_mappings", "prov:Entity"), ], "web_presence": [ ("close_mappings", "schema:url"), ], "website_url": [ ("exact_mappings", "schema:url"), ], "year_established": [ ("close_mappings", "schema:foundingDate"), ], "year_founded": [ ("exact_mappings", "schema:foundingDate"), ], # === REMAINING 50 UNMAPPED SLOTS === "class_metadata_slots": [ ("related_mappings", "rdfs:Resource"), ], "collections_under_responsibility": [ ("close_mappings", "rico:hasOrIsHeldBy"), ("related_mappings", "schema:collection"), ], "deceased": [ ("close_mappings", "schema:deathDate"), ], "encompasses": [ ("close_mappings", "dcterms:hasPart"), ("related_mappings", "schema:containsPlace"), ], "exposes_collections": [ ("close_mappings", "schema:collection"), ], "external_identifiers": [ ("exact_mappings", "dcterms:identifier"), ("close_mappings", "schema:identifier"), ], "extraction_metadata": [ ("close_mappings", "prov:Entity"), ], "finding_aids": [ ("close_mappings", "rico:hasInstantiation"), ("related_mappings", "schema:url"), ], "gender_identity": [ ("exact_mappings", "schema:gender"), ], "generates": [ ("exact_mappings", "prov:generated"), ], "governance_structure": [ ("close_mappings", "org:organization"), ], "holds_record_set_types": [ ("close_mappings", "rico:hasRecordSetType"), ], "humidity_tolerance": [ ("related_mappings", "schema:additionalProperty"), ], "identifier_format_used": [ ("close_mappings", "dcterms:conformsTo"), ], "identifies": [ ("close_mappings", "dcterms:identifier"), ], "initials": [ ("close_mappings", "foaf:name"), ("related_mappings", "skos:altLabel"), ], "jurisdiction": [ ("exact_mappings", "schema:areaServed"), ("close_mappings", "dcterms:spatial"), ], "justification": [ ("close_mappings", "prov:hadReason"), ("related_mappings", "skos:note"), ], "legal_jurisdiction": [ ("exact_mappings", "schema:areaServed"), ], "legal_responsibility_basis": [ ("close_mappings", "dcterms:rights"), ], "linkedin_profile_path": [ ("close_mappings", "schema:sameAs"), ], "method": [ ("exact_mappings", "prov:wasGeneratedBy"), ], "name_authority": [ ("close_mappings", "dcterms:source"), ], "name_validity_period": [ ("close_mappings", "dcterms:temporal"), ], "observation_context": [ ("close_mappings", "prov:atLocation"), ], "offers_donation_schemes": [ ("related_mappings", "schema:availableService"), ], "organizational_change_events": [ ("close_mappings", "prov:Activity"), ], "parent_collection": [ ("exact_mappings", "dcterms:isPartOf"), ], "platform_of": [ ("close_mappings", "dcterms:isPartOf"), ], "portal_data_sources": [ ("close_mappings", "dcterms:source"), ], "powered_by_cms": [ ("related_mappings", "schema:softwareVersion"), ], "preservation_level": [ ("related_mappings", "dcterms:type"), ], "price_currency": [ ("exact_mappings", "schema:priceCurrency"), ], "primary_register": [ ("close_mappings", "dcterms:source"), ], "pronouns": [ ("related_mappings", "foaf:name"), ], "protocol": [ ("close_mappings", "dcterms:conformsTo"), ], "provenance_statement": [ ("exact_mappings", "dcterms:provenance"), ], "response_formats": [ ("close_mappings", "dcterms:format"), ], "resulting_units": [ ("close_mappings", "prov:generated"), ], "rico_equivalent": [ ("exact_mappings", "skos:exactMatch"), ], "role_title": [ ("exact_mappings", "schema:roleName"), ("close_mappings", "org:role"), ], "security_level": [ ("related_mappings", "dcterms:accessRights"), ], "serves_finding_aids": [ ("close_mappings", "schema:availableService"), ], "settlement": [ ("close_mappings", "schema:addressLocality"), ], "source_creator": [ ("exact_mappings", "dcterms:creator"), ], "source_uri": [ ("exact_mappings", "dcterms:source"), ], "sub_collections": [ ("exact_mappings", "dcterms:hasPart"), ], "subregion": [ ("close_mappings", "schema:addressRegion"), ], "supersedes": [ ("exact_mappings", "dcterms:replaces"), ], "temperature_tolerance": [ ("related_mappings", "schema:additionalProperty"), ], "typical_domains": [ ("close_mappings", "dcterms:subject"), ], "typical_technical_features": [ ("close_mappings", "schema:featureList"), ], "unit_affiliation": [ ("close_mappings", "org:memberOf"), ], "used_sources": [ ("exact_mappings", "dcterms:source"), ], "used": [ ("close_mappings", "prov:used"), ], "was_derived_from": [ ("exact_mappings", "prov:wasDerivedFrom"), ], "was_revision_of": [ ("exact_mappings", "prov:wasRevisionOf"), ], # === 4 MISSING CENTRALIZED SLOTS === "archive_branches": [ ("close_mappings", "org:hasSubOrganization"), ("related_mappings", "schema:subOrganization"), ], "archive_department_of": [ ("close_mappings", "org:subOrganizationOf"), ("related_mappings", "schema:parentOrganization"), ], "parent_corporation": [ ("exact_mappings", "schema:parentOrganization"), ("close_mappings", "org:subOrganizationOf"), ], "wikidata_entity": [ ("exact_mappings", "schema:sameAs"), ("close_mappings", "skos:exactMatch"), ], } # Pattern-based mappings for slots that match certain patterns PATTERN_MAPPINGS = [ # Slots ending with _date (r".*_date$", [ ("broad_mappings", "dcterms:date"), ]), # Slots ending with _url (r".*_url$", [ ("broad_mappings", "schema:url"), ]), # Slots ending with _id (r".*_id$", [ ("broad_mappings", "dcterms:identifier"), ]), # Slots ending with _name (r".*_name$", [ ("broad_mappings", "rdfs:label"), ]), # Slots ending with _description (r".*_description$", [ ("broad_mappings", "dcterms:description"), ]), # Slots ending with _type (r".*_type$", [ ("broad_mappings", "dcterms:type"), ]), # Slots ending with _code (r".*_code$", [ ("broad_mappings", "dcterms:identifier"), ]), # Slots starting with is_ or has_ (r"^(is|has)_.*$", [ ("related_mappings", "rdf:type"), ]), # Slots ending with _at (timestamps) (r".*_at$", [ ("broad_mappings", "prov:atTime"), ]), # Slots ending with _by (agents) (r".*_by$", [ ("broad_mappings", "prov:wasAttributedTo"), ]), # Slots ending with _count (r".*_count$", [ ("broad_mappings", "schema:numberOfItems"), ]), # Slots ending with _status (r".*_status$", [ ("broad_mappings", "adms:status"), ]), # Slots ending with _source (r".*_source$", [ ("broad_mappings", "dcterms:source"), ]), # Slots ending with _ref or _reference (r".*_(ref|reference)$", [ ("broad_mappings", "dcterms:references"), ]), # Slots ending with _time (r".*_time$", [ ("broad_mappings", "prov:atTime"), ]), # Slots ending with _timestamp (r".*_timestamp$", [ ("broad_mappings", "prov:generatedAtTime"), ]), # Slots ending with _address (r".*_address$", [ ("broad_mappings", "schema:address"), ]), # Slots ending with _email (r".*_email$", [ ("broad_mappings", "schema:email"), ]), # Slots ending with _phone (r".*_phone$", [ ("broad_mappings", "schema:telephone"), ]), # Slots ending with _notes or _note (r".*_notes?$", [ ("broad_mappings", "skos:note"), ]), # Slots ending with _label (r".*_label$", [ ("broad_mappings", "rdfs:label"), ]), # Slots ending with _value (r".*_value$", [ ("broad_mappings", "rdf:value"), ]), # Slots ending with _language (r".*_language$", [ ("broad_mappings", "dcterms:language"), ]), # Slots ending with _format (r".*_format$", [ ("broad_mappings", "dcterms:format"), ]), # Slots ending with _version (r".*_version$", [ ("broad_mappings", "schema:version"), ]), # Slots ending with _scope (r".*_scope$", [ ("broad_mappings", "dcterms:coverage"), ]), # Slots ending with _coverage (r".*_coverage$", [ ("broad_mappings", "dcterms:coverage"), ]), # Slots ending with _extent (r".*_extent$", [ ("broad_mappings", "dcterms:extent"), ]), # Slots ending with _size (r".*_size$", [ ("broad_mappings", "dcterms:extent"), ]), # Slots ending with _entity or _entities (r".*_entit(y|ies)$", [ ("broad_mappings", "prov:Entity"), ]), # Slots ending with _observation or _observations (r".*_observations?$", [ ("broad_mappings", "prov:Entity"), ]), # Slots ending with _claim or _claims (r".*_claims?$", [ ("broad_mappings", "prov:Entity"), ]), # Slots ending with _confidence (r".*_confidence$", [ ("broad_mappings", "prov:value"), ]), # Slots ending with _score (r".*_score$", [ ("broad_mappings", "prov:value"), ]), # Slots ending with _platform or _platforms (r".*_platforms?$", [ ("broad_mappings", "dcterms:hasPart"), ]), # Slots ending with _standard or _standards (r".*_standards?$", [ ("broad_mappings", "dcterms:conformsTo"), ]), # Slots ending with _policy or _policies (r".*_polic(y|ies)$", [ ("broad_mappings", "dcterms:rights"), ]), # Slots ending with _method (r".*_method$", [ ("broad_mappings", "prov:wasGeneratedBy"), ]), # Slots ending with _agent (r".*_agent$", [ ("broad_mappings", "prov:wasAttributedTo"), ]), # Slots starting with date_ or dates_ (r"^dates?_.*$", [ ("broad_mappings", "dcterms:date"), ]), # Slots containing _of_ (relationships) (r".*_of_.*$", [ ("broad_mappings", "dcterms:relation"), ]), # Slots ending with _location (r".*_location$", [ ("broad_mappings", "schema:location"), ]), # Slots ending with _organization or _organisations (r".*_organi[sz]ations?$", [ ("broad_mappings", "schema:organization"), ]), # Slots ending with _person or _persons (r".*_persons?$", [ ("broad_mappings", "schema:person"), ]), # Slots ending with _institution or _institutions (r".*_institutions?$", [ ("broad_mappings", "schema:organization"), ]), # Slots ending with _custodian or _custodians (r".*_custodians?$", [ ("broad_mappings", "schema:organization"), ]), # Slots ending with _area or _areas (r".*_areas?$", [ ("broad_mappings", "dcterms:subject"), ]), # Slots ending with _items (r".*_items$", [ ("broad_mappings", "schema:itemListElement"), ]), # Slots ending with _list (r".*_list$", [ ("broad_mappings", "schema:ItemList"), ]), # Slots ending with _endpoint or _endpoints (r".*_endpoints?$", [ ("broad_mappings", "dcat:endpointURL"), ]), # Slots ending with _file or _files (r".*_files?$", [ ("broad_mappings", "dcterms:source"), ]), # Slots ending with _body or _bodies (r".*_bod(y|ies)$", [ ("broad_mappings", "org:Organization"), ]), # Slots ending with _link or _links (r".*_links?$", [ ("broad_mappings", "schema:url"), ]), # Slots ending with _portal or _portals (r".*_portals?$", [ ("broad_mappings", "schema:url"), ]), ] def load_yaml(filepath: Path) -> dict: """Load a YAML file.""" with open(filepath, 'r', encoding='utf-8') as f: return yaml.safe_load(f) def save_yaml(filepath: Path, data: dict) -> None: """Save data to a YAML file with proper formatting.""" with open(filepath, 'w', encoding='utf-8') as f: yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=120) def get_slot_name_from_file(filepath: Path) -> Optional[str]: """Extract the main slot name from a slot file.""" data = load_yaml(filepath) if 'slots' in data: slots = data['slots'] if slots: return list(slots.keys())[0] return None def get_mappings_for_slot(slot_name: str) -> Dict[str, List[str]]: """Get appropriate mappings for a slot based on its name.""" mappings = { "exact_mappings": [], "close_mappings": [], "related_mappings": [], "narrow_mappings": [], "broad_mappings": [], } # Check direct match first if slot_name in SLOT_MAPPINGS: for mapping_type, predicate in SLOT_MAPPINGS[slot_name]: if predicate not in mappings[mapping_type]: mappings[mapping_type].append(predicate) # Check pattern matches for pattern, pattern_mappings in PATTERN_MAPPINGS: if re.match(pattern, slot_name): for mapping_type, predicate in pattern_mappings: if predicate not in mappings[mapping_type]: mappings[mapping_type].append(predicate) # Remove empty mapping types return {k: v for k, v in mappings.items() if v} def add_mappings_to_slot_file(filepath: Path, dry_run: bool = False) -> Tuple[bool, str]: """Add mappings to a slot file if needed.""" try: data = load_yaml(filepath) if 'slots' not in data or not data['slots']: return False, "No slots defined" slot_name = list(data['slots'].keys())[0] slot_def = data['slots'][slot_name] # Get existing mappings existing_mappings = set() for mapping_type in ["exact_mappings", "close_mappings", "related_mappings", "narrow_mappings", "broad_mappings"]: if mapping_type in slot_def: existing_mappings.update(slot_def[mapping_type]) # Get new mappings new_mappings = get_mappings_for_slot(slot_name) if not new_mappings: return False, "No mappings found for slot" # Filter out already existing mappings added_any = False for mapping_type, predicates in new_mappings.items(): new_predicates = [p for p in predicates if p not in existing_mappings] if new_predicates: if mapping_type not in slot_def: slot_def[mapping_type] = [] for pred in new_predicates: if pred not in slot_def[mapping_type]: slot_def[mapping_type].append(pred) added_any = True if added_any and not dry_run: save_yaml(filepath, data) return added_any, f"Added mappings: {new_mappings}" except Exception as e: return False, f"Error: {str(e)}" def process_all_slots(slots_dir: Path, dry_run: bool = False) -> dict: """Process all slot files in the directory.""" results = { "updated": [], "skipped": [], "errors": [], "no_mappings": [], } for yaml_file in sorted(slots_dir.glob("*.yaml")): success, message = add_mappings_to_slot_file(yaml_file, dry_run) if success: results["updated"].append((yaml_file.name, message)) elif "Error" in message: results["errors"].append((yaml_file.name, message)) elif "No mappings found" in message: results["no_mappings"].append(yaml_file.name) else: results["skipped"].append((yaml_file.name, message)) return results def main(): """Main entry point.""" import argparse parser = argparse.ArgumentParser(description="Add semantic mappings to LinkML slot files") parser.add_argument("--dry-run", action="store_true", help="Don't modify files, just show what would be done") parser.add_argument("--slots-dir", type=Path, default=Path("/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/slots"), help="Directory containing slot YAML files") parser.add_argument("--slot", type=str, help="Process only this specific slot file") args = parser.parse_args() if args.slot: filepath = args.slots_dir / f"{args.slot}.yaml" if not filepath.exists(): print(f"Error: Slot file not found: {filepath}") return success, message = add_mappings_to_slot_file(filepath, args.dry_run) print(f"{filepath.name}: {'Updated' if success else 'Skipped'} - {message}") else: results = process_all_slots(args.slots_dir, args.dry_run) print(f"\n{'DRY RUN - ' if args.dry_run else ''}Slot Mapping Results:") print(f"=" * 60) print(f"Updated: {len(results['updated'])}") print(f"Skipped (already has mappings): {len(results['skipped'])}") print(f"No mappings found: {len(results['no_mappings'])}") print(f"Errors: {len(results['errors'])}") if results['updated'] and not args.dry_run: print(f"\nUpdated slots:") for name, msg in results['updated'][:20]: print(f" - {name}") if len(results['updated']) > 20: print(f" ... and {len(results['updated']) - 20} more") if results['no_mappings']: print(f"\nSlots without mapping definitions:") for name in results['no_mappings'][:20]: print(f" - {name}") if len(results['no_mappings']) > 20: print(f" ... and {len(results['no_mappings']) - 20} more") if results['errors']: print(f"\nErrors:") for name, msg in results['errors']: print(f" - {name}: {msg}") if __name__ == "__main__": main()