""" mappings.py - Data Transformation Mapping Documentation (Python Port) This module documents how raw YAML/JSON data from custodian files maps to: 1. LinkML schema classes and slots 2. TypeDB entities and attributes 3. RDF triples and predicates ARCHITECTURE OVERVIEW: ====================== The Heritage Custodian System uses a "hub architecture" where: - CustodianHub: Abstract entity with only persistent hc_id - CustodianObservation: Evidence/claims from a specific source - ReconstructionActivity: Process that generates standardized aspects - Four aspects: LegalStatus, Name, Place, Collection (independent temporal lifecycles) Each enrichment block in YAML (google_maps_enrichment, wikidata_enrichment, etc.) maps to a SEPARATE CustodianObservation with its own provenance. DATA FLOW: ========== Raw YAML (data/custodian/*.yaml) | [Transform Layer] | LinkML Instance Data | +------+------+ | | | RDF TypeDB JSON-LD Ported from: frontend/src/lib/linkml/custodian-data-mappings.ts Version: 1.0.0 """ from __future__ import annotations from dataclasses import dataclass, field from enum import Enum from typing import Any, Optional # ============================================================================ # ENUMERATIONS # ============================================================================ class TransformationType(str, Enum): """Types of data transformations that can occur during mapping.""" DIRECT = 'direct' # 1:1 copy, no transformation RENAME = 'rename' # Field name change only SPLIT = 'split' # One source field -> multiple target fields MERGE = 'merge' # Multiple source fields -> one target field LOOKUP = 'lookup' # Enum value lookup or reference resolution COMPUTED = 'computed' # Derived/calculated value NESTED = 'nested' # Nested object mapping (object -> object) ARRAY_MAP = 'array_map' # Array transformation (array -> array with element mapping) ARRAY_DIRECT = 'array_direct' # Direct array copy without element transformation FLATTEN = 'flatten' # Nested structure -> flat structure AGGREGATE = 'aggregate' # Multiple values -> single aggregate TEMPORAL = 'temporal' # Date/time transformation URI_CONSTRUCT = 'uri_construct' # Construct URI from components NORMALIZE = 'normalize' # Normalize/standardize value format CONDITIONAL = 'conditional' # Conditional transformation based on other fields NOT_MAPPED = 'not_mapped' # Ontology property intentionally not mapped class MappingStatus(str, Enum): """Mapping status for ontology coverage documentation.""" MAPPED = 'mapped' # Property is fully mapped to HC system PARTIAL = 'partial' # Property is partially mapped OUT_OF_SCOPE = 'out_of_scope' # Property is intentionally not mapped FUTURE = 'future' # Property may be mapped in future versions class DataTier(str, Enum): """Data tier classification (per AGENTS.md).""" TIER_1_AUTHORITATIVE = 'TIER_1_AUTHORITATIVE' # CSV registries (ISIL, Dutch orgs) TIER_2_VERIFIED = 'TIER_2_VERIFIED' # Data from institutional websites, APIs TIER_3_CROWD_SOURCED = 'TIER_3_CROWD_SOURCED' # Wikidata, OpenStreetMap TIER_4_INFERRED = 'TIER_4_INFERRED' # NLP-extracted from conversations # ============================================================================ # DATACLASSES # ============================================================================ @dataclass class FieldExample: """Example showing source -> target transformation.""" source_value: Any target_value: Any typedb_value: Optional[Any] = None rdf_triple: Optional[str] = None @dataclass class FieldValidation: """Validation rules for a field.""" type: str # 'string' | 'number' | 'boolean' | 'date' | 'uri' | 'enum' | 'array' pattern: Optional[str] = None enum_values: Optional[list[str]] = None min_length: Optional[int] = None max_length: Optional[int] = None @dataclass class FieldMapping: """Mapping for a single field from source to target.""" # JSON path in source YAML (e.g., "google_maps_enrichment.place_id"). None for unmapped. source_path: Optional[str] # Human-readable description of the source field source_description: str # LinkML class this maps to. None for unmapped ontology properties. target_class: Optional[str] # LinkML slot name. None for unmapped ontology properties. target_slot: Optional[str] # Type of transformation applied transformation: TransformationType # Human-readable explanation of the transformation transformation_details: Optional[str] = None # TypeDB entity type (snake-case with hyphens) typedb_entity: Optional[str] = None # TypeDB attribute name. None for unmapped ontology properties. typedb_attribute: Optional[str] = None # RDF predicate (CURIE format) rdf_predicate: Optional[str] = None # Whether this field is required required: bool = False # Example showing source -> target transformation example: Optional[FieldExample] = None # Related/dependent fields related_fields: Optional[list[str]] = None # Validation rules validation: Optional[FieldValidation] = None # Additional notes or comments about this mapping notes: Optional[str] = None # Mapping status for ontology coverage documentation status: Optional[MappingStatus] = None @dataclass class Provenance: """Provenance information for an enrichment source.""" source_type: str data_tier: DataTier api_endpoint: Optional[str] = None update_frequency: Optional[str] = None @dataclass class EnrichmentSourceMapping: """Complete mapping for an enrichment source block.""" # Source block name in YAML (e.g., "google_maps_enrichment") source_block: str # Human-readable description description: str # Primary LinkML class this maps to linkml_class: str # Primary TypeDB entity typedb_entity: str # Provenance information provenance: Provenance # All field mappings for this source fields: list[FieldMapping] = field(default_factory=list) # Extended description with usage notes detailed_description: Optional[str] = None # Classes that can be generated from this source generated_classes: Optional[list[str]] = None # Example YAML snippet example_yaml: Optional[str] = None @dataclass class MappingCategory: """Category grouping for mappings in the UI.""" id: str name: str name_nl: str description: str description_nl: str icon: str sources: list[str] = field(default_factory=list) # sourceBlock names # ============================================================================ # MAPPING CATEGORIES (19 Custodian Categories) # ============================================================================ MAPPING_CATEGORIES: list[MappingCategory] = [ MappingCategory( id='identity', name='Identity & Identification', name_nl='Identiteit & Identificatie', description='Core identity fields: GHCID, names, identifiers', description_nl='Kernidentiteitsvelden: GHCID, namen, identificatiecodes', icon='πŸͺͺ', sources=['ghcid', 'identifiers', 'custodian_name'], ), MappingCategory( id='location', name='Location & Geography', name_nl='Locatie & Geografie', description='Physical location, addresses, coordinates', description_nl='Fysieke locatie, adressen, coΓΆrdinaten', icon='πŸ“', sources=['location', 'google_maps_enrichment'], ), MappingCategory( id='external', name='External Data Sources', name_nl='Externe Databronnen', description='Enrichment from external APIs and databases', description_nl="Verrijking van externe API's en databases", icon='πŸ”—', sources=['wikidata_enrichment', 'museum_register_enrichment', 'genealogiewerkbalk_enrichment'], ), MappingCategory( id='web', name='Web & Digital Presence', name_nl='Web & Digitale Aanwezigheid', description='Website data, digital platforms, social media', description_nl='Websitegegevens, digitale platformen, sociale media', icon='🌐', sources=['web_enrichment', 'web_claims', 'digital_platforms', 'youtube_enrichment'], ), MappingCategory( id='legal', name='Legal & Organization', name_nl='Juridisch & Organisatie', description='Legal status, organizational structure', description_nl='Juridische status, organisatiestructuur', icon='βš–οΈ', sources=['legal_status', 'original_entry'], ), MappingCategory( id='temporal', name='Temporal & Provenance', name_nl='Temporeel & Herkomst', description='Time spans, data provenance, versioning', description_nl='Tijdspannes, data-herkomst, versiebeheer', icon='⏱️', sources=['timespan', 'provenance'], ), MappingCategory( id='heritage', name='Heritage Specific', name_nl='Erfgoed Specifiek', description='UNESCO, collections, domain-specific data', description_nl='UNESCO, collecties, domeinspecifieke gegevens', icon='πŸ›οΈ', sources=['unesco_ich_enrichment'], ), # ------------------------------------------------------------------------- # PHASE 1 ADDITIONS: Schema Class Coverage Categories # ------------------------------------------------------------------------- MappingCategory( id='archive_types', name='Archive Types', name_nl='Archieftypen', description='Specialized archive classification types (97 classes): academic, audiovisual, church, municipal, national, etc.', description_nl='Gespecialiseerde archiefclassificatietypen (97 klassen): academisch, audiovisueel, kerkelijk, gemeentelijk, nationaal, etc.', icon='πŸ“¦', sources=[ 'archive_type_academic', 'archive_type_audiovisual', 'archive_type_church', 'archive_type_corporate', 'archive_type_government', 'archive_type_municipal', 'archive_type_national', 'archive_type_regional', 'archive_type_specialized', 'archive_type_thematic', ], ), MappingCategory( id='organizational_structure', name='Organizational Structure', name_nl='Organisatiestructuur', description='Organizational hierarchy and structure classes (30+ classes): departments, divisions, branches, parent organizations', description_nl='OrganisatiehiΓ«rarchie en structuurklassen (30+ klassen): afdelingen, divisies, vestigingen, moederorganisaties', icon='🏒', sources=[ 'org_structure_hierarchy', 'org_structure_administrative', 'org_structure_subdivision', ], ), MappingCategory( id='heritage_cultural', name='Heritage & Cultural Sites', name_nl='Erfgoed & Culturele Locaties', description='World heritage sites, intangible heritage, cultural institutions (15+ classes)', description_nl='Werelderfgoedlocaties, immaterieel erfgoed, culturele instellingen (15+ klassen)', icon='πŸ—Ώ', sources=[ 'heritage_world_sites', 'heritage_intangible', 'heritage_national_treasures', ], ), MappingCategory( id='classification_types', name='Classification Types', name_nl='Classificatietypen', description='Type classes for custodian classification (32 classes): MuseumType, LibraryType, ArchiveOrganizationType, etc.', description_nl='Typeklassen voor bronhouderclassificatie (32 klassen): MuseumType, BibliotheekType, ArchiefOrganisatieType, etc.', icon='🏷️', sources=[ 'type_classes_glam', 'type_classes_digital', 'type_classes_organizational', ], ), # ------------------------------------------------------------------------- # PHASE 2 ADDITIONS: Remaining Schema Class Coverage Categories # ------------------------------------------------------------------------- MappingCategory( id='place_location', name='Place & Location', name_nl='Plaats & Locatie', description='Geographic and spatial location classes (8 classes): settlements, countries, custodian places, feature places', description_nl='Geografische en ruimtelijke locatieklassen (8 klassen): nederzettingen, landen, bronhouderplaatsen, kenmerkplaatsen', icon='πŸ“', sources=[ 'place_geographic', 'place_custodian_specific', ], ), MappingCategory( id='collections', name='Collections & Holdings', name_nl='Collecties & Bezittingen', description='Collection management and holdings classes (6 classes): collections, special collections, collection management systems', description_nl='Collectiebeheer en bezitklassen (6 klassen): collecties, bijzondere collecties, collectiebeheersystemen', icon='πŸ—ƒοΈ', sources=[ 'collection_core', 'collection_management', ], ), MappingCategory( id='person_staff', name='Person & Staff', name_nl='Persoon & Personeel', description='Person and staff-related classes (9 classes): profiles, connections, work experience, credentials', description_nl="Persoon- en personeelgerelateerde klassen (9 klassen): profielen, connecties, werkervaring, diploma's", icon='πŸ‘₯', sources=[ 'person_profile_extended', 'person_work_education', ], ), MappingCategory( id='digital_api', name='Digital & API Services', name_nl='Digitaal & API Diensten', description='Digital platforms and API endpoint classes (11 classes): web portals, OAI-PMH, search APIs, file APIs', description_nl="Digitale platformen en API-eindpuntklassen (11 klassen): webportalen, OAI-PMH, zoek-API's, bestand-API's", icon='πŸ”Œ', sources=[ 'digital_platforms_extended', 'api_endpoints', ], ), MappingCategory( id='video_media', name='Video & Social Media', name_nl='Video & Sociale Media', description='Video content and social media classes (11 classes): video annotations, chapters, social media posts/profiles', description_nl='Video-inhoud en sociale mediaklassen (11 klassen): video-annotaties, hoofdstukken, sociale media posts/profielen', icon='🎬', sources=[ 'video_content', 'social_media_content', ], ), MappingCategory( id='legal_admin', name='Legal & Administrative', name_nl='Juridisch & Administratief', description='Legal, policy, and administrative classes (9 classes): access policies, budgets, projects, registration', description_nl='Juridische, beleids- en administratieve klassen (9 klassen): toegangsbeleid, budgetten, projecten, registratie', icon='βš–οΈ', sources=[ 'legal_policies', 'administrative_records', ], ), MappingCategory( id='finding_aids', name='Finding Aids & Standards', name_nl='Toegangen & Standaarden', description='Finding aids, standards, and documentation classes (5 classes): finding aids, source documents, standards', description_nl='Toegangen, standaarden en documentatieklassen (5 klassen): toegangen, brondocumenten, standaarden', icon='πŸ“‘', sources=[ 'finding_aids_standards', ], ), MappingCategory( id='reconstruction', name='Reconstruction & Provenance', name_nl='Reconstructie & Herkomst', description='Entity reconstruction and provenance tracking classes (4 classes): reconstructed entities, activities, agents', description_nl='Entiteitsreconstructie en herkomsttrackingklassen (4 klassen): gereconstrueerde entiteiten, activiteiten, agenten', icon='πŸ”„', sources=[ 'reconstruction_provenance', ], ), MappingCategory( id='storage_facilities', name='Storage & Facilities', name_nl='Opslag & Faciliteiten', description='Storage conditions and facility classes (7 classes): storage types, conditions, education centers', description_nl='Opslagcondities en faciliteitenklassen (7 klassen): opslagtypen, condities, onderwijscentra', icon='πŸͺ', sources=[ 'storage_facilities', ], ), MappingCategory( id='funding', name='Funding & Grants', name_nl='Financiering & Subsidies', description='Funding and grant-related classes (3 classes): funding agendas, requirements, applications', description_nl="Financiering- en subsidieklassen (3 klassen): financieringsagenda's, vereisten, aanvragen", icon='πŸ’°', sources=[ 'funding_grants', ], ), MappingCategory( id='language_naming', name='Language & Naming', name_nl='Taal & Naamgeving', description='Language and naming classes (4 classes): language codes, proficiency, appellations', description_nl='Taal- en naamgevingsklassen (4 klassen): taalcodes, taalvaardigheid, benamingen', icon='πŸ—£οΈ', sources=[ 'language_naming', ], ), MappingCategory( id='specialized_archives_intl', name='Specialized Archives (International)', name_nl='Gespecialiseerde Archieven (Internationaal)', description='Country-specific specialized archive types (19 classes): German, Swedish, French, Czech archive types', description_nl='Landspecifieke gespecialiseerde archieftypen (19 klassen): Duitse, Zweedse, Franse, Tsjechische archieftypen', icon='🌍', sources=[ 'archives_german', 'archives_swedish', 'archives_french', 'archives_other', ], ), ] # ============================================================================ # ENRICHMENT SOURCE MAPPINGS # ============================================================================ ENRICHMENT_MAPPINGS: list[EnrichmentSourceMapping] = [ # ------------------------------------------------------------------------- # GHCID - Global Heritage Custodian Identifier # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='ghcid', description='Global Heritage Custodian Identifier - persistent unique identifier', detailed_description=""" The GHCID is the persistent unique identifier for every heritage custodian. It follows the format: {COUNTRY}-{REGION}-{SETTLEMENT}-{TYPE}-{ABBREV} Example: NL-NH-AMS-M-RM (Rijksmuseum, Amsterdam, Netherlands) GHCIDs are deterministically generated and hashed to multiple UUID formats for different use cases (UUID v5 for primary, UUID v8 for future-proofing). """.strip(), linkml_class='GHCID', typedb_entity='ghcid', provenance=Provenance( source_type='computed', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), fields=[ FieldMapping( source_path='ghcid.ghcid_current', source_description='Current GHCID string', target_class='GHCID', target_slot='ghcid_string', transformation=TransformationType.DIRECT, typedb_entity='ghcid', typedb_attribute='ghcid-string', rdf_predicate='hc:ghcidString', required=True, example=FieldExample( source_value='NL-NH-AMS-M-RM', target_value='NL-NH-AMS-M-RM', rdf_triple=' hc:ghcidString "NL-NH-AMS-M-RM" .', ), ), FieldMapping( source_path='ghcid.ghcid_uuid', source_description='UUID v5 derived from GHCID string', target_class='GHCID', target_slot='ghcid_uuid', transformation=TransformationType.COMPUTED, transformation_details='UUID v5 generated using SHA-1 hash of GHCID string with heritage namespace', typedb_entity='ghcid', typedb_attribute='ghcid-uuid', rdf_predicate='hc:ghcidUuid', required=True, example=FieldExample( source_value='550e8400-e29b-5d4f-a716-446655440000', target_value='550e8400-e29b-5d4f-a716-446655440000', ), ), FieldMapping( source_path='ghcid.ghcid_numeric', source_description='64-bit numeric ID for database optimization', target_class='GHCID', target_slot='ghcid_numeric', transformation=TransformationType.COMPUTED, transformation_details='SHA-256 hash truncated to 64-bit integer', typedb_entity='ghcid', typedb_attribute='ghcid-numeric', required=False, ), FieldMapping( source_path='ghcid.location_resolution', source_description='GeoNames resolution metadata', target_class='GHCID', target_slot='location_resolution', transformation=TransformationType.NESTED, transformation_details='Maps to LocationResolution class with GeoNames provenance', typedb_entity='location-resolution', required=False, ), ], example_yaml=""" ghcid: ghcid_current: NL-NH-AMS-M-RM ghcid_uuid: 550e8400-e29b-5d4f-a716-446655440000 ghcid_numeric: 213324328442227739 location_resolution: method: REVERSE_GEOCODE geonames_id: 2759794 geonames_name: Amsterdam settlement_code: AMS admin1_code: "07" region_code: NH """.strip(), ), # ------------------------------------------------------------------------- # Identifiers # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='identifiers', description='External identifiers from various sources', detailed_description=""" Links to external identifier systems including: - ISIL codes (International Standard Identifier for Libraries) - Wikidata QIDs - VIAF (Virtual International Authority File) - KvK numbers (Dutch Chamber of Commerce) - Museum Register numbers - And more... """.strip(), linkml_class='Identifier', typedb_entity='identifier', provenance=Provenance( source_type='registry_lookup', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), fields=[ FieldMapping( source_path='identifiers[].identifier_scheme', source_description='Identifier system/scheme name', target_class='Identifier', target_slot='identifier_scheme', transformation=TransformationType.LOOKUP, transformation_details='Maps to IdentifierSchemeEnum', typedb_entity='identifier', typedb_attribute='scheme', rdf_predicate='hc:identifierScheme', required=True, validation=FieldValidation( type='enum', enum_values=['GHCID', 'ISIL', 'Wikidata', 'VIAF', 'KvK', 'MuseumRegister', 'NDE', 'Website'], ), example=FieldExample( source_value='ISIL', target_value='ISIL', ), ), FieldMapping( source_path='identifiers[].identifier_value', source_description='The identifier value/code', target_class='Identifier', target_slot='identifier_value', transformation=TransformationType.DIRECT, typedb_entity='identifier', typedb_attribute='value', rdf_predicate='hc:identifierValue', required=True, example=FieldExample( source_value='NL-AmRM', target_value='NL-AmRM', ), ), FieldMapping( source_path='identifiers[].identifier_url', source_description='URL to the identifier record', target_class='Identifier', target_slot='identifier_url', transformation=TransformationType.DIRECT, typedb_entity='identifier', typedb_attribute='url', rdf_predicate='schema:url', required=False, example=FieldExample( source_value='https://www.wikidata.org/wiki/Q190804', target_value='https://www.wikidata.org/wiki/Q190804', ), ), ], example_yaml=""" identifiers: - identifier_scheme: GHCID identifier_value: NL-NH-AMS-M-RM - identifier_scheme: ISIL identifier_value: NL-AmRM - identifier_scheme: Wikidata identifier_value: Q190804 identifier_url: https://www.wikidata.org/wiki/Q190804 - identifier_scheme: VIAF identifier_value: "148691498" """.strip(), ), # ------------------------------------------------------------------------- # Custodian Name # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='custodian_name', description='Consensus name with confidence scoring', detailed_description=""" The canonical name determined through multi-source reconciliation. Includes emic (native language) name with legal form stripped per Rule 8. Confidence scores indicate how many sources agree on each name variant. """.strip(), linkml_class='CustodianName', typedb_entity='custodian-name', provenance=Provenance( source_type='reconciliation', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='custodian_name.consensus_name', source_description='Reconciled canonical name', target_class='CustodianName', target_slot='name_string', transformation=TransformationType.DIRECT, typedb_entity='custodian-name', typedb_attribute='name-string', rdf_predicate='skos:prefLabel', required=True, example=FieldExample( source_value='Rijksmuseum', target_value='Rijksmuseum', rdf_triple=' skos:prefLabel "Rijksmuseum"@nl .', ), ), FieldMapping( source_path='custodian_name.emic_name', source_description='Native language name (legal form stripped)', target_class='CustodianName', target_slot='emic_name', transformation=TransformationType.NORMALIZE, transformation_details='Legal form terms (Stichting, Foundation, etc.) removed per Rule 8', typedb_entity='custodian-name', typedb_attribute='emic-name', rdf_predicate='hc:emicName', required=False, example=FieldExample( source_value='Rijksmuseum Amsterdam', target_value='Rijksmuseum Amsterdam', ), ), FieldMapping( source_path='custodian_name.confidence_score', source_description='Confidence in name accuracy (0-1)', target_class='CustodianName', target_slot='confidence', transformation=TransformationType.DIRECT, typedb_entity='custodian-name', typedb_attribute='confidence-score', required=False, validation=FieldValidation(type='number'), example=FieldExample( source_value=0.95, target_value=0.95, ), ), FieldMapping( source_path='custodian_name.alternative_names', source_description='List of alternative/historical names', target_class='CustodianName', target_slot='alternative_names', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='custodian-name', typedb_attribute='alternative-names', rdf_predicate='skos:altLabel', required=False, ), ], example_yaml=""" custodian_name: consensus_name: Rijksmuseum emic_name: Rijksmuseum Amsterdam confidence_score: 0.95 alternative_names: - Rijksmuseum Amsterdam - Netherlands State Museum """.strip(), ), # ------------------------------------------------------------------------- # Google Maps Enrichment # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='google_maps_enrichment', description='Location and business data from Google Maps Places API', detailed_description=""" Rich location data including coordinates, address, opening hours, ratings, reviews, and photos from Google Maps Places API. Each enrichment creates a CustodianObservation with google_maps_api provenance. """.strip(), linkml_class='CustodianObservation', typedb_entity='custodian-observation', provenance=Provenance( source_type='google_maps_api', data_tier=DataTier.TIER_2_VERIFIED, api_endpoint='https://maps.googleapis.com/maps/api/place/', update_frequency='On-demand', ), generated_classes=['Place', 'GeoCoordinates', 'OpeningHours'], fields=[ FieldMapping( source_path='google_maps_enrichment.place_id', source_description='Google Maps Place ID', target_class='CustodianObservation', target_slot='external_id', transformation=TransformationType.DIRECT, typedb_entity='custodian-observation', typedb_attribute='external-id', rdf_predicate='schema:identifier', required=True, example=FieldExample( source_value='ChIJ5Ra7we4JxkcRhYVAaq5zQ9U', target_value='ChIJ5Ra7we4JxkcRhYVAaq5zQ9U', ), ), FieldMapping( source_path='google_maps_enrichment.coordinates.latitude', source_description='Latitude coordinate', target_class='GeoCoordinates', target_slot='latitude', transformation=TransformationType.NESTED, typedb_entity='geo-coordinates', typedb_attribute='latitude', rdf_predicate='schema:latitude', required=True, validation=FieldValidation(type='number'), example=FieldExample( source_value=52.3599976, target_value=52.3599976, ), ), FieldMapping( source_path='google_maps_enrichment.coordinates.longitude', source_description='Longitude coordinate', target_class='GeoCoordinates', target_slot='longitude', transformation=TransformationType.NESTED, typedb_entity='geo-coordinates', typedb_attribute='longitude', rdf_predicate='schema:longitude', required=True, validation=FieldValidation(type='number'), example=FieldExample( source_value=4.8852188, target_value=4.8852188, ), ), FieldMapping( source_path='google_maps_enrichment.formatted_address', source_description='Human-readable formatted address', target_class='Place', target_slot='formatted_address', transformation=TransformationType.DIRECT, typedb_entity='place', typedb_attribute='formatted-address', rdf_predicate='schema:address', required=False, example=FieldExample( source_value='Museumstraat 1, 1071 XX Amsterdam, Netherlands', target_value='Museumstraat 1, 1071 XX Amsterdam, Netherlands', ), ), FieldMapping( source_path='google_maps_enrichment.rating', source_description='Average rating (1-5)', target_class='CustodianObservation', target_slot='rating', transformation=TransformationType.DIRECT, typedb_entity='custodian-observation', typedb_attribute='rating', rdf_predicate='schema:aggregateRating', required=False, validation=FieldValidation(type='number'), example=FieldExample( source_value=4.6, target_value=4.6, ), ), FieldMapping( source_path='google_maps_enrichment.total_ratings', source_description='Total number of ratings', target_class='CustodianObservation', target_slot='review_count', transformation=TransformationType.DIRECT, typedb_entity='custodian-observation', typedb_attribute='review-count', rdf_predicate='schema:reviewCount', required=False, validation=FieldValidation(type='number'), example=FieldExample( source_value=47832, target_value=47832, ), ), FieldMapping( source_path='google_maps_enrichment.opening_hours', source_description='Weekly opening hours schedule', target_class='OpeningHours', target_slot='opening_hours_specification', transformation=TransformationType.NESTED, transformation_details='Maps to OpeningHoursSpecification array per day', typedb_entity='opening-hours', rdf_predicate='schema:openingHoursSpecification', required=False, ), FieldMapping( source_path='google_maps_enrichment.website', source_description='Official website URL', target_class='CustodianObservation', target_slot='website', transformation=TransformationType.DIRECT, typedb_entity='custodian-observation', typedb_attribute='website', rdf_predicate='schema:url', required=False, validation=FieldValidation(type='uri'), example=FieldExample( source_value='https://www.rijksmuseum.nl/', target_value='https://www.rijksmuseum.nl/', ), ), FieldMapping( source_path='google_maps_enrichment.phone', source_description='Phone number', target_class='CustodianObservation', target_slot='telephone', transformation=TransformationType.DIRECT, typedb_entity='custodian-observation', typedb_attribute='telephone', rdf_predicate='schema:telephone', required=False, example=FieldExample( source_value='+31 20 674 7000', target_value='+31 20 674 7000', ), ), ], example_yaml=""" google_maps_enrichment: place_id: ChIJ5Ra7we4JxkcRhYVAaq5zQ9U name: Rijksmuseum coordinates: latitude: 52.3599976 longitude: 4.8852188 formatted_address: Museumstraat 1, 1071 XX Amsterdam, Netherlands rating: 4.6 total_ratings: 47832 website: https://www.rijksmuseum.nl/ phone: +31 20 674 7000 opening_hours: Monday: 9:00 AM - 5:00 PM Tuesday: 9:00 AM - 5:00 PM # ... """.strip(), ), # ------------------------------------------------------------------------- # Wikidata Enrichment # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='wikidata_enrichment', description='Linked data from Wikidata knowledge graph', detailed_description=""" Rich semantic data from Wikidata including: - Multilingual labels and descriptions - Sitelinks to Wikipedia articles - Structured properties (coordinates, founding date, etc.) - Instance-of relationships for type classification Creates a CustodianObservation with wikidata_api provenance. """.strip(), linkml_class='CustodianObservation', typedb_entity='custodian-observation', provenance=Provenance( source_type='wikidata_api', data_tier=DataTier.TIER_3_CROWD_SOURCED, api_endpoint='https://www.wikidata.org/wiki/Special:EntityData/', ), generated_classes=['WikidataEntity', 'Sitelink'], fields=[ FieldMapping( source_path='wikidata_enrichment.entity_id', source_description='Wikidata Q-ID', target_class='CustodianObservation', target_slot='external_id', transformation=TransformationType.DIRECT, typedb_entity='custodian-observation', typedb_attribute='external-id', rdf_predicate='schema:identifier', required=True, example=FieldExample( source_value='Q190804', target_value='Q190804', rdf_triple=' schema:identifier "Q190804" .', ), ), FieldMapping( source_path='wikidata_enrichment.labels', source_description='Multilingual labels', target_class='CustodianObservation', target_slot='labels', transformation=TransformationType.NESTED, transformation_details='Language-tagged strings (e.g., {"en": "Rijksmuseum", "nl": "Rijksmuseum"})', typedb_entity='custodian-observation', rdf_predicate='rdfs:label', required=False, ), FieldMapping( source_path='wikidata_enrichment.descriptions', source_description='Multilingual descriptions', target_class='CustodianObservation', target_slot='descriptions', transformation=TransformationType.NESTED, typedb_entity='custodian-observation', rdf_predicate='schema:description', required=False, ), FieldMapping( source_path='wikidata_enrichment.sitelinks', source_description='Links to Wikipedia articles', target_class='Sitelink', target_slot='sitelinks', transformation=TransformationType.ARRAY_MAP, transformation_details='Each sitelink maps to Wikipedia article URL', typedb_entity='sitelink', rdf_predicate='schema:sameAs', required=False, ), FieldMapping( source_path='wikidata_enrichment.instance_of', source_description='Wikidata type classification (P31)', target_class='CustodianObservation', target_slot='instance_of', transformation=TransformationType.LOOKUP, transformation_details='Maps Q-ID to CustodianTypeEnum', typedb_entity='custodian-observation', typedb_attribute='wikidata-type', rdf_predicate='wdt:P31', required=False, example=FieldExample( source_value='Q33506', target_value='MUSEUM', ), ), FieldMapping( source_path='wikidata_enrichment.coordinates', source_description='Geographic coordinates from Wikidata (P625)', target_class='GeoCoordinates', target_slot='coordinates', transformation=TransformationType.NESTED, typedb_entity='geo-coordinates', rdf_predicate='wdt:P625', required=False, ), FieldMapping( source_path='wikidata_enrichment.founding_date', source_description='Date of establishment (P571)', target_class='CustodianObservation', target_slot='founding_date', transformation=TransformationType.TEMPORAL, transformation_details='Wikidata time format to ISO 8601', typedb_entity='custodian-observation', typedb_attribute='founding-date', rdf_predicate='wdt:P571', required=False, validation=FieldValidation(type='date'), example=FieldExample( source_value='+1800-01-01T00:00:00Z', target_value='1800-01-01', ), ), ], example_yaml=""" wikidata_enrichment: entity_id: Q190804 labels: en: Rijksmuseum nl: Rijksmuseum de: Rijksmuseum fr: Rijksmuseum descriptions: en: Dutch national museum in Amsterdam nl: Nederlands nationaal museum in Amsterdam sitelinks: enwiki: Rijksmuseum nlwiki: Rijksmuseum dewiki: Rijksmuseum instance_of: id: Q33506 label: museum coordinates: latitude: 52.36 longitude: 4.885 founding_date: "+1800-01-01T00:00:00Z" """.strip(), ), # ------------------------------------------------------------------------- # Location (Canonical) # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='location', description='Normalized canonical location', detailed_description=""" The authoritative location record after reconciliation from multiple sources. This is the single source of truth for the custodian's physical location. """.strip(), linkml_class='Place', typedb_entity='place', provenance=Provenance( source_type='reconciliation', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='location.city', source_description='City name', target_class='Place', target_slot='city', transformation=TransformationType.DIRECT, typedb_entity='place', typedb_attribute='city', rdf_predicate='schema:addressLocality', required=True, example=FieldExample( source_value='Amsterdam', target_value='Amsterdam', ), ), FieldMapping( source_path='location.country', source_description='ISO 3166-1 alpha-2 country code', target_class='Place', target_slot='country', transformation=TransformationType.LOOKUP, transformation_details='Maps to CountryCodeEnum', typedb_entity='place', typedb_attribute='country-code', rdf_predicate='schema:addressCountry', required=True, validation=FieldValidation( type='enum', pattern='^[A-Z]{2}$', ), example=FieldExample( source_value='NL', target_value='NL', ), ), FieldMapping( source_path='location.region', source_description='Region/province name', target_class='Place', target_slot='region', transformation=TransformationType.DIRECT, typedb_entity='place', typedb_attribute='region', rdf_predicate='schema:addressRegion', required=False, example=FieldExample( source_value='North Holland', target_value='North Holland', ), ), FieldMapping( source_path='location.postal_code', source_description='Postal/ZIP code', target_class='Place', target_slot='postal_code', transformation=TransformationType.DIRECT, typedb_entity='place', typedb_attribute='postal-code', rdf_predicate='schema:postalCode', required=False, example=FieldExample( source_value='1071 XX', target_value='1071 XX', ), ), FieldMapping( source_path='location.street_address', source_description='Street address', target_class='Place', target_slot='street_address', transformation=TransformationType.DIRECT, typedb_entity='place', typedb_attribute='street-address', rdf_predicate='schema:streetAddress', required=False, example=FieldExample( source_value='Museumstraat 1', target_value='Museumstraat 1', ), ), ], example_yaml=""" location: city: Amsterdam country: NL region: North Holland postal_code: 1071 XX street_address: Museumstraat 1 """.strip(), ), # ------------------------------------------------------------------------- # Web Enrichment # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='web_enrichment', description='Archived website data and scraped content', detailed_description=""" Content extracted from the institution's website using web scraping tools. Includes organization details, collections, exhibitions, contact info, etc. All claims must have XPath provenance per Rule 6. """.strip(), linkml_class='WebObservation', typedb_entity='web-observation', provenance=Provenance( source_type='web_scrape', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=['WebClaim'], fields=[ FieldMapping( source_path='web_enrichment.source_url', source_description='URL of scraped page', target_class='WebObservation', target_slot='source_url', transformation=TransformationType.DIRECT, typedb_entity='web-observation', typedb_attribute='source-url', rdf_predicate='prov:wasDerivedFrom', required=True, validation=FieldValidation(type='uri'), ), FieldMapping( source_path='web_enrichment.retrieved_on', source_description='Timestamp when page was archived', target_class='WebObservation', target_slot='retrieved_on', transformation=TransformationType.TEMPORAL, typedb_entity='web-observation', typedb_attribute='retrieved-on', rdf_predicate='prov:generatedAtTime', required=True, validation=FieldValidation(type='date'), ), FieldMapping( source_path='web_enrichment.html_file', source_description='Path to archived HTML file', target_class='WebObservation', target_slot='archive_path', transformation=TransformationType.DIRECT, typedb_entity='web-observation', typedb_attribute='archive-path', required=False, ), FieldMapping( source_path='web_enrichment.organization_details', source_description='Extracted organization information', target_class='WebObservation', target_slot='extracted_content', transformation=TransformationType.NESTED, typedb_entity='web-observation', required=False, ), ], example_yaml=""" web_enrichment: source_url: https://www.rijksmuseum.nl/en/about-us retrieved_on: "2025-01-15T10:30:00Z" html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/about-us.html retrieval_agent: firecrawl organization_details: mission: "To connect people with art and history" established: "1800" collection_size: "1 million objects" """.strip(), ), # ------------------------------------------------------------------------- # Web Claims # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='web_claims', description='Verified claims extracted from websites with XPath provenance', detailed_description=""" Individual facts extracted from web pages with full provenance chain. Each claim MUST have XPath pointer to exact location in archived HTML. Claims without XPath provenance are fabricated and must be removed per Rule 6. """.strip(), linkml_class='WebClaim', typedb_entity='web-claim', provenance=Provenance( source_type='web_extraction', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='web_claims[].claim_type', source_description='Type of claim (full_name, email, phone, etc.)', target_class='WebClaim', target_slot='claim_type', transformation=TransformationType.LOOKUP, typedb_entity='web-claim', typedb_attribute='claim-type', rdf_predicate='hc:claimType', required=True, validation=FieldValidation( type='enum', enum_values=['full_name', 'description', 'email', 'phone', 'address', 'opening_hours', 'social_media'], ), ), FieldMapping( source_path='web_claims[].claim_value', source_description='The extracted value', target_class='WebClaim', target_slot='claim_value', transformation=TransformationType.DIRECT, typedb_entity='web-claim', typedb_attribute='claim-value', rdf_predicate='rdf:value', required=True, ), FieldMapping( source_path='web_claims[].xpath', source_description='XPath to element containing value', target_class='WebClaim', target_slot='xpath', transformation=TransformationType.DIRECT, typedb_entity='web-claim', typedb_attribute='xpath', rdf_predicate='hc:xpath', required=True, ), FieldMapping( source_path='web_claims[].source_url', source_description='URL where claim was extracted', target_class='WebClaim', target_slot='source_url', transformation=TransformationType.DIRECT, typedb_entity='web-claim', typedb_attribute='source-url', rdf_predicate='prov:wasDerivedFrom', required=True, validation=FieldValidation(type='uri'), ), FieldMapping( source_path='web_claims[].xpath_match_score', source_description='Match confidence (1.0 = exact)', target_class='WebClaim', target_slot='match_score', transformation=TransformationType.DIRECT, typedb_entity='web-claim', typedb_attribute='match-score', required=False, validation=FieldValidation(type='number'), ), ], example_yaml=""" web_claims: - claim_type: full_name claim_value: Rijksmuseum Amsterdam source_url: https://www.rijksmuseum.nl/ retrieved_on: "2025-01-15T10:30:00Z" xpath: /html/body/header/div[1]/a/span html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/index.html xpath_match_score: 1.0 """.strip(), ), # ------------------------------------------------------------------------- # Provenance # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='provenance', description='Data lineage and source tracking', detailed_description=""" Metadata about where the data came from, when it was collected, and how confident we are in its accuracy. """.strip(), linkml_class='Provenance', typedb_entity='provenance', provenance=Provenance( source_type='metadata', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), fields=[ FieldMapping( source_path='provenance.data_source', source_description='Origin of the data', target_class='Provenance', target_slot='data_source', transformation=TransformationType.LOOKUP, typedb_entity='provenance', typedb_attribute='data-source', rdf_predicate='prov:wasAttributedTo', required=True, validation=FieldValidation( type='enum', enum_values=['CSV_REGISTRY', 'CONVERSATION_NLP', 'API_ENRICHMENT', 'WEB_SCRAPE', 'MANUAL_ENTRY'], ), ), FieldMapping( source_path='provenance.data_tier', source_description='Data quality tier', target_class='Provenance', target_slot='data_tier', transformation=TransformationType.LOOKUP, typedb_entity='provenance', typedb_attribute='data-tier', rdf_predicate='hc:dataTier', required=True, validation=FieldValidation( type='enum', enum_values=['TIER_1_AUTHORITATIVE', 'TIER_2_VERIFIED', 'TIER_3_CROWD_SOURCED', 'TIER_4_INFERRED'], ), ), FieldMapping( source_path='provenance.extraction_date', source_description='When data was extracted', target_class='Provenance', target_slot='extraction_date', transformation=TransformationType.TEMPORAL, typedb_entity='provenance', typedb_attribute='extraction-date', rdf_predicate='prov:generatedAtTime', required=True, validation=FieldValidation(type='date'), ), FieldMapping( source_path='provenance.confidence_score', source_description='Confidence in data accuracy (0-1)', target_class='Provenance', target_slot='confidence_score', transformation=TransformationType.DIRECT, typedb_entity='provenance', typedb_attribute='confidence-score', rdf_predicate='hc:confidenceScore', required=False, validation=FieldValidation(type='number'), ), ], example_yaml=""" provenance: data_source: API_ENRICHMENT data_tier: TIER_2_VERIFIED extraction_date: "2025-01-15T10:30:00Z" extraction_method: "Google Maps Places API + Wikidata SPARQL" confidence_score: 0.92 source_files: - google_maps_enrichment - wikidata_enrichment """.strip(), ), # ------------------------------------------------------------------------- # Timespan # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='timespan', description='Temporal bounds (CIDOC-CRM style)', detailed_description=""" Temporal information following CIDOC-CRM E52 Time-Span pattern. Captures founding date, closure date (if applicable), and temporal bounds. """.strip(), linkml_class='TimeSpan', typedb_entity='time-span', provenance=Provenance( source_type='derived', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='timespan.begin_of_the_begin', source_description='Earliest possible start date', target_class='TimeSpan', target_slot='begin_of_the_begin', transformation=TransformationType.TEMPORAL, typedb_entity='time-span', typedb_attribute='begin-of-the-begin', rdf_predicate='crm:P82a_begin_of_the_begin', required=False, validation=FieldValidation(type='date'), ), FieldMapping( source_path='timespan.end_of_the_begin', source_description='Latest possible start date', target_class='TimeSpan', target_slot='end_of_the_begin', transformation=TransformationType.TEMPORAL, typedb_entity='time-span', typedb_attribute='end-of-the-begin', rdf_predicate='crm:P81a_end_of_the_begin', required=False, validation=FieldValidation(type='date'), ), FieldMapping( source_path='timespan.begin_of_the_end', source_description='Earliest possible end date', target_class='TimeSpan', target_slot='begin_of_the_end', transformation=TransformationType.TEMPORAL, typedb_entity='time-span', typedb_attribute='begin-of-the-end', rdf_predicate='crm:P81b_begin_of_the_end', required=False, validation=FieldValidation(type='date'), ), FieldMapping( source_path='timespan.end_of_the_end', source_description='Latest possible end date', target_class='TimeSpan', target_slot='end_of_the_end', transformation=TransformationType.TEMPORAL, typedb_entity='time-span', typedb_attribute='end-of-the-end', rdf_predicate='crm:P82b_end_of_the_end', required=False, validation=FieldValidation(type='date'), ), ], example_yaml=""" timespan: begin_of_the_begin: "1800-01-01" end_of_the_begin: "1800-12-31" # Museum still operating - no end dates """.strip(), ), # ------------------------------------------------------------------------- # Legal Status # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='legal_status', description='Legal form and organizational structure', detailed_description=""" Legal entity information including legal form (foundation, corporation, etc.), registration numbers, and governing body information. """.strip(), linkml_class='CustodianLegalStatus', typedb_entity='custodian-legal-status', provenance=Provenance( source_type='registry_lookup', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), fields=[ FieldMapping( source_path='legal_status.legal_form', source_description='ISO 20275 legal form code', target_class='CustodianLegalStatus', target_slot='legal_form', transformation=TransformationType.LOOKUP, transformation_details='Maps to ISO 20275 Entity Legal Form codes', typedb_entity='custodian-legal-status', typedb_attribute='legal-form', rdf_predicate='org:classification', required=False, example=FieldExample( source_value='stichting', target_value='NL_STI', # ISO 20275 code ), ), FieldMapping( source_path='legal_status.legal_name', source_description='Full registered name including legal form', target_class='CustodianLegalStatus', target_slot='legal_name', transformation=TransformationType.DIRECT, typedb_entity='custodian-legal-status', typedb_attribute='legal-name', rdf_predicate='schema:legalName', required=False, example=FieldExample( source_value='Stichting Rijksmuseum', target_value='Stichting Rijksmuseum', ), ), FieldMapping( source_path='legal_status.kvk_number', source_description='Dutch Chamber of Commerce number', target_class='CustodianLegalStatus', target_slot='kvk_number', transformation=TransformationType.DIRECT, typedb_entity='custodian-legal-status', typedb_attribute='kvk-number', rdf_predicate='hc:kvkNumber', required=False, validation=FieldValidation( type='string', pattern='^[0-9]{8}$', ), ), ], example_yaml=""" legal_status: legal_form: stichting legal_name: Stichting Rijksmuseum kvk_number: "10205502" registered_address: Amsterdam """.strip(), ), # ------------------------------------------------------------------------- # Digital Platforms # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='digital_platforms', description='Website and digital platform metadata', detailed_description=""" Information about the institution's digital presence including primary website, collection management systems, discovery portals, and APIs. """.strip(), linkml_class='DigitalPlatform', typedb_entity='digital-platform', provenance=Provenance( source_type='web_discovery', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='digital_platforms[].platform_name', source_description='Name of the platform', target_class='DigitalPlatform', target_slot='platform_name', transformation=TransformationType.DIRECT, typedb_entity='digital-platform', typedb_attribute='platform-name', rdf_predicate='schema:name', required=True, ), FieldMapping( source_path='digital_platforms[].platform_url', source_description='URL of the platform', target_class='DigitalPlatform', target_slot='platform_url', transformation=TransformationType.DIRECT, typedb_entity='digital-platform', typedb_attribute='platform-url', rdf_predicate='schema:url', required=True, validation=FieldValidation(type='uri'), ), FieldMapping( source_path='digital_platforms[].platform_type', source_description='Type of platform', target_class='DigitalPlatform', target_slot='platform_type', transformation=TransformationType.LOOKUP, typedb_entity='digital-platform', typedb_attribute='platform-type', rdf_predicate='hc:platformType', required=False, validation=FieldValidation( type='enum', enum_values=['WEBSITE', 'COLLECTION_PORTAL', 'DISCOVERY_PLATFORM', 'API', 'SOCIAL_MEDIA'], ), ), ], example_yaml=""" digital_platforms: - platform_name: Rijksmuseum Website platform_url: https://www.rijksmuseum.nl/ platform_type: WEBSITE - platform_name: Rijksstudio platform_url: https://www.rijksmuseum.nl/en/rijksstudio platform_type: COLLECTION_PORTAL - platform_name: Rijksmuseum API platform_url: https://data.rijksmuseum.nl/ platform_type: API """.strip(), ), # ========================================================================= # ARCHIVE TYPE MAPPINGS # ========================================================================= # ------------------------------------------------------------------------- # archive_type_academic # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archive_type_academic', description='Academic and research archive types - universities, institutes, learned societies', detailed_description=""" Academic archives serve educational and research institutions. They preserve: - University records and institutional history - Research data and scholarly outputs - Student and faculty records - Scientific collections and specimen documentation Classes: AcademicArchive, UniversityArchive, ScientificArchive, InstitutionalArchive """.strip(), linkml_class='AcademicArchive', typedb_entity='academic-archive', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'AcademicArchive', 'UniversityArchive', 'ScientificArchive', 'InstitutionalArchive', 'InstitutionalRepository', ], fields=[ FieldMapping( source_path=None, source_description='Archive classification type for academic institutions', target_class='AcademicArchive', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='academic-archive', typedb_attribute='archive-classification', rdf_predicate='hc:archiveType', required=True, status=MappingStatus.MAPPED, notes='Subclass of Archive with specialization for academic/research contexts', ), FieldMapping( source_path='institution_type', source_description='Parent educational institution type', target_class='AcademicArchive', target_slot='parent_institution_type', transformation=TransformationType.LOOKUP, typedb_entity='academic-archive', typedb_attribute='parent-institution-type', rdf_predicate='hc:parentInstitutionType', required=False, validation=FieldValidation( type='enum', enum_values=['UNIVERSITY', 'RESEARCH_INSTITUTE', 'ACADEMY', 'LEARNED_SOCIETY'], ), ), ], example_yaml=""" # AcademicArchive instance institution_type: ARCHIVE archive_classification: ACADEMIC parent_institution: University of Amsterdam specializations: - scientific_records - research_data - institutional_history """.strip(), ), # ------------------------------------------------------------------------- # archive_type_audiovisual # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archive_type_audiovisual', description='Audiovisual archive types - film, sound, television, radio', detailed_description=""" Audiovisual archives preserve time-based media including: - Film and cinema collections (Cinematheque, FilmArchive) - Sound recordings (SoundArchive, MusicArchive, AnimalSoundArchive) - Television and radio broadcasts (TelevisionArchive, RadioArchive) - Media-specific preservation requirements Classes: AudiovisualArchive, FilmArchive, Cinematheque, SoundArchive, MusicArchive, RadioArchive, TelevisionArchive, AnimalSoundArchive, MediaArchive, AdvertisingRadioArchive """.strip(), linkml_class='AudiovisualArchive', typedb_entity='audiovisual-archive', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'AudiovisualArchive', 'FilmArchive', 'Cinematheque', 'SoundArchive', 'MusicArchive', 'RadioArchive', 'TelevisionArchive', 'AnimalSoundArchive', 'MediaArchive', 'AdvertisingRadioArchive', ], fields=[ FieldMapping( source_path=None, source_description='Audiovisual archive classification', target_class='AudiovisualArchive', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='audiovisual-archive', typedb_attribute='archive-classification', rdf_predicate='hc:archiveType', required=True, status=MappingStatus.MAPPED, ), FieldMapping( source_path='media_types', source_description='Types of media held in collection', target_class='AudiovisualArchive', target_slot='media_types', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='audiovisual-archive', typedb_attribute='media-types', rdf_predicate='hc:mediaTypes', required=False, validation=FieldValidation( type='array', enum_values=['FILM', 'VIDEO', 'AUDIO', 'RADIO', 'TELEVISION', 'DIGITAL_MEDIA'], ), ), ], example_yaml=""" # FilmArchive instance (e.g., EYE Filmmuseum) institution_type: ARCHIVE archive_classification: AUDIOVISUAL subtype: FILM_ARCHIVE media_types: - FILM - VIDEO - DIGITAL_MEDIA preservation_formats: - 35mm - 16mm - digital_preservation """.strip(), ), # ------------------------------------------------------------------------- # archive_type_church # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archive_type_church', description='Religious archive types - church, diocesan, monastery, parish', detailed_description=""" Religious archives document faith communities and their histories: - Church administration and governance (ChurchArchive, DiocesanArchive) - Religious orders and communities (MonasteryArchive, ReligiousArchive) - Parish records and sacramental registers (ParishArchive) - Regional variations (ChurchArchiveSweden, CathedralArchive) Classes: ChurchArchive, DiocesanArchive, MonasteryArchive, ParishArchive, ReligiousArchive, CathedralArchive, ChurchArchiveSweden """.strip(), linkml_class='ChurchArchive', typedb_entity='church-archive', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'ChurchArchive', 'DiocesanArchive', 'MonasteryArchive', 'ParishArchive', 'ReligiousArchive', 'CathedralArchive', 'ChurchArchiveSweden', ], fields=[ FieldMapping( source_path=None, source_description='Religious archive classification', target_class='ChurchArchive', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='church-archive', typedb_attribute='archive-classification', rdf_predicate='hc:archiveType', required=True, status=MappingStatus.MAPPED, ), FieldMapping( source_path='religious_denomination', source_description='Religious denomination or tradition', target_class='ChurchArchive', target_slot='denomination', transformation=TransformationType.DIRECT, typedb_entity='church-archive', typedb_attribute='denomination', rdf_predicate='hc:denomination', required=False, ), ], example_yaml=""" # DiocesanArchive instance institution_type: ARCHIVE archive_classification: CHURCH subtype: DIOCESAN_ARCHIVE religious_denomination: Roman Catholic diocese: Diocese of Haarlem-Amsterdam record_types: - sacramental_registers - parish_records - administrative_correspondence """.strip(), ), # ------------------------------------------------------------------------- # archive_type_corporate # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archive_type_corporate', description='Corporate and business archive types - company, bank, trade union', detailed_description=""" Corporate archives document business and economic activities: - Company history and governance (CompanyArchives, BankArchive) - Labor organizations (TradeUnionArchive) - Economic documentation (EconomicArchive) - Industry-specific records (FoundationArchive, AssociationArchive) Classes: CompanyArchives, BankArchive, TradeUnionArchive, EconomicArchive, FoundationArchive, AssociationArchive, RegionalEconomicArchive """.strip(), linkml_class='CompanyArchives', typedb_entity='corporate-archive', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'CompanyArchives', 'BankArchive', 'TradeUnionArchive', 'EconomicArchive', 'FoundationArchive', 'AssociationArchive', 'RegionalEconomicArchive', ], fields=[ FieldMapping( source_path=None, source_description='Corporate archive classification', target_class='CompanyArchives', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='corporate-archive', typedb_attribute='archive-classification', rdf_predicate='hc:archiveType', required=True, status=MappingStatus.MAPPED, ), FieldMapping( source_path='industry_sector', source_description='Industry sector of the organization', target_class='CompanyArchives', target_slot='industry_sector', transformation=TransformationType.DIRECT, typedb_entity='corporate-archive', typedb_attribute='industry-sector', rdf_predicate='hc:industrySector', required=False, ), ], example_yaml=""" # BankArchive instance institution_type: ARCHIVE archive_classification: CORPORATE subtype: BANK_ARCHIVE company_name: ABN AMRO Historical Archive industry_sector: FINANCIAL_SERVICES parent_organization: ABN AMRO Bank N.V. """.strip(), ), # ------------------------------------------------------------------------- # archive_type_government # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archive_type_government', description='Government archive types - national, state, parliamentary, court', detailed_description=""" Government archives document state activities at all levels: - National archives (NationalArchives) - State/regional government (StateArchives, StateArchivesSection) - Parliamentary records (ParliamentaryArchives) - Judicial records (CourtRecords, NotarialArchive) - Public administration (PublicArchive, GovernmentArchive) - Security and intelligence (SecurityArchives) Classes: NationalArchives, StateArchives, GovernmentArchive, PublicArchive, ParliamentaryArchives, CourtRecords, NotarialArchive, SecurityArchives, CurrentArchive, PublicArchivesInFrance """.strip(), linkml_class='NationalArchives', typedb_entity='government-archive', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'NationalArchives', 'StateArchives', 'StateArchivesSection', 'GovernmentArchive', 'PublicArchive', 'ParliamentaryArchives', 'CourtRecords', 'NotarialArchive', 'SecurityArchives', 'CurrentArchive', 'PublicArchivesInFrance', ], fields=[ FieldMapping( source_path=None, source_description='Government archive classification', target_class='NationalArchives', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='government-archive', typedb_attribute='archive-classification', rdf_predicate='hc:archiveType', required=True, status=MappingStatus.MAPPED, ), FieldMapping( source_path='jurisdiction_level', source_description='Level of government jurisdiction', target_class='NationalArchives', target_slot='jurisdiction_level', transformation=TransformationType.LOOKUP, typedb_entity='government-archive', typedb_attribute='jurisdiction-level', rdf_predicate='hc:jurisdictionLevel', required=False, validation=FieldValidation( type='enum', enum_values=['NATIONAL', 'STATE', 'REGIONAL', 'MUNICIPAL', 'LOCAL'], ), ), ], example_yaml=""" # NationalArchives instance (e.g., Nationaal Archief) institution_type: ARCHIVE archive_classification: GOVERNMENT subtype: NATIONAL_ARCHIVES jurisdiction_level: NATIONAL country: NL official_name: Nationaal Archief legal_mandate: Archiefwet 1995 """.strip(), ), # ------------------------------------------------------------------------- # archive_type_municipal # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archive_type_municipal', description='Municipal and local government archive types', detailed_description=""" Municipal archives serve local government and community documentation: - City/town archives (MunicipalArchive) - Local government records (LocalGovernmentArchive) - County/district level (CountyRecordOffice, DistrictArchiveGermany) - Local history collections (LocalHistoryArchive) Classes: MunicipalArchive, LocalGovernmentArchive, CountyRecordOffice, DistrictArchiveGermany, LocalHistoryArchive, ComarcalArchive, DistritalArchive """.strip(), linkml_class='MunicipalArchive', typedb_entity='municipal-archive', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'MunicipalArchive', 'LocalGovernmentArchive', 'CountyRecordOffice', 'DistrictArchiveGermany', 'LocalHistoryArchive', 'ComarcalArchive', 'DistritalArchive', ], fields=[ FieldMapping( source_path=None, source_description='Municipal archive classification', target_class='MunicipalArchive', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='municipal-archive', typedb_attribute='archive-classification', rdf_predicate='hc:archiveType', required=True, status=MappingStatus.MAPPED, ), FieldMapping( source_path='municipality', source_description='Municipality served by the archive', target_class='MunicipalArchive', target_slot='municipality', transformation=TransformationType.DIRECT, typedb_entity='municipal-archive', typedb_attribute='municipality', rdf_predicate='hc:municipality', required=False, ), ], example_yaml=""" # MunicipalArchive instance (e.g., Stadsarchief Amsterdam) institution_type: ARCHIVE archive_classification: MUNICIPAL municipality: Amsterdam province: Noord-Holland country: NL services: - reading_room - digitization - genealogy_support """.strip(), ), # ------------------------------------------------------------------------- # archive_type_national # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archive_type_national', description='National-level archive institutions and systems', detailed_description=""" National archives represent the highest level of archival authority: - Central national repositories (NationalArchives) - Country-specific variations (ArchivesRegionales, Landsarkiv, Fylkesarkiv) - International organization archives (ArchiveOfInternationalOrganization) Classes: NationalArchives, ArchivesRegionales, Landsarkiv, Fylkesarkiv, ArchiveOfInternationalOrganization, RegionalArchivesInIceland """.strip(), linkml_class='NationalArchives', typedb_entity='national-archive', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'NationalArchives', 'ArchivesRegionales', 'Landsarkiv', 'Fylkesarkiv', 'ArchiveOfInternationalOrganization', 'RegionalArchivesInIceland', ], fields=[ FieldMapping( source_path=None, source_description='National archive classification', target_class='NationalArchives', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='national-archive', typedb_attribute='archive-classification', rdf_predicate='hc:archiveType', required=True, status=MappingStatus.MAPPED, ), FieldMapping( source_path='country_code', source_description='ISO 3166-1 alpha-2 country code', target_class='NationalArchives', target_slot='country', transformation=TransformationType.LOOKUP, typedb_entity='national-archive', typedb_attribute='country-code', rdf_predicate='hc:countryCode', required=True, validation=FieldValidation( type='string', pattern='^[A-Z]{2}$', ), ), ], example_yaml=""" # National Archives variation (e.g., Swedish Landsarkiv) institution_type: ARCHIVE archive_classification: NATIONAL subtype: LANDSARKIV country_code: SE region: Gothenburg """.strip(), ), # ------------------------------------------------------------------------- # archive_type_regional # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archive_type_regional', description='Regional and provincial archive types', detailed_description=""" Regional archives serve geographic areas between national and local: - Provincial archives (ProvincialArchive, ProvincialHistoricalArchive) - Regional administration (RegionalArchive, RegionalStateArchives) - Cantonal systems (CantonalArchive - Switzerland) - Country-specific regional (DepartmentalArchives - France, StateRegionalArchiveCzechia) Classes: RegionalArchive, ProvincialArchive, ProvincialHistoricalArchive, RegionalStateArchives, CantonalArchive, DepartmentalArchives, StateRegionalArchiveCzechia, StateDistrictArchive """.strip(), linkml_class='RegionalArchive', typedb_entity='regional-archive', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'RegionalArchive', 'ProvincialArchive', 'ProvincialHistoricalArchive', 'RegionalStateArchives', 'CantonalArchive', 'DepartmentalArchives', 'StateRegionalArchiveCzechia', 'StateDistrictArchive', ], fields=[ FieldMapping( source_path=None, source_description='Regional archive classification', target_class='RegionalArchive', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='regional-archive', typedb_attribute='archive-classification', rdf_predicate='hc:archiveType', required=True, status=MappingStatus.MAPPED, ), FieldMapping( source_path='region', source_description='Geographic region served', target_class='RegionalArchive', target_slot='region', transformation=TransformationType.DIRECT, typedb_entity='regional-archive', typedb_attribute='region', rdf_predicate='hc:region', required=False, ), ], example_yaml=""" # ProvincialArchive instance (e.g., Brabants Historisch Informatie Centrum) institution_type: ARCHIVE archive_classification: REGIONAL subtype: PROVINCIAL_ARCHIVE province: Noord-Brabant country: NL """.strip(), ), # ------------------------------------------------------------------------- # archive_type_specialized # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archive_type_specialized', description='Specialized archive types - thematic collections and unique formats', detailed_description=""" Specialized archives focus on specific formats, subjects, or communities: - Format-specific (PhotoArchive, LiteraryArchive, IconographicArchives) - Subject-focused (PoliticalArchive, MilitaryArchive, PerformingArtsArchive) - Community-focused (WomensArchives, LGBTArchive, CommunityArchive) - Institutional (HospitalArchive, SchoolArchive) Classes: PhotoArchive, LiteraryArchive, PoliticalArchive, MilitaryArchive, PerformingArtsArchive, WomensArchives, LGBTArchive, CommunityArchive, HospitalArchive, SchoolArchive, IconographicArchives, ArtArchive, ArchitecturalArchive, NewspaperClippingsArchive """.strip(), linkml_class='SpecializedArchive', typedb_entity='specialized-archive', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'SpecializedArchive', 'PhotoArchive', 'LiteraryArchive', 'PoliticalArchive', 'MilitaryArchive', 'PerformingArtsArchive', 'WomensArchives', 'LGBTArchive', 'CommunityArchive', 'HospitalArchive', 'SchoolArchive', 'IconographicArchives', 'ArtArchive', 'ArchitecturalArchive', 'NewspaperClippingsArchive', 'PressArchive', 'NobilityArchive', ], fields=[ FieldMapping( source_path=None, source_description='Specialized archive classification', target_class='SpecializedArchive', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='specialized-archive', typedb_attribute='archive-classification', rdf_predicate='hc:archiveType', required=True, status=MappingStatus.MAPPED, ), FieldMapping( source_path='specialization', source_description='Area of specialization', target_class='SpecializedArchive', target_slot='specialization', transformation=TransformationType.DIRECT, typedb_entity='specialized-archive', typedb_attribute='specialization', rdf_predicate='hc:specialization', required=False, ), ], example_yaml=""" # PhotoArchive instance (e.g., Nederlands Fotomuseum archive) institution_type: ARCHIVE archive_classification: SPECIALIZED subtype: PHOTO_ARCHIVE specialization: photography collection_strengths: - Dutch photography 1840-present - Documentary photography - Press photography """.strip(), ), # ------------------------------------------------------------------------- # archive_type_thematic # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archive_type_thematic', description='Thematic archive types - digital, web, dark archives, and special collections', detailed_description=""" Thematic archives organized around specific themes or functions: - Digital preservation (DigitalArchive, DarkArchive, WebArchive) - Collection types (CollectingArchives, DepositArchive) - Personal papers (Nachlass, HouseArchive, PersonalCollectionType) - Online archives (OnlineNewsArchive, MailingListArchive) Classes: DigitalArchive, DarkArchive, WebArchive, CollectingArchives, DepositArchive, Nachlass, HouseArchive, OnlineNewsArchive, MailingListArchive, ClimateArchive, FreeArchive, PostcustodialArchive """.strip(), linkml_class='DigitalArchive', typedb_entity='thematic-archive', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'DigitalArchive', 'DarkArchive', 'WebArchive', 'CollectingArchives', 'DepositArchive', 'Nachlass', 'HouseArchive', 'OnlineNewsArchive', 'MailingListArchive', 'ClimateArchive', 'FreeArchive', 'PostcustodialArchive', 'MuseumArchive', 'ArchivalRepository', 'ArchivalLibrary', ], fields=[ FieldMapping( source_path=None, source_description='Thematic archive classification', target_class='DigitalArchive', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='thematic-archive', typedb_attribute='archive-classification', rdf_predicate='hc:archiveType', required=True, status=MappingStatus.MAPPED, ), FieldMapping( source_path='digital_preservation_level', source_description='Level of digital preservation commitment', target_class='DigitalArchive', target_slot='preservation_level', transformation=TransformationType.LOOKUP, typedb_entity='thematic-archive', typedb_attribute='preservation-level', rdf_predicate='hc:preservationLevel', required=False, validation=FieldValidation( type='enum', enum_values=['BIT_LEVEL', 'LOGICAL', 'SEMANTIC', 'FULL'], ), ), ], example_yaml=""" # DarkArchive instance institution_type: ARCHIVE archive_classification: THEMATIC subtype: DARK_ARCHIVE digital_preservation_level: FULL access_policy: RESTRICTED storage_location: offline_vault """.strip(), ), # ========================================================================= # ORGANIZATIONAL STRUCTURE MAPPINGS # ========================================================================= # ------------------------------------------------------------------------- # org_structure_hierarchy # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='org_structure_hierarchy', description='Organizational hierarchy - parent organizations, subsidiaries, branches', detailed_description=""" Models the hierarchical relationships between organizations: - Parent-child relationships (ParentOrganizationUnit, SubsidiaryOrganization) - Branch locations (OrganizationBranch, BranchOffice) - Encompassing bodies (EncompassingBody) - Networks and associations (ArchiveNetwork, ArchiveAssociation, ConnectionNetwork) Classes: ParentOrganizationUnit, SubsidiaryOrganization, OrganizationBranch, BranchOffice, EncompassingBody, ArchiveNetwork, ArchiveAssociation, ConnectionNetwork, Organization, Institution """.strip(), linkml_class='Organization', typedb_entity='organization', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'Organization', 'Institution', 'ParentOrganizationUnit', 'SubsidiaryOrganization', 'OrganizationBranch', 'BranchOffice', 'EncompassingBody', 'ArchiveNetwork', 'ArchiveAssociation', 'ConnectionNetwork', 'CulturalInstitution', 'MemoryInstitution', 'GLAM', ], fields=[ FieldMapping( source_path='parent_organization', source_description='Parent organization identifier or name', target_class='Organization', target_slot='parent_organization', transformation=TransformationType.NESTED, typedb_entity='organization', typedb_attribute='parent-organization', rdf_predicate='org:subOrganizationOf', required=False, ), FieldMapping( source_path='subsidiary_organizations', source_description='List of subsidiary organizations', target_class='Organization', target_slot='subsidiaries', transformation=TransformationType.ARRAY_MAP, typedb_entity='organization', typedb_attribute='subsidiaries', rdf_predicate='org:hasSubOrganization', required=False, ), FieldMapping( source_path='encompassing_body', source_description='Larger organizational body this belongs to', target_class='Organization', target_slot='encompassing_body', transformation=TransformationType.NESTED, typedb_entity='organization', typedb_attribute='encompassing-body', rdf_predicate='hc:encompassingBody', required=False, ), ], example_yaml=""" # Organization with hierarchy organization_name: Rijksmuseum Research Library parent_organization: name: Rijksmuseum ghcid: NL-NH-AMS-M-RM encompassing_body: name: Ministry of Education, Culture and Science type: GOVERNMENT_MINISTRY """.strip(), ), # ------------------------------------------------------------------------- # org_structure_administrative # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='org_structure_administrative', description='Administrative units - departments, divisions, offices', detailed_description=""" Models the internal administrative structure of organizations: - Functional divisions (Department, Division) - Administrative units (AdministrativeOffice) - Support functions (ConservationLab, ReadingRoom, GiftShop) - Specialized facilities (ExhibitionSpace, Storage, Warehouse) Classes: Department, Division, AdministrativeOffice, ConservationLab, ReadingRoom, ReadingRoomAnnex, GiftShop, ExhibitionSpace, Storage, Warehouse, PrintRoom, ServiceArea """.strip(), linkml_class='Department', typedb_entity='department', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'Department', 'Division', 'AdministrativeOffice', 'ConservationLab', 'ReadingRoom', 'ReadingRoomAnnex', 'GiftShop', 'ExhibitionSpace', 'Storage', 'Warehouse', 'PrintRoom', 'ServiceArea', 'ClosedSpace', 'PublicSpace', 'OutdoorSite', ], fields=[ FieldMapping( source_path='departments', source_description='List of organizational departments', target_class='Department', target_slot='department_name', transformation=TransformationType.ARRAY_MAP, typedb_entity='department', typedb_attribute='department-name', rdf_predicate='org:hasUnit', required=False, ), FieldMapping( source_path='facilities', source_description='Physical facilities and spaces', target_class='Department', target_slot='facilities', transformation=TransformationType.ARRAY_MAP, typedb_entity='department', typedb_attribute='facilities', rdf_predicate='hc:hasFacility', required=False, ), ], example_yaml=""" # Department structure departments: - name: Collection Management type: DEPARTMENT staff_count: 25 - name: Conservation Laboratory type: CONSERVATION_LAB specializations: - paper_conservation - photograph_conservation """.strip(), ), # ------------------------------------------------------------------------- # ORGANIZATIONAL STRUCTURE - SUBDIVISION # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='org_structure_subdivision', description='Organizational subdivisions and change events', detailed_description=""" Models organizational subdivisions and structural changes: - Subdivision types (OrganizationalSubdivision) - Organizational structure (OrganizationalStructure) - Change events (OrganizationalChangeEvent) - Contributing and allocating agencies (ContributingAgency, AllocationAgency) Classes: OrganizationalSubdivision, OrganizationalStructure, OrganizationalChangeEvent, ContributingAgency, AllocationAgency, Jurisdiction, StandardsOrganization, RegistrationAuthority """.strip(), linkml_class='OrganizationalSubdivision', typedb_entity='organizational-subdivision', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'OrganizationalSubdivision', 'OrganizationalStructure', 'OrganizationalChangeEvent', 'ContributingAgency', 'AllocationAgency', 'Jurisdiction', 'StandardsOrganization', 'RegistrationAuthority', ], fields=[ FieldMapping( source_path='subdivisions', source_description='Organizational subdivisions', target_class='OrganizationalSubdivision', target_slot='subdivision_name', transformation=TransformationType.ARRAY_MAP, typedb_entity='organizational-subdivision', typedb_attribute='subdivision-name', rdf_predicate='org:hasUnit', required=False, ), FieldMapping( source_path='change_history', source_description='History of organizational changes', target_class='OrganizationalChangeEvent', target_slot='change_events', transformation=TransformationType.ARRAY_MAP, typedb_entity='organizational-change-event', typedb_attribute='change-events', rdf_predicate='hc:hasChangeEvent', required=False, ), ], example_yaml=""" # Organizational change history change_history: - event_type: MERGER date: "2001-01-01" description: "Merger of Gemeentearchief and Rijksarchief" predecessor_organizations: - name: Gemeentearchief Haarlem - name: Rijksarchief Noord-Holland resulting_organization: name: Noord-Hollands Archief """.strip(), ), # ========================================================================= # PHASE 1: HERITAGE & CULTURAL SITES # ========================================================================= # ------------------------------------------------------------------------- # HERITAGE - WORLD SITES # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='heritage_world_sites', description='UNESCO World Heritage Sites and tentative list entries', detailed_description=""" World Heritage Site designations and related classifications: - Inscribed sites (WorldHeritageSite) - Tentative list entries (TentativeWorldHeritageSite) - Historic buildings and monuments (HistoricBuilding) - Cultural institutions (CulturalInstitution) Classes: WorldHeritageSite, TentativeWorldHeritageSite, HistoricBuilding, CulturalInstitution """.strip(), linkml_class='WorldHeritageSite', typedb_entity='world-heritage-site', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'WorldHeritageSite', 'TentativeWorldHeritageSite', 'HistoricBuilding', 'CulturalInstitution', ], fields=[ FieldMapping( source_path='unesco_id', source_description='UNESCO World Heritage Site ID', target_class='WorldHeritageSite', target_slot='unesco_id', transformation=TransformationType.DIRECT, typedb_entity='world-heritage-site', typedb_attribute='unesco-id', rdf_predicate='hc:unescoId', required=True, validation=FieldValidation(type='number'), ), FieldMapping( source_path='inscription_year', source_description='Year of UNESCO inscription', target_class='WorldHeritageSite', target_slot='inscription_year', transformation=TransformationType.DIRECT, typedb_entity='world-heritage-site', typedb_attribute='inscription-year', rdf_predicate='hc:inscriptionYear', required=False, validation=FieldValidation(type='number'), ), FieldMapping( source_path='heritage_criteria', source_description='UNESCO selection criteria (i-x)', target_class='WorldHeritageSite', target_slot='criteria', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='world-heritage-site', typedb_attribute='criteria', rdf_predicate='hc:heritageCriteria', required=False, ), ], example_yaml=""" # WorldHeritageSite instance heritage_designation: UNESCO_WORLD_HERITAGE unesco_id: 818 name: Rietveld SchrΓΆderhuis inscription_year: 2000 heritage_criteria: - i - ii country: NL """.strip(), ), # ------------------------------------------------------------------------- # HERITAGE - INTANGIBLE # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='heritage_intangible', description='Intangible cultural heritage - traditions, performances, practices', detailed_description=""" UNESCO Intangible Cultural Heritage and related practices: - Intangible heritage forms (IntangibleHeritageForm) - Performances and events (IntangibleHeritagePerformance, IntangibleHeritageEvent) - Groups preserving traditions (IntangibleHeritageGroupType) Classes: IntangibleHeritageForm, IntangibleHeritagePerformance, IntangibleHeritageEvent, IntangibleHeritageGroupType """.strip(), linkml_class='IntangibleHeritageForm', typedb_entity='intangible-heritage', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'IntangibleHeritageForm', 'IntangibleHeritagePerformance', 'IntangibleHeritageEvent', 'IntangibleHeritageGroupType', ], fields=[ FieldMapping( source_path='ich_domain', source_description='UNESCO ICH domain category', target_class='IntangibleHeritageForm', target_slot='domain', transformation=TransformationType.LOOKUP, typedb_entity='intangible-heritage', typedb_attribute='domain', rdf_predicate='hc:ichDomain', required=False, validation=FieldValidation( type='enum', enum_values=[ 'ORAL_TRADITIONS', 'PERFORMING_ARTS', 'SOCIAL_PRACTICES', 'KNOWLEDGE_PRACTICES', 'TRADITIONAL_CRAFTSMANSHIP', ], ), ), FieldMapping( source_path='inscription_list', source_description='UNESCO ICH list type', target_class='IntangibleHeritageForm', target_slot='list_type', transformation=TransformationType.LOOKUP, typedb_entity='intangible-heritage', typedb_attribute='list-type', rdf_predicate='hc:ichListType', required=False, validation=FieldValidation( type='enum', enum_values=['REPRESENTATIVE', 'URGENT_SAFEGUARDING', 'GOOD_PRACTICES'], ), ), ], example_yaml=""" # IntangibleHeritageForm instance heritage_type: INTANGIBLE ich_domain: TRADITIONAL_CRAFTSMANSHIP name: Craft of the miller operating windmills and watermills inscription_list: REPRESENTATIVE inscription_year: 2017 countries: - NL - BE """.strip(), ), # ------------------------------------------------------------------------- # HERITAGE - NATIONAL TREASURES # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='heritage_national_treasures', description='National treasures and designated heritage items', detailed_description=""" National-level heritage designations: - National treasures (NationalTreasure) - Country-specific designations (NationalTreasureOfFrance) - Documentation centers (DocumentationCentre, RegionalHistoricCenter) - Research facilities (ResearchCenter, ScientificTechnicAndIndustrialCultureCenter) Classes: NationalTreasure, NationalTreasureOfFrance, DocumentationCentre, RegionalHistoricCenter, ResearchCenter, ScientificTechnicAndIndustrialCultureCenter """.strip(), linkml_class='NationalTreasure', typedb_entity='national-treasure', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'NationalTreasure', 'NationalTreasureOfFrance', 'DocumentationCentre', 'RegionalHistoricCenter', 'ResearchCenter', 'ScientificTechnicAndIndustrialCultureCenter', ], fields=[ FieldMapping( source_path='designation_type', source_description='Type of national designation', target_class='NationalTreasure', target_slot='designation_type', transformation=TransformationType.LOOKUP, typedb_entity='national-treasure', typedb_attribute='designation-type', rdf_predicate='hc:designationType', required=True, ), FieldMapping( source_path='designation_date', source_description='Date of official designation', target_class='NationalTreasure', target_slot='designation_date', transformation=TransformationType.TEMPORAL, typedb_entity='national-treasure', typedb_attribute='designation-date', rdf_predicate='hc:designationDate', required=False, validation=FieldValidation(type='date'), ), ], example_yaml=""" # NationalTreasure instance designation_type: RIJKSMONUMENT designation_number: 12345 name: Amsterdam Canal Ring designation_date: "1999-12-01" country: NL """.strip(), ), # ========================================================================= # PHASE 1: CLASSIFICATION TYPES # ========================================================================= # ------------------------------------------------------------------------- # TYPE CLASSES - GLAM INSTITUTIONS # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='type_classes_glam', description='Classification types for GLAM institutions - museums, libraries, archives, galleries', detailed_description=""" Type classes for classifying heritage custodian institutions: - Core GLAM types (MuseumType, LibraryType, ArchiveOrganizationType, GalleryType) - Bio/nature custodians (BioCustodianType) - Religious/sacred sites (HolySacredSiteType) - Education providers (EducationProviderType) Classes: MuseumType, LibraryType, ArchiveOrganizationType, GalleryType, BioCustodianType, HolySacredSiteType, EducationProviderType, CustodianType, PersonalCollectionType """.strip(), linkml_class='CustodianType', typedb_entity='custodian-type', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'CustodianType', 'MuseumType', 'LibraryType', 'ArchiveOrganizationType', 'GalleryType', 'BioCustodianType', 'HolySacredSiteType', 'EducationProviderType', 'PersonalCollectionType', 'FeatureCustodianType', 'TasteScentHeritageType', ], fields=[ FieldMapping( source_path='institution_type', source_description='Primary institution type code', target_class='CustodianType', target_slot='type_code', transformation=TransformationType.LOOKUP, typedb_entity='custodian-type', typedb_attribute='type-code', rdf_predicate='hc:custodianTypeCode', required=True, validation=FieldValidation( type='enum', enum_values=['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'], ), notes='GLAMORCUBESFIXPHDNT taxonomy single-letter codes', ), FieldMapping( source_path='institution_subtype', source_description='Detailed institution subtype', target_class='CustodianType', target_slot='subtype', transformation=TransformationType.LOOKUP, typedb_entity='custodian-type', typedb_attribute='subtype', rdf_predicate='hc:custodianSubtype', required=False, ), ], example_yaml=""" # Custodian type classification institution_type: M # Museum institution_subtype: ART_MUSEUM museum_type_details: collection_focus: FINE_ARTS governance: PUBLIC size_category: LARGE """.strip(), ), # ------------------------------------------------------------------------- # TYPE CLASSES - DIGITAL PLATFORMS # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='type_classes_digital', description='Classification types for digital platforms and web presence', detailed_description=""" Type classes for digital infrastructure: - Digital platform types (DigitalPlatformType) - Web portal classifications (WebPortalType, WebPortalTypes) - Social media platforms (SocialMediaPlatformType, SocialMediaPlatformTypes) - Social media content (SocialMediaPostType, SocialMediaPostTypes) - Video content types (VideoAnnotationTypes) Classes: DigitalPlatformType, WebPortalType, WebPortalTypes, SocialMediaPlatformType, SocialMediaPlatformTypes, SocialMediaPostType, SocialMediaPostTypes, VideoAnnotationTypes """.strip(), linkml_class='DigitalPlatformType', typedb_entity='digital-platform-type', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'DigitalPlatformType', 'WebPortalType', 'WebPortalTypes', 'SocialMediaPlatformType', 'SocialMediaPlatformTypes', 'SocialMediaPostType', 'SocialMediaPostTypes', 'VideoAnnotationTypes', 'DataServiceEndpointType', ], fields=[ FieldMapping( source_path='platform_type', source_description='Digital platform classification', target_class='DigitalPlatformType', target_slot='platform_category', transformation=TransformationType.LOOKUP, typedb_entity='digital-platform-type', typedb_attribute='platform-category', rdf_predicate='hc:platformCategory', required=True, validation=FieldValidation( type='enum', enum_values=[ 'WEBSITE', 'COLLECTION_PORTAL', 'DISCOVERY_PLATFORM', 'API', 'SOCIAL_MEDIA', 'CMS', 'DAM', ], ), ), ], example_yaml=""" # Digital platform type classification digital_platforms: - platform_type: COLLECTION_PORTAL name: Online Collection - platform_type: API name: Data API protocol: REST - platform_type: SOCIAL_MEDIA social_platform_type: INSTAGRAM """.strip(), ), # ------------------------------------------------------------------------- # TYPE CLASSES - ORGANIZATIONAL # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='type_classes_organizational', description='Classification types for organizational structures', detailed_description=""" Type classes for organizational classifications: - Commercial organizations (CommercialOrganizationType) - Non-profit organizations (NonProfitType) - Research organizations (ResearchOrganizationType) - Official institutions (OfficialInstitutionType) - Heritage societies (HeritageSocietyType) - Mixed/unspecified (MixedCustodianType, UnspecifiedType) Classes: CommercialOrganizationType, NonProfitType, ResearchOrganizationType, OfficialInstitutionType, HeritageSocietyType, MixedCustodianType, UnspecifiedType, LegalEntityType """.strip(), linkml_class='LegalEntityType', typedb_entity='legal-entity-type', provenance=Provenance( source_type='schema_documentation', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'CommercialOrganizationType', 'NonProfitType', 'ResearchOrganizationType', 'OfficialInstitutionType', 'HeritageSocietyType', 'MixedCustodianType', 'UnspecifiedType', 'LegalEntityType', 'LegalForm', 'LegalName', ], fields=[ FieldMapping( source_path='legal_form', source_description='Legal form/entity type', target_class='LegalEntityType', target_slot='legal_form_code', transformation=TransformationType.LOOKUP, typedb_entity='legal-entity-type', typedb_attribute='legal-form-code', rdf_predicate='hc:legalFormCode', required=False, notes='ISO 20275 legal form codes or national equivalents', ), FieldMapping( source_path='governance_type', source_description='Governance/ownership type', target_class='LegalEntityType', target_slot='governance_type', transformation=TransformationType.LOOKUP, typedb_entity='legal-entity-type', typedb_attribute='governance-type', rdf_predicate='hc:governanceType', required=False, validation=FieldValidation( type='enum', enum_values=['PUBLIC', 'PRIVATE', 'NON_PROFIT', 'MIXED', 'GOVERNMENT'], ), ), ], example_yaml=""" # Legal entity type classification legal_status: legal_form: STICHTING # Dutch foundation legal_form_code: "8888" # ISO 20275 governance_type: NON_PROFIT registration_authority: KVK registration_number: "12345678" """.strip(), ), # ========================================================================= # PHASE 2: PLACE & LOCATION # ========================================================================= # ------------------------------------------------------------------------- # PLACE - GEOGRAPHIC # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='place_geographic', description='Geographic and spatial location entities', detailed_description=""" Geographic location classes representing physical places where heritage custodians are located or operate. Includes settlements (cities/towns), countries, regions, and geospatial coordinates. These classes support: - Settlement identification (GeoNames integration) - Country code normalization (ISO 3166-1) - Region/subregion hierarchies - Geospatial coordinates (lat/lon) """.strip(), linkml_class='Settlement', typedb_entity='settlement', provenance=Provenance( source_type='geonames', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'Settlement', 'Country', 'Subregion', 'GeoSpatialPlace', 'FeaturePlace', ], fields=[ FieldMapping( source_path='location.settlement', source_description='Settlement/city name', target_class='Settlement', target_slot='name', transformation=TransformationType.DIRECT, typedb_entity='settlement', typedb_attribute='name', rdf_predicate='schema:name', required=True, ), FieldMapping( source_path='location.country', source_description='Country code (ISO 3166-1)', target_class='Country', target_slot='country_code', transformation=TransformationType.LOOKUP, typedb_entity='country', typedb_attribute='country-code', rdf_predicate='schema:addressCountry', required=True, ), FieldMapping( source_path='location.coordinates', source_description='Geographic coordinates', target_class='GeoSpatialPlace', target_slot='coordinates', transformation=TransformationType.COMPUTED, typedb_entity='geo-spatial-place', typedb_attribute='coordinates', rdf_predicate='geo:hasGeometry', required=False, ), ], example_yaml=""" # Geographic location location: settlement: Amsterdam country: NL region: Noord-Holland coordinates: latitude: 52.3676 longitude: 4.9041 """.strip(), ), # ------------------------------------------------------------------------- # PLACE - CUSTODIAN SPECIFIC # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='place_custodian_specific', description='Custodian-specific place classes', detailed_description=""" Place classes specifically related to heritage custodian operations, including auxiliary places, temporary locations, and custodian-specific place designations. Supports modeling: - Primary vs auxiliary locations - Temporary/seasonal locations - Off-site storage locations - Branch/satellite locations """.strip(), linkml_class='CustodianPlace', typedb_entity='custodian-place', provenance=Provenance( source_type='custodian_yaml', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=[ 'CustodianPlace', 'AuxiliaryPlace', 'TemporaryLocation', ], fields=[ FieldMapping( source_path='places.primary', source_description='Primary location', target_class='CustodianPlace', target_slot='primary_location', transformation=TransformationType.DIRECT, typedb_entity='custodian-place', typedb_attribute='primary-location', rdf_predicate='hc:primaryLocation', required=True, ), FieldMapping( source_path='places.auxiliary', source_description='Auxiliary/secondary locations', target_class='AuxiliaryPlace', target_slot='auxiliary_locations', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='auxiliary-place', typedb_attribute='location', rdf_predicate='hc:auxiliaryLocation', required=False, ), FieldMapping( source_path='places.temporary', source_description='Temporary location', target_class='TemporaryLocation', target_slot='temporary_location', transformation=TransformationType.DIRECT, typedb_entity='temporary-location', typedb_attribute='location', rdf_predicate='hc:temporaryLocation', required=False, ), ], example_yaml=""" # Custodian places places: primary: address: Museumstraat 1 city: Amsterdam auxiliary: - name: Storage Facility address: Industrieweg 100 temporary: name: Pop-up Exhibition Space valid_from: 2024-06-01 valid_to: 2024-09-30 """.strip(), ), # ========================================================================= # PHASE 2: COLLECTIONS # ========================================================================= # ------------------------------------------------------------------------- # COLLECTION - CORE # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='collection_core', description='Core collection and holdings classes', detailed_description=""" Core classes for modeling heritage collections and holdings. Collections represent aggregations of objects, documents, or materials managed by a heritage custodian. Supports: - Collection naming and description - Collection types (archival, library, museum, mixed) - Special collections designation - Subject/temporal coverage """.strip(), linkml_class='Collection', typedb_entity='collection', provenance=Provenance( source_type='custodian_yaml', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=[ 'Collection', 'CollectionType', 'SpecialCollection', 'CastCollection', 'PhotographCollection', 'CustodianCollection', 'CustodianArchive', 'CustodianAdministration', ], fields=[ FieldMapping( source_path='collections[].name', source_description='Collection name', target_class='Collection', target_slot='collection_name', transformation=TransformationType.DIRECT, typedb_entity='collection', typedb_attribute='name', rdf_predicate='schema:name', required=True, ), FieldMapping( source_path='collections[].type', source_description='Collection type', target_class='CollectionType', target_slot='collection_type', transformation=TransformationType.LOOKUP, typedb_entity='collection-type', typedb_attribute='type', rdf_predicate='hc:collectionType', required=False, ), FieldMapping( source_path='collections[].description', source_description='Collection description', target_class='Collection', target_slot='description', transformation=TransformationType.DIRECT, typedb_entity='collection', typedb_attribute='description', rdf_predicate='schema:description', required=False, ), ], example_yaml=""" # Collection definition collections: - name: Dutch Masters Collection type: MUSEUM_ART description: 17th century Dutch paintings extent: 450 paintings subject_areas: - Dutch Golden Age - Portraiture """.strip(), ), # ------------------------------------------------------------------------- # COLLECTION - MANAGEMENT # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='collection_management', description='Collection management system classes', detailed_description=""" Classes for collection management systems (CMS) used by heritage custodians to catalog, track, and manage their collections. Supports documentation of: - CMS software used (Adlib, TMS, ArchivesSpace, etc.) - System configurations - Integration endpoints """.strip(), linkml_class='CollectionManagementSystem', typedb_entity='collection-management-system', provenance=Provenance( source_type='custodian_yaml', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=[ 'CollectionManagementSystem', ], fields=[ FieldMapping( source_path='digital_platforms.cms.name', source_description='CMS name', target_class='CollectionManagementSystem', target_slot='system_name', transformation=TransformationType.DIRECT, typedb_entity='collection-management-system', typedb_attribute='system-name', rdf_predicate='schema:name', required=True, ), FieldMapping( source_path='digital_platforms.cms.vendor', source_description='CMS vendor', target_class='CollectionManagementSystem', target_slot='vendor', transformation=TransformationType.DIRECT, typedb_entity='collection-management-system', typedb_attribute='vendor', rdf_predicate='schema:manufacturer', required=False, ), ], example_yaml=""" # Collection management system digital_platforms: cms: name: Adlib Museum vendor: Axiell version: "7.8" url: https://collection.museum.nl """.strip(), ), # ========================================================================= # PHASE 2: PERSON & STAFF # ========================================================================= # ------------------------------------------------------------------------- # PERSON - PROFILE EXTENDED # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='person_profile_extended', description='Extended person profile classes', detailed_description=""" Extended classes for person/staff profiles beyond basic identity. Includes LinkedIn profiles, person connections (professional networks), and web claims. Supports: - LinkedIn profile data integration - Professional network connections - Web-sourced claims about persons - Person name variants and aliases """.strip(), linkml_class='PersonObservation', typedb_entity='person-observation', provenance=Provenance( source_type='linkedin', data_tier=DataTier.TIER_3_CROWD_SOURCED, ), generated_classes=[ 'LinkedInProfile', 'PersonConnection', 'PersonName', 'PersonOrOrganization', 'PersonWebClaim', ], fields=[ FieldMapping( source_path='person.linkedin_url', source_description='LinkedIn profile URL', target_class='LinkedInProfile', target_slot='linkedin_url', transformation=TransformationType.DIRECT, typedb_entity='linkedin-profile', typedb_attribute='profile-url', rdf_predicate='schema:sameAs', required=False, ), FieldMapping( source_path='person.connections', source_description='Professional connections', target_class='PersonConnection', target_slot='connections', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='person-connection', typedb_attribute='connected-person', rdf_predicate='hc:hasConnection', required=False, ), FieldMapping( source_path='person.names', source_description='Person name variants', target_class='PersonName', target_slot='name_variants', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='person-name', typedb_attribute='name-value', rdf_predicate='schema:alternateName', required=False, ), ], example_yaml=""" # Extended person profile person: name: Jan de Vries linkedin_url: https://linkedin.com/in/jandevries names: - value: Jan de Vries type: legal_name - value: J. de Vries type: abbreviated connections: - name: Maria Bakker organization: Rijksmuseum """.strip(), ), # ------------------------------------------------------------------------- # PERSON - WORK & EDUCATION # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='person_work_education', description='Work experience and education classes', detailed_description=""" Classes for modeling work experience history and educational credentials of persons associated with heritage custodians. Supports: - Employment history with dates - Role/position tracking - Educational credentials - Skills and certifications """.strip(), linkml_class='WorkExperience', typedb_entity='work-experience', provenance=Provenance( source_type='linkedin', data_tier=DataTier.TIER_3_CROWD_SOURCED, ), generated_classes=[ 'WorkExperience', 'EducationCredential', 'StaffRole', 'StaffRoles', ], fields=[ FieldMapping( source_path='person.experience[].organization', source_description='Employer organization', target_class='WorkExperience', target_slot='organization', transformation=TransformationType.DIRECT, typedb_entity='work-experience', typedb_attribute='organization', rdf_predicate='schema:worksFor', required=True, ), FieldMapping( source_path='person.experience[].role', source_description='Job title/role', target_class='WorkExperience', target_slot='role_title', transformation=TransformationType.DIRECT, typedb_entity='work-experience', typedb_attribute='role-title', rdf_predicate='schema:jobTitle', required=True, ), FieldMapping( source_path='person.education[].institution', source_description='Educational institution', target_class='EducationCredential', target_slot='institution', transformation=TransformationType.DIRECT, typedb_entity='education-credential', typedb_attribute='institution', rdf_predicate='schema:alumniOf', required=True, ), ], example_yaml=""" # Work and education person: experience: - organization: Rijksmuseum role: Senior Curator start_date: 2018-03 current: true education: - institution: University of Amsterdam degree: MA Art History graduation_year: 2010 """.strip(), ), # ========================================================================= # PHASE 2: DIGITAL & API SERVICES # ========================================================================= # ------------------------------------------------------------------------- # DIGITAL PLATFORMS - EXTENDED # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='digital_platforms_extended', description='Extended digital platform classes', detailed_description=""" Extended digital platform classes for web portals, virtual libraries, and auxiliary digital presence. Supports heritage custodian online presence beyond primary websites. Includes: - Web portals and discovery interfaces - Virtual/digital-only libraries - Auxiliary digital platforms - Primary digital presence assertions """.strip(), linkml_class='WebPortal', typedb_entity='web-portal', provenance=Provenance( source_type='web_enrichment', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=[ 'WebPortal', 'VirtualMapLibrary', 'AuxiliaryDigitalPlatform', 'PrimaryDigitalPresenceAssertion', ], fields=[ FieldMapping( source_path='digital_platforms.portals[].url', source_description='Portal URL', target_class='WebPortal', target_slot='portal_url', transformation=TransformationType.DIRECT, typedb_entity='web-portal', typedb_attribute='url', rdf_predicate='schema:url', required=True, ), FieldMapping( source_path='digital_platforms.portals[].type', source_description='Portal type', target_class='WebPortal', target_slot='portal_type', transformation=TransformationType.LOOKUP, typedb_entity='web-portal', typedb_attribute='portal-type', rdf_predicate='hc:portalType', required=False, ), ], example_yaml=""" # Digital platforms digital_platforms: portals: - url: https://collectie.museum.nl type: DISCOVERY_PORTAL name: Online Collection - url: https://maps.museum.nl type: VIRTUAL_MAP name: Interactive Map """.strip(), ), # ------------------------------------------------------------------------- # API ENDPOINTS # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='api_endpoints', description='API and data service endpoint classes', detailed_description=""" Classes for API endpoints and data services exposed by heritage custodians. Supports interoperability documentation for harvesting, searching, and accessing digital content. Includes: - OAI-PMH harvesting endpoints - Search APIs (SRU, OpenSearch) - IIIF Image/Presentation APIs - File download services (EAD, METS) """.strip(), linkml_class='DataServiceEndpoint', typedb_entity='data-service-endpoint', provenance=Provenance( source_type='web_enrichment', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=[ 'DataServiceEndpoint', 'OAIPMHEndpoint', 'SearchAPI', 'FileAPI', 'EADDownload', 'METSAPI', 'IIPImageServer', 'InternetOfThings', ], fields=[ FieldMapping( source_path='api_endpoints.oai_pmh', source_description='OAI-PMH endpoint', target_class='OAIPMHEndpoint', target_slot='endpoint_url', transformation=TransformationType.DIRECT, typedb_entity='oai-pmh-endpoint', typedb_attribute='endpoint-url', rdf_predicate='hc:oaiPmhEndpoint', required=False, ), FieldMapping( source_path='api_endpoints.search', source_description='Search API endpoint', target_class='SearchAPI', target_slot='search_url', transformation=TransformationType.DIRECT, typedb_entity='search-api', typedb_attribute='search-url', rdf_predicate='hc:searchEndpoint', required=False, ), FieldMapping( source_path='api_endpoints.iiif', source_description='IIIF Image API', target_class='IIPImageServer', target_slot='iiif_url', transformation=TransformationType.DIRECT, typedb_entity='iip-image-server', typedb_attribute='iiif-url', rdf_predicate='hc:iiifEndpoint', required=False, ), ], example_yaml=""" # API endpoints api_endpoints: oai_pmh: https://api.museum.nl/oai search: https://api.museum.nl/search iiif: https://iiif.museum.nl/image/2 ead_download: https://api.museum.nl/ead """.strip(), ), # ========================================================================= # PHASE 2: VIDEO & SOCIAL MEDIA # ========================================================================= # ------------------------------------------------------------------------- # VIDEO CONTENT # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='video_content', description='Video content and annotation classes', detailed_description=""" Classes for video content produced or published by heritage custodians. Supports rich annotation of video content including chapters, transcripts, subtitles, and time-based segments. Includes: - Video posts (YouTube, Vimeo) - Video chapters and segments - Transcripts and subtitles - Audio/text annotations """.strip(), linkml_class='VideoPost', typedb_entity='video-post', provenance=Provenance( source_type='social_media', data_tier=DataTier.TIER_3_CROWD_SOURCED, ), generated_classes=[ 'VideoPost', 'VideoChapter', 'VideoTranscript', 'VideoSubtitle', 'VideoAnnotation', 'VideoAudioAnnotation', 'VideoTextContent', 'VideoTimeSegment', ], fields=[ FieldMapping( source_path='videos[].url', source_description='Video URL', target_class='VideoPost', target_slot='video_url', transformation=TransformationType.DIRECT, typedb_entity='video-post', typedb_attribute='video-url', rdf_predicate='schema:contentUrl', required=True, ), FieldMapping( source_path='videos[].chapters', source_description='Video chapters', target_class='VideoChapter', target_slot='chapters', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='video-chapter', typedb_attribute='chapters', rdf_predicate='schema:hasPart', required=False, ), FieldMapping( source_path='videos[].transcript', source_description='Video transcript', target_class='VideoTranscript', target_slot='transcript', transformation=TransformationType.DIRECT, typedb_entity='video-transcript', typedb_attribute='transcript-text', rdf_predicate='schema:transcript', required=False, ), ], example_yaml=""" # Video content videos: - url: https://youtube.com/watch?v=abc123 title: Museum Tour 2024 duration: PT45M30S chapters: - title: Introduction start_time: PT0S - title: Main Gallery start_time: PT5M transcript: language: nl text: "Welkom bij het museum..." """.strip(), ), # ------------------------------------------------------------------------- # SOCIAL MEDIA CONTENT # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='social_media_content', description='Social media content and profile classes', detailed_description=""" Classes for social media presence and content of heritage custodians. Tracks posts, profiles, and engagement across platforms. Includes: - Social media profiles (per platform) - Posts and content items - Engagement metrics """.strip(), linkml_class='SocialMediaProfile', typedb_entity='social-media-profile', provenance=Provenance( source_type='social_media', data_tier=DataTier.TIER_3_CROWD_SOURCED, ), generated_classes=[ 'SocialMediaProfile', 'SocialMediaPost', ], fields=[ FieldMapping( source_path='social_media.profiles[].platform', source_description='Social media platform', target_class='SocialMediaProfile', target_slot='platform', transformation=TransformationType.LOOKUP, typedb_entity='social-media-profile', typedb_attribute='platform', rdf_predicate='hc:socialPlatform', required=True, ), FieldMapping( source_path='social_media.profiles[].url', source_description='Profile URL', target_class='SocialMediaProfile', target_slot='profile_url', transformation=TransformationType.DIRECT, typedb_entity='social-media-profile', typedb_attribute='profile-url', rdf_predicate='schema:url', required=True, ), ], example_yaml=""" # Social media profiles social_media: profiles: - platform: TWITTER url: https://twitter.com/rijksmuseum followers: 450000 - platform: INSTAGRAM url: https://instagram.com/rijksmuseum followers: 1200000 """.strip(), ), # ========================================================================= # PHASE 2 SOURCE MAPPINGS: Legal & Administrative # ========================================================================= # ------------------------------------------------------------------------- # legal_policies - Legal and policy classes # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='legal_policies', description='Legal and policy classes', detailed_description=""" Classes for legal policies, access restrictions, and data licensing applicable to heritage custodians and their collections. Includes: - Access policies (reading room, digital) - Data license terms - Legal responsibility collections - Trade register information """.strip(), linkml_class='AccessPolicy', typedb_entity='access-policy', provenance=Provenance( source_type='custodian_yaml', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=[ 'AccessPolicy', 'DataLicensePolicy', 'LegalResponsibilityCollection', 'ArticlesOfAssociation', 'TradeRegister', ], fields=[ FieldMapping( source_path='policies.access', source_description='Access policy', target_class='AccessPolicy', target_slot='access_type', transformation=TransformationType.LOOKUP, typedb_entity='access-policy', typedb_attribute='access-type', rdf_predicate='hc:accessPolicy', required=False, ), FieldMapping( source_path='policies.data_license', source_description='Data license', target_class='DataLicensePolicy', target_slot='license', transformation=TransformationType.DIRECT, typedb_entity='data-license-policy', typedb_attribute='license-type', rdf_predicate='schema:license', required=False, ), ], example_yaml=""" # Policies policies: access: type: PUBLIC reading_room: true appointment_required: false data_license: CC-BY-4.0 legal_responsibility: type: FOUNDATION articles_url: https://kvk.nl/articles/12345678 """.strip(), ), # ------------------------------------------------------------------------- # administrative_records - Administrative and financial record classes # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='administrative_records', description='Administrative and financial record classes', detailed_description=""" Classes for administrative records including budgets, projects, financial statements, and registration information. Includes: - Budget tracking - Project management - Financial statements - Registration info """.strip(), linkml_class='Budget', typedb_entity='budget', provenance=Provenance( source_type='custodian_yaml', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=[ 'Budget', 'Project', 'FinancialStatement', 'RegistrationInfo', 'ConfidenceMeasure', 'ConflictStatus', ], fields=[ FieldMapping( source_path='administration.budget', source_description='Budget information', target_class='Budget', target_slot='annual_budget', transformation=TransformationType.DIRECT, typedb_entity='budget', typedb_attribute='annual-amount', rdf_predicate='hc:annualBudget', required=False, ), FieldMapping( source_path='administration.projects', source_description='Active projects', target_class='Project', target_slot='projects', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='project', typedb_attribute='project-name', rdf_predicate='hc:hasProject', required=False, ), FieldMapping( source_path='administration.registration', source_description='Registration information', target_class='RegistrationInfo', target_slot='registration', transformation=TransformationType.DIRECT, typedb_entity='registration-info', typedb_attribute='registration-number', rdf_predicate='hc:registrationInfo', required=False, ), ], example_yaml=""" # Administrative records administration: budget: annual_amount: 5000000 currency: EUR fiscal_year: 2024 projects: - name: Digitization 2024 status: IN_PROGRESS registration: authority: KVK number: "12345678" """.strip(), ), # ========================================================================= # PHASE 2 SOURCE MAPPINGS: Finding Aids & Standards # ========================================================================= # ------------------------------------------------------------------------- # finding_aids_standards - Finding aids, standards, documentation classes # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='finding_aids_standards', description='Finding aids, standards, and source documentation classes', detailed_description=""" Classes for archival finding aids, metadata standards, and source documentation. Essential for archival description and interoperability. Includes: - Finding aids (EAD, PDF, online) - Metadata standards compliance - Source document references - Primary digital presence assertions """.strip(), linkml_class='FindingAid', typedb_entity='finding-aid', provenance=Provenance( source_type='custodian_yaml', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=[ 'FindingAid', 'FindingAidType', 'SourceDocument', 'Standard', 'PrimaryDigitalPresenceAssertion', ], fields=[ FieldMapping( source_path='finding_aids[].type', source_description='Finding aid type', target_class='FindingAidType', target_slot='finding_aid_type', transformation=TransformationType.LOOKUP, typedb_entity='finding-aid-type', typedb_attribute='type', rdf_predicate='hc:findingAidType', required=True, ), FieldMapping( source_path='finding_aids[].url', source_description='Finding aid URL', target_class='FindingAid', target_slot='url', transformation=TransformationType.DIRECT, typedb_entity='finding-aid', typedb_attribute='url', rdf_predicate='schema:url', required=False, ), FieldMapping( source_path='standards', source_description='Standards compliance', target_class='Standard', target_slot='standards', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='standard', typedb_attribute='standard-name', rdf_predicate='hc:conformsToStandard', required=False, ), ], example_yaml=""" # Finding aids and standards finding_aids: - type: EAD url: https://archive.nl/ead/collection123.xml - type: PDF url: https://archive.nl/guides/collection123.pdf standards: - ISAD(G) - EAD3 - Dublin Core """.strip(), ), # ========================================================================= # PHASE 2 SOURCE MAPPINGS: Reconstruction & Provenance # ========================================================================= # ------------------------------------------------------------------------- # reconstruction_provenance - Entity reconstruction and provenance tracking # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='reconstruction_provenance', description='Entity reconstruction and provenance tracking classes', detailed_description=""" Classes for tracking entity reconstruction activities and provenance chains. Used for modeling how information about heritage custodians is assembled from multiple sources. Includes: - Reconstructed entities (from multiple sources) - Reconstruction activities - Reconstruction agents (human/automated) - Timeline events from external sources """.strip(), linkml_class='ReconstructedEntity', typedb_entity='reconstructed-entity', provenance=Provenance( source_type='computed', data_tier=DataTier.TIER_4_INFERRED, ), generated_classes=[ 'ReconstructedEntity', 'ReconstructionActivity', 'ReconstructionAgent', 'CustodianTimelineEvent', ], fields=[ FieldMapping( source_path='reconstruction.sources', source_description='Source documents', target_class='ReconstructedEntity', target_slot='sources', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='reconstructed-entity', typedb_attribute='sources', rdf_predicate='prov:wasDerivedFrom', required=True, ), FieldMapping( source_path='reconstruction.activity', source_description='Reconstruction activity', target_class='ReconstructionActivity', target_slot='activity', transformation=TransformationType.DIRECT, typedb_entity='reconstruction-activity', typedb_attribute='activity-type', rdf_predicate='prov:wasGeneratedBy', required=False, ), FieldMapping( source_path='reconstruction.agent', source_description='Reconstruction agent', target_class='ReconstructionAgent', target_slot='agent', transformation=TransformationType.DIRECT, typedb_entity='reconstruction-agent', typedb_attribute='agent-id', rdf_predicate='prov:wasAttributedTo', required=False, ), ], example_yaml=""" # Reconstruction provenance reconstruction: sources: - type: WIKIDATA id: Q190804 - type: ISIL_REGISTRY id: NL-AmRM activity: type: AUTOMATED_MERGE date: 2024-01-15 agent: type: SYSTEM name: glam-extractor """.strip(), ), # ========================================================================= # PHASE 2 SOURCE MAPPINGS: Storage & Facilities # ========================================================================= # ------------------------------------------------------------------------- # storage_facilities - Storage conditions and facility classes # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='storage_facilities', description='Storage conditions and facility classes', detailed_description=""" Classes for physical storage conditions, educational facilities, and specialized spaces within heritage custodian buildings. Includes: - Storage conditions (climate, security) - Storage types (warehouse, vault) - Educational centers - Specialized facilities (libraries, social spaces) """.strip(), linkml_class='StorageCondition', typedb_entity='storage-condition', provenance=Provenance( source_type='custodian_yaml', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=[ 'StorageCondition', 'StorageConditionPolicy', 'StorageType', 'EducationCenter', 'PersonalLibrary', 'LocationLibrary', 'SocialSpace', 'CateringPlace', ], fields=[ FieldMapping( source_path='facilities.storage.conditions', source_description='Storage conditions', target_class='StorageCondition', target_slot='climate_control', transformation=TransformationType.DIRECT, typedb_entity='storage-condition', typedb_attribute='climate-control', rdf_predicate='hc:storageCondition', required=False, ), FieldMapping( source_path='facilities.storage.type', source_description='Storage type', target_class='StorageType', target_slot='storage_type', transformation=TransformationType.LOOKUP, typedb_entity='storage-type', typedb_attribute='type', rdf_predicate='hc:storageType', required=False, ), FieldMapping( source_path='facilities.education_center', source_description='Education center', target_class='EducationCenter', target_slot='education_center', transformation=TransformationType.DIRECT, typedb_entity='education-center', typedb_attribute='center-name', rdf_predicate='hc:hasEducationCenter', required=False, ), ], example_yaml=""" # Facilities facilities: storage: type: CLIMATE_CONTROLLED_VAULT conditions: temperature: 18 humidity: 50 security_level: HIGH education_center: name: Museum Education Wing capacity: 50 """.strip(), ), # ========================================================================= # PHASE 2 SOURCE MAPPINGS: Funding & Grants # ========================================================================= # ------------------------------------------------------------------------- # funding_grants - Funding, grants, and application classes # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='funding_grants', description='Funding, grants, and application classes', detailed_description=""" Classes for funding sources, grant applications, and financial requirements relevant to heritage custodians. Includes: - Funding agendas - Grant requirements - Application calls """.strip(), linkml_class='FundingAgenda', typedb_entity='funding-agenda', provenance=Provenance( source_type='external_api', data_tier=DataTier.TIER_2_VERIFIED, ), generated_classes=[ 'FundingAgenda', 'FundingRequirement', 'CallForApplication', ], fields=[ FieldMapping( source_path='funding.agendas', source_description='Funding agendas', target_class='FundingAgenda', target_slot='agendas', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='funding-agenda', typedb_attribute='agenda-name', rdf_predicate='hc:fundingAgenda', required=False, ), FieldMapping( source_path='funding.requirements', source_description='Funding requirements', target_class='FundingRequirement', target_slot='requirements', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='funding-requirement', typedb_attribute='requirement', rdf_predicate='hc:fundingRequirement', required=False, ), FieldMapping( source_path='funding.calls', source_description='Open calls for applications', target_class='CallForApplication', target_slot='calls', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='call-for-application', typedb_attribute='call-title', rdf_predicate='hc:openCall', required=False, ), ], example_yaml=""" # Funding funding: agendas: - name: Heritage Digitization Fund 2024 amount: 500000 currency: EUR requirements: - type: MATCHING_FUNDS percentage: 25 calls: - title: Digital Heritage Innovation deadline: 2024-06-30 """.strip(), ), # ========================================================================= # PHASE 2 SOURCE MAPPINGS: Language & Naming # ========================================================================= # ------------------------------------------------------------------------- # language_naming - Language codes, proficiency, and naming classes # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='language_naming', description='Language codes, proficiency, and naming classes', detailed_description=""" Classes for language handling, proficiency levels, and naming/appellation conventions used in heritage custodian data. Includes: - ISO language codes - Language proficiency levels - Appellations (formal names) - Container (structural) classes """.strip(), linkml_class='LanguageCode', typedb_entity='language-code', provenance=Provenance( source_type='reference_data', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'LanguageCode', 'LanguageProficiency', 'Appellation', 'Container', ], fields=[ FieldMapping( source_path='languages', source_description='Languages used', target_class='LanguageCode', target_slot='languages', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='language-code', typedb_attribute='iso-code', rdf_predicate='schema:inLanguage', required=False, ), FieldMapping( source_path='staff.language_proficiency', source_description='Language proficiency', target_class='LanguageProficiency', target_slot='proficiency', transformation=TransformationType.LOOKUP, typedb_entity='language-proficiency', typedb_attribute='level', rdf_predicate='hc:languageProficiency', required=False, ), FieldMapping( source_path='names.appellations', source_description='Formal appellations', target_class='Appellation', target_slot='appellations', transformation=TransformationType.ARRAY_DIRECT, typedb_entity='appellation', typedb_attribute='name-value', rdf_predicate='crm:P1_is_identified_by', required=False, ), ], example_yaml=""" # Language and naming languages: - nl - en - de names: appellations: - value: Rijksmuseum Amsterdam type: OFFICIAL language: nl - value: National Museum of the Netherlands type: TRANSLATION language: en """.strip(), ), # ========================================================================= # PHASE 2 SOURCE MAPPINGS: Specialized Archives (International) # ========================================================================= # ------------------------------------------------------------------------- # archives_german - German-specific archive types # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archives_german', description='German-specific archive types', detailed_description=""" Archive types specific to German archival tradition and organization. German archives follow a distinctive organizational pattern based on political/administrative regions and specialized functions. """.strip(), linkml_class='Verwaltungsarchiv', typedb_entity='verwaltungsarchiv', provenance=Provenance( source_type='isil_registry', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'Verwaltungsarchiv', 'Vereinsarchiv', 'Verlagsarchiv', 'Bildstelle', 'Medienzentrum', 'Personenstandsarchiv', ], fields=[ FieldMapping( source_path='archive_type', source_description='German archive type', target_class='Verwaltungsarchiv', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='verwaltungsarchiv', typedb_attribute='type', rdf_predicate='hc:archiveType', required=True, ), ], example_yaml=""" # German archive type archive_type: VERWALTUNGSARCHIV name: Landesarchiv Baden-WΓΌrttemberg """.strip(), ), # ------------------------------------------------------------------------- # archives_swedish - Swedish-specific archive types # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archives_swedish', description='Swedish-specific archive types', detailed_description=""" Archive types specific to Swedish archival tradition. Swedish archives include national (Riksarkivet), regional (Landsarkiv), and local heritage institutions (HembygdsfΓΆrening). """.strip(), linkml_class='Landsarkiv', typedb_entity='landsarkiv', provenance=Provenance( source_type='isil_registry', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'Landsarkiv', 'Foremalarkiv', 'SectorOfArchivesInSweden', 'LocalHeritageInstitutionSweden', ], fields=[ FieldMapping( source_path='archive_type', source_description='Swedish archive type', target_class='Landsarkiv', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='landsarkiv', typedb_attribute='type', rdf_predicate='hc:archiveType', required=True, ), ], example_yaml=""" # Swedish archive type archive_type: LANDSARKIV name: Landsarkivet i Uppsala """.strip(), ), # ------------------------------------------------------------------------- # archives_french - French-specific archive types # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archives_french', description='French-specific archive types', detailed_description=""" Archive types specific to French archival organization. French archives follow a centralized national system with departmental and communal levels. """.strip(), linkml_class='FrenchPrivateArchives', typedb_entity='french-private-archives', provenance=Provenance( source_type='isil_registry', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'FrenchPrivateArchives', 'Conservatoria', ], fields=[ FieldMapping( source_path='archive_type', source_description='French archive type', target_class='FrenchPrivateArchives', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='french-private-archives', typedb_attribute='type', rdf_predicate='hc:archiveType', required=True, ), ], example_yaml=""" # French archive type archive_type: ARCHIVES_PRIVEES name: Archives privΓ©es de la famille Rothschild """.strip(), ), # ------------------------------------------------------------------------- # archives_other - Other international specialized archive types # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='archives_other', description='Other international specialized archive types', detailed_description=""" Specialized archive types from other countries including Czech regional archives, Nordic archives, and various thematic archive types. """.strip(), linkml_class='SpecializedArchivesCzechia', typedb_entity='specialized-archives-czechia', provenance=Provenance( source_type='isil_registry', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), generated_classes=[ 'SpecializedArchivesCzechia', 'DimArchives', 'LightArchives', 'HistoricalArchive', 'JointArchives', 'PartyArchive', 'Kustodie', 'ArchivesForBuildingRecords', ], fields=[ FieldMapping( source_path='archive_type', source_description='Specialized archive type', target_class='SpecializedArchivesCzechia', target_slot='archive_type', transformation=TransformationType.LOOKUP, typedb_entity='specialized-archives-czechia', typedb_attribute='type', rdf_predicate='hc:archiveType', required=True, ), ], example_yaml=""" # Czech specialized archive archive_type: OBLASTNI_ARCHIV name: StΓ‘tnΓ­ oblastnΓ­ archiv v Praze """.strip(), ), ] # ============================================================================ # PERSON CATEGORIES (9 Categories) # ============================================================================ PERSON_CATEGORIES: list[MappingCategory] = [ MappingCategory( id='identity', name='Identity & Profile', name_nl='Identiteit & Profiel', description='Core person identity: name, headline, location, connections', description_nl='Kern persoonidentiteit: naam, kopregel, locatie, connecties', icon='πŸͺͺ', sources=['profile_identity', 'linkedin_profile'], ), MappingCategory( id='career', name='Career & Experience', name_nl='CarriΓ¨re & Ervaring', description='Work history, positions, organizations', description_nl='Werkgeschiedenis, posities, organisaties', icon='πŸ’Ό', sources=['career_history'], ), MappingCategory( id='education', name='Education & Credentials', name_nl='Opleiding & Kwalificaties', description='Educational background, degrees, institutions', description_nl='Opleidingsachtergrond, diploma\'s, instellingen', icon='πŸŽ“', sources=['education'], ), MappingCategory( id='skills', name='Skills & Expertise', name_nl='Vaardigheden & Expertise', description='Professional skills, languages, endorsements', description_nl='Professionele vaardigheden, talen, aanbevelingen', icon='⚑', sources=['skills_expertise'], ), MappingCategory( id='heritage', name='Heritage Sector Relevance', name_nl='Erfgoedsector Relevantie', description='Heritage domain expertise and experience', description_nl='Erfgoed domeinexpertise en ervaring', icon='πŸ›οΈ', sources=['heritage_relevance', 'heritage_experience'], ), MappingCategory( id='affiliations', name='Affiliations & Records', name_nl='Affiliaties & Records', description='Linked custodians, person records, connections', description_nl='Gekoppelde bronhouders, persoonsrecords, connecties', icon='πŸ”—', sources=['affiliations', 'linked_records'], ), MappingCategory( id='contact', name='Contact & Social', name_nl='Contact & Sociaal', description='Contact information, social media profiles', description_nl='Contactgegevens, sociale media profielen', icon='πŸ“§', sources=['contact_data'], ), MappingCategory( id='provenance', name='Extraction & Provenance', name_nl='Extractie & Herkomst', description='Data extraction metadata and web claims', description_nl='Data extractie metadata en webclaims', icon='πŸ“‹', sources=['extraction_metadata', 'web_claims'], ), MappingCategory( id='pico_ontology', name='PiCo Ontology Mapping', name_nl='PiCo Ontologie Mapping', description='Person in Context (PiCo) ontology alignment', description_nl='Person in Context (PiCo) ontologie uitlijning', icon='πŸ”¬', sources=['pico_mapped', 'pico_unmapped'], ), ] # ============================================================================ # PERSON MAPPINGS (14 Person Source Blocks) # ============================================================================ PERSON_MAPPINGS: list[EnrichmentSourceMapping] = [ # ------------------------------------------------------------------------- # PROFILE IDENTITY - Core profile information # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='profile_identity', description='Core profile identity - name, headline, location', detailed_description=""" Core identity information extracted from LinkedIn profiles. Includes the person's full name, professional headline, location, and current company affiliation. This data forms the foundation of the person entity and is used for display and search purposes across the heritage network. """.strip(), linkml_class='Person', typedb_entity='person', provenance=Provenance( source_type='external_api', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='profile_data.name', source_description='Full name of the person', target_class='Person', target_slot='name', typedb_entity='person', typedb_attribute='person-name', rdf_predicate='foaf:name', transformation=TransformationType.DIRECT, required=True, notes='Primary identifier for the person', ), FieldMapping( source_path='profile_data.headline', source_description='Professional headline/title', target_class='Person', target_slot='headline', typedb_entity='person', typedb_attribute='headline', rdf_predicate='schema:jobTitle', transformation=TransformationType.DIRECT, required=False, notes='Current professional headline from LinkedIn', ), FieldMapping( source_path='profile_data.headline_english', source_description='English translation of headline', target_class='Person', target_slot='headline_english', typedb_entity='person', typedb_attribute='headline-english', rdf_predicate='schema:jobTitle', transformation=TransformationType.DIRECT, required=False, notes='Translated headline for non-English profiles', ), FieldMapping( source_path='profile_data.location', source_description='Geographic location', target_class='Person', target_slot='location', typedb_entity='person', typedb_attribute='location-string', rdf_predicate='schema:address', transformation=TransformationType.DIRECT, required=False, notes='Free-text location from LinkedIn', ), FieldMapping( source_path='profile_data.country_code', source_description='ISO country code', target_class='Person', target_slot='country_code', typedb_entity='person', typedb_attribute='country-code', rdf_predicate='schema:addressCountry', transformation=TransformationType.DIRECT, required=False, notes='Two-letter ISO 3166-1 country code', ), FieldMapping( source_path='profile_data.current_company', source_description='Current employer name', target_class='Person', target_slot='current_company', typedb_entity='person', typedb_attribute='current-company', rdf_predicate='schema:worksFor', transformation=TransformationType.DIRECT, required=False, notes='Name of current employer organization', ), FieldMapping( source_path='profile_data.about', source_description='About/summary section', target_class='Person', target_slot='about', typedb_entity='person', typedb_attribute='about', rdf_predicate='schema:description', transformation=TransformationType.DIRECT, required=False, notes='Professional summary from LinkedIn', ), FieldMapping( source_path='profile_data.total_experience', source_description='Total years of experience', target_class='Person', target_slot='total_experience', typedb_entity='person', typedb_attribute='total-experience', rdf_predicate='schema:experienceYears', transformation=TransformationType.DIRECT, required=False, notes='Calculated total professional experience', ), ], generated_classes=['Person', 'PersonProfile'], example_yaml=""" profile_data: name: Iris van Meer headline: Staff member at the Services Department at Nationaal Archief headline_english: Staff member at the Services Department at Nationaal Archief location: The Randstad, Netherlands country_code: NL current_company: Nationaal Archief about: Total Experience: 15 years and 8 months total_experience: 15 years and 8 months """.strip(), ), # ------------------------------------------------------------------------- # LINKEDIN PROFILE - URLs and social metrics # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='linkedin_profile', description='LinkedIn profile URLs and social metrics', detailed_description=""" LinkedIn-specific profile data including the profile URL, photo URL, and social metrics like connections and followers count. These fields enable linking back to the source profile and provide insight into the person's professional network reach. """.strip(), linkml_class='Person', typedb_entity='person', provenance=Provenance( source_type='external_api', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='linkedin_profile_url', source_description='LinkedIn profile URL', target_class='Person', target_slot='linkedin_url', typedb_entity='person', typedb_attribute='linkedin-url', rdf_predicate='schema:sameAs', transformation=TransformationType.DIRECT, required=True, notes='Canonical LinkedIn profile URL', ), FieldMapping( source_path='linkedin_photo_url', source_description='LinkedIn profile photo URL', target_class='Person', target_slot='photo_url', typedb_entity='person', typedb_attribute='photo-url', rdf_predicate='schema:image', transformation=TransformationType.DIRECT, required=False, notes='CDN URL for profile photo', ), FieldMapping( source_path='profile_data.connections', source_description='Number of LinkedIn connections', target_class='Person', target_slot='connections', typedb_entity='person', typedb_attribute='connections-count', rdf_predicate='schema:knows', transformation=TransformationType.DIRECT, required=False, notes='First-degree connection count', ), FieldMapping( source_path='profile_data.followers', source_description='Number of followers', target_class='Person', target_slot='followers', typedb_entity='person', typedb_attribute='followers-count', rdf_predicate='schema:followerCount', transformation=TransformationType.DIRECT, required=False, notes='LinkedIn follower count', ), ], generated_classes=['Person'], example_yaml=""" linkedin_profile_url: https://www.linkedin.com/in/iris-van-meer-34329131 linkedin_photo_url: https://media.licdn.com/dms/image/v2/... profile_data: connections: 286 followers: 289 """.strip(), ), # ------------------------------------------------------------------------- # CAREER HISTORY - Employment timeline # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='career_history', description='Career history and employment timeline', detailed_description=""" Complete career history extracted from LinkedIn profiles. Each position includes organization, role, dates, duration, location, and organizational metadata like company size and industry. This data is crucial for understanding a person's professional trajectory and their experience in heritage-related roles. """.strip(), linkml_class='CareerPosition', typedb_entity='career-position', provenance=Provenance( source_type='external_api', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='profile_data.career_history[].organization', source_description='Employer organization name', target_class='CareerPosition', target_slot='organization', typedb_entity='career-position', typedb_attribute='organization-name', rdf_predicate='schema:worksFor', transformation=TransformationType.DIRECT, required=True, notes='Name of the employing organization', ), FieldMapping( source_path='profile_data.career_history[].organization_linkedin', source_description='LinkedIn URL for organization', target_class='CareerPosition', target_slot='organization_linkedin', typedb_entity='career-position', typedb_attribute='organization-linkedin-url', rdf_predicate='schema:sameAs', transformation=TransformationType.DIRECT, required=False, notes='LinkedIn company page URL', ), FieldMapping( source_path='profile_data.career_history[].role', source_description='Job title/role', target_class='CareerPosition', target_slot='role', typedb_entity='career-position', typedb_attribute='role-title', rdf_predicate='schema:jobTitle', transformation=TransformationType.DIRECT, required=True, notes='Original language job title', ), FieldMapping( source_path='profile_data.career_history[].role_english', source_description='English translation of role', target_class='CareerPosition', target_slot='role_english', typedb_entity='career-position', typedb_attribute='role-title-english', rdf_predicate='schema:jobTitle', transformation=TransformationType.DIRECT, required=False, notes='English translation for non-English titles', ), FieldMapping( source_path='profile_data.career_history[].dates', source_description='Employment date range', target_class='CareerPosition', target_slot='dates', typedb_entity='career-position', typedb_attribute='date-range', rdf_predicate='schema:temporalCoverage', transformation=TransformationType.DIRECT, required=False, notes='Date range string (e.g., "Apr 2014 - Present")', ), FieldMapping( source_path='profile_data.career_history[].duration', source_description='Employment duration', target_class='CareerPosition', target_slot='duration', typedb_entity='career-position', typedb_attribute='duration', rdf_predicate='schema:duration', transformation=TransformationType.DIRECT, required=False, notes='Calculated duration (e.g., "11 years and 7 months")', ), FieldMapping( source_path='profile_data.career_history[].location', source_description='Work location', target_class='CareerPosition', target_slot='location', typedb_entity='career-position', typedb_attribute='work-location', rdf_predicate='schema:workLocation', transformation=TransformationType.DIRECT, required=False, notes='Geographic location of the position', ), FieldMapping( source_path='profile_data.career_history[].current', source_description='Is current position', target_class='CareerPosition', target_slot='current', typedb_entity='career-position', typedb_attribute='is-current', rdf_predicate='schema:currentPosition', transformation=TransformationType.DIRECT, required=False, notes='Boolean flag for current employment', ), FieldMapping( source_path='profile_data.career_history[].company_size', source_description='Company employee count range', target_class='CareerPosition', target_slot='company_size', typedb_entity='career-position', typedb_attribute='company-size', rdf_predicate='schema:numberOfEmployees', transformation=TransformationType.DIRECT, required=False, notes='Employee count range (e.g., "201-500 employees")', ), FieldMapping( source_path='profile_data.career_history[].company_founded', source_description='Year company was founded', target_class='CareerPosition', target_slot='company_founded', typedb_entity='career-position', typedb_attribute='company-founded-year', rdf_predicate='schema:foundingDate', transformation=TransformationType.DIRECT, required=False, notes='Founding year of the organization', ), FieldMapping( source_path='profile_data.career_history[].company_type', source_description='Type of company', target_class='CareerPosition', target_slot='company_type', typedb_entity='career-position', typedb_attribute='company-type', rdf_predicate='schema:additionalType', transformation=TransformationType.DIRECT, required=False, notes='Organization type (e.g., "Government Agency")', ), FieldMapping( source_path='profile_data.career_history[].industry', source_description='Industry sector', target_class='CareerPosition', target_slot='industry', typedb_entity='career-position', typedb_attribute='industry', rdf_predicate='schema:industry', transformation=TransformationType.DIRECT, required=False, notes='Industry classification', ), FieldMapping( source_path='profile_data.career_history[].department', source_description='Department within organization', target_class='CareerPosition', target_slot='department', typedb_entity='career-position', typedb_attribute='department', rdf_predicate='schema:department', transformation=TransformationType.DIRECT, required=False, notes='Department or division name', ), FieldMapping( source_path='profile_data.career_history[].level', source_description='Seniority level', target_class='CareerPosition', target_slot='level', typedb_entity='career-position', typedb_attribute='seniority-level', rdf_predicate='schema:occupationalCategory', transformation=TransformationType.DIRECT, required=False, notes='Career level (e.g., "Specialist", "Manager")', ), FieldMapping( source_path='profile_data.career_history[].description', source_description='Role description', target_class='CareerPosition', target_slot='description', typedb_entity='career-position', typedb_attribute='role-description', rdf_predicate='schema:description', transformation=TransformationType.DIRECT, required=False, notes='Free-text description of the role', ), ], generated_classes=['CareerPosition', 'Person'], example_yaml=""" profile_data: career_history: - organization: Nationaal Archief organization_linkedin: https://www.linkedin.com/company/nationaal-archief role: Staff Member At The Services Department role_english: Staff Member At The Services Department dates: Apr 2014 - Present duration: 11 years and 7 months location: Den Haag current: true company_size: 201-500 employees company_founded: 1802 company_type: Government Agency industry: Government Administration department: Other level: Specialist description: null """.strip(), ), # ------------------------------------------------------------------------- # EDUCATION - Academic background # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='education', description='Educational background and qualifications', detailed_description=""" Educational history including degrees, institutions, and duration. Links to institution LinkedIn pages when available. This data helps understand the academic foundation and qualifications of heritage professionals. """.strip(), linkml_class='Education', typedb_entity='education', provenance=Provenance( source_type='external_api', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='profile_data.education[].institution', source_description='Educational institution name', target_class='Education', target_slot='institution', typedb_entity='education', typedb_attribute='institution-name', rdf_predicate='schema:educationalCredentialAwarded', transformation=TransformationType.DIRECT, required=True, notes='Name of university/school', ), FieldMapping( source_path='profile_data.education[].institution_linkedin', source_description='LinkedIn URL for institution', target_class='Education', target_slot='institution_linkedin', typedb_entity='education', typedb_attribute='institution-linkedin-url', rdf_predicate='schema:sameAs', transformation=TransformationType.DIRECT, required=False, notes='LinkedIn school page URL', ), FieldMapping( source_path='profile_data.education[].degree', source_description='Degree and field of study', target_class='Education', target_slot='degree', typedb_entity='education', typedb_attribute='degree', rdf_predicate='schema:educationalLevel', transformation=TransformationType.DIRECT, required=False, notes='Degree type and major (e.g., "MA, History")', ), FieldMapping( source_path='profile_data.education[].years', source_description='Years attended', target_class='Education', target_slot='years', typedb_entity='education', typedb_attribute='years-attended', rdf_predicate='schema:temporalCoverage', transformation=TransformationType.DIRECT, required=False, notes='Date range (e.g., "2001 - 2007")', ), FieldMapping( source_path='profile_data.education[].duration', source_description='Duration of study', target_class='Education', target_slot='duration', typedb_entity='education', typedb_attribute='study-duration', rdf_predicate='schema:duration', transformation=TransformationType.DIRECT, required=False, notes='Calculated duration (e.g., "6 years")', ), FieldMapping( source_path='profile_data.education[].country', source_description='Country of institution', target_class='Education', target_slot='country', typedb_entity='education', typedb_attribute='education-country', rdf_predicate='schema:addressCountry', transformation=TransformationType.DIRECT, required=False, notes='ISO country code of institution', ), ], generated_classes=['Education', 'Person'], example_yaml=""" profile_data: education: - institution: Universiteit Utrecht institution_linkedin: https://www.linkedin.com/school/universiteit-utrecht degree: MA, History years: 2001 - 2007 duration: 6 years country: NL """.strip(), ), # ------------------------------------------------------------------------- # SKILLS & EXPERTISE - Professional capabilities # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='skills_expertise', description='Professional skills, languages, and expertise areas', detailed_description=""" Professional skills endorsed on LinkedIn, language proficiencies, and identified expertise areas based on career history analysis. Expertise areas are derived from analyzing the person's complete professional background in the heritage sector. """.strip(), linkml_class='Person', typedb_entity='person', provenance=Provenance( source_type='external_api', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='profile_data.skills', source_description='Professional skills list', target_class='Person', target_slot='skills', typedb_entity='person', typedb_attribute='skills', rdf_predicate='schema:knowsAbout', transformation=TransformationType.ARRAY_DIRECT, required=False, notes='LinkedIn-endorsed skills', ), FieldMapping( source_path='profile_data.languages', source_description='Language proficiencies', target_class='Person', target_slot='languages', typedb_entity='person', typedb_attribute='languages', rdf_predicate='schema:knowsLanguage', transformation=TransformationType.ARRAY_DIRECT, required=False, notes='Languages spoken with proficiency levels', ), FieldMapping( source_path='profile_data.expertise_areas', source_description='Identified expertise areas', target_class='Person', target_slot='expertise_areas', typedb_entity='person', typedb_attribute='expertise-areas', rdf_predicate='schema:hasOccupation', transformation=TransformationType.ARRAY_DIRECT, required=False, notes='Derived from career analysis', ), ], generated_classes=['Person'], example_yaml=""" profile_data: skills: - Digital Preservation - Archival Description - Collection Management languages: - Dutch (Native) - English (Professional) expertise_areas: - Archival services - Public services - History research - Library services """.strip(), ), # ------------------------------------------------------------------------- # HERITAGE RELEVANCE - Sector-specific assessment # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='heritage_relevance', description='Heritage sector relevance assessment', detailed_description=""" Assessment of the person's relevance to the heritage sector. Includes heritage type classification (GLAMORCUBESFIXPHDNT), current institution, sector role, and years of heritage experience. This provides a quick overview of where the person fits within the heritage ecosystem. """.strip(), linkml_class='HeritageRelevance', typedb_entity='heritage-relevance', provenance=Provenance( source_type='computed', data_tier=DataTier.TIER_3_CROWD_SOURCED, ), fields=[ FieldMapping( source_path='heritage_sector_relevance.heritage_type', source_description='Heritage type code', target_class='HeritageRelevance', target_slot='heritage_type', typedb_entity='heritage-relevance', typedb_attribute='heritage-type-code', rdf_predicate='glam:heritageType', transformation=TransformationType.DIRECT, required=False, notes='Single letter GLAMORCUBESFIXPHDNT code', ), FieldMapping( source_path='heritage_sector_relevance.heritage_type_label', source_description='Heritage type label', target_class='HeritageRelevance', target_slot='heritage_type_label', typedb_entity='heritage-relevance', typedb_attribute='heritage-type-label', rdf_predicate='rdfs:label', transformation=TransformationType.DIRECT, required=False, notes='Human-readable type label (e.g., "Archive")', ), FieldMapping( source_path='heritage_sector_relevance.current_institution', source_description='Current heritage institution', target_class='HeritageRelevance', target_slot='current_institution', typedb_entity='heritage-relevance', typedb_attribute='current-institution', rdf_predicate='schema:worksFor', transformation=TransformationType.DIRECT, required=False, notes='Name of current heritage employer', ), FieldMapping( source_path='heritage_sector_relevance.institution_type', source_description='Type of institution', target_class='HeritageRelevance', target_slot='institution_type', typedb_entity='heritage-relevance', typedb_attribute='institution-type', rdf_predicate='schema:additionalType', transformation=TransformationType.DIRECT, required=False, notes='Classification of the institution', ), FieldMapping( source_path='heritage_sector_relevance.sector_role', source_description='Role within heritage sector', target_class='HeritageRelevance', target_slot='sector_role', typedb_entity='heritage-relevance', typedb_attribute='sector-role', rdf_predicate='schema:jobTitle', transformation=TransformationType.DIRECT, required=False, notes='Standardized role classification', ), FieldMapping( source_path='heritage_sector_relevance.years_in_heritage', source_description='Years of heritage experience', target_class='HeritageRelevance', target_slot='years_in_heritage', typedb_entity='heritage-relevance', typedb_attribute='years-in-heritage', rdf_predicate='schema:experienceYears', transformation=TransformationType.DIRECT, required=False, notes='Total years in heritage sector', ), ], generated_classes=['HeritageRelevance', 'Person'], example_yaml=""" heritage_sector_relevance: heritage_type: A heritage_type_label: Archive current_institution: Nationaal Archief institution_type: National Archive sector_role: Services Staff years_in_heritage: 11 """.strip(), ), # ------------------------------------------------------------------------- # HERITAGE EXPERIENCE - Relevant positions # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='heritage_experience', description='Heritage-relevant experience from career history', detailed_description=""" Filtered list of positions that are relevant to the heritage sector. Extracted from full career history with relevance annotations. Includes both current and past positions at heritage institutions with notes explaining their relevance to the GLAM sector. """.strip(), linkml_class='HeritageExperience', typedb_entity='heritage-experience', provenance=Provenance( source_type='computed', data_tier=DataTier.TIER_3_CROWD_SOURCED, ), fields=[ FieldMapping( source_path='profile_data.heritage_relevant_experience[].organization', source_description='Heritage organization name', target_class='HeritageExperience', target_slot='organization', typedb_entity='heritage-experience', typedb_attribute='heritage-org-name', rdf_predicate='schema:worksFor', transformation=TransformationType.DIRECT, required=True, notes='Name of heritage institution', ), FieldMapping( source_path='profile_data.heritage_relevant_experience[].role', source_description='Role at heritage organization', target_class='HeritageExperience', target_slot='role', typedb_entity='heritage-experience', typedb_attribute='heritage-role', rdf_predicate='schema:jobTitle', transformation=TransformationType.DIRECT, required=True, notes='Job title at heritage institution', ), FieldMapping( source_path='profile_data.heritage_relevant_experience[].relevance', source_description='Relevance explanation', target_class='HeritageExperience', target_slot='relevance', typedb_entity='heritage-experience', typedb_attribute='relevance-notes', rdf_predicate='schema:description', transformation=TransformationType.DIRECT, required=False, notes='Why this position is heritage-relevant', ), FieldMapping( source_path='profile_data.heritage_relevant_experience[].current', source_description='Is current position', target_class='HeritageExperience', target_slot='current', typedb_entity='heritage-experience', typedb_attribute='is-current-heritage', rdf_predicate='schema:currentPosition', transformation=TransformationType.DIRECT, required=False, notes='Whether this is a current position', ), ], generated_classes=['HeritageExperience', 'Person'], example_yaml=""" profile_data: heritage_relevant_experience: - organization: Nationaal Archief role: Staff Member At The Services Department relevance: Public services at National Archives of the Netherlands current: true - organization: University Library Utrecht role: Library Employee relevance: Academic library experience current: false """.strip(), ), # ------------------------------------------------------------------------- # AFFILIATIONS - Custodian connections # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='affiliations', description='Affiliations with heritage custodians', detailed_description=""" Links between the person and heritage custodian institutions. Each affiliation includes the custodian name, slug identifier, role title, and heritage classification. These affiliations enable network analysis across the heritage sector workforce. """.strip(), linkml_class='Affiliation', typedb_entity='affiliation', provenance=Provenance( source_type='computed', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='affiliations[].custodian_name', source_description='Heritage custodian name', target_class='Affiliation', target_slot='custodian_name', typedb_entity='affiliation', typedb_attribute='custodian-name', rdf_predicate='schema:memberOf', transformation=TransformationType.DIRECT, required=True, notes='Name of the heritage institution', ), FieldMapping( source_path='affiliations[].custodian_slug', source_description='Custodian identifier slug', target_class='Affiliation', target_slot='custodian_slug', typedb_entity='affiliation', typedb_attribute='custodian-slug', rdf_predicate='schema:identifier', transformation=TransformationType.DIRECT, required=True, notes='URL-safe identifier for the custodian', ), FieldMapping( source_path='affiliations[].role_title', source_description='Role at custodian', target_class='Affiliation', target_slot='role_title', typedb_entity='affiliation', typedb_attribute='affiliation-role', rdf_predicate='schema:jobTitle', transformation=TransformationType.DIRECT, required=False, notes='Job title at this custodian', ), FieldMapping( source_path='affiliations[].heritage_relevant', source_description='Is heritage relevant', target_class='Affiliation', target_slot='heritage_relevant', typedb_entity='affiliation', typedb_attribute='is-heritage-relevant', rdf_predicate='glam:heritageRelevant', transformation=TransformationType.DIRECT, required=False, notes='Whether affiliation is heritage-relevant', ), FieldMapping( source_path='affiliations[].heritage_type', source_description='Heritage type code', target_class='Affiliation', target_slot='heritage_type', typedb_entity='affiliation', typedb_attribute='affiliation-heritage-type', rdf_predicate='glam:heritageType', transformation=TransformationType.DIRECT, required=False, notes='GLAMORCUBESFIXPHDNT type code', ), FieldMapping( source_path='affiliations[].current', source_description='Is current affiliation', target_class='Affiliation', target_slot='current', typedb_entity='affiliation', typedb_attribute='is-current-affiliation', rdf_predicate='schema:currentPosition', transformation=TransformationType.DIRECT, required=False, notes='Whether this is a current affiliation', ), FieldMapping( source_path='affiliations[].observed_on', source_description='Observation timestamp', target_class='Affiliation', target_slot='observed_on', typedb_entity='affiliation', typedb_attribute='observed-on', rdf_predicate='prov:generatedAtTime', transformation=TransformationType.DIRECT, required=False, notes='When this affiliation was observed', ), FieldMapping( source_path='affiliations[].source_url', source_description='Source URL for affiliation', target_class='Affiliation', target_slot='source_url', typedb_entity='affiliation', typedb_attribute='affiliation-source-url', rdf_predicate='prov:wasDerivedFrom', transformation=TransformationType.DIRECT, required=False, notes='URL where affiliation was discovered', ), ], generated_classes=['Affiliation'], example_yaml=""" affiliations: - custodian_name: Nationaal Archief custodian_slug: nationaal-archief role_title: Staff member at the Services Department at Nationaal Archief heritage_relevant: true heritage_type: A current: true observed_on: 2025-12-14T11:21:47Z source_url: https://www.linkedin.com/company/nationaal-archief/people/ """.strip(), ), # ------------------------------------------------------------------------- # LINKED RECORDS - Cross-references # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='linked_records', description='Links to related records in the system', detailed_description=""" Cross-references to other records in the heritage data system. Includes links to staff records (parsed from LinkedIn company pages) and custodian records (heritage institution YAML files). These links enable navigation between person profiles and the institutions they work for. """.strip(), linkml_class='LinkedRecords', typedb_entity='linked-records', provenance=Provenance( source_type='computed', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='linked_records.staff_record.file', source_description='Staff record file path', target_class='LinkedRecords', target_slot='staff_record_file', typedb_entity='linked-records', typedb_attribute='staff-record-path', rdf_predicate='prov:wasDerivedFrom', transformation=TransformationType.DIRECT, required=False, notes='Path to parsed staff JSON file', ), FieldMapping( source_path='linked_records.staff_record.staff_id', source_description='Staff record ID', target_class='LinkedRecords', target_slot='staff_id', typedb_entity='linked-records', typedb_attribute='staff-id', rdf_predicate='schema:identifier', transformation=TransformationType.DIRECT, required=False, notes='Unique staff identifier', ), FieldMapping( source_path='linked_records.custodian_record.ghcid', source_description='Custodian GHCID', target_class='LinkedRecords', target_slot='custodian_ghcid', typedb_entity='linked-records', typedb_attribute='linked-ghcid', rdf_predicate='glam:ghcid', transformation=TransformationType.DIRECT, required=False, notes='GHCID of linked custodian', ), FieldMapping( source_path='linked_records.custodian_record.notes', source_description='Custodian record notes', target_class='LinkedRecords', target_slot='custodian_notes', typedb_entity='linked-records', typedb_attribute='custodian-notes', rdf_predicate='schema:description', transformation=TransformationType.DIRECT, required=False, notes='Additional notes about the custodian link', ), ], generated_classes=['LinkedRecords'], example_yaml=""" linked_records: staff_record: file: data/custodian/person/affiliated/parsed/nationaal-archief_staff_20251210T155415Z.json staff_id: nationaal-archief_staff_0002_iris_van_meer custodian_record: ghcid: NL-ZH-DHA-A-NA notes: Nationaal Archief, The Hague """.strip(), ), # ------------------------------------------------------------------------- # CONTACT DATA - Contact information # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='contact_data', description='Contact information and communication channels', detailed_description=""" Contact information including email addresses and phone numbers. Emails may be inferred from organizational naming conventions with confidence scores indicating reliability. Also includes profile photo URLs and external lookup service links. """.strip(), linkml_class='ContactData', typedb_entity='contact-data', provenance=Provenance( source_type='computed', data_tier=DataTier.TIER_3_CROWD_SOURCED, ), fields=[ FieldMapping( source_path='contact_data.provenance.source', source_description='Contact data source', target_class='ContactData', target_slot='provenance_source', typedb_entity='contact-data', typedb_attribute='contact-source', rdf_predicate='prov:wasAttributedTo', transformation=TransformationType.DIRECT, required=False, notes='How contact data was obtained', ), FieldMapping( source_path='contact_data.emails[].email', source_description='Email address', target_class='ContactData', target_slot='email', typedb_entity='contact-data', typedb_attribute='email-address', rdf_predicate='schema:email', transformation=TransformationType.DIRECT, required=False, notes='Email address (may be inferred)', ), FieldMapping( source_path='contact_data.emails[].type', source_description='Email type', target_class='ContactData', target_slot='email_type', typedb_entity='contact-data', typedb_attribute='email-type', rdf_predicate='schema:contactType', transformation=TransformationType.DIRECT, required=False, notes='Type of email (work, personal)', ), FieldMapping( source_path='contact_data.emails[].confidence', source_description='Email confidence score', target_class='ContactData', target_slot='email_confidence', typedb_entity='contact-data', typedb_attribute='email-confidence', rdf_predicate='prov:confidence', transformation=TransformationType.DIRECT, required=False, notes='Confidence in email accuracy (0-1)', ), FieldMapping( source_path='contact_data.emails[].verified', source_description='Email verification status', target_class='ContactData', target_slot='email_verified', typedb_entity='contact-data', typedb_attribute='email-verified', rdf_predicate='schema:verified', transformation=TransformationType.DIRECT, required=False, notes='Whether email has been verified', ), FieldMapping( source_path='contact_data.profile_photo_url', source_description='Profile photo URL', target_class='ContactData', target_slot='profile_photo_url', typedb_entity='contact-data', typedb_attribute='profile-photo', rdf_predicate='schema:image', transformation=TransformationType.DIRECT, required=False, notes='URL to profile photo', ), FieldMapping( source_path='contact_data.rocketreach_url', source_description='RocketReach lookup URL', target_class='ContactData', target_slot='rocketreach_url', typedb_entity='contact-data', typedb_attribute='rocketreach-url', rdf_predicate='schema:sameAs', transformation=TransformationType.DIRECT, required=False, notes='Link to RocketReach profile lookup', ), ], generated_classes=['ContactData'], example_yaml=""" contact_data: provenance: source: LinkedIn profile + Dutch government naming convention source_url: https://www.linkedin.com/in/iris-van-meer-34329131 retrieved_date: 2025-12-14 extraction_method: naming_convention_inference emails: - email: iris.van.meer@nationaalarchief.nl domain: nationaalarchief.nl type: work source: inferred confidence: 0.8 verified: false phones: [] profile_photo_url: https://media.licdn.com/dms/image/v2/... """.strip(), ), # ------------------------------------------------------------------------- # EXTRACTION METADATA - Data provenance # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='extraction_metadata', description='Metadata about how the profile was extracted', detailed_description=""" Provenance information about the data extraction process. Includes source file references, extraction timestamps, methods used, and cost tracking for API calls. This ensures full traceability of data origin and enables reproducibility of the extraction process. """.strip(), linkml_class='ExtractionMetadata', typedb_entity='extraction-metadata', provenance=Provenance( source_type='computed', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), fields=[ FieldMapping( source_path='extraction_metadata.source_file', source_description='Source file path', target_class='ExtractionMetadata', target_slot='source_file', typedb_entity='extraction-metadata', typedb_attribute='source-file-path', rdf_predicate='prov:wasDerivedFrom', transformation=TransformationType.DIRECT, required=False, notes='Original source file for extraction', ), FieldMapping( source_path='extraction_metadata.staff_id', source_description='Staff identifier', target_class='ExtractionMetadata', target_slot='staff_id', typedb_entity='extraction-metadata', typedb_attribute='extraction-staff-id', rdf_predicate='schema:identifier', transformation=TransformationType.DIRECT, required=False, notes='Staff ID from source data', ), FieldMapping( source_path='extraction_metadata.extraction_date', source_description='Extraction timestamp', target_class='ExtractionMetadata', target_slot='extraction_date', typedb_entity='extraction-metadata', typedb_attribute='extraction-date', rdf_predicate='prov:generatedAtTime', transformation=TransformationType.DIRECT, required=True, notes='ISO 8601 timestamp of extraction', ), FieldMapping( source_path='extraction_metadata.extraction_method', source_description='Method used for extraction', target_class='ExtractionMetadata', target_slot='extraction_method', typedb_entity='extraction-metadata', typedb_attribute='extraction-method', rdf_predicate='prov:wasGeneratedBy', transformation=TransformationType.DIRECT, required=False, notes='Tool/API used (e.g., exa_crawling_exa)', ), FieldMapping( source_path='extraction_metadata.extraction_agent', source_description='Agent performing extraction', target_class='ExtractionMetadata', target_slot='extraction_agent', typedb_entity='extraction-metadata', typedb_attribute='extraction-agent', rdf_predicate='prov:wasAttributedTo', transformation=TransformationType.DIRECT, required=False, notes='AI agent or script name', ), FieldMapping( source_path='extraction_metadata.linkedin_url', source_description='Source LinkedIn URL', target_class='ExtractionMetadata', target_slot='linkedin_url', typedb_entity='extraction-metadata', typedb_attribute='extraction-linkedin-url', rdf_predicate='prov:hadPrimarySource', transformation=TransformationType.DIRECT, required=False, notes='LinkedIn profile URL that was extracted', ), FieldMapping( source_path='extraction_metadata.cost_usd', source_description='Extraction cost in USD', target_class='ExtractionMetadata', target_slot='cost_usd', typedb_entity='extraction-metadata', typedb_attribute='extraction-cost', rdf_predicate='schema:price', transformation=TransformationType.DIRECT, required=False, notes='API cost for extraction', ), FieldMapping( source_path='extraction_metadata.request_id', source_description='API request identifier', target_class='ExtractionMetadata', target_slot='request_id', typedb_entity='extraction-metadata', typedb_attribute='api-request-id', rdf_predicate='schema:identifier', transformation=TransformationType.DIRECT, required=False, notes='Unique request ID for tracing', ), ], generated_classes=['ExtractionMetadata'], example_yaml=""" extraction_metadata: source_file: null staff_id: null extraction_date: 2025-12-13T17:35:24.524090+00:00 extraction_method: exa_crawling_exa extraction_agent: claude-opus-4.5 linkedin_url: https://www.linkedin.com/in/iris-van-meer-34329131 cost_usd: 0 request_id: null """.strip(), ), # ------------------------------------------------------------------------- # WEB CLAIMS - Verifiable claims from web sources # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='web_claims', description='Web claims with provenance tracking', detailed_description=""" Individual claims extracted from web sources with full provenance. Each claim includes the claim type, value, source URL, retrieval timestamp, and the agent/tool that performed the extraction. This follows the WebObservation pattern for verifiable data claims. """.strip(), linkml_class='WebClaim', typedb_entity='web-claim', provenance=Provenance( source_type='external_api', data_tier=DataTier.TIER_2_VERIFIED, ), fields=[ FieldMapping( source_path='web_claims[].claim_type', source_description='Type of claim', target_class='WebClaim', target_slot='claim_type', typedb_entity='web-claim', typedb_attribute='claim-type', rdf_predicate='rdf:type', transformation=TransformationType.DIRECT, required=True, notes='Category of claim (e.g., full_name, role_title)', ), FieldMapping( source_path='web_claims[].claim_value', source_description='Value of the claim', target_class='WebClaim', target_slot='claim_value', typedb_entity='web-claim', typedb_attribute='claim-value', rdf_predicate='rdf:value', transformation=TransformationType.DIRECT, required=True, notes='The actual claimed value', ), FieldMapping( source_path='web_claims[].source_url', source_description='URL source of claim', target_class='WebClaim', target_slot='source_url', typedb_entity='web-claim', typedb_attribute='claim-source-url', rdf_predicate='prov:wasDerivedFrom', transformation=TransformationType.DIRECT, required=True, notes='Web page where claim was found', ), FieldMapping( source_path='web_claims[].retrieved_on', source_description='Retrieval timestamp', target_class='WebClaim', target_slot='retrieved_on', typedb_entity='web-claim', typedb_attribute='claim-retrieved-on', rdf_predicate='prov:generatedAtTime', transformation=TransformationType.DIRECT, required=False, notes='When the claim was retrieved', ), FieldMapping( source_path='web_claims[].retrieval_agent', source_description='Agent that retrieved claim', target_class='WebClaim', target_slot='retrieval_agent', typedb_entity='web-claim', typedb_attribute='claim-retrieval-agent', rdf_predicate='prov:wasAttributedTo', transformation=TransformationType.DIRECT, required=False, notes='Tool/agent that extracted the claim', ), ], generated_classes=['WebClaim'], example_yaml=""" web_claims: - claim_type: full_name claim_value: Iris van Meer source_url: https://www.linkedin.com/in/iris-van-meer-34329131 retrieved_on: 2025-12-14T11:21:47Z retrieval_agent: linkedin_html_parser - claim_type: role_title claim_value: Staff member at the Services Department at Nationaal Archief source_url: https://www.linkedin.com/in/iris-van-meer-34329131 retrieved_on: 2025-12-14T11:21:47Z retrieval_agent: linkedin_html_parser """.strip(), ), # ------------------------------------------------------------------------- # PICO MAPPED - PiCo ontology properties that ARE mapped # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='pico_mapped', description='PiCo ontology properties mapped to HC person data', detailed_description=""" The Heritage Custodian (HC) system implements a focused subset of the PiCo (Persons in Context) ontology, optimized for tracking heritage sector staff. PiCo was designed for historical vital records (birth/death certificates, marriage records, census data), but HC uses LinkedIn as the primary data source, which provides professional context rather than biographical/genealogical data. This section documents which PiCo properties ARE mapped to HC fields, showing the semantic alignment between the ontologies. """.strip(), linkml_class='PersonObservation', typedb_entity='person-observation', provenance=Provenance( source_type='ontology_mapping', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), fields=[ FieldMapping( source_path='profile_data.name', source_description='Full name of the person', target_class='PersonObservation', target_slot='name', typedb_entity='person-observation', typedb_attribute='person-name', rdf_predicate='sdo:name', transformation=TransformationType.DIRECT, required=True, notes='PiCo uses sdo:name (Schema.org) for full names. Directly mapped.', status='mapped', ), FieldMapping( source_path='profile_data.location', source_description='Current geographic location', target_class='PersonObservation', target_slot='location', typedb_entity='person-observation', typedb_attribute='location-string', rdf_predicate='sdo:address', transformation=TransformationType.DIRECT, required=False, notes='PiCo uses sdo:address for location. LinkedIn provides free-text location.', status='mapped', ), FieldMapping( source_path='profile_data.headline', source_description='Current occupation/role', target_class='PersonObservation', target_slot='headline', typedb_entity='person-observation', typedb_attribute='headline', rdf_predicate='sdo:hasOccupation', transformation=TransformationType.DIRECT, required=False, notes='PiCo uses sdo:hasOccupation for job roles. HC captures this via LinkedIn headline.', status='mapped', ), FieldMapping( source_path='extraction_metadata.linkedin_url', source_description='LinkedIn profile URL as primary source', target_class='PersonObservation', target_slot='source_url', typedb_entity='person-observation', typedb_attribute='source-url', rdf_predicate='prov:hadPrimarySource', transformation=TransformationType.DIRECT, required=True, notes='PiCo uses prov:hadPrimarySource for provenance. LinkedIn URL serves as source document.', status='mapped', ), FieldMapping( source_path='affiliations[].role_title', source_description='Role at heritage institution', target_class='Affiliation', target_slot='role_title', typedb_entity='person-observation', typedb_attribute='role-title', rdf_predicate='pico:hasRole', transformation=TransformationType.DIRECT, required=False, notes='PiCo uses pico:hasRole with picot_roles thesaurus. HC captures current institutional roles.', status='mapped', ), FieldMapping( source_path='extraction_metadata.extraction_date', source_description='When observation was recorded', target_class='PersonObservation', target_slot='observation_date', typedb_entity='person-observation', typedb_attribute='observation-date', rdf_predicate='prov:generatedAtTime', transformation=TransformationType.DIRECT, required=True, notes='PiCo uses prov:generatedAtTime for temporal provenance. Mapped to extraction timestamp.', status='mapped', ), FieldMapping( source_path='extraction_metadata.extraction_agent', source_description='Agent that performed extraction', target_class='PersonObservation', target_slot='extraction_agent', typedb_entity='person-observation', typedb_attribute='extraction-agent', rdf_predicate='prov:wasAttributedTo', transformation=TransformationType.DIRECT, required=False, notes='PiCo uses prov:wasAttributedTo for agent provenance. Records which tool/agent extracted data.', status='mapped', ), FieldMapping( source_path='profile_data.profile_image_url', source_description='Profile photo URL', target_class='PersonObservation', target_slot='image_url', typedb_entity='person-observation', typedb_attribute='profile-image-url', rdf_predicate='sdo:image', transformation=TransformationType.DIRECT, required=False, notes='PiCo uses sdo:image for visual representation. LinkedIn CDN URL stored.', status='mapped', ), ], generated_classes=['PersonObservation', 'Affiliation'], example_yaml=""" # PiCo ontology alignment example # HC PersonObservation β†’ PiCo PersonObservation profile_data: name: "Iris van Meer" # β†’ sdo:name headline: "Staff member at..." # β†’ sdo:hasOccupation location: "The Hague, Netherlands" # β†’ sdo:address profile_image_url: "https://..." # β†’ sdo:image extraction_metadata: linkedin_url: "https://linkedin.com/in/..." # β†’ prov:hadPrimarySource extraction_date: "2025-12-14T11:21:47Z" # β†’ prov:generatedAtTime extraction_agent: "claude-opus-4.5" # β†’ prov:wasAttributedTo affiliations: - role_title: "Archivist" # β†’ pico:hasRole """.strip(), ), # ------------------------------------------------------------------------- # PICO UNMAPPED - PiCo ontology properties intentionally OUT OF SCOPE # ------------------------------------------------------------------------- EnrichmentSourceMapping( source_block='pico_unmapped', description='PiCo ontology properties intentionally not mapped', detailed_description=""" Many PiCo properties are intentionally NOT mapped in the HC system. This is a design decision, not a gap to be filled. **Why these properties are out of scope:** 1. **Data source limitation**: LinkedIn profiles don't contain vital records (birth dates, death dates, marriage records, baptism records). 2. **Use case mismatch**: HC tracks heritage sector workforce, not genealogical reconstruction. Family relationships aren't relevant for institutional staff directories. 3. **Privacy considerations**: Collecting personal biographical data about living individuals raises GDPR concerns. Professional context is appropriate; personal history is not. 4. **Ontology purpose**: PiCo was designed for historical archives processing (civil registration, notarial records). HC serves a different purpose. This documentation ensures transparency about the ontology alignment scope. """.strip(), linkml_class='PersonObservation', typedb_entity='person-observation', provenance=Provenance( source_type='ontology_mapping', data_tier=DataTier.TIER_1_AUTHORITATIVE, ), fields=[ # Vital records - not available from LinkedIn FieldMapping( source_path=None, source_description='Birth date', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:birthDate', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: LinkedIn does not provide birth dates. Historical vital records property.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Death date', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:deathDate', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: LinkedIn profiles are for living professionals. Historical vital records property.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Birth place', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:birthPlace', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: LinkedIn does not provide birth location. Use sdo:address for current location.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Death place', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:deathPlace', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: LinkedIn profiles are for living professionals.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Deceased flag', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='pico:deceased', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: HC tracks active professionals. Memorial profiles not in scope.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Age', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='pico:hasAge', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Age not available from LinkedIn. Privacy consideration for living individuals.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Gender', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:gender', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Gender not reliably extractable from LinkedIn. Privacy consideration.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Religion', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='pico:hasReligion', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Religious affiliation not available from LinkedIn. Privacy consideration.', status='out_of_scope', ), # Structured name components - partial mapping FieldMapping( source_path=None, source_description='Given name (first name)', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:givenName', transformation=TransformationType.NOT_MAPPED, required=False, notes='PARTIAL: Could be extracted from full name, but not reliably for all cultures. Full name (sdo:name) is used instead.', status='partial', ), FieldMapping( source_path=None, source_description='Family name (surname)', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:familyName', transformation=TransformationType.NOT_MAPPED, required=False, notes='PARTIAL: Could be extracted from full name, but not reliably for all cultures. Full name (sdo:name) is used instead.', status='partial', ), FieldMapping( source_path=None, source_description='Patronym', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='pnv:patronym', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Patronymic names are historical/cultural. Not extractable from LinkedIn.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Base surname', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='pnv:baseSurname', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Genealogical name component. Not relevant for staff tracking.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Surname prefix', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='pnv:surnamePrefix', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Dutch tussenvoegsel (van, de, etc.) not separately tracked. Full name preserved.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Initials', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='pnv:initials', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Initials not separately extracted. Full name used.', status='out_of_scope', ), # Family relationships - 40+ properties not mapped FieldMapping( source_path=None, source_description='Parent relationship', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:parent', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Children relationship', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:children', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Spouse relationship', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:spouse', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Sibling relationship', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:sibling', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.', status='out_of_scope', ), FieldMapping( source_path=None, source_description='Extended family (40+ PiCo properties)', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='pico:has*', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: PiCo defines 40+ family relationship properties (grandparent, in-law, step-relations, cousins, etc.). None are mapped - HC tracks professional, not familial relationships.', status='out_of_scope', ), # Archival source properties FieldMapping( source_path=None, source_description='Archive component source', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='sdo:ArchiveComponent', transformation=TransformationType.NOT_MAPPED, required=False, notes='OUT OF SCOPE: HC uses LinkedIn as source, not archival documents. Web claims serve similar provenance purpose.', status='out_of_scope', ), # Reconstruction properties FieldMapping( source_path=None, source_description='Person reconstruction aggregation', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='pico:PersonReconstruction', transformation=TransformationType.NOT_MAPPED, required=False, notes='FUTURE: PersonReconstruction (aggregating multiple observations) not yet implemented. Currently each person has one LinkedIn-based observation.', status='future', ), FieldMapping( source_path=None, source_description='Derived from relationship', target_class=None, target_slot=None, typedb_entity='person-observation', typedb_attribute=None, rdf_predicate='prov:wasDerivedFrom', transformation=TransformationType.NOT_MAPPED, required=False, notes='FUTURE: Cross-observation derivation not implemented. Would link reconstructed person to source observations.', status='future', ), ], generated_classes=['PersonObservation'], example_yaml=""" # PiCo properties NOT mapped in HC system # ❌ Vital records (not available from LinkedIn): # - sdo:birthDate, sdo:deathDate # - sdo:birthPlace, sdo:deathPlace # - pico:deceased, pico:hasAge # ❌ Personal characteristics (privacy): # - sdo:gender # - pico:hasReligion # ❌ Genealogical name components: # - pnv:patronym, pnv:baseSurname # - pnv:surnamePrefix, pnv:initials # (HC uses full sdo:name instead) # ❌ Family relationships (40+ properties): # - sdo:parent, sdo:children, sdo:spouse, sdo:sibling # - pico:hasGrandparent, pico:hasGrandchild # - pico:hasParent-in-law, pico:hasSibling-in-law # - pico:hasStepparent, pico:hasStepchild # - pico:hasCousin, pico:hasUncle_Aunt # - pico:hasFosterParent, pico:hasGodparent # ... and many more # ⏳ Future consideration: # - pico:PersonReconstruction (multi-source aggregation) # - prov:wasDerivedFrom (observation linking) """.strip(), ), ] # ============================================================================ # HELPER FUNCTIONS # ============================================================================ def get_mapping_for_source(source_block: str) -> Optional[EnrichmentSourceMapping]: """Get the mapping for a specific source block.""" for mapping in ENRICHMENT_MAPPINGS: if mapping.source_block == source_block: return mapping return None def get_fields_for_class(class_name: str) -> list[FieldMapping]: """Get all field mappings that target a specific LinkML class.""" fields = [] for mapping in ENRICHMENT_MAPPINGS: for field_mapping in mapping.fields: if field_mapping.target_class == class_name: fields.append(field_mapping) return fields def get_sources_for_class(class_name: str) -> list[str]: """Get all source blocks that contribute to a specific LinkML class.""" sources = [] for mapping in ENRICHMENT_MAPPINGS: if mapping.linkml_class == class_name: sources.append(mapping.source_block) else: for field_mapping in mapping.fields: if field_mapping.target_class == class_name and mapping.source_block not in sources: sources.append(mapping.source_block) return sources def get_typedb_attribute(class_name: str, slot_name: str) -> Optional[str]: """Get the TypeDB attribute name for a specific class/slot combination.""" for mapping in ENRICHMENT_MAPPINGS: for field_mapping in mapping.fields: if field_mapping.target_class == class_name and field_mapping.target_slot == slot_name: return field_mapping.typedb_attribute return None def get_rdf_predicate(class_name: str, slot_name: str) -> Optional[str]: """Get the RDF predicate for a specific class/slot combination.""" for mapping in ENRICHMENT_MAPPINGS: for field_mapping in mapping.fields: if field_mapping.target_class == class_name and field_mapping.target_slot == slot_name: return field_mapping.rdf_predicate return None def get_category_for_source(source_block: str) -> Optional[MappingCategory]: """Get the category that contains a specific source block.""" for category in MAPPING_CATEGORIES: if source_block in category.sources: return category return None def get_transformation_types() -> list[TransformationType]: """Get all available transformation types.""" return list(TransformationType) def get_person_mapping_for_source(source_block: str) -> Optional[EnrichmentSourceMapping]: """Get the person mapping for a specific source block.""" for mapping in PERSON_MAPPINGS: if mapping.source_block == source_block: return mapping return None def get_person_fields_for_class(class_name: str) -> list[FieldMapping]: """Get all person field mappings that target a specific LinkML class.""" fields = [] for mapping in PERSON_MAPPINGS: for field_mapping in mapping.fields: if field_mapping.target_class == class_name: fields.append(field_mapping) return fields def get_person_sources_for_class(class_name: str) -> list[str]: """Get all person source blocks that contribute to a specific LinkML class.""" sources = [] for mapping in PERSON_MAPPINGS: if mapping.linkml_class == class_name: sources.append(mapping.source_block) else: for field_mapping in mapping.fields: if field_mapping.target_class == class_name and mapping.source_block not in sources: sources.append(mapping.source_block) return sources def get_person_category_for_source(source_block: str) -> Optional[MappingCategory]: """Get the person category that contains a specific source block.""" for category in PERSON_CATEGORIES: if source_block in category.sources: return category return None def get_mapping_statistics() -> dict: """Get statistics about the custodian mappings.""" total_fields = sum(len(m.fields) for m in ENRICHMENT_MAPPINGS) required_fields = sum( 1 for m in ENRICHMENT_MAPPINGS for f in m.fields if f.required ) classes = set() for m in ENRICHMENT_MAPPINGS: classes.add(m.linkml_class) for f in m.fields: if f.target_class: classes.add(f.target_class) return { 'total_source_blocks': len(ENRICHMENT_MAPPINGS), 'total_categories': len(MAPPING_CATEGORIES), 'total_fields': total_fields, 'required_fields': required_fields, 'unique_classes': len(classes), 'classes': sorted(classes), } def get_person_mapping_statistics() -> dict: """Get statistics about the person mappings.""" total_fields = sum(len(m.fields) for m in PERSON_MAPPINGS) required_fields = sum( 1 for m in PERSON_MAPPINGS for f in m.fields if f.required ) classes = set() for m in PERSON_MAPPINGS: classes.add(m.linkml_class) for f in m.fields: if f.target_class: classes.add(f.target_class) return { 'total_source_blocks': len(PERSON_MAPPINGS), 'total_categories': len(PERSON_CATEGORIES), 'total_fields': total_fields, 'required_fields': required_fields, 'unique_classes': len(classes), 'classes': sorted(classes), } def get_mappings_for_data_source(data_source: str) -> list[EnrichmentSourceMapping]: """Get all mappings that use a specific data source type.""" return [ m for m in ENRICHMENT_MAPPINGS if m.provenance.source_type == data_source ] def get_categories_for_data_source(data_source: str) -> list[MappingCategory]: """Get all categories that contain mappings from a specific data source.""" source_blocks = { m.source_block for m in ENRICHMENT_MAPPINGS if m.provenance.source_type == data_source } return [ c for c in MAPPING_CATEGORIES if any(s in source_blocks for s in c.sources) ] # ============================================================================ # MODULE EXPORTS # ============================================================================ __all__ = [ # Enums 'TransformationType', 'MappingStatus', 'DataTier', # Dataclasses 'FieldExample', 'FieldValidation', 'FieldMapping', 'Provenance', 'EnrichmentSourceMapping', 'MappingCategory', # Data 'MAPPING_CATEGORIES', 'ENRICHMENT_MAPPINGS', 'PERSON_CATEGORIES', 'PERSON_MAPPINGS', # Functions 'get_mapping_for_source', 'get_fields_for_class', 'get_sources_for_class', 'get_typedb_attribute', 'get_rdf_predicate', 'get_category_for_source', 'get_transformation_types', 'get_person_mapping_for_source', 'get_person_fields_for_class', 'get_person_sources_for_class', 'get_person_category_for_source', 'get_mapping_statistics', 'get_person_mapping_statistics', 'get_mappings_for_data_source', 'get_categories_for_data_source', ]