glam/scripts/sync/mappings.py
2026-01-02 02:11:04 +01:00

6643 lines
264 KiB
Python

"""
mappings.py - Data Transformation Mapping Documentation (Python Port)
This module documents how raw YAML/JSON data from custodian files maps to:
1. LinkML schema classes and slots
2. TypeDB entities and attributes
3. RDF triples and predicates
ARCHITECTURE OVERVIEW:
======================
The Heritage Custodian System uses a "hub architecture" where:
- CustodianHub: Abstract entity with only persistent hc_id
- CustodianObservation: Evidence/claims from a specific source
- ReconstructionActivity: Process that generates standardized aspects
- Four aspects: LegalStatus, Name, Place, Collection (independent temporal lifecycles)
Each enrichment block in YAML (google_maps_enrichment, wikidata_enrichment, etc.)
maps to a SEPARATE CustodianObservation with its own provenance.
DATA FLOW:
==========
Raw YAML (data/custodian/*.yaml)
|
[Transform Layer]
|
LinkML Instance Data
|
+------+------+
| | |
RDF TypeDB JSON-LD
Ported from: frontend/src/lib/linkml/custodian-data-mappings.ts
Version: 1.0.0
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Optional
# ============================================================================
# ENUMERATIONS
# ============================================================================
class TransformationType(str, Enum):
"""Types of data transformations that can occur during mapping."""
DIRECT = 'direct' # 1:1 copy, no transformation
RENAME = 'rename' # Field name change only
SPLIT = 'split' # One source field -> multiple target fields
MERGE = 'merge' # Multiple source fields -> one target field
LOOKUP = 'lookup' # Enum value lookup or reference resolution
COMPUTED = 'computed' # Derived/calculated value
NESTED = 'nested' # Nested object mapping (object -> object)
ARRAY_MAP = 'array_map' # Array transformation (array -> array with element mapping)
ARRAY_DIRECT = 'array_direct' # Direct array copy without element transformation
FLATTEN = 'flatten' # Nested structure -> flat structure
AGGREGATE = 'aggregate' # Multiple values -> single aggregate
TEMPORAL = 'temporal' # Date/time transformation
URI_CONSTRUCT = 'uri_construct' # Construct URI from components
NORMALIZE = 'normalize' # Normalize/standardize value format
CONDITIONAL = 'conditional' # Conditional transformation based on other fields
NOT_MAPPED = 'not_mapped' # Ontology property intentionally not mapped
class MappingStatus(str, Enum):
"""Mapping status for ontology coverage documentation."""
MAPPED = 'mapped' # Property is fully mapped to HC system
PARTIAL = 'partial' # Property is partially mapped
OUT_OF_SCOPE = 'out_of_scope' # Property is intentionally not mapped
FUTURE = 'future' # Property may be mapped in future versions
class DataTier(str, Enum):
"""Data tier classification (per AGENTS.md)."""
TIER_1_AUTHORITATIVE = 'TIER_1_AUTHORITATIVE' # CSV registries (ISIL, Dutch orgs)
TIER_2_VERIFIED = 'TIER_2_VERIFIED' # Data from institutional websites, APIs
TIER_3_CROWD_SOURCED = 'TIER_3_CROWD_SOURCED' # Wikidata, OpenStreetMap
TIER_4_INFERRED = 'TIER_4_INFERRED' # NLP-extracted from conversations
# ============================================================================
# DATACLASSES
# ============================================================================
@dataclass
class FieldExample:
"""Example showing source -> target transformation."""
source_value: Any
target_value: Any
typedb_value: Optional[Any] = None
rdf_triple: Optional[str] = None
@dataclass
class FieldValidation:
"""Validation rules for a field."""
type: str # 'string' | 'number' | 'boolean' | 'date' | 'uri' | 'enum' | 'array'
pattern: Optional[str] = None
enum_values: Optional[list[str]] = None
min_length: Optional[int] = None
max_length: Optional[int] = None
@dataclass
class FieldMapping:
"""Mapping for a single field from source to target."""
# JSON path in source YAML (e.g., "google_maps_enrichment.place_id"). None for unmapped.
source_path: Optional[str]
# Human-readable description of the source field
source_description: str
# LinkML class this maps to. None for unmapped ontology properties.
target_class: Optional[str]
# LinkML slot name. None for unmapped ontology properties.
target_slot: Optional[str]
# Type of transformation applied
transformation: TransformationType
# Human-readable explanation of the transformation
transformation_details: Optional[str] = None
# TypeDB entity type (snake-case with hyphens)
typedb_entity: Optional[str] = None
# TypeDB attribute name. None for unmapped ontology properties.
typedb_attribute: Optional[str] = None
# RDF predicate (CURIE format)
rdf_predicate: Optional[str] = None
# Whether this field is required
required: bool = False
# Example showing source -> target transformation
example: Optional[FieldExample] = None
# Related/dependent fields
related_fields: Optional[list[str]] = None
# Validation rules
validation: Optional[FieldValidation] = None
# Additional notes or comments about this mapping
notes: Optional[str] = None
# Mapping status for ontology coverage documentation
status: Optional[MappingStatus] = None
@dataclass
class Provenance:
"""Provenance information for an enrichment source."""
source_type: str
data_tier: DataTier
api_endpoint: Optional[str] = None
update_frequency: Optional[str] = None
@dataclass
class EnrichmentSourceMapping:
"""Complete mapping for an enrichment source block."""
# Source block name in YAML (e.g., "google_maps_enrichment")
source_block: str
# Human-readable description
description: str
# Primary LinkML class this maps to
linkml_class: str
# Primary TypeDB entity
typedb_entity: str
# Provenance information
provenance: Provenance
# All field mappings for this source
fields: list[FieldMapping] = field(default_factory=list)
# Extended description with usage notes
detailed_description: Optional[str] = None
# Classes that can be generated from this source
generated_classes: Optional[list[str]] = None
# Example YAML snippet
example_yaml: Optional[str] = None
@dataclass
class MappingCategory:
"""Category grouping for mappings in the UI."""
id: str
name: str
name_nl: str
description: str
description_nl: str
icon: str
sources: list[str] = field(default_factory=list) # sourceBlock names
# ============================================================================
# MAPPING CATEGORIES (19 Custodian Categories)
# ============================================================================
MAPPING_CATEGORIES: list[MappingCategory] = [
MappingCategory(
id='identity',
name='Identity & Identification',
name_nl='Identiteit & Identificatie',
description='Core identity fields: GHCID, names, identifiers',
description_nl='Kernidentiteitsvelden: GHCID, namen, identificatiecodes',
icon='🪪',
sources=['ghcid', 'identifiers', 'custodian_name'],
),
MappingCategory(
id='location',
name='Location & Geography',
name_nl='Locatie & Geografie',
description='Physical location, addresses, coordinates',
description_nl='Fysieke locatie, adressen, coördinaten',
icon='📍',
sources=['location', 'google_maps_enrichment'],
),
MappingCategory(
id='external',
name='External Data Sources',
name_nl='Externe Databronnen',
description='Enrichment from external APIs and databases',
description_nl="Verrijking van externe API's en databases",
icon='🔗',
sources=['wikidata_enrichment', 'museum_register_enrichment', 'genealogiewerkbalk_enrichment'],
),
MappingCategory(
id='web',
name='Web & Digital Presence',
name_nl='Web & Digitale Aanwezigheid',
description='Website data, digital platforms, social media',
description_nl='Websitegegevens, digitale platformen, sociale media',
icon='🌐',
sources=['web_enrichment', 'web_claims', 'digital_platforms', 'youtube_enrichment'],
),
MappingCategory(
id='legal',
name='Legal & Organization',
name_nl='Juridisch & Organisatie',
description='Legal status, organizational structure',
description_nl='Juridische status, organisatiestructuur',
icon='⚖️',
sources=['legal_status', 'original_entry'],
),
MappingCategory(
id='temporal',
name='Temporal & Provenance',
name_nl='Temporeel & Herkomst',
description='Time spans, data provenance, versioning',
description_nl='Tijdspannes, data-herkomst, versiebeheer',
icon='⏱️',
sources=['timespan', 'provenance'],
),
MappingCategory(
id='heritage',
name='Heritage Specific',
name_nl='Erfgoed Specifiek',
description='UNESCO, collections, domain-specific data',
description_nl='UNESCO, collecties, domeinspecifieke gegevens',
icon='🏛️',
sources=['unesco_ich_enrichment'],
),
# -------------------------------------------------------------------------
# PHASE 1 ADDITIONS: Schema Class Coverage Categories
# -------------------------------------------------------------------------
MappingCategory(
id='archive_types',
name='Archive Types',
name_nl='Archieftypen',
description='Specialized archive classification types (97 classes): academic, audiovisual, church, municipal, national, etc.',
description_nl='Gespecialiseerde archiefclassificatietypen (97 klassen): academisch, audiovisueel, kerkelijk, gemeentelijk, nationaal, etc.',
icon='📦',
sources=[
'archive_type_academic', 'archive_type_audiovisual', 'archive_type_church',
'archive_type_corporate', 'archive_type_government', 'archive_type_municipal',
'archive_type_national', 'archive_type_regional', 'archive_type_specialized',
'archive_type_thematic',
],
),
MappingCategory(
id='organizational_structure',
name='Organizational Structure',
name_nl='Organisatiestructuur',
description='Organizational hierarchy and structure classes (30+ classes): departments, divisions, branches, parent organizations',
description_nl='Organisatiehiërarchie en structuurklassen (30+ klassen): afdelingen, divisies, vestigingen, moederorganisaties',
icon='🏢',
sources=[
'org_structure_hierarchy', 'org_structure_administrative', 'org_structure_subdivision',
],
),
MappingCategory(
id='heritage_cultural',
name='Heritage & Cultural Sites',
name_nl='Erfgoed & Culturele Locaties',
description='World heritage sites, intangible heritage, cultural institutions (15+ classes)',
description_nl='Werelderfgoedlocaties, immaterieel erfgoed, culturele instellingen (15+ klassen)',
icon='🗿',
sources=[
'heritage_world_sites', 'heritage_intangible', 'heritage_national_treasures',
],
),
MappingCategory(
id='classification_types',
name='Classification Types',
name_nl='Classificatietypen',
description='Type classes for custodian classification (32 classes): MuseumType, LibraryType, ArchiveOrganizationType, etc.',
description_nl='Typeklassen voor bronhouderclassificatie (32 klassen): MuseumType, BibliotheekType, ArchiefOrganisatieType, etc.',
icon='🏷️',
sources=[
'type_classes_glam', 'type_classes_digital', 'type_classes_organizational',
],
),
# -------------------------------------------------------------------------
# PHASE 2 ADDITIONS: Remaining Schema Class Coverage Categories
# -------------------------------------------------------------------------
MappingCategory(
id='place_location',
name='Place & Location',
name_nl='Plaats & Locatie',
description='Geographic and spatial location classes (8 classes): settlements, countries, custodian places, feature places',
description_nl='Geografische en ruimtelijke locatieklassen (8 klassen): nederzettingen, landen, bronhouderplaatsen, kenmerkplaatsen',
icon='📍',
sources=[
'place_geographic', 'place_custodian_specific',
],
),
MappingCategory(
id='collections',
name='Collections & Holdings',
name_nl='Collecties & Bezittingen',
description='Collection management and holdings classes (6 classes): collections, special collections, collection management systems',
description_nl='Collectiebeheer en bezitklassen (6 klassen): collecties, bijzondere collecties, collectiebeheersystemen',
icon='🗃️',
sources=[
'collection_core', 'collection_management',
],
),
MappingCategory(
id='person_staff',
name='Person & Staff',
name_nl='Persoon & Personeel',
description='Person and staff-related classes (9 classes): profiles, connections, work experience, credentials',
description_nl="Persoon- en personeelgerelateerde klassen (9 klassen): profielen, connecties, werkervaring, diploma's",
icon='👥',
sources=[
'person_profile_extended', 'person_work_education',
],
),
MappingCategory(
id='digital_api',
name='Digital & API Services',
name_nl='Digitaal & API Diensten',
description='Digital platforms and API endpoint classes (11 classes): web portals, OAI-PMH, search APIs, file APIs',
description_nl="Digitale platformen en API-eindpuntklassen (11 klassen): webportalen, OAI-PMH, zoek-API's, bestand-API's",
icon='🔌',
sources=[
'digital_platforms_extended', 'api_endpoints',
],
),
MappingCategory(
id='video_media',
name='Video & Social Media',
name_nl='Video & Sociale Media',
description='Video content and social media classes (11 classes): video annotations, chapters, social media posts/profiles',
description_nl='Video-inhoud en sociale mediaklassen (11 klassen): video-annotaties, hoofdstukken, sociale media posts/profielen',
icon='🎬',
sources=[
'video_content', 'social_media_content',
],
),
MappingCategory(
id='legal_admin',
name='Legal & Administrative',
name_nl='Juridisch & Administratief',
description='Legal, policy, and administrative classes (9 classes): access policies, budgets, projects, registration',
description_nl='Juridische, beleids- en administratieve klassen (9 klassen): toegangsbeleid, budgetten, projecten, registratie',
icon='⚖️',
sources=[
'legal_policies', 'administrative_records',
],
),
MappingCategory(
id='finding_aids',
name='Finding Aids & Standards',
name_nl='Toegangen & Standaarden',
description='Finding aids, standards, and documentation classes (5 classes): finding aids, source documents, standards',
description_nl='Toegangen, standaarden en documentatieklassen (5 klassen): toegangen, brondocumenten, standaarden',
icon='📑',
sources=[
'finding_aids_standards',
],
),
MappingCategory(
id='reconstruction',
name='Reconstruction & Provenance',
name_nl='Reconstructie & Herkomst',
description='Entity reconstruction and provenance tracking classes (4 classes): reconstructed entities, activities, agents',
description_nl='Entiteitsreconstructie en herkomsttrackingklassen (4 klassen): gereconstrueerde entiteiten, activiteiten, agenten',
icon='🔄',
sources=[
'reconstruction_provenance',
],
),
MappingCategory(
id='storage_facilities',
name='Storage & Facilities',
name_nl='Opslag & Faciliteiten',
description='Storage conditions and facility classes (7 classes): storage types, conditions, education centers',
description_nl='Opslagcondities en faciliteitenklassen (7 klassen): opslagtypen, condities, onderwijscentra',
icon='🏪',
sources=[
'storage_facilities',
],
),
MappingCategory(
id='funding',
name='Funding & Grants',
name_nl='Financiering & Subsidies',
description='Funding and grant-related classes (3 classes): funding agendas, requirements, applications',
description_nl="Financiering- en subsidieklassen (3 klassen): financieringsagenda's, vereisten, aanvragen",
icon='💰',
sources=[
'funding_grants',
],
),
MappingCategory(
id='language_naming',
name='Language & Naming',
name_nl='Taal & Naamgeving',
description='Language and naming classes (4 classes): language codes, proficiency, appellations',
description_nl='Taal- en naamgevingsklassen (4 klassen): taalcodes, taalvaardigheid, benamingen',
icon='🗣️',
sources=[
'language_naming',
],
),
MappingCategory(
id='specialized_archives_intl',
name='Specialized Archives (International)',
name_nl='Gespecialiseerde Archieven (Internationaal)',
description='Country-specific specialized archive types (19 classes): German, Swedish, French, Czech archive types',
description_nl='Landspecifieke gespecialiseerde archieftypen (19 klassen): Duitse, Zweedse, Franse, Tsjechische archieftypen',
icon='🌍',
sources=[
'archives_german', 'archives_swedish', 'archives_french', 'archives_other',
],
),
]
# ============================================================================
# ENRICHMENT SOURCE MAPPINGS
# ============================================================================
ENRICHMENT_MAPPINGS: list[EnrichmentSourceMapping] = [
# -------------------------------------------------------------------------
# GHCID - Global Heritage Custodian Identifier
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='ghcid',
description='Global Heritage Custodian Identifier - persistent unique identifier',
detailed_description="""
The GHCID is the persistent unique identifier for every heritage custodian.
It follows the format: {COUNTRY}-{REGION}-{SETTLEMENT}-{TYPE}-{ABBREV}
Example: NL-NH-AMS-M-RM (Rijksmuseum, Amsterdam, Netherlands)
GHCIDs are deterministically generated and hashed to multiple UUID formats
for different use cases (UUID v5 for primary, UUID v8 for future-proofing).
""".strip(),
linkml_class='GHCID',
typedb_entity='ghcid',
provenance=Provenance(
source_type='computed',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
fields=[
FieldMapping(
source_path='ghcid.ghcid_current',
source_description='Current GHCID string',
target_class='GHCID',
target_slot='ghcid_string',
transformation=TransformationType.DIRECT,
typedb_entity='ghcid',
typedb_attribute='ghcid-string',
rdf_predicate='hc:ghcidString',
required=True,
example=FieldExample(
source_value='NL-NH-AMS-M-RM',
target_value='NL-NH-AMS-M-RM',
rdf_triple='<https://w3id.org/hc/NL-NH-AMS-M-RM> hc:ghcidString "NL-NH-AMS-M-RM" .',
),
),
FieldMapping(
source_path='ghcid.ghcid_uuid',
source_description='UUID v5 derived from GHCID string',
target_class='GHCID',
target_slot='ghcid_uuid',
transformation=TransformationType.COMPUTED,
transformation_details='UUID v5 generated using SHA-1 hash of GHCID string with heritage namespace',
typedb_entity='ghcid',
typedb_attribute='ghcid-uuid',
rdf_predicate='hc:ghcidUuid',
required=True,
example=FieldExample(
source_value='550e8400-e29b-5d4f-a716-446655440000',
target_value='550e8400-e29b-5d4f-a716-446655440000',
),
),
FieldMapping(
source_path='ghcid.ghcid_numeric',
source_description='64-bit numeric ID for database optimization',
target_class='GHCID',
target_slot='ghcid_numeric',
transformation=TransformationType.COMPUTED,
transformation_details='SHA-256 hash truncated to 64-bit integer',
typedb_entity='ghcid',
typedb_attribute='ghcid-numeric',
required=False,
),
FieldMapping(
source_path='ghcid.location_resolution',
source_description='GeoNames resolution metadata',
target_class='GHCID',
target_slot='location_resolution',
transformation=TransformationType.NESTED,
transformation_details='Maps to LocationResolution class with GeoNames provenance',
typedb_entity='location-resolution',
required=False,
),
],
example_yaml="""
ghcid:
ghcid_current: NL-NH-AMS-M-RM
ghcid_uuid: 550e8400-e29b-5d4f-a716-446655440000
ghcid_numeric: 213324328442227739
location_resolution:
method: REVERSE_GEOCODE
geonames_id: 2759794
geonames_name: Amsterdam
settlement_code: AMS
admin1_code: "07"
region_code: NH
""".strip(),
),
# -------------------------------------------------------------------------
# Identifiers
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='identifiers',
description='External identifiers from various sources',
detailed_description="""
Links to external identifier systems including:
- ISIL codes (International Standard Identifier for Libraries)
- Wikidata QIDs
- VIAF (Virtual International Authority File)
- KvK numbers (Dutch Chamber of Commerce)
- Museum Register numbers
- And more...
""".strip(),
linkml_class='Identifier',
typedb_entity='identifier',
provenance=Provenance(
source_type='registry_lookup',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
fields=[
FieldMapping(
source_path='identifiers[].identifier_scheme',
source_description='Identifier system/scheme name',
target_class='Identifier',
target_slot='identifier_scheme',
transformation=TransformationType.LOOKUP,
transformation_details='Maps to IdentifierSchemeEnum',
typedb_entity='identifier',
typedb_attribute='scheme',
rdf_predicate='hc:identifierScheme',
required=True,
validation=FieldValidation(
type='enum',
enum_values=['GHCID', 'ISIL', 'Wikidata', 'VIAF', 'KvK', 'MuseumRegister', 'NDE', 'Website'],
),
example=FieldExample(
source_value='ISIL',
target_value='ISIL',
),
),
FieldMapping(
source_path='identifiers[].identifier_value',
source_description='The identifier value/code',
target_class='Identifier',
target_slot='identifier_value',
transformation=TransformationType.DIRECT,
typedb_entity='identifier',
typedb_attribute='value',
rdf_predicate='hc:identifierValue',
required=True,
example=FieldExample(
source_value='NL-AmRM',
target_value='NL-AmRM',
),
),
FieldMapping(
source_path='identifiers[].identifier_url',
source_description='URL to the identifier record',
target_class='Identifier',
target_slot='identifier_url',
transformation=TransformationType.DIRECT,
typedb_entity='identifier',
typedb_attribute='url',
rdf_predicate='schema:url',
required=False,
example=FieldExample(
source_value='https://www.wikidata.org/wiki/Q190804',
target_value='https://www.wikidata.org/wiki/Q190804',
),
),
],
example_yaml="""
identifiers:
- identifier_scheme: GHCID
identifier_value: NL-NH-AMS-M-RM
- identifier_scheme: ISIL
identifier_value: NL-AmRM
- identifier_scheme: Wikidata
identifier_value: Q190804
identifier_url: https://www.wikidata.org/wiki/Q190804
- identifier_scheme: VIAF
identifier_value: "148691498"
""".strip(),
),
# -------------------------------------------------------------------------
# Custodian Name
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='custodian_name',
description='Consensus name with confidence scoring',
detailed_description="""
The canonical name determined through multi-source reconciliation.
Includes emic (native language) name with legal form stripped per Rule 8.
Confidence scores indicate how many sources agree on each name variant.
""".strip(),
linkml_class='CustodianName',
typedb_entity='custodian-name',
provenance=Provenance(
source_type='reconciliation',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='custodian_name.consensus_name',
source_description='Reconciled canonical name',
target_class='CustodianName',
target_slot='name_string',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-name',
typedb_attribute='name-string',
rdf_predicate='skos:prefLabel',
required=True,
example=FieldExample(
source_value='Rijksmuseum',
target_value='Rijksmuseum',
rdf_triple='<https://w3id.org/hc/NL-NH-AMS-M-RM> skos:prefLabel "Rijksmuseum"@nl .',
),
),
FieldMapping(
source_path='custodian_name.emic_name',
source_description='Native language name (legal form stripped)',
target_class='CustodianName',
target_slot='emic_name',
transformation=TransformationType.NORMALIZE,
transformation_details='Legal form terms (Stichting, Foundation, etc.) removed per Rule 8',
typedb_entity='custodian-name',
typedb_attribute='emic-name',
rdf_predicate='hc:emicName',
required=False,
example=FieldExample(
source_value='Rijksmuseum Amsterdam',
target_value='Rijksmuseum Amsterdam',
),
),
FieldMapping(
source_path='custodian_name.confidence_score',
source_description='Confidence in name accuracy (0-1)',
target_class='CustodianName',
target_slot='confidence',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-name',
typedb_attribute='confidence-score',
required=False,
validation=FieldValidation(type='number'),
example=FieldExample(
source_value=0.95,
target_value=0.95,
),
),
FieldMapping(
source_path='custodian_name.alternative_names',
source_description='List of alternative/historical names',
target_class='CustodianName',
target_slot='alternative_names',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='custodian-name',
typedb_attribute='alternative-names',
rdf_predicate='skos:altLabel',
required=False,
),
],
example_yaml="""
custodian_name:
consensus_name: Rijksmuseum
emic_name: Rijksmuseum Amsterdam
confidence_score: 0.95
alternative_names:
- Rijksmuseum Amsterdam
- Netherlands State Museum
""".strip(),
),
# -------------------------------------------------------------------------
# Google Maps Enrichment
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='google_maps_enrichment',
description='Location and business data from Google Maps Places API',
detailed_description="""
Rich location data including coordinates, address, opening hours,
ratings, reviews, and photos from Google Maps Places API.
Each enrichment creates a CustodianObservation with google_maps_api provenance.
""".strip(),
linkml_class='CustodianObservation',
typedb_entity='custodian-observation',
provenance=Provenance(
source_type='google_maps_api',
data_tier=DataTier.TIER_2_VERIFIED,
api_endpoint='https://maps.googleapis.com/maps/api/place/',
update_frequency='On-demand',
),
generated_classes=['Place', 'GeoCoordinates', 'OpeningHours'],
fields=[
FieldMapping(
source_path='google_maps_enrichment.place_id',
source_description='Google Maps Place ID',
target_class='CustodianObservation',
target_slot='external_id',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-observation',
typedb_attribute='external-id',
rdf_predicate='schema:identifier',
required=True,
example=FieldExample(
source_value='ChIJ5Ra7we4JxkcRhYVAaq5zQ9U',
target_value='ChIJ5Ra7we4JxkcRhYVAaq5zQ9U',
),
),
FieldMapping(
source_path='google_maps_enrichment.coordinates.latitude',
source_description='Latitude coordinate',
target_class='GeoCoordinates',
target_slot='latitude',
transformation=TransformationType.NESTED,
typedb_entity='geo-coordinates',
typedb_attribute='latitude',
rdf_predicate='schema:latitude',
required=True,
validation=FieldValidation(type='number'),
example=FieldExample(
source_value=52.3599976,
target_value=52.3599976,
),
),
FieldMapping(
source_path='google_maps_enrichment.coordinates.longitude',
source_description='Longitude coordinate',
target_class='GeoCoordinates',
target_slot='longitude',
transformation=TransformationType.NESTED,
typedb_entity='geo-coordinates',
typedb_attribute='longitude',
rdf_predicate='schema:longitude',
required=True,
validation=FieldValidation(type='number'),
example=FieldExample(
source_value=4.8852188,
target_value=4.8852188,
),
),
FieldMapping(
source_path='google_maps_enrichment.formatted_address',
source_description='Human-readable formatted address',
target_class='Place',
target_slot='formatted_address',
transformation=TransformationType.DIRECT,
typedb_entity='place',
typedb_attribute='formatted-address',
rdf_predicate='schema:address',
required=False,
example=FieldExample(
source_value='Museumstraat 1, 1071 XX Amsterdam, Netherlands',
target_value='Museumstraat 1, 1071 XX Amsterdam, Netherlands',
),
),
FieldMapping(
source_path='google_maps_enrichment.rating',
source_description='Average rating (1-5)',
target_class='CustodianObservation',
target_slot='rating',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-observation',
typedb_attribute='rating',
rdf_predicate='schema:aggregateRating',
required=False,
validation=FieldValidation(type='number'),
example=FieldExample(
source_value=4.6,
target_value=4.6,
),
),
FieldMapping(
source_path='google_maps_enrichment.total_ratings',
source_description='Total number of ratings',
target_class='CustodianObservation',
target_slot='review_count',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-observation',
typedb_attribute='review-count',
rdf_predicate='schema:reviewCount',
required=False,
validation=FieldValidation(type='number'),
example=FieldExample(
source_value=47832,
target_value=47832,
),
),
FieldMapping(
source_path='google_maps_enrichment.opening_hours',
source_description='Weekly opening hours schedule',
target_class='OpeningHours',
target_slot='opening_hours_specification',
transformation=TransformationType.NESTED,
transformation_details='Maps to OpeningHoursSpecification array per day',
typedb_entity='opening-hours',
rdf_predicate='schema:openingHoursSpecification',
required=False,
),
FieldMapping(
source_path='google_maps_enrichment.website',
source_description='Official website URL',
target_class='CustodianObservation',
target_slot='website',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-observation',
typedb_attribute='website',
rdf_predicate='schema:url',
required=False,
validation=FieldValidation(type='uri'),
example=FieldExample(
source_value='https://www.rijksmuseum.nl/',
target_value='https://www.rijksmuseum.nl/',
),
),
FieldMapping(
source_path='google_maps_enrichment.phone',
source_description='Phone number',
target_class='CustodianObservation',
target_slot='telephone',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-observation',
typedb_attribute='telephone',
rdf_predicate='schema:telephone',
required=False,
example=FieldExample(
source_value='+31 20 674 7000',
target_value='+31 20 674 7000',
),
),
],
example_yaml="""
google_maps_enrichment:
place_id: ChIJ5Ra7we4JxkcRhYVAaq5zQ9U
name: Rijksmuseum
coordinates:
latitude: 52.3599976
longitude: 4.8852188
formatted_address: Museumstraat 1, 1071 XX Amsterdam, Netherlands
rating: 4.6
total_ratings: 47832
website: https://www.rijksmuseum.nl/
phone: +31 20 674 7000
opening_hours:
Monday: 9:00 AM - 5:00 PM
Tuesday: 9:00 AM - 5:00 PM
# ...
""".strip(),
),
# -------------------------------------------------------------------------
# Wikidata Enrichment
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='wikidata_enrichment',
description='Linked data from Wikidata knowledge graph',
detailed_description="""
Rich semantic data from Wikidata including:
- Multilingual labels and descriptions
- Sitelinks to Wikipedia articles
- Structured properties (coordinates, founding date, etc.)
- Instance-of relationships for type classification
Creates a CustodianObservation with wikidata_api provenance.
""".strip(),
linkml_class='CustodianObservation',
typedb_entity='custodian-observation',
provenance=Provenance(
source_type='wikidata_api',
data_tier=DataTier.TIER_3_CROWD_SOURCED,
api_endpoint='https://www.wikidata.org/wiki/Special:EntityData/',
),
generated_classes=['WikidataEntity', 'Sitelink'],
fields=[
FieldMapping(
source_path='wikidata_enrichment.entity_id',
source_description='Wikidata Q-ID',
target_class='CustodianObservation',
target_slot='external_id',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-observation',
typedb_attribute='external-id',
rdf_predicate='schema:identifier',
required=True,
example=FieldExample(
source_value='Q190804',
target_value='Q190804',
rdf_triple='<https://w3id.org/hc/obs/Q190804> schema:identifier "Q190804" .',
),
),
FieldMapping(
source_path='wikidata_enrichment.labels',
source_description='Multilingual labels',
target_class='CustodianObservation',
target_slot='labels',
transformation=TransformationType.NESTED,
transformation_details='Language-tagged strings (e.g., {"en": "Rijksmuseum", "nl": "Rijksmuseum"})',
typedb_entity='custodian-observation',
rdf_predicate='rdfs:label',
required=False,
),
FieldMapping(
source_path='wikidata_enrichment.descriptions',
source_description='Multilingual descriptions',
target_class='CustodianObservation',
target_slot='descriptions',
transformation=TransformationType.NESTED,
typedb_entity='custodian-observation',
rdf_predicate='schema:description',
required=False,
),
FieldMapping(
source_path='wikidata_enrichment.sitelinks',
source_description='Links to Wikipedia articles',
target_class='Sitelink',
target_slot='sitelinks',
transformation=TransformationType.ARRAY_MAP,
transformation_details='Each sitelink maps to Wikipedia article URL',
typedb_entity='sitelink',
rdf_predicate='schema:sameAs',
required=False,
),
FieldMapping(
source_path='wikidata_enrichment.instance_of',
source_description='Wikidata type classification (P31)',
target_class='CustodianObservation',
target_slot='instance_of',
transformation=TransformationType.LOOKUP,
transformation_details='Maps Q-ID to CustodianTypeEnum',
typedb_entity='custodian-observation',
typedb_attribute='wikidata-type',
rdf_predicate='wdt:P31',
required=False,
example=FieldExample(
source_value='Q33506',
target_value='MUSEUM',
),
),
FieldMapping(
source_path='wikidata_enrichment.coordinates',
source_description='Geographic coordinates from Wikidata (P625)',
target_class='GeoCoordinates',
target_slot='coordinates',
transformation=TransformationType.NESTED,
typedb_entity='geo-coordinates',
rdf_predicate='wdt:P625',
required=False,
),
FieldMapping(
source_path='wikidata_enrichment.founding_date',
source_description='Date of establishment (P571)',
target_class='CustodianObservation',
target_slot='founding_date',
transformation=TransformationType.TEMPORAL,
transformation_details='Wikidata time format to ISO 8601',
typedb_entity='custodian-observation',
typedb_attribute='founding-date',
rdf_predicate='wdt:P571',
required=False,
validation=FieldValidation(type='date'),
example=FieldExample(
source_value='+1800-01-01T00:00:00Z',
target_value='1800-01-01',
),
),
],
example_yaml="""
wikidata_enrichment:
entity_id: Q190804
labels:
en: Rijksmuseum
nl: Rijksmuseum
de: Rijksmuseum
fr: Rijksmuseum
descriptions:
en: Dutch national museum in Amsterdam
nl: Nederlands nationaal museum in Amsterdam
sitelinks:
enwiki: Rijksmuseum
nlwiki: Rijksmuseum
dewiki: Rijksmuseum
instance_of:
id: Q33506
label: museum
coordinates:
latitude: 52.36
longitude: 4.885
founding_date: "+1800-01-01T00:00:00Z"
""".strip(),
),
# -------------------------------------------------------------------------
# Location (Canonical)
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='location',
description='Normalized canonical location',
detailed_description="""
The authoritative location record after reconciliation from multiple sources.
This is the single source of truth for the custodian's physical location.
""".strip(),
linkml_class='Place',
typedb_entity='place',
provenance=Provenance(
source_type='reconciliation',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='location.city',
source_description='City name',
target_class='Place',
target_slot='city',
transformation=TransformationType.DIRECT,
typedb_entity='place',
typedb_attribute='city',
rdf_predicate='schema:addressLocality',
required=True,
example=FieldExample(
source_value='Amsterdam',
target_value='Amsterdam',
),
),
FieldMapping(
source_path='location.country',
source_description='ISO 3166-1 alpha-2 country code',
target_class='Place',
target_slot='country',
transformation=TransformationType.LOOKUP,
transformation_details='Maps to CountryCodeEnum',
typedb_entity='place',
typedb_attribute='country-code',
rdf_predicate='schema:addressCountry',
required=True,
validation=FieldValidation(
type='enum',
pattern='^[A-Z]{2}$',
),
example=FieldExample(
source_value='NL',
target_value='NL',
),
),
FieldMapping(
source_path='location.region',
source_description='Region/province name',
target_class='Place',
target_slot='region',
transformation=TransformationType.DIRECT,
typedb_entity='place',
typedb_attribute='region',
rdf_predicate='schema:addressRegion',
required=False,
example=FieldExample(
source_value='North Holland',
target_value='North Holland',
),
),
FieldMapping(
source_path='location.postal_code',
source_description='Postal/ZIP code',
target_class='Place',
target_slot='postal_code',
transformation=TransformationType.DIRECT,
typedb_entity='place',
typedb_attribute='postal-code',
rdf_predicate='schema:postalCode',
required=False,
example=FieldExample(
source_value='1071 XX',
target_value='1071 XX',
),
),
FieldMapping(
source_path='location.street_address',
source_description='Street address',
target_class='Place',
target_slot='street_address',
transformation=TransformationType.DIRECT,
typedb_entity='place',
typedb_attribute='street-address',
rdf_predicate='schema:streetAddress',
required=False,
example=FieldExample(
source_value='Museumstraat 1',
target_value='Museumstraat 1',
),
),
],
example_yaml="""
location:
city: Amsterdam
country: NL
region: North Holland
postal_code: 1071 XX
street_address: Museumstraat 1
""".strip(),
),
# -------------------------------------------------------------------------
# Web Enrichment
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='web_enrichment',
description='Archived website data and scraped content',
detailed_description="""
Content extracted from the institution's website using web scraping tools.
Includes organization details, collections, exhibitions, contact info, etc.
All claims must have XPath provenance per Rule 6.
""".strip(),
linkml_class='WebObservation',
typedb_entity='web-observation',
provenance=Provenance(
source_type='web_scrape',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=['WebClaim'],
fields=[
FieldMapping(
source_path='web_enrichment.source_url',
source_description='URL of scraped page',
target_class='WebObservation',
target_slot='source_url',
transformation=TransformationType.DIRECT,
typedb_entity='web-observation',
typedb_attribute='source-url',
rdf_predicate='prov:wasDerivedFrom',
required=True,
validation=FieldValidation(type='uri'),
),
FieldMapping(
source_path='web_enrichment.retrieved_on',
source_description='Timestamp when page was archived',
target_class='WebObservation',
target_slot='retrieved_on',
transformation=TransformationType.TEMPORAL,
typedb_entity='web-observation',
typedb_attribute='retrieved-on',
rdf_predicate='prov:generatedAtTime',
required=True,
validation=FieldValidation(type='date'),
),
FieldMapping(
source_path='web_enrichment.html_file',
source_description='Path to archived HTML file',
target_class='WebObservation',
target_slot='archive_path',
transformation=TransformationType.DIRECT,
typedb_entity='web-observation',
typedb_attribute='archive-path',
required=False,
),
FieldMapping(
source_path='web_enrichment.organization_details',
source_description='Extracted organization information',
target_class='WebObservation',
target_slot='extracted_content',
transformation=TransformationType.NESTED,
typedb_entity='web-observation',
required=False,
),
],
example_yaml="""
web_enrichment:
source_url: https://www.rijksmuseum.nl/en/about-us
retrieved_on: "2025-01-15T10:30:00Z"
html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/about-us.html
retrieval_agent: firecrawl
organization_details:
mission: "To connect people with art and history"
established: "1800"
collection_size: "1 million objects"
""".strip(),
),
# -------------------------------------------------------------------------
# Web Claims
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='web_claims',
description='Verified claims extracted from websites with XPath provenance',
detailed_description="""
Individual facts extracted from web pages with full provenance chain.
Each claim MUST have XPath pointer to exact location in archived HTML.
Claims without XPath provenance are fabricated and must be removed per Rule 6.
""".strip(),
linkml_class='WebClaim',
typedb_entity='web-claim',
provenance=Provenance(
source_type='web_extraction',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='web_claims[].claim_type',
source_description='Type of claim (full_name, email, phone, etc.)',
target_class='WebClaim',
target_slot='claim_type',
transformation=TransformationType.LOOKUP,
typedb_entity='web-claim',
typedb_attribute='claim-type',
rdf_predicate='hc:claimType',
required=True,
validation=FieldValidation(
type='enum',
enum_values=['full_name', 'description', 'email', 'phone', 'address', 'opening_hours', 'social_media'],
),
),
FieldMapping(
source_path='web_claims[].claim_value',
source_description='The extracted value',
target_class='WebClaim',
target_slot='claim_value',
transformation=TransformationType.DIRECT,
typedb_entity='web-claim',
typedb_attribute='claim-value',
rdf_predicate='rdf:value',
required=True,
),
FieldMapping(
source_path='web_claims[].xpath',
source_description='XPath to element containing value',
target_class='WebClaim',
target_slot='xpath',
transformation=TransformationType.DIRECT,
typedb_entity='web-claim',
typedb_attribute='xpath',
rdf_predicate='hc:xpath',
required=True,
),
FieldMapping(
source_path='web_claims[].source_url',
source_description='URL where claim was extracted',
target_class='WebClaim',
target_slot='source_url',
transformation=TransformationType.DIRECT,
typedb_entity='web-claim',
typedb_attribute='source-url',
rdf_predicate='prov:wasDerivedFrom',
required=True,
validation=FieldValidation(type='uri'),
),
FieldMapping(
source_path='web_claims[].xpath_match_score',
source_description='Match confidence (1.0 = exact)',
target_class='WebClaim',
target_slot='match_score',
transformation=TransformationType.DIRECT,
typedb_entity='web-claim',
typedb_attribute='match-score',
required=False,
validation=FieldValidation(type='number'),
),
],
example_yaml="""
web_claims:
- claim_type: full_name
claim_value: Rijksmuseum Amsterdam
source_url: https://www.rijksmuseum.nl/
retrieved_on: "2025-01-15T10:30:00Z"
xpath: /html/body/header/div[1]/a/span
html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/index.html
xpath_match_score: 1.0
""".strip(),
),
# -------------------------------------------------------------------------
# Provenance
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='provenance',
description='Data lineage and source tracking',
detailed_description="""
Metadata about where the data came from, when it was collected,
and how confident we are in its accuracy.
""".strip(),
linkml_class='Provenance',
typedb_entity='provenance',
provenance=Provenance(
source_type='metadata',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
fields=[
FieldMapping(
source_path='provenance.data_source',
source_description='Origin of the data',
target_class='Provenance',
target_slot='data_source',
transformation=TransformationType.LOOKUP,
typedb_entity='provenance',
typedb_attribute='data-source',
rdf_predicate='prov:wasAttributedTo',
required=True,
validation=FieldValidation(
type='enum',
enum_values=['CSV_REGISTRY', 'CONVERSATION_NLP', 'API_ENRICHMENT', 'WEB_SCRAPE', 'MANUAL_ENTRY'],
),
),
FieldMapping(
source_path='provenance.data_tier',
source_description='Data quality tier',
target_class='Provenance',
target_slot='data_tier',
transformation=TransformationType.LOOKUP,
typedb_entity='provenance',
typedb_attribute='data-tier',
rdf_predicate='hc:dataTier',
required=True,
validation=FieldValidation(
type='enum',
enum_values=['TIER_1_AUTHORITATIVE', 'TIER_2_VERIFIED', 'TIER_3_CROWD_SOURCED', 'TIER_4_INFERRED'],
),
),
FieldMapping(
source_path='provenance.extraction_date',
source_description='When data was extracted',
target_class='Provenance',
target_slot='extraction_date',
transformation=TransformationType.TEMPORAL,
typedb_entity='provenance',
typedb_attribute='extraction-date',
rdf_predicate='prov:generatedAtTime',
required=True,
validation=FieldValidation(type='date'),
),
FieldMapping(
source_path='provenance.confidence_score',
source_description='Confidence in data accuracy (0-1)',
target_class='Provenance',
target_slot='confidence_score',
transformation=TransformationType.DIRECT,
typedb_entity='provenance',
typedb_attribute='confidence-score',
rdf_predicate='hc:confidenceScore',
required=False,
validation=FieldValidation(type='number'),
),
],
example_yaml="""
provenance:
data_source: API_ENRICHMENT
data_tier: TIER_2_VERIFIED
extraction_date: "2025-01-15T10:30:00Z"
extraction_method: "Google Maps Places API + Wikidata SPARQL"
confidence_score: 0.92
source_files:
- google_maps_enrichment
- wikidata_enrichment
""".strip(),
),
# -------------------------------------------------------------------------
# Timespan
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='timespan',
description='Temporal bounds (CIDOC-CRM style)',
detailed_description="""
Temporal information following CIDOC-CRM E52 Time-Span pattern.
Captures founding date, closure date (if applicable), and temporal bounds.
""".strip(),
linkml_class='TimeSpan',
typedb_entity='time-span',
provenance=Provenance(
source_type='derived',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='timespan.begin_of_the_begin',
source_description='Earliest possible start date',
target_class='TimeSpan',
target_slot='begin_of_the_begin',
transformation=TransformationType.TEMPORAL,
typedb_entity='time-span',
typedb_attribute='begin-of-the-begin',
rdf_predicate='crm:P82a_begin_of_the_begin',
required=False,
validation=FieldValidation(type='date'),
),
FieldMapping(
source_path='timespan.end_of_the_begin',
source_description='Latest possible start date',
target_class='TimeSpan',
target_slot='end_of_the_begin',
transformation=TransformationType.TEMPORAL,
typedb_entity='time-span',
typedb_attribute='end-of-the-begin',
rdf_predicate='crm:P81a_end_of_the_begin',
required=False,
validation=FieldValidation(type='date'),
),
FieldMapping(
source_path='timespan.begin_of_the_end',
source_description='Earliest possible end date',
target_class='TimeSpan',
target_slot='begin_of_the_end',
transformation=TransformationType.TEMPORAL,
typedb_entity='time-span',
typedb_attribute='begin-of-the-end',
rdf_predicate='crm:P81b_begin_of_the_end',
required=False,
validation=FieldValidation(type='date'),
),
FieldMapping(
source_path='timespan.end_of_the_end',
source_description='Latest possible end date',
target_class='TimeSpan',
target_slot='end_of_the_end',
transformation=TransformationType.TEMPORAL,
typedb_entity='time-span',
typedb_attribute='end-of-the-end',
rdf_predicate='crm:P82b_end_of_the_end',
required=False,
validation=FieldValidation(type='date'),
),
],
example_yaml="""
timespan:
begin_of_the_begin: "1800-01-01"
end_of_the_begin: "1800-12-31"
# Museum still operating - no end dates
""".strip(),
),
# -------------------------------------------------------------------------
# Legal Status
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='legal_status',
description='Legal form and organizational structure',
detailed_description="""
Legal entity information including legal form (foundation, corporation, etc.),
registration numbers, and governing body information.
""".strip(),
linkml_class='CustodianLegalStatus',
typedb_entity='custodian-legal-status',
provenance=Provenance(
source_type='registry_lookup',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
fields=[
FieldMapping(
source_path='legal_status.legal_form',
source_description='ISO 20275 legal form code',
target_class='CustodianLegalStatus',
target_slot='legal_form',
transformation=TransformationType.LOOKUP,
transformation_details='Maps to ISO 20275 Entity Legal Form codes',
typedb_entity='custodian-legal-status',
typedb_attribute='legal-form',
rdf_predicate='org:classification',
required=False,
example=FieldExample(
source_value='stichting',
target_value='NL_STI', # ISO 20275 code
),
),
FieldMapping(
source_path='legal_status.legal_name',
source_description='Full registered name including legal form',
target_class='CustodianLegalStatus',
target_slot='legal_name',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-legal-status',
typedb_attribute='legal-name',
rdf_predicate='schema:legalName',
required=False,
example=FieldExample(
source_value='Stichting Rijksmuseum',
target_value='Stichting Rijksmuseum',
),
),
FieldMapping(
source_path='legal_status.kvk_number',
source_description='Dutch Chamber of Commerce number',
target_class='CustodianLegalStatus',
target_slot='kvk_number',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-legal-status',
typedb_attribute='kvk-number',
rdf_predicate='hc:kvkNumber',
required=False,
validation=FieldValidation(
type='string',
pattern='^[0-9]{8}$',
),
),
],
example_yaml="""
legal_status:
legal_form: stichting
legal_name: Stichting Rijksmuseum
kvk_number: "10205502"
registered_address: Amsterdam
""".strip(),
),
# -------------------------------------------------------------------------
# Digital Platforms
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='digital_platforms',
description='Website and digital platform metadata',
detailed_description="""
Information about the institution's digital presence including
primary website, collection management systems, discovery portals, and APIs.
""".strip(),
linkml_class='DigitalPlatform',
typedb_entity='digital-platform',
provenance=Provenance(
source_type='web_discovery',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='digital_platforms[].platform_name',
source_description='Name of the platform',
target_class='DigitalPlatform',
target_slot='platform_name',
transformation=TransformationType.DIRECT,
typedb_entity='digital-platform',
typedb_attribute='platform-name',
rdf_predicate='schema:name',
required=True,
),
FieldMapping(
source_path='digital_platforms[].platform_url',
source_description='URL of the platform',
target_class='DigitalPlatform',
target_slot='platform_url',
transformation=TransformationType.DIRECT,
typedb_entity='digital-platform',
typedb_attribute='platform-url',
rdf_predicate='schema:url',
required=True,
validation=FieldValidation(type='uri'),
),
FieldMapping(
source_path='digital_platforms[].platform_type',
source_description='Type of platform',
target_class='DigitalPlatform',
target_slot='platform_type',
transformation=TransformationType.LOOKUP,
typedb_entity='digital-platform',
typedb_attribute='platform-type',
rdf_predicate='hc:platformType',
required=False,
validation=FieldValidation(
type='enum',
enum_values=['WEBSITE', 'COLLECTION_PORTAL', 'DISCOVERY_PLATFORM', 'API', 'SOCIAL_MEDIA'],
),
),
],
example_yaml="""
digital_platforms:
- platform_name: Rijksmuseum Website
platform_url: https://www.rijksmuseum.nl/
platform_type: WEBSITE
- platform_name: Rijksstudio
platform_url: https://www.rijksmuseum.nl/en/rijksstudio
platform_type: COLLECTION_PORTAL
- platform_name: Rijksmuseum API
platform_url: https://data.rijksmuseum.nl/
platform_type: API
""".strip(),
),
# =========================================================================
# ARCHIVE TYPE MAPPINGS
# =========================================================================
# -------------------------------------------------------------------------
# archive_type_academic
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archive_type_academic',
description='Academic and research archive types - universities, institutes, learned societies',
detailed_description="""
Academic archives serve educational and research institutions. They preserve:
- University records and institutional history
- Research data and scholarly outputs
- Student and faculty records
- Scientific collections and specimen documentation
Classes: AcademicArchive, UniversityArchive, ScientificArchive, InstitutionalArchive
""".strip(),
linkml_class='AcademicArchive',
typedb_entity='academic-archive',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'AcademicArchive',
'UniversityArchive',
'ScientificArchive',
'InstitutionalArchive',
'InstitutionalRepository',
],
fields=[
FieldMapping(
source_path=None,
source_description='Archive classification type for academic institutions',
target_class='AcademicArchive',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='academic-archive',
typedb_attribute='archive-classification',
rdf_predicate='hc:archiveType',
required=True,
status=MappingStatus.MAPPED,
notes='Subclass of Archive with specialization for academic/research contexts',
),
FieldMapping(
source_path='institution_type',
source_description='Parent educational institution type',
target_class='AcademicArchive',
target_slot='parent_institution_type',
transformation=TransformationType.LOOKUP,
typedb_entity='academic-archive',
typedb_attribute='parent-institution-type',
rdf_predicate='hc:parentInstitutionType',
required=False,
validation=FieldValidation(
type='enum',
enum_values=['UNIVERSITY', 'RESEARCH_INSTITUTE', 'ACADEMY', 'LEARNED_SOCIETY'],
),
),
],
example_yaml="""
# AcademicArchive instance
institution_type: ARCHIVE
archive_classification: ACADEMIC
parent_institution: University of Amsterdam
specializations:
- scientific_records
- research_data
- institutional_history
""".strip(),
),
# -------------------------------------------------------------------------
# archive_type_audiovisual
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archive_type_audiovisual',
description='Audiovisual archive types - film, sound, television, radio',
detailed_description="""
Audiovisual archives preserve time-based media including:
- Film and cinema collections (Cinematheque, FilmArchive)
- Sound recordings (SoundArchive, MusicArchive, AnimalSoundArchive)
- Television and radio broadcasts (TelevisionArchive, RadioArchive)
- Media-specific preservation requirements
Classes: AudiovisualArchive, FilmArchive, Cinematheque, SoundArchive,
MusicArchive, RadioArchive, TelevisionArchive, AnimalSoundArchive,
MediaArchive, AdvertisingRadioArchive
""".strip(),
linkml_class='AudiovisualArchive',
typedb_entity='audiovisual-archive',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'AudiovisualArchive',
'FilmArchive',
'Cinematheque',
'SoundArchive',
'MusicArchive',
'RadioArchive',
'TelevisionArchive',
'AnimalSoundArchive',
'MediaArchive',
'AdvertisingRadioArchive',
],
fields=[
FieldMapping(
source_path=None,
source_description='Audiovisual archive classification',
target_class='AudiovisualArchive',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='audiovisual-archive',
typedb_attribute='archive-classification',
rdf_predicate='hc:archiveType',
required=True,
status=MappingStatus.MAPPED,
),
FieldMapping(
source_path='media_types',
source_description='Types of media held in collection',
target_class='AudiovisualArchive',
target_slot='media_types',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='audiovisual-archive',
typedb_attribute='media-types',
rdf_predicate='hc:mediaTypes',
required=False,
validation=FieldValidation(
type='array',
enum_values=['FILM', 'VIDEO', 'AUDIO', 'RADIO', 'TELEVISION', 'DIGITAL_MEDIA'],
),
),
],
example_yaml="""
# FilmArchive instance (e.g., EYE Filmmuseum)
institution_type: ARCHIVE
archive_classification: AUDIOVISUAL
subtype: FILM_ARCHIVE
media_types:
- FILM
- VIDEO
- DIGITAL_MEDIA
preservation_formats:
- 35mm
- 16mm
- digital_preservation
""".strip(),
),
# -------------------------------------------------------------------------
# archive_type_church
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archive_type_church',
description='Religious archive types - church, diocesan, monastery, parish',
detailed_description="""
Religious archives document faith communities and their histories:
- Church administration and governance (ChurchArchive, DiocesanArchive)
- Religious orders and communities (MonasteryArchive, ReligiousArchive)
- Parish records and sacramental registers (ParishArchive)
- Regional variations (ChurchArchiveSweden, CathedralArchive)
Classes: ChurchArchive, DiocesanArchive, MonasteryArchive, ParishArchive,
ReligiousArchive, CathedralArchive, ChurchArchiveSweden
""".strip(),
linkml_class='ChurchArchive',
typedb_entity='church-archive',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'ChurchArchive',
'DiocesanArchive',
'MonasteryArchive',
'ParishArchive',
'ReligiousArchive',
'CathedralArchive',
'ChurchArchiveSweden',
],
fields=[
FieldMapping(
source_path=None,
source_description='Religious archive classification',
target_class='ChurchArchive',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='church-archive',
typedb_attribute='archive-classification',
rdf_predicate='hc:archiveType',
required=True,
status=MappingStatus.MAPPED,
),
FieldMapping(
source_path='religious_denomination',
source_description='Religious denomination or tradition',
target_class='ChurchArchive',
target_slot='denomination',
transformation=TransformationType.DIRECT,
typedb_entity='church-archive',
typedb_attribute='denomination',
rdf_predicate='hc:denomination',
required=False,
),
],
example_yaml="""
# DiocesanArchive instance
institution_type: ARCHIVE
archive_classification: CHURCH
subtype: DIOCESAN_ARCHIVE
religious_denomination: Roman Catholic
diocese: Diocese of Haarlem-Amsterdam
record_types:
- sacramental_registers
- parish_records
- administrative_correspondence
""".strip(),
),
# -------------------------------------------------------------------------
# archive_type_corporate
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archive_type_corporate',
description='Corporate and business archive types - company, bank, trade union',
detailed_description="""
Corporate archives document business and economic activities:
- Company history and governance (CompanyArchives, BankArchive)
- Labor organizations (TradeUnionArchive)
- Economic documentation (EconomicArchive)
- Industry-specific records (FoundationArchive, AssociationArchive)
Classes: CompanyArchives, BankArchive, TradeUnionArchive, EconomicArchive,
FoundationArchive, AssociationArchive, RegionalEconomicArchive
""".strip(),
linkml_class='CompanyArchives',
typedb_entity='corporate-archive',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'CompanyArchives',
'BankArchive',
'TradeUnionArchive',
'EconomicArchive',
'FoundationArchive',
'AssociationArchive',
'RegionalEconomicArchive',
],
fields=[
FieldMapping(
source_path=None,
source_description='Corporate archive classification',
target_class='CompanyArchives',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='corporate-archive',
typedb_attribute='archive-classification',
rdf_predicate='hc:archiveType',
required=True,
status=MappingStatus.MAPPED,
),
FieldMapping(
source_path='industry_sector',
source_description='Industry sector of the organization',
target_class='CompanyArchives',
target_slot='industry_sector',
transformation=TransformationType.DIRECT,
typedb_entity='corporate-archive',
typedb_attribute='industry-sector',
rdf_predicate='hc:industrySector',
required=False,
),
],
example_yaml="""
# BankArchive instance
institution_type: ARCHIVE
archive_classification: CORPORATE
subtype: BANK_ARCHIVE
company_name: ABN AMRO Historical Archive
industry_sector: FINANCIAL_SERVICES
parent_organization: ABN AMRO Bank N.V.
""".strip(),
),
# -------------------------------------------------------------------------
# archive_type_government
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archive_type_government',
description='Government archive types - national, state, parliamentary, court',
detailed_description="""
Government archives document state activities at all levels:
- National archives (NationalArchives)
- State/regional government (StateArchives, StateArchivesSection)
- Parliamentary records (ParliamentaryArchives)
- Judicial records (CourtRecords, NotarialArchive)
- Public administration (PublicArchive, GovernmentArchive)
- Security and intelligence (SecurityArchives)
Classes: NationalArchives, StateArchives, GovernmentArchive, PublicArchive,
ParliamentaryArchives, CourtRecords, NotarialArchive, SecurityArchives,
CurrentArchive, PublicArchivesInFrance
""".strip(),
linkml_class='NationalArchives',
typedb_entity='government-archive',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'NationalArchives',
'StateArchives',
'StateArchivesSection',
'GovernmentArchive',
'PublicArchive',
'ParliamentaryArchives',
'CourtRecords',
'NotarialArchive',
'SecurityArchives',
'CurrentArchive',
'PublicArchivesInFrance',
],
fields=[
FieldMapping(
source_path=None,
source_description='Government archive classification',
target_class='NationalArchives',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='government-archive',
typedb_attribute='archive-classification',
rdf_predicate='hc:archiveType',
required=True,
status=MappingStatus.MAPPED,
),
FieldMapping(
source_path='jurisdiction_level',
source_description='Level of government jurisdiction',
target_class='NationalArchives',
target_slot='jurisdiction_level',
transformation=TransformationType.LOOKUP,
typedb_entity='government-archive',
typedb_attribute='jurisdiction-level',
rdf_predicate='hc:jurisdictionLevel',
required=False,
validation=FieldValidation(
type='enum',
enum_values=['NATIONAL', 'STATE', 'REGIONAL', 'MUNICIPAL', 'LOCAL'],
),
),
],
example_yaml="""
# NationalArchives instance (e.g., Nationaal Archief)
institution_type: ARCHIVE
archive_classification: GOVERNMENT
subtype: NATIONAL_ARCHIVES
jurisdiction_level: NATIONAL
country: NL
official_name: Nationaal Archief
legal_mandate: Archiefwet 1995
""".strip(),
),
# -------------------------------------------------------------------------
# archive_type_municipal
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archive_type_municipal',
description='Municipal and local government archive types',
detailed_description="""
Municipal archives serve local government and community documentation:
- City/town archives (MunicipalArchive)
- Local government records (LocalGovernmentArchive)
- County/district level (CountyRecordOffice, DistrictArchiveGermany)
- Local history collections (LocalHistoryArchive)
Classes: MunicipalArchive, LocalGovernmentArchive, CountyRecordOffice,
DistrictArchiveGermany, LocalHistoryArchive, ComarcalArchive,
DistritalArchive
""".strip(),
linkml_class='MunicipalArchive',
typedb_entity='municipal-archive',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'MunicipalArchive',
'LocalGovernmentArchive',
'CountyRecordOffice',
'DistrictArchiveGermany',
'LocalHistoryArchive',
'ComarcalArchive',
'DistritalArchive',
],
fields=[
FieldMapping(
source_path=None,
source_description='Municipal archive classification',
target_class='MunicipalArchive',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='municipal-archive',
typedb_attribute='archive-classification',
rdf_predicate='hc:archiveType',
required=True,
status=MappingStatus.MAPPED,
),
FieldMapping(
source_path='municipality',
source_description='Municipality served by the archive',
target_class='MunicipalArchive',
target_slot='municipality',
transformation=TransformationType.DIRECT,
typedb_entity='municipal-archive',
typedb_attribute='municipality',
rdf_predicate='hc:municipality',
required=False,
),
],
example_yaml="""
# MunicipalArchive instance (e.g., Stadsarchief Amsterdam)
institution_type: ARCHIVE
archive_classification: MUNICIPAL
municipality: Amsterdam
province: Noord-Holland
country: NL
services:
- reading_room
- digitization
- genealogy_support
""".strip(),
),
# -------------------------------------------------------------------------
# archive_type_national
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archive_type_national',
description='National-level archive institutions and systems',
detailed_description="""
National archives represent the highest level of archival authority:
- Central national repositories (NationalArchives)
- Country-specific variations (ArchivesRegionales, Landsarkiv, Fylkesarkiv)
- International organization archives (ArchiveOfInternationalOrganization)
Classes: NationalArchives, ArchivesRegionales, Landsarkiv, Fylkesarkiv,
ArchiveOfInternationalOrganization, RegionalArchivesInIceland
""".strip(),
linkml_class='NationalArchives',
typedb_entity='national-archive',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'NationalArchives',
'ArchivesRegionales',
'Landsarkiv',
'Fylkesarkiv',
'ArchiveOfInternationalOrganization',
'RegionalArchivesInIceland',
],
fields=[
FieldMapping(
source_path=None,
source_description='National archive classification',
target_class='NationalArchives',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='national-archive',
typedb_attribute='archive-classification',
rdf_predicate='hc:archiveType',
required=True,
status=MappingStatus.MAPPED,
),
FieldMapping(
source_path='country_code',
source_description='ISO 3166-1 alpha-2 country code',
target_class='NationalArchives',
target_slot='country',
transformation=TransformationType.LOOKUP,
typedb_entity='national-archive',
typedb_attribute='country-code',
rdf_predicate='hc:countryCode',
required=True,
validation=FieldValidation(
type='string',
pattern='^[A-Z]{2}$',
),
),
],
example_yaml="""
# National Archives variation (e.g., Swedish Landsarkiv)
institution_type: ARCHIVE
archive_classification: NATIONAL
subtype: LANDSARKIV
country_code: SE
region: Gothenburg
""".strip(),
),
# -------------------------------------------------------------------------
# archive_type_regional
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archive_type_regional',
description='Regional and provincial archive types',
detailed_description="""
Regional archives serve geographic areas between national and local:
- Provincial archives (ProvincialArchive, ProvincialHistoricalArchive)
- Regional administration (RegionalArchive, RegionalStateArchives)
- Cantonal systems (CantonalArchive - Switzerland)
- Country-specific regional (DepartmentalArchives - France, StateRegionalArchiveCzechia)
Classes: RegionalArchive, ProvincialArchive, ProvincialHistoricalArchive,
RegionalStateArchives, CantonalArchive, DepartmentalArchives,
StateRegionalArchiveCzechia, StateDistrictArchive
""".strip(),
linkml_class='RegionalArchive',
typedb_entity='regional-archive',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'RegionalArchive',
'ProvincialArchive',
'ProvincialHistoricalArchive',
'RegionalStateArchives',
'CantonalArchive',
'DepartmentalArchives',
'StateRegionalArchiveCzechia',
'StateDistrictArchive',
],
fields=[
FieldMapping(
source_path=None,
source_description='Regional archive classification',
target_class='RegionalArchive',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='regional-archive',
typedb_attribute='archive-classification',
rdf_predicate='hc:archiveType',
required=True,
status=MappingStatus.MAPPED,
),
FieldMapping(
source_path='region',
source_description='Geographic region served',
target_class='RegionalArchive',
target_slot='region',
transformation=TransformationType.DIRECT,
typedb_entity='regional-archive',
typedb_attribute='region',
rdf_predicate='hc:region',
required=False,
),
],
example_yaml="""
# ProvincialArchive instance (e.g., Brabants Historisch Informatie Centrum)
institution_type: ARCHIVE
archive_classification: REGIONAL
subtype: PROVINCIAL_ARCHIVE
province: Noord-Brabant
country: NL
""".strip(),
),
# -------------------------------------------------------------------------
# archive_type_specialized
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archive_type_specialized',
description='Specialized archive types - thematic collections and unique formats',
detailed_description="""
Specialized archives focus on specific formats, subjects, or communities:
- Format-specific (PhotoArchive, LiteraryArchive, IconographicArchives)
- Subject-focused (PoliticalArchive, MilitaryArchive, PerformingArtsArchive)
- Community-focused (WomensArchives, LGBTArchive, CommunityArchive)
- Institutional (HospitalArchive, SchoolArchive)
Classes: PhotoArchive, LiteraryArchive, PoliticalArchive, MilitaryArchive,
PerformingArtsArchive, WomensArchives, LGBTArchive, CommunityArchive,
HospitalArchive, SchoolArchive, IconographicArchives, ArtArchive,
ArchitecturalArchive, NewspaperClippingsArchive
""".strip(),
linkml_class='SpecializedArchive',
typedb_entity='specialized-archive',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'SpecializedArchive',
'PhotoArchive',
'LiteraryArchive',
'PoliticalArchive',
'MilitaryArchive',
'PerformingArtsArchive',
'WomensArchives',
'LGBTArchive',
'CommunityArchive',
'HospitalArchive',
'SchoolArchive',
'IconographicArchives',
'ArtArchive',
'ArchitecturalArchive',
'NewspaperClippingsArchive',
'PressArchive',
'NobilityArchive',
],
fields=[
FieldMapping(
source_path=None,
source_description='Specialized archive classification',
target_class='SpecializedArchive',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='specialized-archive',
typedb_attribute='archive-classification',
rdf_predicate='hc:archiveType',
required=True,
status=MappingStatus.MAPPED,
),
FieldMapping(
source_path='specialization',
source_description='Area of specialization',
target_class='SpecializedArchive',
target_slot='specialization',
transformation=TransformationType.DIRECT,
typedb_entity='specialized-archive',
typedb_attribute='specialization',
rdf_predicate='hc:specialization',
required=False,
),
],
example_yaml="""
# PhotoArchive instance (e.g., Nederlands Fotomuseum archive)
institution_type: ARCHIVE
archive_classification: SPECIALIZED
subtype: PHOTO_ARCHIVE
specialization: photography
collection_strengths:
- Dutch photography 1840-present
- Documentary photography
- Press photography
""".strip(),
),
# -------------------------------------------------------------------------
# archive_type_thematic
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archive_type_thematic',
description='Thematic archive types - digital, web, dark archives, and special collections',
detailed_description="""
Thematic archives organized around specific themes or functions:
- Digital preservation (DigitalArchive, DarkArchive, WebArchive)
- Collection types (CollectingArchives, DepositArchive)
- Personal papers (Nachlass, HouseArchive, PersonalCollectionType)
- Online archives (OnlineNewsArchive, MailingListArchive)
Classes: DigitalArchive, DarkArchive, WebArchive, CollectingArchives,
DepositArchive, Nachlass, HouseArchive, OnlineNewsArchive,
MailingListArchive, ClimateArchive, FreeArchive, PostcustodialArchive
""".strip(),
linkml_class='DigitalArchive',
typedb_entity='thematic-archive',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'DigitalArchive',
'DarkArchive',
'WebArchive',
'CollectingArchives',
'DepositArchive',
'Nachlass',
'HouseArchive',
'OnlineNewsArchive',
'MailingListArchive',
'ClimateArchive',
'FreeArchive',
'PostcustodialArchive',
'MuseumArchive',
'ArchivalRepository',
'ArchivalLibrary',
],
fields=[
FieldMapping(
source_path=None,
source_description='Thematic archive classification',
target_class='DigitalArchive',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='thematic-archive',
typedb_attribute='archive-classification',
rdf_predicate='hc:archiveType',
required=True,
status=MappingStatus.MAPPED,
),
FieldMapping(
source_path='digital_preservation_level',
source_description='Level of digital preservation commitment',
target_class='DigitalArchive',
target_slot='preservation_level',
transformation=TransformationType.LOOKUP,
typedb_entity='thematic-archive',
typedb_attribute='preservation-level',
rdf_predicate='hc:preservationLevel',
required=False,
validation=FieldValidation(
type='enum',
enum_values=['BIT_LEVEL', 'LOGICAL', 'SEMANTIC', 'FULL'],
),
),
],
example_yaml="""
# DarkArchive instance
institution_type: ARCHIVE
archive_classification: THEMATIC
subtype: DARK_ARCHIVE
digital_preservation_level: FULL
access_policy: RESTRICTED
storage_location: offline_vault
""".strip(),
),
# =========================================================================
# ORGANIZATIONAL STRUCTURE MAPPINGS
# =========================================================================
# -------------------------------------------------------------------------
# org_structure_hierarchy
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='org_structure_hierarchy',
description='Organizational hierarchy - parent organizations, subsidiaries, branches',
detailed_description="""
Models the hierarchical relationships between organizations:
- Parent-child relationships (ParentOrganizationUnit, SubsidiaryOrganization)
- Branch locations (OrganizationBranch, BranchOffice)
- Encompassing bodies (EncompassingBody)
- Networks and associations (ArchiveNetwork, ArchiveAssociation, ConnectionNetwork)
Classes: ParentOrganizationUnit, SubsidiaryOrganization, OrganizationBranch,
BranchOffice, EncompassingBody, ArchiveNetwork, ArchiveAssociation,
ConnectionNetwork, Organization, Institution
""".strip(),
linkml_class='Organization',
typedb_entity='organization',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'Organization',
'Institution',
'ParentOrganizationUnit',
'SubsidiaryOrganization',
'OrganizationBranch',
'BranchOffice',
'EncompassingBody',
'ArchiveNetwork',
'ArchiveAssociation',
'ConnectionNetwork',
'CulturalInstitution',
'MemoryInstitution',
'GLAM',
],
fields=[
FieldMapping(
source_path='parent_organization',
source_description='Parent organization identifier or name',
target_class='Organization',
target_slot='parent_organization',
transformation=TransformationType.NESTED,
typedb_entity='organization',
typedb_attribute='parent-organization',
rdf_predicate='org:subOrganizationOf',
required=False,
),
FieldMapping(
source_path='subsidiary_organizations',
source_description='List of subsidiary organizations',
target_class='Organization',
target_slot='subsidiaries',
transformation=TransformationType.ARRAY_MAP,
typedb_entity='organization',
typedb_attribute='subsidiaries',
rdf_predicate='org:hasSubOrganization',
required=False,
),
FieldMapping(
source_path='encompassing_body',
source_description='Larger organizational body this belongs to',
target_class='Organization',
target_slot='encompassing_body',
transformation=TransformationType.NESTED,
typedb_entity='organization',
typedb_attribute='encompassing-body',
rdf_predicate='hc:encompassingBody',
required=False,
),
],
example_yaml="""
# Organization with hierarchy
organization_name: Rijksmuseum Research Library
parent_organization:
name: Rijksmuseum
ghcid: NL-NH-AMS-M-RM
encompassing_body:
name: Ministry of Education, Culture and Science
type: GOVERNMENT_MINISTRY
""".strip(),
),
# -------------------------------------------------------------------------
# org_structure_administrative
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='org_structure_administrative',
description='Administrative units - departments, divisions, offices',
detailed_description="""
Models the internal administrative structure of organizations:
- Functional divisions (Department, Division)
- Administrative units (AdministrativeOffice)
- Support functions (ConservationLab, ReadingRoom, GiftShop)
- Specialized facilities (ExhibitionSpace, Storage, Warehouse)
Classes: Department, Division, AdministrativeOffice, ConservationLab,
ReadingRoom, ReadingRoomAnnex, GiftShop, ExhibitionSpace,
Storage, Warehouse, PrintRoom, ServiceArea
""".strip(),
linkml_class='Department',
typedb_entity='department',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'Department',
'Division',
'AdministrativeOffice',
'ConservationLab',
'ReadingRoom',
'ReadingRoomAnnex',
'GiftShop',
'ExhibitionSpace',
'Storage',
'Warehouse',
'PrintRoom',
'ServiceArea',
'ClosedSpace',
'PublicSpace',
'OutdoorSite',
],
fields=[
FieldMapping(
source_path='departments',
source_description='List of organizational departments',
target_class='Department',
target_slot='department_name',
transformation=TransformationType.ARRAY_MAP,
typedb_entity='department',
typedb_attribute='department-name',
rdf_predicate='org:hasUnit',
required=False,
),
FieldMapping(
source_path='facilities',
source_description='Physical facilities and spaces',
target_class='Department',
target_slot='facilities',
transformation=TransformationType.ARRAY_MAP,
typedb_entity='department',
typedb_attribute='facilities',
rdf_predicate='hc:hasFacility',
required=False,
),
],
example_yaml="""
# Department structure
departments:
- name: Collection Management
type: DEPARTMENT
staff_count: 25
- name: Conservation Laboratory
type: CONSERVATION_LAB
specializations:
- paper_conservation
- photograph_conservation
""".strip(),
),
# -------------------------------------------------------------------------
# ORGANIZATIONAL STRUCTURE - SUBDIVISION
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='org_structure_subdivision',
description='Organizational subdivisions and change events',
detailed_description="""
Models organizational subdivisions and structural changes:
- Subdivision types (OrganizationalSubdivision)
- Organizational structure (OrganizationalStructure)
- Change events (OrganizationalChangeEvent)
- Contributing and allocating agencies (ContributingAgency, AllocationAgency)
Classes: OrganizationalSubdivision, OrganizationalStructure,
OrganizationalChangeEvent, ContributingAgency, AllocationAgency,
Jurisdiction, StandardsOrganization, RegistrationAuthority
""".strip(),
linkml_class='OrganizationalSubdivision',
typedb_entity='organizational-subdivision',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'OrganizationalSubdivision',
'OrganizationalStructure',
'OrganizationalChangeEvent',
'ContributingAgency',
'AllocationAgency',
'Jurisdiction',
'StandardsOrganization',
'RegistrationAuthority',
],
fields=[
FieldMapping(
source_path='subdivisions',
source_description='Organizational subdivisions',
target_class='OrganizationalSubdivision',
target_slot='subdivision_name',
transformation=TransformationType.ARRAY_MAP,
typedb_entity='organizational-subdivision',
typedb_attribute='subdivision-name',
rdf_predicate='org:hasUnit',
required=False,
),
FieldMapping(
source_path='change_history',
source_description='History of organizational changes',
target_class='OrganizationalChangeEvent',
target_slot='change_events',
transformation=TransformationType.ARRAY_MAP,
typedb_entity='organizational-change-event',
typedb_attribute='change-events',
rdf_predicate='hc:hasChangeEvent',
required=False,
),
],
example_yaml="""
# Organizational change history
change_history:
- event_type: MERGER
date: "2001-01-01"
description: "Merger of Gemeentearchief and Rijksarchief"
predecessor_organizations:
- name: Gemeentearchief Haarlem
- name: Rijksarchief Noord-Holland
resulting_organization:
name: Noord-Hollands Archief
""".strip(),
),
# =========================================================================
# PHASE 1: HERITAGE & CULTURAL SITES
# =========================================================================
# -------------------------------------------------------------------------
# HERITAGE - WORLD SITES
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='heritage_world_sites',
description='UNESCO World Heritage Sites and tentative list entries',
detailed_description="""
World Heritage Site designations and related classifications:
- Inscribed sites (WorldHeritageSite)
- Tentative list entries (TentativeWorldHeritageSite)
- Historic buildings and monuments (HistoricBuilding)
- Cultural institutions (CulturalInstitution)
Classes: WorldHeritageSite, TentativeWorldHeritageSite, HistoricBuilding,
CulturalInstitution
""".strip(),
linkml_class='WorldHeritageSite',
typedb_entity='world-heritage-site',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'WorldHeritageSite',
'TentativeWorldHeritageSite',
'HistoricBuilding',
'CulturalInstitution',
],
fields=[
FieldMapping(
source_path='unesco_id',
source_description='UNESCO World Heritage Site ID',
target_class='WorldHeritageSite',
target_slot='unesco_id',
transformation=TransformationType.DIRECT,
typedb_entity='world-heritage-site',
typedb_attribute='unesco-id',
rdf_predicate='hc:unescoId',
required=True,
validation=FieldValidation(type='number'),
),
FieldMapping(
source_path='inscription_year',
source_description='Year of UNESCO inscription',
target_class='WorldHeritageSite',
target_slot='inscription_year',
transformation=TransformationType.DIRECT,
typedb_entity='world-heritage-site',
typedb_attribute='inscription-year',
rdf_predicate='hc:inscriptionYear',
required=False,
validation=FieldValidation(type='number'),
),
FieldMapping(
source_path='heritage_criteria',
source_description='UNESCO selection criteria (i-x)',
target_class='WorldHeritageSite',
target_slot='criteria',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='world-heritage-site',
typedb_attribute='criteria',
rdf_predicate='hc:heritageCriteria',
required=False,
),
],
example_yaml="""
# WorldHeritageSite instance
heritage_designation: UNESCO_WORLD_HERITAGE
unesco_id: 818
name: Rietveld Schröderhuis
inscription_year: 2000
heritage_criteria:
- i
- ii
country: NL
""".strip(),
),
# -------------------------------------------------------------------------
# HERITAGE - INTANGIBLE
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='heritage_intangible',
description='Intangible cultural heritage - traditions, performances, practices',
detailed_description="""
UNESCO Intangible Cultural Heritage and related practices:
- Intangible heritage forms (IntangibleHeritageForm)
- Performances and events (IntangibleHeritagePerformance, IntangibleHeritageEvent)
- Groups preserving traditions (IntangibleHeritageGroupType)
Classes: IntangibleHeritageForm, IntangibleHeritagePerformance,
IntangibleHeritageEvent, IntangibleHeritageGroupType
""".strip(),
linkml_class='IntangibleHeritageForm',
typedb_entity='intangible-heritage',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'IntangibleHeritageForm',
'IntangibleHeritagePerformance',
'IntangibleHeritageEvent',
'IntangibleHeritageGroupType',
],
fields=[
FieldMapping(
source_path='ich_domain',
source_description='UNESCO ICH domain category',
target_class='IntangibleHeritageForm',
target_slot='domain',
transformation=TransformationType.LOOKUP,
typedb_entity='intangible-heritage',
typedb_attribute='domain',
rdf_predicate='hc:ichDomain',
required=False,
validation=FieldValidation(
type='enum',
enum_values=[
'ORAL_TRADITIONS',
'PERFORMING_ARTS',
'SOCIAL_PRACTICES',
'KNOWLEDGE_PRACTICES',
'TRADITIONAL_CRAFTSMANSHIP',
],
),
),
FieldMapping(
source_path='inscription_list',
source_description='UNESCO ICH list type',
target_class='IntangibleHeritageForm',
target_slot='list_type',
transformation=TransformationType.LOOKUP,
typedb_entity='intangible-heritage',
typedb_attribute='list-type',
rdf_predicate='hc:ichListType',
required=False,
validation=FieldValidation(
type='enum',
enum_values=['REPRESENTATIVE', 'URGENT_SAFEGUARDING', 'GOOD_PRACTICES'],
),
),
],
example_yaml="""
# IntangibleHeritageForm instance
heritage_type: INTANGIBLE
ich_domain: TRADITIONAL_CRAFTSMANSHIP
name: Craft of the miller operating windmills and watermills
inscription_list: REPRESENTATIVE
inscription_year: 2017
countries:
- NL
- BE
""".strip(),
),
# -------------------------------------------------------------------------
# HERITAGE - NATIONAL TREASURES
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='heritage_national_treasures',
description='National treasures and designated heritage items',
detailed_description="""
National-level heritage designations:
- National treasures (NationalTreasure)
- Country-specific designations (NationalTreasureOfFrance)
- Documentation centers (DocumentationCentre, RegionalHistoricCenter)
- Research facilities (ResearchCenter, ScientificTechnicAndIndustrialCultureCenter)
Classes: NationalTreasure, NationalTreasureOfFrance, DocumentationCentre,
RegionalHistoricCenter, ResearchCenter,
ScientificTechnicAndIndustrialCultureCenter
""".strip(),
linkml_class='NationalTreasure',
typedb_entity='national-treasure',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'NationalTreasure',
'NationalTreasureOfFrance',
'DocumentationCentre',
'RegionalHistoricCenter',
'ResearchCenter',
'ScientificTechnicAndIndustrialCultureCenter',
],
fields=[
FieldMapping(
source_path='designation_type',
source_description='Type of national designation',
target_class='NationalTreasure',
target_slot='designation_type',
transformation=TransformationType.LOOKUP,
typedb_entity='national-treasure',
typedb_attribute='designation-type',
rdf_predicate='hc:designationType',
required=True,
),
FieldMapping(
source_path='designation_date',
source_description='Date of official designation',
target_class='NationalTreasure',
target_slot='designation_date',
transformation=TransformationType.TEMPORAL,
typedb_entity='national-treasure',
typedb_attribute='designation-date',
rdf_predicate='hc:designationDate',
required=False,
validation=FieldValidation(type='date'),
),
],
example_yaml="""
# NationalTreasure instance
designation_type: RIJKSMONUMENT
designation_number: 12345
name: Amsterdam Canal Ring
designation_date: "1999-12-01"
country: NL
""".strip(),
),
# =========================================================================
# PHASE 1: CLASSIFICATION TYPES
# =========================================================================
# -------------------------------------------------------------------------
# TYPE CLASSES - GLAM INSTITUTIONS
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='type_classes_glam',
description='Classification types for GLAM institutions - museums, libraries, archives, galleries',
detailed_description="""
Type classes for classifying heritage custodian institutions:
- Core GLAM types (MuseumType, LibraryType, ArchiveOrganizationType, GalleryType)
- Bio/nature custodians (BioCustodianType)
- Religious/sacred sites (HolySacredSiteType)
- Education providers (EducationProviderType)
Classes: MuseumType, LibraryType, ArchiveOrganizationType, GalleryType,
BioCustodianType, HolySacredSiteType, EducationProviderType,
CustodianType, PersonalCollectionType
""".strip(),
linkml_class='CustodianType',
typedb_entity='custodian-type',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'CustodianType',
'MuseumType',
'LibraryType',
'ArchiveOrganizationType',
'GalleryType',
'BioCustodianType',
'HolySacredSiteType',
'EducationProviderType',
'PersonalCollectionType',
'FeatureCustodianType',
'TasteScentHeritageType',
],
fields=[
FieldMapping(
source_path='institution_type',
source_description='Primary institution type code',
target_class='CustodianType',
target_slot='type_code',
transformation=TransformationType.LOOKUP,
typedb_entity='custodian-type',
typedb_attribute='type-code',
rdf_predicate='hc:custodianTypeCode',
required=True,
validation=FieldValidation(
type='enum',
enum_values=['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
),
notes='GLAMORCUBESFIXPHDNT taxonomy single-letter codes',
),
FieldMapping(
source_path='institution_subtype',
source_description='Detailed institution subtype',
target_class='CustodianType',
target_slot='subtype',
transformation=TransformationType.LOOKUP,
typedb_entity='custodian-type',
typedb_attribute='subtype',
rdf_predicate='hc:custodianSubtype',
required=False,
),
],
example_yaml="""
# Custodian type classification
institution_type: M # Museum
institution_subtype: ART_MUSEUM
museum_type_details:
collection_focus: FINE_ARTS
governance: PUBLIC
size_category: LARGE
""".strip(),
),
# -------------------------------------------------------------------------
# TYPE CLASSES - DIGITAL PLATFORMS
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='type_classes_digital',
description='Classification types for digital platforms and web presence',
detailed_description="""
Type classes for digital infrastructure:
- Digital platform types (DigitalPlatformType)
- Web portal classifications (WebPortalType, WebPortalTypes)
- Social media platforms (SocialMediaPlatformType, SocialMediaPlatformTypes)
- Social media content (SocialMediaPostType, SocialMediaPostTypes)
- Video content types (VideoAnnotationTypes)
Classes: DigitalPlatformType, WebPortalType, WebPortalTypes,
SocialMediaPlatformType, SocialMediaPlatformTypes,
SocialMediaPostType, SocialMediaPostTypes, VideoAnnotationTypes
""".strip(),
linkml_class='DigitalPlatformType',
typedb_entity='digital-platform-type',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'DigitalPlatformType',
'WebPortalType',
'WebPortalTypes',
'SocialMediaPlatformType',
'SocialMediaPlatformTypes',
'SocialMediaPostType',
'SocialMediaPostTypes',
'VideoAnnotationTypes',
'DataServiceEndpointType',
],
fields=[
FieldMapping(
source_path='platform_type',
source_description='Digital platform classification',
target_class='DigitalPlatformType',
target_slot='platform_category',
transformation=TransformationType.LOOKUP,
typedb_entity='digital-platform-type',
typedb_attribute='platform-category',
rdf_predicate='hc:platformCategory',
required=True,
validation=FieldValidation(
type='enum',
enum_values=[
'WEBSITE',
'COLLECTION_PORTAL',
'DISCOVERY_PLATFORM',
'API',
'SOCIAL_MEDIA',
'CMS',
'DAM',
],
),
),
],
example_yaml="""
# Digital platform type classification
digital_platforms:
- platform_type: COLLECTION_PORTAL
name: Online Collection
- platform_type: API
name: Data API
protocol: REST
- platform_type: SOCIAL_MEDIA
social_platform_type: INSTAGRAM
""".strip(),
),
# -------------------------------------------------------------------------
# TYPE CLASSES - ORGANIZATIONAL
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='type_classes_organizational',
description='Classification types for organizational structures',
detailed_description="""
Type classes for organizational classifications:
- Commercial organizations (CommercialOrganizationType)
- Non-profit organizations (NonProfitType)
- Research organizations (ResearchOrganizationType)
- Official institutions (OfficialInstitutionType)
- Heritage societies (HeritageSocietyType)
- Mixed/unspecified (MixedCustodianType, UnspecifiedType)
Classes: CommercialOrganizationType, NonProfitType, ResearchOrganizationType,
OfficialInstitutionType, HeritageSocietyType, MixedCustodianType,
UnspecifiedType, LegalEntityType
""".strip(),
linkml_class='LegalEntityType',
typedb_entity='legal-entity-type',
provenance=Provenance(
source_type='schema_documentation',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'CommercialOrganizationType',
'NonProfitType',
'ResearchOrganizationType',
'OfficialInstitutionType',
'HeritageSocietyType',
'MixedCustodianType',
'UnspecifiedType',
'LegalEntityType',
'LegalForm',
'LegalName',
],
fields=[
FieldMapping(
source_path='legal_form',
source_description='Legal form/entity type',
target_class='LegalEntityType',
target_slot='legal_form_code',
transformation=TransformationType.LOOKUP,
typedb_entity='legal-entity-type',
typedb_attribute='legal-form-code',
rdf_predicate='hc:legalFormCode',
required=False,
notes='ISO 20275 legal form codes or national equivalents',
),
FieldMapping(
source_path='governance_type',
source_description='Governance/ownership type',
target_class='LegalEntityType',
target_slot='governance_type',
transformation=TransformationType.LOOKUP,
typedb_entity='legal-entity-type',
typedb_attribute='governance-type',
rdf_predicate='hc:governanceType',
required=False,
validation=FieldValidation(
type='enum',
enum_values=['PUBLIC', 'PRIVATE', 'NON_PROFIT', 'MIXED', 'GOVERNMENT'],
),
),
],
example_yaml="""
# Legal entity type classification
legal_status:
legal_form: STICHTING # Dutch foundation
legal_form_code: "8888" # ISO 20275
governance_type: NON_PROFIT
registration_authority: KVK
registration_number: "12345678"
""".strip(),
),
# =========================================================================
# PHASE 2: PLACE & LOCATION
# =========================================================================
# -------------------------------------------------------------------------
# PLACE - GEOGRAPHIC
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='place_geographic',
description='Geographic and spatial location entities',
detailed_description="""
Geographic location classes representing physical places where heritage custodians
are located or operate. Includes settlements (cities/towns), countries, regions,
and geospatial coordinates.
These classes support:
- Settlement identification (GeoNames integration)
- Country code normalization (ISO 3166-1)
- Region/subregion hierarchies
- Geospatial coordinates (lat/lon)
""".strip(),
linkml_class='Settlement',
typedb_entity='settlement',
provenance=Provenance(
source_type='geonames',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'Settlement', 'Country', 'Subregion', 'GeoSpatialPlace', 'FeaturePlace',
],
fields=[
FieldMapping(
source_path='location.settlement',
source_description='Settlement/city name',
target_class='Settlement',
target_slot='name',
transformation=TransformationType.DIRECT,
typedb_entity='settlement',
typedb_attribute='name',
rdf_predicate='schema:name',
required=True,
),
FieldMapping(
source_path='location.country',
source_description='Country code (ISO 3166-1)',
target_class='Country',
target_slot='country_code',
transformation=TransformationType.LOOKUP,
typedb_entity='country',
typedb_attribute='country-code',
rdf_predicate='schema:addressCountry',
required=True,
),
FieldMapping(
source_path='location.coordinates',
source_description='Geographic coordinates',
target_class='GeoSpatialPlace',
target_slot='coordinates',
transformation=TransformationType.COMPUTED,
typedb_entity='geo-spatial-place',
typedb_attribute='coordinates',
rdf_predicate='geo:hasGeometry',
required=False,
),
],
example_yaml="""
# Geographic location
location:
settlement: Amsterdam
country: NL
region: Noord-Holland
coordinates:
latitude: 52.3676
longitude: 4.9041
""".strip(),
),
# -------------------------------------------------------------------------
# PLACE - CUSTODIAN SPECIFIC
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='place_custodian_specific',
description='Custodian-specific place classes',
detailed_description="""
Place classes specifically related to heritage custodian operations, including
auxiliary places, temporary locations, and custodian-specific place designations.
Supports modeling:
- Primary vs auxiliary locations
- Temporary/seasonal locations
- Off-site storage locations
- Branch/satellite locations
""".strip(),
linkml_class='CustodianPlace',
typedb_entity='custodian-place',
provenance=Provenance(
source_type='custodian_yaml',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=[
'CustodianPlace', 'AuxiliaryPlace', 'TemporaryLocation',
],
fields=[
FieldMapping(
source_path='places.primary',
source_description='Primary location',
target_class='CustodianPlace',
target_slot='primary_location',
transformation=TransformationType.DIRECT,
typedb_entity='custodian-place',
typedb_attribute='primary-location',
rdf_predicate='hc:primaryLocation',
required=True,
),
FieldMapping(
source_path='places.auxiliary',
source_description='Auxiliary/secondary locations',
target_class='AuxiliaryPlace',
target_slot='auxiliary_locations',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='auxiliary-place',
typedb_attribute='location',
rdf_predicate='hc:auxiliaryLocation',
required=False,
),
FieldMapping(
source_path='places.temporary',
source_description='Temporary location',
target_class='TemporaryLocation',
target_slot='temporary_location',
transformation=TransformationType.DIRECT,
typedb_entity='temporary-location',
typedb_attribute='location',
rdf_predicate='hc:temporaryLocation',
required=False,
),
],
example_yaml="""
# Custodian places
places:
primary:
address: Museumstraat 1
city: Amsterdam
auxiliary:
- name: Storage Facility
address: Industrieweg 100
temporary:
name: Pop-up Exhibition Space
valid_from: 2024-06-01
valid_to: 2024-09-30
""".strip(),
),
# =========================================================================
# PHASE 2: COLLECTIONS
# =========================================================================
# -------------------------------------------------------------------------
# COLLECTION - CORE
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='collection_core',
description='Core collection and holdings classes',
detailed_description="""
Core classes for modeling heritage collections and holdings. Collections represent
aggregations of objects, documents, or materials managed by a heritage custodian.
Supports:
- Collection naming and description
- Collection types (archival, library, museum, mixed)
- Special collections designation
- Subject/temporal coverage
""".strip(),
linkml_class='Collection',
typedb_entity='collection',
provenance=Provenance(
source_type='custodian_yaml',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=[
'Collection', 'CollectionType', 'SpecialCollection', 'CastCollection',
'PhotographCollection', 'CustodianCollection', 'CustodianArchive',
'CustodianAdministration',
],
fields=[
FieldMapping(
source_path='collections[].name',
source_description='Collection name',
target_class='Collection',
target_slot='collection_name',
transformation=TransformationType.DIRECT,
typedb_entity='collection',
typedb_attribute='name',
rdf_predicate='schema:name',
required=True,
),
FieldMapping(
source_path='collections[].type',
source_description='Collection type',
target_class='CollectionType',
target_slot='collection_type',
transformation=TransformationType.LOOKUP,
typedb_entity='collection-type',
typedb_attribute='type',
rdf_predicate='hc:collectionType',
required=False,
),
FieldMapping(
source_path='collections[].description',
source_description='Collection description',
target_class='Collection',
target_slot='description',
transformation=TransformationType.DIRECT,
typedb_entity='collection',
typedb_attribute='description',
rdf_predicate='schema:description',
required=False,
),
],
example_yaml="""
# Collection definition
collections:
- name: Dutch Masters Collection
type: MUSEUM_ART
description: 17th century Dutch paintings
extent: 450 paintings
subject_areas:
- Dutch Golden Age
- Portraiture
""".strip(),
),
# -------------------------------------------------------------------------
# COLLECTION - MANAGEMENT
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='collection_management',
description='Collection management system classes',
detailed_description="""
Classes for collection management systems (CMS) used by heritage custodians
to catalog, track, and manage their collections.
Supports documentation of:
- CMS software used (Adlib, TMS, ArchivesSpace, etc.)
- System configurations
- Integration endpoints
""".strip(),
linkml_class='CollectionManagementSystem',
typedb_entity='collection-management-system',
provenance=Provenance(
source_type='custodian_yaml',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=[
'CollectionManagementSystem',
],
fields=[
FieldMapping(
source_path='digital_platforms.cms.name',
source_description='CMS name',
target_class='CollectionManagementSystem',
target_slot='system_name',
transformation=TransformationType.DIRECT,
typedb_entity='collection-management-system',
typedb_attribute='system-name',
rdf_predicate='schema:name',
required=True,
),
FieldMapping(
source_path='digital_platforms.cms.vendor',
source_description='CMS vendor',
target_class='CollectionManagementSystem',
target_slot='vendor',
transformation=TransformationType.DIRECT,
typedb_entity='collection-management-system',
typedb_attribute='vendor',
rdf_predicate='schema:manufacturer',
required=False,
),
],
example_yaml="""
# Collection management system
digital_platforms:
cms:
name: Adlib Museum
vendor: Axiell
version: "7.8"
url: https://collection.museum.nl
""".strip(),
),
# =========================================================================
# PHASE 2: PERSON & STAFF
# =========================================================================
# -------------------------------------------------------------------------
# PERSON - PROFILE EXTENDED
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='person_profile_extended',
description='Extended person profile classes',
detailed_description="""
Extended classes for person/staff profiles beyond basic identity. Includes
LinkedIn profiles, person connections (professional networks), and web claims.
Supports:
- LinkedIn profile data integration
- Professional network connections
- Web-sourced claims about persons
- Person name variants and aliases
""".strip(),
linkml_class='PersonObservation',
typedb_entity='person-observation',
provenance=Provenance(
source_type='linkedin',
data_tier=DataTier.TIER_3_CROWD_SOURCED,
),
generated_classes=[
'LinkedInProfile', 'PersonConnection', 'PersonName',
'PersonOrOrganization', 'PersonWebClaim',
],
fields=[
FieldMapping(
source_path='person.linkedin_url',
source_description='LinkedIn profile URL',
target_class='LinkedInProfile',
target_slot='linkedin_url',
transformation=TransformationType.DIRECT,
typedb_entity='linkedin-profile',
typedb_attribute='profile-url',
rdf_predicate='schema:sameAs',
required=False,
),
FieldMapping(
source_path='person.connections',
source_description='Professional connections',
target_class='PersonConnection',
target_slot='connections',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='person-connection',
typedb_attribute='connected-person',
rdf_predicate='hc:hasConnection',
required=False,
),
FieldMapping(
source_path='person.names',
source_description='Person name variants',
target_class='PersonName',
target_slot='name_variants',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='person-name',
typedb_attribute='name-value',
rdf_predicate='schema:alternateName',
required=False,
),
],
example_yaml="""
# Extended person profile
person:
name: Jan de Vries
linkedin_url: https://linkedin.com/in/jandevries
names:
- value: Jan de Vries
type: legal_name
- value: J. de Vries
type: abbreviated
connections:
- name: Maria Bakker
organization: Rijksmuseum
""".strip(),
),
# -------------------------------------------------------------------------
# PERSON - WORK & EDUCATION
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='person_work_education',
description='Work experience and education classes',
detailed_description="""
Classes for modeling work experience history and educational credentials
of persons associated with heritage custodians.
Supports:
- Employment history with dates
- Role/position tracking
- Educational credentials
- Skills and certifications
""".strip(),
linkml_class='WorkExperience',
typedb_entity='work-experience',
provenance=Provenance(
source_type='linkedin',
data_tier=DataTier.TIER_3_CROWD_SOURCED,
),
generated_classes=[
'WorkExperience', 'EducationCredential', 'StaffRole', 'StaffRoles',
],
fields=[
FieldMapping(
source_path='person.experience[].organization',
source_description='Employer organization',
target_class='WorkExperience',
target_slot='organization',
transformation=TransformationType.DIRECT,
typedb_entity='work-experience',
typedb_attribute='organization',
rdf_predicate='schema:worksFor',
required=True,
),
FieldMapping(
source_path='person.experience[].role',
source_description='Job title/role',
target_class='WorkExperience',
target_slot='role_title',
transformation=TransformationType.DIRECT,
typedb_entity='work-experience',
typedb_attribute='role-title',
rdf_predicate='schema:jobTitle',
required=True,
),
FieldMapping(
source_path='person.education[].institution',
source_description='Educational institution',
target_class='EducationCredential',
target_slot='institution',
transformation=TransformationType.DIRECT,
typedb_entity='education-credential',
typedb_attribute='institution',
rdf_predicate='schema:alumniOf',
required=True,
),
],
example_yaml="""
# Work and education
person:
experience:
- organization: Rijksmuseum
role: Senior Curator
start_date: 2018-03
current: true
education:
- institution: University of Amsterdam
degree: MA Art History
graduation_year: 2010
""".strip(),
),
# =========================================================================
# PHASE 2: DIGITAL & API SERVICES
# =========================================================================
# -------------------------------------------------------------------------
# DIGITAL PLATFORMS - EXTENDED
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='digital_platforms_extended',
description='Extended digital platform classes',
detailed_description="""
Extended digital platform classes for web portals, virtual libraries,
and auxiliary digital presence. Supports heritage custodian online presence
beyond primary websites.
Includes:
- Web portals and discovery interfaces
- Virtual/digital-only libraries
- Auxiliary digital platforms
- Primary digital presence assertions
""".strip(),
linkml_class='WebPortal',
typedb_entity='web-portal',
provenance=Provenance(
source_type='web_enrichment',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=[
'WebPortal', 'VirtualMapLibrary', 'AuxiliaryDigitalPlatform',
'PrimaryDigitalPresenceAssertion',
],
fields=[
FieldMapping(
source_path='digital_platforms.portals[].url',
source_description='Portal URL',
target_class='WebPortal',
target_slot='portal_url',
transformation=TransformationType.DIRECT,
typedb_entity='web-portal',
typedb_attribute='url',
rdf_predicate='schema:url',
required=True,
),
FieldMapping(
source_path='digital_platforms.portals[].type',
source_description='Portal type',
target_class='WebPortal',
target_slot='portal_type',
transformation=TransformationType.LOOKUP,
typedb_entity='web-portal',
typedb_attribute='portal-type',
rdf_predicate='hc:portalType',
required=False,
),
],
example_yaml="""
# Digital platforms
digital_platforms:
portals:
- url: https://collectie.museum.nl
type: DISCOVERY_PORTAL
name: Online Collection
- url: https://maps.museum.nl
type: VIRTUAL_MAP
name: Interactive Map
""".strip(),
),
# -------------------------------------------------------------------------
# API ENDPOINTS
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='api_endpoints',
description='API and data service endpoint classes',
detailed_description="""
Classes for API endpoints and data services exposed by heritage custodians.
Supports interoperability documentation for harvesting, searching, and
accessing digital content.
Includes:
- OAI-PMH harvesting endpoints
- Search APIs (SRU, OpenSearch)
- IIIF Image/Presentation APIs
- File download services (EAD, METS)
""".strip(),
linkml_class='DataServiceEndpoint',
typedb_entity='data-service-endpoint',
provenance=Provenance(
source_type='web_enrichment',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=[
'DataServiceEndpoint', 'OAIPMHEndpoint', 'SearchAPI', 'FileAPI',
'EADDownload', 'METSAPI', 'IIPImageServer', 'InternetOfThings',
],
fields=[
FieldMapping(
source_path='api_endpoints.oai_pmh',
source_description='OAI-PMH endpoint',
target_class='OAIPMHEndpoint',
target_slot='endpoint_url',
transformation=TransformationType.DIRECT,
typedb_entity='oai-pmh-endpoint',
typedb_attribute='endpoint-url',
rdf_predicate='hc:oaiPmhEndpoint',
required=False,
),
FieldMapping(
source_path='api_endpoints.search',
source_description='Search API endpoint',
target_class='SearchAPI',
target_slot='search_url',
transformation=TransformationType.DIRECT,
typedb_entity='search-api',
typedb_attribute='search-url',
rdf_predicate='hc:searchEndpoint',
required=False,
),
FieldMapping(
source_path='api_endpoints.iiif',
source_description='IIIF Image API',
target_class='IIPImageServer',
target_slot='iiif_url',
transformation=TransformationType.DIRECT,
typedb_entity='iip-image-server',
typedb_attribute='iiif-url',
rdf_predicate='hc:iiifEndpoint',
required=False,
),
],
example_yaml="""
# API endpoints
api_endpoints:
oai_pmh: https://api.museum.nl/oai
search: https://api.museum.nl/search
iiif: https://iiif.museum.nl/image/2
ead_download: https://api.museum.nl/ead
""".strip(),
),
# =========================================================================
# PHASE 2: VIDEO & SOCIAL MEDIA
# =========================================================================
# -------------------------------------------------------------------------
# VIDEO CONTENT
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='video_content',
description='Video content and annotation classes',
detailed_description="""
Classes for video content produced or published by heritage custodians.
Supports rich annotation of video content including chapters, transcripts,
subtitles, and time-based segments.
Includes:
- Video posts (YouTube, Vimeo)
- Video chapters and segments
- Transcripts and subtitles
- Audio/text annotations
""".strip(),
linkml_class='VideoPost',
typedb_entity='video-post',
provenance=Provenance(
source_type='social_media',
data_tier=DataTier.TIER_3_CROWD_SOURCED,
),
generated_classes=[
'VideoPost', 'VideoChapter', 'VideoTranscript', 'VideoSubtitle',
'VideoAnnotation', 'VideoAudioAnnotation', 'VideoTextContent',
'VideoTimeSegment',
],
fields=[
FieldMapping(
source_path='videos[].url',
source_description='Video URL',
target_class='VideoPost',
target_slot='video_url',
transformation=TransformationType.DIRECT,
typedb_entity='video-post',
typedb_attribute='video-url',
rdf_predicate='schema:contentUrl',
required=True,
),
FieldMapping(
source_path='videos[].chapters',
source_description='Video chapters',
target_class='VideoChapter',
target_slot='chapters',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='video-chapter',
typedb_attribute='chapters',
rdf_predicate='schema:hasPart',
required=False,
),
FieldMapping(
source_path='videos[].transcript',
source_description='Video transcript',
target_class='VideoTranscript',
target_slot='transcript',
transformation=TransformationType.DIRECT,
typedb_entity='video-transcript',
typedb_attribute='transcript-text',
rdf_predicate='schema:transcript',
required=False,
),
],
example_yaml="""
# Video content
videos:
- url: https://youtube.com/watch?v=abc123
title: Museum Tour 2024
duration: PT45M30S
chapters:
- title: Introduction
start_time: PT0S
- title: Main Gallery
start_time: PT5M
transcript:
language: nl
text: "Welkom bij het museum..."
""".strip(),
),
# -------------------------------------------------------------------------
# SOCIAL MEDIA CONTENT
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='social_media_content',
description='Social media content and profile classes',
detailed_description="""
Classes for social media presence and content of heritage custodians.
Tracks posts, profiles, and engagement across platforms.
Includes:
- Social media profiles (per platform)
- Posts and content items
- Engagement metrics
""".strip(),
linkml_class='SocialMediaProfile',
typedb_entity='social-media-profile',
provenance=Provenance(
source_type='social_media',
data_tier=DataTier.TIER_3_CROWD_SOURCED,
),
generated_classes=[
'SocialMediaProfile', 'SocialMediaPost',
],
fields=[
FieldMapping(
source_path='social_media.profiles[].platform',
source_description='Social media platform',
target_class='SocialMediaProfile',
target_slot='platform',
transformation=TransformationType.LOOKUP,
typedb_entity='social-media-profile',
typedb_attribute='platform',
rdf_predicate='hc:socialPlatform',
required=True,
),
FieldMapping(
source_path='social_media.profiles[].url',
source_description='Profile URL',
target_class='SocialMediaProfile',
target_slot='profile_url',
transformation=TransformationType.DIRECT,
typedb_entity='social-media-profile',
typedb_attribute='profile-url',
rdf_predicate='schema:url',
required=True,
),
],
example_yaml="""
# Social media profiles
social_media:
profiles:
- platform: TWITTER
url: https://twitter.com/rijksmuseum
followers: 450000
- platform: INSTAGRAM
url: https://instagram.com/rijksmuseum
followers: 1200000
""".strip(),
),
# =========================================================================
# PHASE 2 SOURCE MAPPINGS: Legal & Administrative
# =========================================================================
# -------------------------------------------------------------------------
# legal_policies - Legal and policy classes
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='legal_policies',
description='Legal and policy classes',
detailed_description="""
Classes for legal policies, access restrictions, and data licensing
applicable to heritage custodians and their collections.
Includes:
- Access policies (reading room, digital)
- Data license terms
- Legal responsibility collections
- Trade register information
""".strip(),
linkml_class='AccessPolicy',
typedb_entity='access-policy',
provenance=Provenance(
source_type='custodian_yaml',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=[
'AccessPolicy', 'DataLicensePolicy', 'LegalResponsibilityCollection',
'ArticlesOfAssociation', 'TradeRegister',
],
fields=[
FieldMapping(
source_path='policies.access',
source_description='Access policy',
target_class='AccessPolicy',
target_slot='access_type',
transformation=TransformationType.LOOKUP,
typedb_entity='access-policy',
typedb_attribute='access-type',
rdf_predicate='hc:accessPolicy',
required=False,
),
FieldMapping(
source_path='policies.data_license',
source_description='Data license',
target_class='DataLicensePolicy',
target_slot='license',
transformation=TransformationType.DIRECT,
typedb_entity='data-license-policy',
typedb_attribute='license-type',
rdf_predicate='schema:license',
required=False,
),
],
example_yaml="""
# Policies
policies:
access:
type: PUBLIC
reading_room: true
appointment_required: false
data_license: CC-BY-4.0
legal_responsibility:
type: FOUNDATION
articles_url: https://kvk.nl/articles/12345678
""".strip(),
),
# -------------------------------------------------------------------------
# administrative_records - Administrative and financial record classes
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='administrative_records',
description='Administrative and financial record classes',
detailed_description="""
Classes for administrative records including budgets, projects, financial
statements, and registration information.
Includes:
- Budget tracking
- Project management
- Financial statements
- Registration info
""".strip(),
linkml_class='Budget',
typedb_entity='budget',
provenance=Provenance(
source_type='custodian_yaml',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=[
'Budget', 'Project', 'FinancialStatement', 'RegistrationInfo',
'ConfidenceMeasure', 'ConflictStatus',
],
fields=[
FieldMapping(
source_path='administration.budget',
source_description='Budget information',
target_class='Budget',
target_slot='annual_budget',
transformation=TransformationType.DIRECT,
typedb_entity='budget',
typedb_attribute='annual-amount',
rdf_predicate='hc:annualBudget',
required=False,
),
FieldMapping(
source_path='administration.projects',
source_description='Active projects',
target_class='Project',
target_slot='projects',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='project',
typedb_attribute='project-name',
rdf_predicate='hc:hasProject',
required=False,
),
FieldMapping(
source_path='administration.registration',
source_description='Registration information',
target_class='RegistrationInfo',
target_slot='registration',
transformation=TransformationType.DIRECT,
typedb_entity='registration-info',
typedb_attribute='registration-number',
rdf_predicate='hc:registrationInfo',
required=False,
),
],
example_yaml="""
# Administrative records
administration:
budget:
annual_amount: 5000000
currency: EUR
fiscal_year: 2024
projects:
- name: Digitization 2024
status: IN_PROGRESS
registration:
authority: KVK
number: "12345678"
""".strip(),
),
# =========================================================================
# PHASE 2 SOURCE MAPPINGS: Finding Aids & Standards
# =========================================================================
# -------------------------------------------------------------------------
# finding_aids_standards - Finding aids, standards, documentation classes
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='finding_aids_standards',
description='Finding aids, standards, and source documentation classes',
detailed_description="""
Classes for archival finding aids, metadata standards, and source documentation.
Essential for archival description and interoperability.
Includes:
- Finding aids (EAD, PDF, online)
- Metadata standards compliance
- Source document references
- Primary digital presence assertions
""".strip(),
linkml_class='FindingAid',
typedb_entity='finding-aid',
provenance=Provenance(
source_type='custodian_yaml',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=[
'FindingAid', 'FindingAidType', 'SourceDocument', 'Standard',
'PrimaryDigitalPresenceAssertion',
],
fields=[
FieldMapping(
source_path='finding_aids[].type',
source_description='Finding aid type',
target_class='FindingAidType',
target_slot='finding_aid_type',
transformation=TransformationType.LOOKUP,
typedb_entity='finding-aid-type',
typedb_attribute='type',
rdf_predicate='hc:findingAidType',
required=True,
),
FieldMapping(
source_path='finding_aids[].url',
source_description='Finding aid URL',
target_class='FindingAid',
target_slot='url',
transformation=TransformationType.DIRECT,
typedb_entity='finding-aid',
typedb_attribute='url',
rdf_predicate='schema:url',
required=False,
),
FieldMapping(
source_path='standards',
source_description='Standards compliance',
target_class='Standard',
target_slot='standards',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='standard',
typedb_attribute='standard-name',
rdf_predicate='hc:conformsToStandard',
required=False,
),
],
example_yaml="""
# Finding aids and standards
finding_aids:
- type: EAD
url: https://archive.nl/ead/collection123.xml
- type: PDF
url: https://archive.nl/guides/collection123.pdf
standards:
- ISAD(G)
- EAD3
- Dublin Core
""".strip(),
),
# =========================================================================
# PHASE 2 SOURCE MAPPINGS: Reconstruction & Provenance
# =========================================================================
# -------------------------------------------------------------------------
# reconstruction_provenance - Entity reconstruction and provenance tracking
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='reconstruction_provenance',
description='Entity reconstruction and provenance tracking classes',
detailed_description="""
Classes for tracking entity reconstruction activities and provenance chains.
Used for modeling how information about heritage custodians is assembled
from multiple sources.
Includes:
- Reconstructed entities (from multiple sources)
- Reconstruction activities
- Reconstruction agents (human/automated)
- Timeline events from external sources
""".strip(),
linkml_class='ReconstructedEntity',
typedb_entity='reconstructed-entity',
provenance=Provenance(
source_type='computed',
data_tier=DataTier.TIER_4_INFERRED,
),
generated_classes=[
'ReconstructedEntity', 'ReconstructionActivity', 'ReconstructionAgent',
'CustodianTimelineEvent',
],
fields=[
FieldMapping(
source_path='reconstruction.sources',
source_description='Source documents',
target_class='ReconstructedEntity',
target_slot='sources',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='reconstructed-entity',
typedb_attribute='sources',
rdf_predicate='prov:wasDerivedFrom',
required=True,
),
FieldMapping(
source_path='reconstruction.activity',
source_description='Reconstruction activity',
target_class='ReconstructionActivity',
target_slot='activity',
transformation=TransformationType.DIRECT,
typedb_entity='reconstruction-activity',
typedb_attribute='activity-type',
rdf_predicate='prov:wasGeneratedBy',
required=False,
),
FieldMapping(
source_path='reconstruction.agent',
source_description='Reconstruction agent',
target_class='ReconstructionAgent',
target_slot='agent',
transformation=TransformationType.DIRECT,
typedb_entity='reconstruction-agent',
typedb_attribute='agent-id',
rdf_predicate='prov:wasAttributedTo',
required=False,
),
],
example_yaml="""
# Reconstruction provenance
reconstruction:
sources:
- type: WIKIDATA
id: Q190804
- type: ISIL_REGISTRY
id: NL-AmRM
activity:
type: AUTOMATED_MERGE
date: 2024-01-15
agent:
type: SYSTEM
name: glam-extractor
""".strip(),
),
# =========================================================================
# PHASE 2 SOURCE MAPPINGS: Storage & Facilities
# =========================================================================
# -------------------------------------------------------------------------
# storage_facilities - Storage conditions and facility classes
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='storage_facilities',
description='Storage conditions and facility classes',
detailed_description="""
Classes for physical storage conditions, educational facilities, and
specialized spaces within heritage custodian buildings.
Includes:
- Storage conditions (climate, security)
- Storage types (warehouse, vault)
- Educational centers
- Specialized facilities (libraries, social spaces)
""".strip(),
linkml_class='StorageCondition',
typedb_entity='storage-condition',
provenance=Provenance(
source_type='custodian_yaml',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=[
'StorageCondition', 'StorageConditionPolicy', 'StorageType',
'EducationCenter', 'PersonalLibrary', 'LocationLibrary', 'SocialSpace',
'CateringPlace',
],
fields=[
FieldMapping(
source_path='facilities.storage.conditions',
source_description='Storage conditions',
target_class='StorageCondition',
target_slot='climate_control',
transformation=TransformationType.DIRECT,
typedb_entity='storage-condition',
typedb_attribute='climate-control',
rdf_predicate='hc:storageCondition',
required=False,
),
FieldMapping(
source_path='facilities.storage.type',
source_description='Storage type',
target_class='StorageType',
target_slot='storage_type',
transformation=TransformationType.LOOKUP,
typedb_entity='storage-type',
typedb_attribute='type',
rdf_predicate='hc:storageType',
required=False,
),
FieldMapping(
source_path='facilities.education_center',
source_description='Education center',
target_class='EducationCenter',
target_slot='education_center',
transformation=TransformationType.DIRECT,
typedb_entity='education-center',
typedb_attribute='center-name',
rdf_predicate='hc:hasEducationCenter',
required=False,
),
],
example_yaml="""
# Facilities
facilities:
storage:
type: CLIMATE_CONTROLLED_VAULT
conditions:
temperature: 18
humidity: 50
security_level: HIGH
education_center:
name: Museum Education Wing
capacity: 50
""".strip(),
),
# =========================================================================
# PHASE 2 SOURCE MAPPINGS: Funding & Grants
# =========================================================================
# -------------------------------------------------------------------------
# funding_grants - Funding, grants, and application classes
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='funding_grants',
description='Funding, grants, and application classes',
detailed_description="""
Classes for funding sources, grant applications, and financial requirements
relevant to heritage custodians.
Includes:
- Funding agendas
- Grant requirements
- Application calls
""".strip(),
linkml_class='FundingAgenda',
typedb_entity='funding-agenda',
provenance=Provenance(
source_type='external_api',
data_tier=DataTier.TIER_2_VERIFIED,
),
generated_classes=[
'FundingAgenda', 'FundingRequirement', 'CallForApplication',
],
fields=[
FieldMapping(
source_path='funding.agendas',
source_description='Funding agendas',
target_class='FundingAgenda',
target_slot='agendas',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='funding-agenda',
typedb_attribute='agenda-name',
rdf_predicate='hc:fundingAgenda',
required=False,
),
FieldMapping(
source_path='funding.requirements',
source_description='Funding requirements',
target_class='FundingRequirement',
target_slot='requirements',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='funding-requirement',
typedb_attribute='requirement',
rdf_predicate='hc:fundingRequirement',
required=False,
),
FieldMapping(
source_path='funding.calls',
source_description='Open calls for applications',
target_class='CallForApplication',
target_slot='calls',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='call-for-application',
typedb_attribute='call-title',
rdf_predicate='hc:openCall',
required=False,
),
],
example_yaml="""
# Funding
funding:
agendas:
- name: Heritage Digitization Fund 2024
amount: 500000
currency: EUR
requirements:
- type: MATCHING_FUNDS
percentage: 25
calls:
- title: Digital Heritage Innovation
deadline: 2024-06-30
""".strip(),
),
# =========================================================================
# PHASE 2 SOURCE MAPPINGS: Language & Naming
# =========================================================================
# -------------------------------------------------------------------------
# language_naming - Language codes, proficiency, and naming classes
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='language_naming',
description='Language codes, proficiency, and naming classes',
detailed_description="""
Classes for language handling, proficiency levels, and naming/appellation
conventions used in heritage custodian data.
Includes:
- ISO language codes
- Language proficiency levels
- Appellations (formal names)
- Container (structural) classes
""".strip(),
linkml_class='LanguageCode',
typedb_entity='language-code',
provenance=Provenance(
source_type='reference_data',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'LanguageCode', 'LanguageProficiency', 'Appellation', 'Container',
],
fields=[
FieldMapping(
source_path='languages',
source_description='Languages used',
target_class='LanguageCode',
target_slot='languages',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='language-code',
typedb_attribute='iso-code',
rdf_predicate='schema:inLanguage',
required=False,
),
FieldMapping(
source_path='staff.language_proficiency',
source_description='Language proficiency',
target_class='LanguageProficiency',
target_slot='proficiency',
transformation=TransformationType.LOOKUP,
typedb_entity='language-proficiency',
typedb_attribute='level',
rdf_predicate='hc:languageProficiency',
required=False,
),
FieldMapping(
source_path='names.appellations',
source_description='Formal appellations',
target_class='Appellation',
target_slot='appellations',
transformation=TransformationType.ARRAY_DIRECT,
typedb_entity='appellation',
typedb_attribute='name-value',
rdf_predicate='crm:P1_is_identified_by',
required=False,
),
],
example_yaml="""
# Language and naming
languages:
- nl
- en
- de
names:
appellations:
- value: Rijksmuseum Amsterdam
type: OFFICIAL
language: nl
- value: National Museum of the Netherlands
type: TRANSLATION
language: en
""".strip(),
),
# =========================================================================
# PHASE 2 SOURCE MAPPINGS: Specialized Archives (International)
# =========================================================================
# -------------------------------------------------------------------------
# archives_german - German-specific archive types
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archives_german',
description='German-specific archive types',
detailed_description="""
Archive types specific to German archival tradition and organization.
German archives follow a distinctive organizational pattern based on
political/administrative regions and specialized functions.
""".strip(),
linkml_class='Verwaltungsarchiv',
typedb_entity='verwaltungsarchiv',
provenance=Provenance(
source_type='isil_registry',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'Verwaltungsarchiv', 'Vereinsarchiv', 'Verlagsarchiv',
'Bildstelle', 'Medienzentrum', 'Personenstandsarchiv',
],
fields=[
FieldMapping(
source_path='archive_type',
source_description='German archive type',
target_class='Verwaltungsarchiv',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='verwaltungsarchiv',
typedb_attribute='type',
rdf_predicate='hc:archiveType',
required=True,
),
],
example_yaml="""
# German archive type
archive_type: VERWALTUNGSARCHIV
name: Landesarchiv Baden-Württemberg
""".strip(),
),
# -------------------------------------------------------------------------
# archives_swedish - Swedish-specific archive types
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archives_swedish',
description='Swedish-specific archive types',
detailed_description="""
Archive types specific to Swedish archival tradition. Swedish archives
include national (Riksarkivet), regional (Landsarkiv), and local heritage
institutions (Hembygdsförening).
""".strip(),
linkml_class='Landsarkiv',
typedb_entity='landsarkiv',
provenance=Provenance(
source_type='isil_registry',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'Landsarkiv', 'Foremalarkiv', 'SectorOfArchivesInSweden',
'LocalHeritageInstitutionSweden',
],
fields=[
FieldMapping(
source_path='archive_type',
source_description='Swedish archive type',
target_class='Landsarkiv',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='landsarkiv',
typedb_attribute='type',
rdf_predicate='hc:archiveType',
required=True,
),
],
example_yaml="""
# Swedish archive type
archive_type: LANDSARKIV
name: Landsarkivet i Uppsala
""".strip(),
),
# -------------------------------------------------------------------------
# archives_french - French-specific archive types
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archives_french',
description='French-specific archive types',
detailed_description="""
Archive types specific to French archival organization. French archives
follow a centralized national system with departmental and communal levels.
""".strip(),
linkml_class='FrenchPrivateArchives',
typedb_entity='french-private-archives',
provenance=Provenance(
source_type='isil_registry',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'FrenchPrivateArchives', 'Conservatoria',
],
fields=[
FieldMapping(
source_path='archive_type',
source_description='French archive type',
target_class='FrenchPrivateArchives',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='french-private-archives',
typedb_attribute='type',
rdf_predicate='hc:archiveType',
required=True,
),
],
example_yaml="""
# French archive type
archive_type: ARCHIVES_PRIVEES
name: Archives privées de la famille Rothschild
""".strip(),
),
# -------------------------------------------------------------------------
# archives_other - Other international specialized archive types
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='archives_other',
description='Other international specialized archive types',
detailed_description="""
Specialized archive types from other countries including Czech regional
archives, Nordic archives, and various thematic archive types.
""".strip(),
linkml_class='SpecializedArchivesCzechia',
typedb_entity='specialized-archives-czechia',
provenance=Provenance(
source_type='isil_registry',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
generated_classes=[
'SpecializedArchivesCzechia', 'DimArchives', 'LightArchives',
'HistoricalArchive', 'JointArchives', 'PartyArchive', 'Kustodie',
'ArchivesForBuildingRecords',
],
fields=[
FieldMapping(
source_path='archive_type',
source_description='Specialized archive type',
target_class='SpecializedArchivesCzechia',
target_slot='archive_type',
transformation=TransformationType.LOOKUP,
typedb_entity='specialized-archives-czechia',
typedb_attribute='type',
rdf_predicate='hc:archiveType',
required=True,
),
],
example_yaml="""
# Czech specialized archive
archive_type: OBLASTNI_ARCHIV
name: Státní oblastní archiv v Praze
""".strip(),
),
]
# ============================================================================
# PERSON CATEGORIES (9 Categories)
# ============================================================================
PERSON_CATEGORIES: list[MappingCategory] = [
MappingCategory(
id='identity',
name='Identity & Profile',
name_nl='Identiteit & Profiel',
description='Core person identity: name, headline, location, connections',
description_nl='Kern persoonidentiteit: naam, kopregel, locatie, connecties',
icon='🪪',
sources=['profile_identity', 'linkedin_profile'],
),
MappingCategory(
id='career',
name='Career & Experience',
name_nl='Carrière & Ervaring',
description='Work history, positions, organizations',
description_nl='Werkgeschiedenis, posities, organisaties',
icon='💼',
sources=['career_history'],
),
MappingCategory(
id='education',
name='Education & Credentials',
name_nl='Opleiding & Kwalificaties',
description='Educational background, degrees, institutions',
description_nl='Opleidingsachtergrond, diploma\'s, instellingen',
icon='🎓',
sources=['education'],
),
MappingCategory(
id='skills',
name='Skills & Expertise',
name_nl='Vaardigheden & Expertise',
description='Professional skills, languages, endorsements',
description_nl='Professionele vaardigheden, talen, aanbevelingen',
icon='',
sources=['skills_expertise'],
),
MappingCategory(
id='heritage',
name='Heritage Sector Relevance',
name_nl='Erfgoedsector Relevantie',
description='Heritage domain expertise and experience',
description_nl='Erfgoed domeinexpertise en ervaring',
icon='🏛️',
sources=['heritage_relevance', 'heritage_experience'],
),
MappingCategory(
id='affiliations',
name='Affiliations & Records',
name_nl='Affiliaties & Records',
description='Linked custodians, person records, connections',
description_nl='Gekoppelde bronhouders, persoonsrecords, connecties',
icon='🔗',
sources=['affiliations', 'linked_records'],
),
MappingCategory(
id='contact',
name='Contact & Social',
name_nl='Contact & Sociaal',
description='Contact information, social media profiles',
description_nl='Contactgegevens, sociale media profielen',
icon='📧',
sources=['contact_data'],
),
MappingCategory(
id='provenance',
name='Extraction & Provenance',
name_nl='Extractie & Herkomst',
description='Data extraction metadata and web claims',
description_nl='Data extractie metadata en webclaims',
icon='📋',
sources=['extraction_metadata', 'web_claims'],
),
MappingCategory(
id='pico_ontology',
name='PiCo Ontology Mapping',
name_nl='PiCo Ontologie Mapping',
description='Person in Context (PiCo) ontology alignment',
description_nl='Person in Context (PiCo) ontologie uitlijning',
icon='🔬',
sources=['pico_mapped', 'pico_unmapped'],
),
]
# ============================================================================
# PERSON MAPPINGS (14 Person Source Blocks)
# ============================================================================
PERSON_MAPPINGS: list[EnrichmentSourceMapping] = [
# -------------------------------------------------------------------------
# PROFILE IDENTITY - Core profile information
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='profile_identity',
description='Core profile identity - name, headline, location',
detailed_description="""
Core identity information extracted from LinkedIn profiles.
Includes the person's full name, professional headline, location,
and current company affiliation.
This data forms the foundation of the person entity and is used
for display and search purposes across the heritage network.
""".strip(),
linkml_class='Person',
typedb_entity='person',
provenance=Provenance(
source_type='external_api',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='profile_data.name',
source_description='Full name of the person',
target_class='Person',
target_slot='name',
typedb_entity='person',
typedb_attribute='person-name',
rdf_predicate='foaf:name',
transformation=TransformationType.DIRECT,
required=True,
notes='Primary identifier for the person',
),
FieldMapping(
source_path='profile_data.headline',
source_description='Professional headline/title',
target_class='Person',
target_slot='headline',
typedb_entity='person',
typedb_attribute='headline',
rdf_predicate='schema:jobTitle',
transformation=TransformationType.DIRECT,
required=False,
notes='Current professional headline from LinkedIn',
),
FieldMapping(
source_path='profile_data.headline_english',
source_description='English translation of headline',
target_class='Person',
target_slot='headline_english',
typedb_entity='person',
typedb_attribute='headline-english',
rdf_predicate='schema:jobTitle',
transformation=TransformationType.DIRECT,
required=False,
notes='Translated headline for non-English profiles',
),
FieldMapping(
source_path='profile_data.location',
source_description='Geographic location',
target_class='Person',
target_slot='location',
typedb_entity='person',
typedb_attribute='location-string',
rdf_predicate='schema:address',
transformation=TransformationType.DIRECT,
required=False,
notes='Free-text location from LinkedIn',
),
FieldMapping(
source_path='profile_data.country_code',
source_description='ISO country code',
target_class='Person',
target_slot='country_code',
typedb_entity='person',
typedb_attribute='country-code',
rdf_predicate='schema:addressCountry',
transformation=TransformationType.DIRECT,
required=False,
notes='Two-letter ISO 3166-1 country code',
),
FieldMapping(
source_path='profile_data.current_company',
source_description='Current employer name',
target_class='Person',
target_slot='current_company',
typedb_entity='person',
typedb_attribute='current-company',
rdf_predicate='schema:worksFor',
transformation=TransformationType.DIRECT,
required=False,
notes='Name of current employer organization',
),
FieldMapping(
source_path='profile_data.about',
source_description='About/summary section',
target_class='Person',
target_slot='about',
typedb_entity='person',
typedb_attribute='about',
rdf_predicate='schema:description',
transformation=TransformationType.DIRECT,
required=False,
notes='Professional summary from LinkedIn',
),
FieldMapping(
source_path='profile_data.total_experience',
source_description='Total years of experience',
target_class='Person',
target_slot='total_experience',
typedb_entity='person',
typedb_attribute='total-experience',
rdf_predicate='schema:experienceYears',
transformation=TransformationType.DIRECT,
required=False,
notes='Calculated total professional experience',
),
],
generated_classes=['Person', 'PersonProfile'],
example_yaml="""
profile_data:
name: Iris van Meer
headline: Staff member at the Services Department at Nationaal Archief
headline_english: Staff member at the Services Department at Nationaal Archief
location: The Randstad, Netherlands
country_code: NL
current_company: Nationaal Archief
about: Total Experience: 15 years and 8 months
total_experience: 15 years and 8 months
""".strip(),
),
# -------------------------------------------------------------------------
# LINKEDIN PROFILE - URLs and social metrics
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='linkedin_profile',
description='LinkedIn profile URLs and social metrics',
detailed_description="""
LinkedIn-specific profile data including the profile URL, photo URL,
and social metrics like connections and followers count.
These fields enable linking back to the source profile and provide
insight into the person's professional network reach.
""".strip(),
linkml_class='Person',
typedb_entity='person',
provenance=Provenance(
source_type='external_api',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='linkedin_profile_url',
source_description='LinkedIn profile URL',
target_class='Person',
target_slot='linkedin_url',
typedb_entity='person',
typedb_attribute='linkedin-url',
rdf_predicate='schema:sameAs',
transformation=TransformationType.DIRECT,
required=True,
notes='Canonical LinkedIn profile URL',
),
FieldMapping(
source_path='linkedin_photo_url',
source_description='LinkedIn profile photo URL',
target_class='Person',
target_slot='photo_url',
typedb_entity='person',
typedb_attribute='photo-url',
rdf_predicate='schema:image',
transformation=TransformationType.DIRECT,
required=False,
notes='CDN URL for profile photo',
),
FieldMapping(
source_path='profile_data.connections',
source_description='Number of LinkedIn connections',
target_class='Person',
target_slot='connections',
typedb_entity='person',
typedb_attribute='connections-count',
rdf_predicate='schema:knows',
transformation=TransformationType.DIRECT,
required=False,
notes='First-degree connection count',
),
FieldMapping(
source_path='profile_data.followers',
source_description='Number of followers',
target_class='Person',
target_slot='followers',
typedb_entity='person',
typedb_attribute='followers-count',
rdf_predicate='schema:followerCount',
transformation=TransformationType.DIRECT,
required=False,
notes='LinkedIn follower count',
),
],
generated_classes=['Person'],
example_yaml="""
linkedin_profile_url: https://www.linkedin.com/in/iris-van-meer-34329131
linkedin_photo_url: https://media.licdn.com/dms/image/v2/...
profile_data:
connections: 286
followers: 289
""".strip(),
),
# -------------------------------------------------------------------------
# CAREER HISTORY - Employment timeline
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='career_history',
description='Career history and employment timeline',
detailed_description="""
Complete career history extracted from LinkedIn profiles.
Each position includes organization, role, dates, duration,
location, and organizational metadata like company size and industry.
This data is crucial for understanding a person's professional
trajectory and their experience in heritage-related roles.
""".strip(),
linkml_class='CareerPosition',
typedb_entity='career-position',
provenance=Provenance(
source_type='external_api',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='profile_data.career_history[].organization',
source_description='Employer organization name',
target_class='CareerPosition',
target_slot='organization',
typedb_entity='career-position',
typedb_attribute='organization-name',
rdf_predicate='schema:worksFor',
transformation=TransformationType.DIRECT,
required=True,
notes='Name of the employing organization',
),
FieldMapping(
source_path='profile_data.career_history[].organization_linkedin',
source_description='LinkedIn URL for organization',
target_class='CareerPosition',
target_slot='organization_linkedin',
typedb_entity='career-position',
typedb_attribute='organization-linkedin-url',
rdf_predicate='schema:sameAs',
transformation=TransformationType.DIRECT,
required=False,
notes='LinkedIn company page URL',
),
FieldMapping(
source_path='profile_data.career_history[].role',
source_description='Job title/role',
target_class='CareerPosition',
target_slot='role',
typedb_entity='career-position',
typedb_attribute='role-title',
rdf_predicate='schema:jobTitle',
transformation=TransformationType.DIRECT,
required=True,
notes='Original language job title',
),
FieldMapping(
source_path='profile_data.career_history[].role_english',
source_description='English translation of role',
target_class='CareerPosition',
target_slot='role_english',
typedb_entity='career-position',
typedb_attribute='role-title-english',
rdf_predicate='schema:jobTitle',
transformation=TransformationType.DIRECT,
required=False,
notes='English translation for non-English titles',
),
FieldMapping(
source_path='profile_data.career_history[].dates',
source_description='Employment date range',
target_class='CareerPosition',
target_slot='dates',
typedb_entity='career-position',
typedb_attribute='date-range',
rdf_predicate='schema:temporalCoverage',
transformation=TransformationType.DIRECT,
required=False,
notes='Date range string (e.g., "Apr 2014 - Present")',
),
FieldMapping(
source_path='profile_data.career_history[].duration',
source_description='Employment duration',
target_class='CareerPosition',
target_slot='duration',
typedb_entity='career-position',
typedb_attribute='duration',
rdf_predicate='schema:duration',
transformation=TransformationType.DIRECT,
required=False,
notes='Calculated duration (e.g., "11 years and 7 months")',
),
FieldMapping(
source_path='profile_data.career_history[].location',
source_description='Work location',
target_class='CareerPosition',
target_slot='location',
typedb_entity='career-position',
typedb_attribute='work-location',
rdf_predicate='schema:workLocation',
transformation=TransformationType.DIRECT,
required=False,
notes='Geographic location of the position',
),
FieldMapping(
source_path='profile_data.career_history[].current',
source_description='Is current position',
target_class='CareerPosition',
target_slot='current',
typedb_entity='career-position',
typedb_attribute='is-current',
rdf_predicate='schema:currentPosition',
transformation=TransformationType.DIRECT,
required=False,
notes='Boolean flag for current employment',
),
FieldMapping(
source_path='profile_data.career_history[].company_size',
source_description='Company employee count range',
target_class='CareerPosition',
target_slot='company_size',
typedb_entity='career-position',
typedb_attribute='company-size',
rdf_predicate='schema:numberOfEmployees',
transformation=TransformationType.DIRECT,
required=False,
notes='Employee count range (e.g., "201-500 employees")',
),
FieldMapping(
source_path='profile_data.career_history[].company_founded',
source_description='Year company was founded',
target_class='CareerPosition',
target_slot='company_founded',
typedb_entity='career-position',
typedb_attribute='company-founded-year',
rdf_predicate='schema:foundingDate',
transformation=TransformationType.DIRECT,
required=False,
notes='Founding year of the organization',
),
FieldMapping(
source_path='profile_data.career_history[].company_type',
source_description='Type of company',
target_class='CareerPosition',
target_slot='company_type',
typedb_entity='career-position',
typedb_attribute='company-type',
rdf_predicate='schema:additionalType',
transformation=TransformationType.DIRECT,
required=False,
notes='Organization type (e.g., "Government Agency")',
),
FieldMapping(
source_path='profile_data.career_history[].industry',
source_description='Industry sector',
target_class='CareerPosition',
target_slot='industry',
typedb_entity='career-position',
typedb_attribute='industry',
rdf_predicate='schema:industry',
transformation=TransformationType.DIRECT,
required=False,
notes='Industry classification',
),
FieldMapping(
source_path='profile_data.career_history[].department',
source_description='Department within organization',
target_class='CareerPosition',
target_slot='department',
typedb_entity='career-position',
typedb_attribute='department',
rdf_predicate='schema:department',
transformation=TransformationType.DIRECT,
required=False,
notes='Department or division name',
),
FieldMapping(
source_path='profile_data.career_history[].level',
source_description='Seniority level',
target_class='CareerPosition',
target_slot='level',
typedb_entity='career-position',
typedb_attribute='seniority-level',
rdf_predicate='schema:occupationalCategory',
transformation=TransformationType.DIRECT,
required=False,
notes='Career level (e.g., "Specialist", "Manager")',
),
FieldMapping(
source_path='profile_data.career_history[].description',
source_description='Role description',
target_class='CareerPosition',
target_slot='description',
typedb_entity='career-position',
typedb_attribute='role-description',
rdf_predicate='schema:description',
transformation=TransformationType.DIRECT,
required=False,
notes='Free-text description of the role',
),
],
generated_classes=['CareerPosition', 'Person'],
example_yaml="""
profile_data:
career_history:
- organization: Nationaal Archief
organization_linkedin: https://www.linkedin.com/company/nationaal-archief
role: Staff Member At The Services Department
role_english: Staff Member At The Services Department
dates: Apr 2014 - Present
duration: 11 years and 7 months
location: Den Haag
current: true
company_size: 201-500 employees
company_founded: 1802
company_type: Government Agency
industry: Government Administration
department: Other
level: Specialist
description: null
""".strip(),
),
# -------------------------------------------------------------------------
# EDUCATION - Academic background
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='education',
description='Educational background and qualifications',
detailed_description="""
Educational history including degrees, institutions, and duration.
Links to institution LinkedIn pages when available.
This data helps understand the academic foundation and
qualifications of heritage professionals.
""".strip(),
linkml_class='Education',
typedb_entity='education',
provenance=Provenance(
source_type='external_api',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='profile_data.education[].institution',
source_description='Educational institution name',
target_class='Education',
target_slot='institution',
typedb_entity='education',
typedb_attribute='institution-name',
rdf_predicate='schema:educationalCredentialAwarded',
transformation=TransformationType.DIRECT,
required=True,
notes='Name of university/school',
),
FieldMapping(
source_path='profile_data.education[].institution_linkedin',
source_description='LinkedIn URL for institution',
target_class='Education',
target_slot='institution_linkedin',
typedb_entity='education',
typedb_attribute='institution-linkedin-url',
rdf_predicate='schema:sameAs',
transformation=TransformationType.DIRECT,
required=False,
notes='LinkedIn school page URL',
),
FieldMapping(
source_path='profile_data.education[].degree',
source_description='Degree and field of study',
target_class='Education',
target_slot='degree',
typedb_entity='education',
typedb_attribute='degree',
rdf_predicate='schema:educationalLevel',
transformation=TransformationType.DIRECT,
required=False,
notes='Degree type and major (e.g., "MA, History")',
),
FieldMapping(
source_path='profile_data.education[].years',
source_description='Years attended',
target_class='Education',
target_slot='years',
typedb_entity='education',
typedb_attribute='years-attended',
rdf_predicate='schema:temporalCoverage',
transformation=TransformationType.DIRECT,
required=False,
notes='Date range (e.g., "2001 - 2007")',
),
FieldMapping(
source_path='profile_data.education[].duration',
source_description='Duration of study',
target_class='Education',
target_slot='duration',
typedb_entity='education',
typedb_attribute='study-duration',
rdf_predicate='schema:duration',
transformation=TransformationType.DIRECT,
required=False,
notes='Calculated duration (e.g., "6 years")',
),
FieldMapping(
source_path='profile_data.education[].country',
source_description='Country of institution',
target_class='Education',
target_slot='country',
typedb_entity='education',
typedb_attribute='education-country',
rdf_predicate='schema:addressCountry',
transformation=TransformationType.DIRECT,
required=False,
notes='ISO country code of institution',
),
],
generated_classes=['Education', 'Person'],
example_yaml="""
profile_data:
education:
- institution: Universiteit Utrecht
institution_linkedin: https://www.linkedin.com/school/universiteit-utrecht
degree: MA, History
years: 2001 - 2007
duration: 6 years
country: NL
""".strip(),
),
# -------------------------------------------------------------------------
# SKILLS & EXPERTISE - Professional capabilities
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='skills_expertise',
description='Professional skills, languages, and expertise areas',
detailed_description="""
Professional skills endorsed on LinkedIn, language proficiencies,
and identified expertise areas based on career history analysis.
Expertise areas are derived from analyzing the person's complete
professional background in the heritage sector.
""".strip(),
linkml_class='Person',
typedb_entity='person',
provenance=Provenance(
source_type='external_api',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='profile_data.skills',
source_description='Professional skills list',
target_class='Person',
target_slot='skills',
typedb_entity='person',
typedb_attribute='skills',
rdf_predicate='schema:knowsAbout',
transformation=TransformationType.ARRAY_DIRECT,
required=False,
notes='LinkedIn-endorsed skills',
),
FieldMapping(
source_path='profile_data.languages',
source_description='Language proficiencies',
target_class='Person',
target_slot='languages',
typedb_entity='person',
typedb_attribute='languages',
rdf_predicate='schema:knowsLanguage',
transformation=TransformationType.ARRAY_DIRECT,
required=False,
notes='Languages spoken with proficiency levels',
),
FieldMapping(
source_path='profile_data.expertise_areas',
source_description='Identified expertise areas',
target_class='Person',
target_slot='expertise_areas',
typedb_entity='person',
typedb_attribute='expertise-areas',
rdf_predicate='schema:hasOccupation',
transformation=TransformationType.ARRAY_DIRECT,
required=False,
notes='Derived from career analysis',
),
],
generated_classes=['Person'],
example_yaml="""
profile_data:
skills:
- Digital Preservation
- Archival Description
- Collection Management
languages:
- Dutch (Native)
- English (Professional)
expertise_areas:
- Archival services
- Public services
- History research
- Library services
""".strip(),
),
# -------------------------------------------------------------------------
# HERITAGE RELEVANCE - Sector-specific assessment
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='heritage_relevance',
description='Heritage sector relevance assessment',
detailed_description="""
Assessment of the person's relevance to the heritage sector.
Includes heritage type classification (GLAMORCUBESFIXPHDNT),
current institution, sector role, and years of heritage experience.
This provides a quick overview of where the person fits
within the heritage ecosystem.
""".strip(),
linkml_class='HeritageRelevance',
typedb_entity='heritage-relevance',
provenance=Provenance(
source_type='computed',
data_tier=DataTier.TIER_3_CROWD_SOURCED,
),
fields=[
FieldMapping(
source_path='heritage_sector_relevance.heritage_type',
source_description='Heritage type code',
target_class='HeritageRelevance',
target_slot='heritage_type',
typedb_entity='heritage-relevance',
typedb_attribute='heritage-type-code',
rdf_predicate='glam:heritageType',
transformation=TransformationType.DIRECT,
required=False,
notes='Single letter GLAMORCUBESFIXPHDNT code',
),
FieldMapping(
source_path='heritage_sector_relevance.heritage_type_label',
source_description='Heritage type label',
target_class='HeritageRelevance',
target_slot='heritage_type_label',
typedb_entity='heritage-relevance',
typedb_attribute='heritage-type-label',
rdf_predicate='rdfs:label',
transformation=TransformationType.DIRECT,
required=False,
notes='Human-readable type label (e.g., "Archive")',
),
FieldMapping(
source_path='heritage_sector_relevance.current_institution',
source_description='Current heritage institution',
target_class='HeritageRelevance',
target_slot='current_institution',
typedb_entity='heritage-relevance',
typedb_attribute='current-institution',
rdf_predicate='schema:worksFor',
transformation=TransformationType.DIRECT,
required=False,
notes='Name of current heritage employer',
),
FieldMapping(
source_path='heritage_sector_relevance.institution_type',
source_description='Type of institution',
target_class='HeritageRelevance',
target_slot='institution_type',
typedb_entity='heritage-relevance',
typedb_attribute='institution-type',
rdf_predicate='schema:additionalType',
transformation=TransformationType.DIRECT,
required=False,
notes='Classification of the institution',
),
FieldMapping(
source_path='heritage_sector_relevance.sector_role',
source_description='Role within heritage sector',
target_class='HeritageRelevance',
target_slot='sector_role',
typedb_entity='heritage-relevance',
typedb_attribute='sector-role',
rdf_predicate='schema:jobTitle',
transformation=TransformationType.DIRECT,
required=False,
notes='Standardized role classification',
),
FieldMapping(
source_path='heritage_sector_relevance.years_in_heritage',
source_description='Years of heritage experience',
target_class='HeritageRelevance',
target_slot='years_in_heritage',
typedb_entity='heritage-relevance',
typedb_attribute='years-in-heritage',
rdf_predicate='schema:experienceYears',
transformation=TransformationType.DIRECT,
required=False,
notes='Total years in heritage sector',
),
],
generated_classes=['HeritageRelevance', 'Person'],
example_yaml="""
heritage_sector_relevance:
heritage_type: A
heritage_type_label: Archive
current_institution: Nationaal Archief
institution_type: National Archive
sector_role: Services Staff
years_in_heritage: 11
""".strip(),
),
# -------------------------------------------------------------------------
# HERITAGE EXPERIENCE - Relevant positions
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='heritage_experience',
description='Heritage-relevant experience from career history',
detailed_description="""
Filtered list of positions that are relevant to the heritage sector.
Extracted from full career history with relevance annotations.
Includes both current and past positions at heritage institutions
with notes explaining their relevance to the GLAM sector.
""".strip(),
linkml_class='HeritageExperience',
typedb_entity='heritage-experience',
provenance=Provenance(
source_type='computed',
data_tier=DataTier.TIER_3_CROWD_SOURCED,
),
fields=[
FieldMapping(
source_path='profile_data.heritage_relevant_experience[].organization',
source_description='Heritage organization name',
target_class='HeritageExperience',
target_slot='organization',
typedb_entity='heritage-experience',
typedb_attribute='heritage-org-name',
rdf_predicate='schema:worksFor',
transformation=TransformationType.DIRECT,
required=True,
notes='Name of heritage institution',
),
FieldMapping(
source_path='profile_data.heritage_relevant_experience[].role',
source_description='Role at heritage organization',
target_class='HeritageExperience',
target_slot='role',
typedb_entity='heritage-experience',
typedb_attribute='heritage-role',
rdf_predicate='schema:jobTitle',
transformation=TransformationType.DIRECT,
required=True,
notes='Job title at heritage institution',
),
FieldMapping(
source_path='profile_data.heritage_relevant_experience[].relevance',
source_description='Relevance explanation',
target_class='HeritageExperience',
target_slot='relevance',
typedb_entity='heritage-experience',
typedb_attribute='relevance-notes',
rdf_predicate='schema:description',
transformation=TransformationType.DIRECT,
required=False,
notes='Why this position is heritage-relevant',
),
FieldMapping(
source_path='profile_data.heritage_relevant_experience[].current',
source_description='Is current position',
target_class='HeritageExperience',
target_slot='current',
typedb_entity='heritage-experience',
typedb_attribute='is-current-heritage',
rdf_predicate='schema:currentPosition',
transformation=TransformationType.DIRECT,
required=False,
notes='Whether this is a current position',
),
],
generated_classes=['HeritageExperience', 'Person'],
example_yaml="""
profile_data:
heritage_relevant_experience:
- organization: Nationaal Archief
role: Staff Member At The Services Department
relevance: Public services at National Archives of the Netherlands
current: true
- organization: University Library Utrecht
role: Library Employee
relevance: Academic library experience
current: false
""".strip(),
),
# -------------------------------------------------------------------------
# AFFILIATIONS - Custodian connections
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='affiliations',
description='Affiliations with heritage custodians',
detailed_description="""
Links between the person and heritage custodian institutions.
Each affiliation includes the custodian name, slug identifier,
role title, and heritage classification.
These affiliations enable network analysis across the heritage
sector workforce.
""".strip(),
linkml_class='Affiliation',
typedb_entity='affiliation',
provenance=Provenance(
source_type='computed',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='affiliations[].custodian_name',
source_description='Heritage custodian name',
target_class='Affiliation',
target_slot='custodian_name',
typedb_entity='affiliation',
typedb_attribute='custodian-name',
rdf_predicate='schema:memberOf',
transformation=TransformationType.DIRECT,
required=True,
notes='Name of the heritage institution',
),
FieldMapping(
source_path='affiliations[].custodian_slug',
source_description='Custodian identifier slug',
target_class='Affiliation',
target_slot='custodian_slug',
typedb_entity='affiliation',
typedb_attribute='custodian-slug',
rdf_predicate='schema:identifier',
transformation=TransformationType.DIRECT,
required=True,
notes='URL-safe identifier for the custodian',
),
FieldMapping(
source_path='affiliations[].role_title',
source_description='Role at custodian',
target_class='Affiliation',
target_slot='role_title',
typedb_entity='affiliation',
typedb_attribute='affiliation-role',
rdf_predicate='schema:jobTitle',
transformation=TransformationType.DIRECT,
required=False,
notes='Job title at this custodian',
),
FieldMapping(
source_path='affiliations[].heritage_relevant',
source_description='Is heritage relevant',
target_class='Affiliation',
target_slot='heritage_relevant',
typedb_entity='affiliation',
typedb_attribute='is-heritage-relevant',
rdf_predicate='glam:heritageRelevant',
transformation=TransformationType.DIRECT,
required=False,
notes='Whether affiliation is heritage-relevant',
),
FieldMapping(
source_path='affiliations[].heritage_type',
source_description='Heritage type code',
target_class='Affiliation',
target_slot='heritage_type',
typedb_entity='affiliation',
typedb_attribute='affiliation-heritage-type',
rdf_predicate='glam:heritageType',
transformation=TransformationType.DIRECT,
required=False,
notes='GLAMORCUBESFIXPHDNT type code',
),
FieldMapping(
source_path='affiliations[].current',
source_description='Is current affiliation',
target_class='Affiliation',
target_slot='current',
typedb_entity='affiliation',
typedb_attribute='is-current-affiliation',
rdf_predicate='schema:currentPosition',
transformation=TransformationType.DIRECT,
required=False,
notes='Whether this is a current affiliation',
),
FieldMapping(
source_path='affiliations[].observed_on',
source_description='Observation timestamp',
target_class='Affiliation',
target_slot='observed_on',
typedb_entity='affiliation',
typedb_attribute='observed-on',
rdf_predicate='prov:generatedAtTime',
transformation=TransformationType.DIRECT,
required=False,
notes='When this affiliation was observed',
),
FieldMapping(
source_path='affiliations[].source_url',
source_description='Source URL for affiliation',
target_class='Affiliation',
target_slot='source_url',
typedb_entity='affiliation',
typedb_attribute='affiliation-source-url',
rdf_predicate='prov:wasDerivedFrom',
transformation=TransformationType.DIRECT,
required=False,
notes='URL where affiliation was discovered',
),
],
generated_classes=['Affiliation'],
example_yaml="""
affiliations:
- custodian_name: Nationaal Archief
custodian_slug: nationaal-archief
role_title: Staff member at the Services Department at Nationaal Archief
heritage_relevant: true
heritage_type: A
current: true
observed_on: 2025-12-14T11:21:47Z
source_url: https://www.linkedin.com/company/nationaal-archief/people/
""".strip(),
),
# -------------------------------------------------------------------------
# LINKED RECORDS - Cross-references
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='linked_records',
description='Links to related records in the system',
detailed_description="""
Cross-references to other records in the heritage data system.
Includes links to staff records (parsed from LinkedIn company pages)
and custodian records (heritage institution YAML files).
These links enable navigation between person profiles and
the institutions they work for.
""".strip(),
linkml_class='LinkedRecords',
typedb_entity='linked-records',
provenance=Provenance(
source_type='computed',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='linked_records.staff_record.file',
source_description='Staff record file path',
target_class='LinkedRecords',
target_slot='staff_record_file',
typedb_entity='linked-records',
typedb_attribute='staff-record-path',
rdf_predicate='prov:wasDerivedFrom',
transformation=TransformationType.DIRECT,
required=False,
notes='Path to parsed staff JSON file',
),
FieldMapping(
source_path='linked_records.staff_record.staff_id',
source_description='Staff record ID',
target_class='LinkedRecords',
target_slot='staff_id',
typedb_entity='linked-records',
typedb_attribute='staff-id',
rdf_predicate='schema:identifier',
transformation=TransformationType.DIRECT,
required=False,
notes='Unique staff identifier',
),
FieldMapping(
source_path='linked_records.custodian_record.ghcid',
source_description='Custodian GHCID',
target_class='LinkedRecords',
target_slot='custodian_ghcid',
typedb_entity='linked-records',
typedb_attribute='linked-ghcid',
rdf_predicate='glam:ghcid',
transformation=TransformationType.DIRECT,
required=False,
notes='GHCID of linked custodian',
),
FieldMapping(
source_path='linked_records.custodian_record.notes',
source_description='Custodian record notes',
target_class='LinkedRecords',
target_slot='custodian_notes',
typedb_entity='linked-records',
typedb_attribute='custodian-notes',
rdf_predicate='schema:description',
transformation=TransformationType.DIRECT,
required=False,
notes='Additional notes about the custodian link',
),
],
generated_classes=['LinkedRecords'],
example_yaml="""
linked_records:
staff_record:
file: data/custodian/person/affiliated/parsed/nationaal-archief_staff_20251210T155415Z.json
staff_id: nationaal-archief_staff_0002_iris_van_meer
custodian_record:
ghcid: NL-ZH-DHA-A-NA
notes: Nationaal Archief, The Hague
""".strip(),
),
# -------------------------------------------------------------------------
# CONTACT DATA - Contact information
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='contact_data',
description='Contact information and communication channels',
detailed_description="""
Contact information including email addresses and phone numbers.
Emails may be inferred from organizational naming conventions
with confidence scores indicating reliability.
Also includes profile photo URLs and external lookup service links.
""".strip(),
linkml_class='ContactData',
typedb_entity='contact-data',
provenance=Provenance(
source_type='computed',
data_tier=DataTier.TIER_3_CROWD_SOURCED,
),
fields=[
FieldMapping(
source_path='contact_data.provenance.source',
source_description='Contact data source',
target_class='ContactData',
target_slot='provenance_source',
typedb_entity='contact-data',
typedb_attribute='contact-source',
rdf_predicate='prov:wasAttributedTo',
transformation=TransformationType.DIRECT,
required=False,
notes='How contact data was obtained',
),
FieldMapping(
source_path='contact_data.emails[].email',
source_description='Email address',
target_class='ContactData',
target_slot='email',
typedb_entity='contact-data',
typedb_attribute='email-address',
rdf_predicate='schema:email',
transformation=TransformationType.DIRECT,
required=False,
notes='Email address (may be inferred)',
),
FieldMapping(
source_path='contact_data.emails[].type',
source_description='Email type',
target_class='ContactData',
target_slot='email_type',
typedb_entity='contact-data',
typedb_attribute='email-type',
rdf_predicate='schema:contactType',
transformation=TransformationType.DIRECT,
required=False,
notes='Type of email (work, personal)',
),
FieldMapping(
source_path='contact_data.emails[].confidence',
source_description='Email confidence score',
target_class='ContactData',
target_slot='email_confidence',
typedb_entity='contact-data',
typedb_attribute='email-confidence',
rdf_predicate='prov:confidence',
transformation=TransformationType.DIRECT,
required=False,
notes='Confidence in email accuracy (0-1)',
),
FieldMapping(
source_path='contact_data.emails[].verified',
source_description='Email verification status',
target_class='ContactData',
target_slot='email_verified',
typedb_entity='contact-data',
typedb_attribute='email-verified',
rdf_predicate='schema:verified',
transformation=TransformationType.DIRECT,
required=False,
notes='Whether email has been verified',
),
FieldMapping(
source_path='contact_data.profile_photo_url',
source_description='Profile photo URL',
target_class='ContactData',
target_slot='profile_photo_url',
typedb_entity='contact-data',
typedb_attribute='profile-photo',
rdf_predicate='schema:image',
transformation=TransformationType.DIRECT,
required=False,
notes='URL to profile photo',
),
FieldMapping(
source_path='contact_data.rocketreach_url',
source_description='RocketReach lookup URL',
target_class='ContactData',
target_slot='rocketreach_url',
typedb_entity='contact-data',
typedb_attribute='rocketreach-url',
rdf_predicate='schema:sameAs',
transformation=TransformationType.DIRECT,
required=False,
notes='Link to RocketReach profile lookup',
),
],
generated_classes=['ContactData'],
example_yaml="""
contact_data:
provenance:
source: LinkedIn profile + Dutch government naming convention
source_url: https://www.linkedin.com/in/iris-van-meer-34329131
retrieved_date: 2025-12-14
extraction_method: naming_convention_inference
emails:
- email: iris.van.meer@nationaalarchief.nl
domain: nationaalarchief.nl
type: work
source: inferred
confidence: 0.8
verified: false
phones: []
profile_photo_url: https://media.licdn.com/dms/image/v2/...
""".strip(),
),
# -------------------------------------------------------------------------
# EXTRACTION METADATA - Data provenance
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='extraction_metadata',
description='Metadata about how the profile was extracted',
detailed_description="""
Provenance information about the data extraction process.
Includes source file references, extraction timestamps,
methods used, and cost tracking for API calls.
This ensures full traceability of data origin and enables
reproducibility of the extraction process.
""".strip(),
linkml_class='ExtractionMetadata',
typedb_entity='extraction-metadata',
provenance=Provenance(
source_type='computed',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
fields=[
FieldMapping(
source_path='extraction_metadata.source_file',
source_description='Source file path',
target_class='ExtractionMetadata',
target_slot='source_file',
typedb_entity='extraction-metadata',
typedb_attribute='source-file-path',
rdf_predicate='prov:wasDerivedFrom',
transformation=TransformationType.DIRECT,
required=False,
notes='Original source file for extraction',
),
FieldMapping(
source_path='extraction_metadata.staff_id',
source_description='Staff identifier',
target_class='ExtractionMetadata',
target_slot='staff_id',
typedb_entity='extraction-metadata',
typedb_attribute='extraction-staff-id',
rdf_predicate='schema:identifier',
transformation=TransformationType.DIRECT,
required=False,
notes='Staff ID from source data',
),
FieldMapping(
source_path='extraction_metadata.extraction_date',
source_description='Extraction timestamp',
target_class='ExtractionMetadata',
target_slot='extraction_date',
typedb_entity='extraction-metadata',
typedb_attribute='extraction-date',
rdf_predicate='prov:generatedAtTime',
transformation=TransformationType.DIRECT,
required=True,
notes='ISO 8601 timestamp of extraction',
),
FieldMapping(
source_path='extraction_metadata.extraction_method',
source_description='Method used for extraction',
target_class='ExtractionMetadata',
target_slot='extraction_method',
typedb_entity='extraction-metadata',
typedb_attribute='extraction-method',
rdf_predicate='prov:wasGeneratedBy',
transformation=TransformationType.DIRECT,
required=False,
notes='Tool/API used (e.g., exa_crawling_exa)',
),
FieldMapping(
source_path='extraction_metadata.extraction_agent',
source_description='Agent performing extraction',
target_class='ExtractionMetadata',
target_slot='extraction_agent',
typedb_entity='extraction-metadata',
typedb_attribute='extraction-agent',
rdf_predicate='prov:wasAttributedTo',
transformation=TransformationType.DIRECT,
required=False,
notes='AI agent or script name',
),
FieldMapping(
source_path='extraction_metadata.linkedin_url',
source_description='Source LinkedIn URL',
target_class='ExtractionMetadata',
target_slot='linkedin_url',
typedb_entity='extraction-metadata',
typedb_attribute='extraction-linkedin-url',
rdf_predicate='prov:hadPrimarySource',
transformation=TransformationType.DIRECT,
required=False,
notes='LinkedIn profile URL that was extracted',
),
FieldMapping(
source_path='extraction_metadata.cost_usd',
source_description='Extraction cost in USD',
target_class='ExtractionMetadata',
target_slot='cost_usd',
typedb_entity='extraction-metadata',
typedb_attribute='extraction-cost',
rdf_predicate='schema:price',
transformation=TransformationType.DIRECT,
required=False,
notes='API cost for extraction',
),
FieldMapping(
source_path='extraction_metadata.request_id',
source_description='API request identifier',
target_class='ExtractionMetadata',
target_slot='request_id',
typedb_entity='extraction-metadata',
typedb_attribute='api-request-id',
rdf_predicate='schema:identifier',
transformation=TransformationType.DIRECT,
required=False,
notes='Unique request ID for tracing',
),
],
generated_classes=['ExtractionMetadata'],
example_yaml="""
extraction_metadata:
source_file: null
staff_id: null
extraction_date: 2025-12-13T17:35:24.524090+00:00
extraction_method: exa_crawling_exa
extraction_agent: claude-opus-4.5
linkedin_url: https://www.linkedin.com/in/iris-van-meer-34329131
cost_usd: 0
request_id: null
""".strip(),
),
# -------------------------------------------------------------------------
# WEB CLAIMS - Verifiable claims from web sources
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='web_claims',
description='Web claims with provenance tracking',
detailed_description="""
Individual claims extracted from web sources with full provenance.
Each claim includes the claim type, value, source URL, retrieval
timestamp, and the agent/tool that performed the extraction.
This follows the WebObservation pattern for verifiable data claims.
""".strip(),
linkml_class='WebClaim',
typedb_entity='web-claim',
provenance=Provenance(
source_type='external_api',
data_tier=DataTier.TIER_2_VERIFIED,
),
fields=[
FieldMapping(
source_path='web_claims[].claim_type',
source_description='Type of claim',
target_class='WebClaim',
target_slot='claim_type',
typedb_entity='web-claim',
typedb_attribute='claim-type',
rdf_predicate='rdf:type',
transformation=TransformationType.DIRECT,
required=True,
notes='Category of claim (e.g., full_name, role_title)',
),
FieldMapping(
source_path='web_claims[].claim_value',
source_description='Value of the claim',
target_class='WebClaim',
target_slot='claim_value',
typedb_entity='web-claim',
typedb_attribute='claim-value',
rdf_predicate='rdf:value',
transformation=TransformationType.DIRECT,
required=True,
notes='The actual claimed value',
),
FieldMapping(
source_path='web_claims[].source_url',
source_description='URL source of claim',
target_class='WebClaim',
target_slot='source_url',
typedb_entity='web-claim',
typedb_attribute='claim-source-url',
rdf_predicate='prov:wasDerivedFrom',
transformation=TransformationType.DIRECT,
required=True,
notes='Web page where claim was found',
),
FieldMapping(
source_path='web_claims[].retrieved_on',
source_description='Retrieval timestamp',
target_class='WebClaim',
target_slot='retrieved_on',
typedb_entity='web-claim',
typedb_attribute='claim-retrieved-on',
rdf_predicate='prov:generatedAtTime',
transformation=TransformationType.DIRECT,
required=False,
notes='When the claim was retrieved',
),
FieldMapping(
source_path='web_claims[].retrieval_agent',
source_description='Agent that retrieved claim',
target_class='WebClaim',
target_slot='retrieval_agent',
typedb_entity='web-claim',
typedb_attribute='claim-retrieval-agent',
rdf_predicate='prov:wasAttributedTo',
transformation=TransformationType.DIRECT,
required=False,
notes='Tool/agent that extracted the claim',
),
],
generated_classes=['WebClaim'],
example_yaml="""
web_claims:
- claim_type: full_name
claim_value: Iris van Meer
source_url: https://www.linkedin.com/in/iris-van-meer-34329131
retrieved_on: 2025-12-14T11:21:47Z
retrieval_agent: linkedin_html_parser
- claim_type: role_title
claim_value: Staff member at the Services Department at Nationaal Archief
source_url: https://www.linkedin.com/in/iris-van-meer-34329131
retrieved_on: 2025-12-14T11:21:47Z
retrieval_agent: linkedin_html_parser
""".strip(),
),
# -------------------------------------------------------------------------
# PICO MAPPED - PiCo ontology properties that ARE mapped
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='pico_mapped',
description='PiCo ontology properties mapped to HC person data',
detailed_description="""
The Heritage Custodian (HC) system implements a focused subset of the PiCo
(Persons in Context) ontology, optimized for tracking heritage sector staff.
PiCo was designed for historical vital records (birth/death certificates,
marriage records, census data), but HC uses LinkedIn as the primary data
source, which provides professional context rather than biographical/genealogical
data.
This section documents which PiCo properties ARE mapped to HC fields,
showing the semantic alignment between the ontologies.
""".strip(),
linkml_class='PersonObservation',
typedb_entity='person-observation',
provenance=Provenance(
source_type='ontology_mapping',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
fields=[
FieldMapping(
source_path='profile_data.name',
source_description='Full name of the person',
target_class='PersonObservation',
target_slot='name',
typedb_entity='person-observation',
typedb_attribute='person-name',
rdf_predicate='sdo:name',
transformation=TransformationType.DIRECT,
required=True,
notes='PiCo uses sdo:name (Schema.org) for full names. Directly mapped.',
status='mapped',
),
FieldMapping(
source_path='profile_data.location',
source_description='Current geographic location',
target_class='PersonObservation',
target_slot='location',
typedb_entity='person-observation',
typedb_attribute='location-string',
rdf_predicate='sdo:address',
transformation=TransformationType.DIRECT,
required=False,
notes='PiCo uses sdo:address for location. LinkedIn provides free-text location.',
status='mapped',
),
FieldMapping(
source_path='profile_data.headline',
source_description='Current occupation/role',
target_class='PersonObservation',
target_slot='headline',
typedb_entity='person-observation',
typedb_attribute='headline',
rdf_predicate='sdo:hasOccupation',
transformation=TransformationType.DIRECT,
required=False,
notes='PiCo uses sdo:hasOccupation for job roles. HC captures this via LinkedIn headline.',
status='mapped',
),
FieldMapping(
source_path='extraction_metadata.linkedin_url',
source_description='LinkedIn profile URL as primary source',
target_class='PersonObservation',
target_slot='source_url',
typedb_entity='person-observation',
typedb_attribute='source-url',
rdf_predicate='prov:hadPrimarySource',
transformation=TransformationType.DIRECT,
required=True,
notes='PiCo uses prov:hadPrimarySource for provenance. LinkedIn URL serves as source document.',
status='mapped',
),
FieldMapping(
source_path='affiliations[].role_title',
source_description='Role at heritage institution',
target_class='Affiliation',
target_slot='role_title',
typedb_entity='person-observation',
typedb_attribute='role-title',
rdf_predicate='pico:hasRole',
transformation=TransformationType.DIRECT,
required=False,
notes='PiCo uses pico:hasRole with picot_roles thesaurus. HC captures current institutional roles.',
status='mapped',
),
FieldMapping(
source_path='extraction_metadata.extraction_date',
source_description='When observation was recorded',
target_class='PersonObservation',
target_slot='observation_date',
typedb_entity='person-observation',
typedb_attribute='observation-date',
rdf_predicate='prov:generatedAtTime',
transformation=TransformationType.DIRECT,
required=True,
notes='PiCo uses prov:generatedAtTime for temporal provenance. Mapped to extraction timestamp.',
status='mapped',
),
FieldMapping(
source_path='extraction_metadata.extraction_agent',
source_description='Agent that performed extraction',
target_class='PersonObservation',
target_slot='extraction_agent',
typedb_entity='person-observation',
typedb_attribute='extraction-agent',
rdf_predicate='prov:wasAttributedTo',
transformation=TransformationType.DIRECT,
required=False,
notes='PiCo uses prov:wasAttributedTo for agent provenance. Records which tool/agent extracted data.',
status='mapped',
),
FieldMapping(
source_path='profile_data.profile_image_url',
source_description='Profile photo URL',
target_class='PersonObservation',
target_slot='image_url',
typedb_entity='person-observation',
typedb_attribute='profile-image-url',
rdf_predicate='sdo:image',
transformation=TransformationType.DIRECT,
required=False,
notes='PiCo uses sdo:image for visual representation. LinkedIn CDN URL stored.',
status='mapped',
),
],
generated_classes=['PersonObservation', 'Affiliation'],
example_yaml="""
# PiCo ontology alignment example
# HC PersonObservation → PiCo PersonObservation
profile_data:
name: "Iris van Meer" # → sdo:name
headline: "Staff member at..." # → sdo:hasOccupation
location: "The Hague, Netherlands" # → sdo:address
profile_image_url: "https://..." # → sdo:image
extraction_metadata:
linkedin_url: "https://linkedin.com/in/..." # → prov:hadPrimarySource
extraction_date: "2025-12-14T11:21:47Z" # → prov:generatedAtTime
extraction_agent: "claude-opus-4.5" # → prov:wasAttributedTo
affiliations:
- role_title: "Archivist" # → pico:hasRole
""".strip(),
),
# -------------------------------------------------------------------------
# PICO UNMAPPED - PiCo ontology properties intentionally OUT OF SCOPE
# -------------------------------------------------------------------------
EnrichmentSourceMapping(
source_block='pico_unmapped',
description='PiCo ontology properties intentionally not mapped',
detailed_description="""
Many PiCo properties are intentionally NOT mapped in the HC system.
This is a design decision, not a gap to be filled.
**Why these properties are out of scope:**
1. **Data source limitation**: LinkedIn profiles don't contain vital records
(birth dates, death dates, marriage records, baptism records).
2. **Use case mismatch**: HC tracks heritage sector workforce, not genealogical
reconstruction. Family relationships aren't relevant for institutional
staff directories.
3. **Privacy considerations**: Collecting personal biographical data about
living individuals raises GDPR concerns. Professional context is appropriate;
personal history is not.
4. **Ontology purpose**: PiCo was designed for historical archives processing
(civil registration, notarial records). HC serves a different purpose.
This documentation ensures transparency about the ontology alignment scope.
""".strip(),
linkml_class='PersonObservation',
typedb_entity='person-observation',
provenance=Provenance(
source_type='ontology_mapping',
data_tier=DataTier.TIER_1_AUTHORITATIVE,
),
fields=[
# Vital records - not available from LinkedIn
FieldMapping(
source_path=None,
source_description='Birth date',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:birthDate',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: LinkedIn does not provide birth dates. Historical vital records property.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Death date',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:deathDate',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: LinkedIn profiles are for living professionals. Historical vital records property.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Birth place',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:birthPlace',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: LinkedIn does not provide birth location. Use sdo:address for current location.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Death place',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:deathPlace',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: LinkedIn profiles are for living professionals.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Deceased flag',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='pico:deceased',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: HC tracks active professionals. Memorial profiles not in scope.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Age',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='pico:hasAge',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Age not available from LinkedIn. Privacy consideration for living individuals.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Gender',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:gender',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Gender not reliably extractable from LinkedIn. Privacy consideration.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Religion',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='pico:hasReligion',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Religious affiliation not available from LinkedIn. Privacy consideration.',
status='out_of_scope',
),
# Structured name components - partial mapping
FieldMapping(
source_path=None,
source_description='Given name (first name)',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:givenName',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='PARTIAL: Could be extracted from full name, but not reliably for all cultures. Full name (sdo:name) is used instead.',
status='partial',
),
FieldMapping(
source_path=None,
source_description='Family name (surname)',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:familyName',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='PARTIAL: Could be extracted from full name, but not reliably for all cultures. Full name (sdo:name) is used instead.',
status='partial',
),
FieldMapping(
source_path=None,
source_description='Patronym',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='pnv:patronym',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Patronymic names are historical/cultural. Not extractable from LinkedIn.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Base surname',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='pnv:baseSurname',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Genealogical name component. Not relevant for staff tracking.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Surname prefix',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='pnv:surnamePrefix',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Dutch tussenvoegsel (van, de, etc.) not separately tracked. Full name preserved.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Initials',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='pnv:initials',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Initials not separately extracted. Full name used.',
status='out_of_scope',
),
# Family relationships - 40+ properties not mapped
FieldMapping(
source_path=None,
source_description='Parent relationship',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:parent',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Children relationship',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:children',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Spouse relationship',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:spouse',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Sibling relationship',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:sibling',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
status='out_of_scope',
),
FieldMapping(
source_path=None,
source_description='Extended family (40+ PiCo properties)',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='pico:has*',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: PiCo defines 40+ family relationship properties (grandparent, in-law, step-relations, cousins, etc.). None are mapped - HC tracks professional, not familial relationships.',
status='out_of_scope',
),
# Archival source properties
FieldMapping(
source_path=None,
source_description='Archive component source',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='sdo:ArchiveComponent',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='OUT OF SCOPE: HC uses LinkedIn as source, not archival documents. Web claims serve similar provenance purpose.',
status='out_of_scope',
),
# Reconstruction properties
FieldMapping(
source_path=None,
source_description='Person reconstruction aggregation',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='pico:PersonReconstruction',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='FUTURE: PersonReconstruction (aggregating multiple observations) not yet implemented. Currently each person has one LinkedIn-based observation.',
status='future',
),
FieldMapping(
source_path=None,
source_description='Derived from relationship',
target_class=None,
target_slot=None,
typedb_entity='person-observation',
typedb_attribute=None,
rdf_predicate='prov:wasDerivedFrom',
transformation=TransformationType.NOT_MAPPED,
required=False,
notes='FUTURE: Cross-observation derivation not implemented. Would link reconstructed person to source observations.',
status='future',
),
],
generated_classes=['PersonObservation'],
example_yaml="""
# PiCo properties NOT mapped in HC system
# ❌ Vital records (not available from LinkedIn):
# - sdo:birthDate, sdo:deathDate
# - sdo:birthPlace, sdo:deathPlace
# - pico:deceased, pico:hasAge
# ❌ Personal characteristics (privacy):
# - sdo:gender
# - pico:hasReligion
# ❌ Genealogical name components:
# - pnv:patronym, pnv:baseSurname
# - pnv:surnamePrefix, pnv:initials
# (HC uses full sdo:name instead)
# ❌ Family relationships (40+ properties):
# - sdo:parent, sdo:children, sdo:spouse, sdo:sibling
# - pico:hasGrandparent, pico:hasGrandchild
# - pico:hasParent-in-law, pico:hasSibling-in-law
# - pico:hasStepparent, pico:hasStepchild
# - pico:hasCousin, pico:hasUncle_Aunt
# - pico:hasFosterParent, pico:hasGodparent
# ... and many more
# ⏳ Future consideration:
# - pico:PersonReconstruction (multi-source aggregation)
# - prov:wasDerivedFrom (observation linking)
""".strip(),
),
]
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def get_mapping_for_source(source_block: str) -> Optional[EnrichmentSourceMapping]:
"""Get the mapping for a specific source block."""
for mapping in ENRICHMENT_MAPPINGS:
if mapping.source_block == source_block:
return mapping
return None
def get_fields_for_class(class_name: str) -> list[FieldMapping]:
"""Get all field mappings that target a specific LinkML class."""
fields = []
for mapping in ENRICHMENT_MAPPINGS:
for field_mapping in mapping.fields:
if field_mapping.target_class == class_name:
fields.append(field_mapping)
return fields
def get_sources_for_class(class_name: str) -> list[str]:
"""Get all source blocks that contribute to a specific LinkML class."""
sources = []
for mapping in ENRICHMENT_MAPPINGS:
if mapping.linkml_class == class_name:
sources.append(mapping.source_block)
else:
for field_mapping in mapping.fields:
if field_mapping.target_class == class_name and mapping.source_block not in sources:
sources.append(mapping.source_block)
return sources
def get_typedb_attribute(class_name: str, slot_name: str) -> Optional[str]:
"""Get the TypeDB attribute name for a specific class/slot combination."""
for mapping in ENRICHMENT_MAPPINGS:
for field_mapping in mapping.fields:
if field_mapping.target_class == class_name and field_mapping.target_slot == slot_name:
return field_mapping.typedb_attribute
return None
def get_rdf_predicate(class_name: str, slot_name: str) -> Optional[str]:
"""Get the RDF predicate for a specific class/slot combination."""
for mapping in ENRICHMENT_MAPPINGS:
for field_mapping in mapping.fields:
if field_mapping.target_class == class_name and field_mapping.target_slot == slot_name:
return field_mapping.rdf_predicate
return None
def get_category_for_source(source_block: str) -> Optional[MappingCategory]:
"""Get the category that contains a specific source block."""
for category in MAPPING_CATEGORIES:
if source_block in category.sources:
return category
return None
def get_transformation_types() -> list[TransformationType]:
"""Get all available transformation types."""
return list(TransformationType)
def get_person_mapping_for_source(source_block: str) -> Optional[EnrichmentSourceMapping]:
"""Get the person mapping for a specific source block."""
for mapping in PERSON_MAPPINGS:
if mapping.source_block == source_block:
return mapping
return None
def get_person_fields_for_class(class_name: str) -> list[FieldMapping]:
"""Get all person field mappings that target a specific LinkML class."""
fields = []
for mapping in PERSON_MAPPINGS:
for field_mapping in mapping.fields:
if field_mapping.target_class == class_name:
fields.append(field_mapping)
return fields
def get_person_sources_for_class(class_name: str) -> list[str]:
"""Get all person source blocks that contribute to a specific LinkML class."""
sources = []
for mapping in PERSON_MAPPINGS:
if mapping.linkml_class == class_name:
sources.append(mapping.source_block)
else:
for field_mapping in mapping.fields:
if field_mapping.target_class == class_name and mapping.source_block not in sources:
sources.append(mapping.source_block)
return sources
def get_person_category_for_source(source_block: str) -> Optional[MappingCategory]:
"""Get the person category that contains a specific source block."""
for category in PERSON_CATEGORIES:
if source_block in category.sources:
return category
return None
def get_mapping_statistics() -> dict:
"""Get statistics about the custodian mappings."""
total_fields = sum(len(m.fields) for m in ENRICHMENT_MAPPINGS)
required_fields = sum(
1 for m in ENRICHMENT_MAPPINGS for f in m.fields if f.required
)
classes = set()
for m in ENRICHMENT_MAPPINGS:
classes.add(m.linkml_class)
for f in m.fields:
if f.target_class:
classes.add(f.target_class)
return {
'total_source_blocks': len(ENRICHMENT_MAPPINGS),
'total_categories': len(MAPPING_CATEGORIES),
'total_fields': total_fields,
'required_fields': required_fields,
'unique_classes': len(classes),
'classes': sorted(classes),
}
def get_person_mapping_statistics() -> dict:
"""Get statistics about the person mappings."""
total_fields = sum(len(m.fields) for m in PERSON_MAPPINGS)
required_fields = sum(
1 for m in PERSON_MAPPINGS for f in m.fields if f.required
)
classes = set()
for m in PERSON_MAPPINGS:
classes.add(m.linkml_class)
for f in m.fields:
if f.target_class:
classes.add(f.target_class)
return {
'total_source_blocks': len(PERSON_MAPPINGS),
'total_categories': len(PERSON_CATEGORIES),
'total_fields': total_fields,
'required_fields': required_fields,
'unique_classes': len(classes),
'classes': sorted(classes),
}
def get_mappings_for_data_source(data_source: str) -> list[EnrichmentSourceMapping]:
"""Get all mappings that use a specific data source type."""
return [
m for m in ENRICHMENT_MAPPINGS
if m.provenance.source_type == data_source
]
def get_categories_for_data_source(data_source: str) -> list[MappingCategory]:
"""Get all categories that contain mappings from a specific data source."""
source_blocks = {
m.source_block for m in ENRICHMENT_MAPPINGS
if m.provenance.source_type == data_source
}
return [
c for c in MAPPING_CATEGORIES
if any(s in source_blocks for s in c.sources)
]
# ============================================================================
# MODULE EXPORTS
# ============================================================================
__all__ = [
# Enums
'TransformationType',
'MappingStatus',
'DataTier',
# Dataclasses
'FieldExample',
'FieldValidation',
'FieldMapping',
'Provenance',
'EnrichmentSourceMapping',
'MappingCategory',
# Data
'MAPPING_CATEGORIES',
'ENRICHMENT_MAPPINGS',
'PERSON_CATEGORIES',
'PERSON_MAPPINGS',
# Functions
'get_mapping_for_source',
'get_fields_for_class',
'get_sources_for_class',
'get_typedb_attribute',
'get_rdf_predicate',
'get_category_for_source',
'get_transformation_types',
'get_person_mapping_for_source',
'get_person_fields_for_class',
'get_person_sources_for_class',
'get_person_category_for_source',
'get_mapping_statistics',
'get_person_mapping_statistics',
'get_mappings_for_data_source',
'get_categories_for_data_source',
]