6643 lines
264 KiB
Python
6643 lines
264 KiB
Python
"""
|
|
mappings.py - Data Transformation Mapping Documentation (Python Port)
|
|
|
|
This module documents how raw YAML/JSON data from custodian files maps to:
|
|
1. LinkML schema classes and slots
|
|
2. TypeDB entities and attributes
|
|
3. RDF triples and predicates
|
|
|
|
ARCHITECTURE OVERVIEW:
|
|
======================
|
|
|
|
The Heritage Custodian System uses a "hub architecture" where:
|
|
- CustodianHub: Abstract entity with only persistent hc_id
|
|
- CustodianObservation: Evidence/claims from a specific source
|
|
- ReconstructionActivity: Process that generates standardized aspects
|
|
- Four aspects: LegalStatus, Name, Place, Collection (independent temporal lifecycles)
|
|
|
|
Each enrichment block in YAML (google_maps_enrichment, wikidata_enrichment, etc.)
|
|
maps to a SEPARATE CustodianObservation with its own provenance.
|
|
|
|
DATA FLOW:
|
|
==========
|
|
|
|
Raw YAML (data/custodian/*.yaml)
|
|
|
|
|
[Transform Layer]
|
|
|
|
|
LinkML Instance Data
|
|
|
|
|
+------+------+
|
|
| | |
|
|
RDF TypeDB JSON-LD
|
|
|
|
Ported from: frontend/src/lib/linkml/custodian-data-mappings.ts
|
|
Version: 1.0.0
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from enum import Enum
|
|
from typing import Any, Optional
|
|
|
|
|
|
# ============================================================================
|
|
# ENUMERATIONS
|
|
# ============================================================================
|
|
|
|
class TransformationType(str, Enum):
|
|
"""Types of data transformations that can occur during mapping."""
|
|
DIRECT = 'direct' # 1:1 copy, no transformation
|
|
RENAME = 'rename' # Field name change only
|
|
SPLIT = 'split' # One source field -> multiple target fields
|
|
MERGE = 'merge' # Multiple source fields -> one target field
|
|
LOOKUP = 'lookup' # Enum value lookup or reference resolution
|
|
COMPUTED = 'computed' # Derived/calculated value
|
|
NESTED = 'nested' # Nested object mapping (object -> object)
|
|
ARRAY_MAP = 'array_map' # Array transformation (array -> array with element mapping)
|
|
ARRAY_DIRECT = 'array_direct' # Direct array copy without element transformation
|
|
FLATTEN = 'flatten' # Nested structure -> flat structure
|
|
AGGREGATE = 'aggregate' # Multiple values -> single aggregate
|
|
TEMPORAL = 'temporal' # Date/time transformation
|
|
URI_CONSTRUCT = 'uri_construct' # Construct URI from components
|
|
NORMALIZE = 'normalize' # Normalize/standardize value format
|
|
CONDITIONAL = 'conditional' # Conditional transformation based on other fields
|
|
NOT_MAPPED = 'not_mapped' # Ontology property intentionally not mapped
|
|
|
|
|
|
class MappingStatus(str, Enum):
|
|
"""Mapping status for ontology coverage documentation."""
|
|
MAPPED = 'mapped' # Property is fully mapped to HC system
|
|
PARTIAL = 'partial' # Property is partially mapped
|
|
OUT_OF_SCOPE = 'out_of_scope' # Property is intentionally not mapped
|
|
FUTURE = 'future' # Property may be mapped in future versions
|
|
|
|
|
|
class DataTier(str, Enum):
|
|
"""Data tier classification (per AGENTS.md)."""
|
|
TIER_1_AUTHORITATIVE = 'TIER_1_AUTHORITATIVE' # CSV registries (ISIL, Dutch orgs)
|
|
TIER_2_VERIFIED = 'TIER_2_VERIFIED' # Data from institutional websites, APIs
|
|
TIER_3_CROWD_SOURCED = 'TIER_3_CROWD_SOURCED' # Wikidata, OpenStreetMap
|
|
TIER_4_INFERRED = 'TIER_4_INFERRED' # NLP-extracted from conversations
|
|
|
|
|
|
# ============================================================================
|
|
# DATACLASSES
|
|
# ============================================================================
|
|
|
|
@dataclass
|
|
class FieldExample:
|
|
"""Example showing source -> target transformation."""
|
|
source_value: Any
|
|
target_value: Any
|
|
typedb_value: Optional[Any] = None
|
|
rdf_triple: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class FieldValidation:
|
|
"""Validation rules for a field."""
|
|
type: str # 'string' | 'number' | 'boolean' | 'date' | 'uri' | 'enum' | 'array'
|
|
pattern: Optional[str] = None
|
|
enum_values: Optional[list[str]] = None
|
|
min_length: Optional[int] = None
|
|
max_length: Optional[int] = None
|
|
|
|
|
|
@dataclass
|
|
class FieldMapping:
|
|
"""Mapping for a single field from source to target."""
|
|
# JSON path in source YAML (e.g., "google_maps_enrichment.place_id"). None for unmapped.
|
|
source_path: Optional[str]
|
|
# Human-readable description of the source field
|
|
source_description: str
|
|
# LinkML class this maps to. None for unmapped ontology properties.
|
|
target_class: Optional[str]
|
|
# LinkML slot name. None for unmapped ontology properties.
|
|
target_slot: Optional[str]
|
|
# Type of transformation applied
|
|
transformation: TransformationType
|
|
# Human-readable explanation of the transformation
|
|
transformation_details: Optional[str] = None
|
|
# TypeDB entity type (snake-case with hyphens)
|
|
typedb_entity: Optional[str] = None
|
|
# TypeDB attribute name. None for unmapped ontology properties.
|
|
typedb_attribute: Optional[str] = None
|
|
# RDF predicate (CURIE format)
|
|
rdf_predicate: Optional[str] = None
|
|
# Whether this field is required
|
|
required: bool = False
|
|
# Example showing source -> target transformation
|
|
example: Optional[FieldExample] = None
|
|
# Related/dependent fields
|
|
related_fields: Optional[list[str]] = None
|
|
# Validation rules
|
|
validation: Optional[FieldValidation] = None
|
|
# Additional notes or comments about this mapping
|
|
notes: Optional[str] = None
|
|
# Mapping status for ontology coverage documentation
|
|
status: Optional[MappingStatus] = None
|
|
|
|
|
|
@dataclass
|
|
class Provenance:
|
|
"""Provenance information for an enrichment source."""
|
|
source_type: str
|
|
data_tier: DataTier
|
|
api_endpoint: Optional[str] = None
|
|
update_frequency: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class EnrichmentSourceMapping:
|
|
"""Complete mapping for an enrichment source block."""
|
|
# Source block name in YAML (e.g., "google_maps_enrichment")
|
|
source_block: str
|
|
# Human-readable description
|
|
description: str
|
|
# Primary LinkML class this maps to
|
|
linkml_class: str
|
|
# Primary TypeDB entity
|
|
typedb_entity: str
|
|
# Provenance information
|
|
provenance: Provenance
|
|
# All field mappings for this source
|
|
fields: list[FieldMapping] = field(default_factory=list)
|
|
# Extended description with usage notes
|
|
detailed_description: Optional[str] = None
|
|
# Classes that can be generated from this source
|
|
generated_classes: Optional[list[str]] = None
|
|
# Example YAML snippet
|
|
example_yaml: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class MappingCategory:
|
|
"""Category grouping for mappings in the UI."""
|
|
id: str
|
|
name: str
|
|
name_nl: str
|
|
description: str
|
|
description_nl: str
|
|
icon: str
|
|
sources: list[str] = field(default_factory=list) # sourceBlock names
|
|
|
|
|
|
# ============================================================================
|
|
# MAPPING CATEGORIES (19 Custodian Categories)
|
|
# ============================================================================
|
|
|
|
MAPPING_CATEGORIES: list[MappingCategory] = [
|
|
MappingCategory(
|
|
id='identity',
|
|
name='Identity & Identification',
|
|
name_nl='Identiteit & Identificatie',
|
|
description='Core identity fields: GHCID, names, identifiers',
|
|
description_nl='Kernidentiteitsvelden: GHCID, namen, identificatiecodes',
|
|
icon='🪪',
|
|
sources=['ghcid', 'identifiers', 'custodian_name'],
|
|
),
|
|
MappingCategory(
|
|
id='location',
|
|
name='Location & Geography',
|
|
name_nl='Locatie & Geografie',
|
|
description='Physical location, addresses, coordinates',
|
|
description_nl='Fysieke locatie, adressen, coördinaten',
|
|
icon='📍',
|
|
sources=['location', 'google_maps_enrichment'],
|
|
),
|
|
MappingCategory(
|
|
id='external',
|
|
name='External Data Sources',
|
|
name_nl='Externe Databronnen',
|
|
description='Enrichment from external APIs and databases',
|
|
description_nl="Verrijking van externe API's en databases",
|
|
icon='🔗',
|
|
sources=['wikidata_enrichment', 'museum_register_enrichment', 'genealogiewerkbalk_enrichment'],
|
|
),
|
|
MappingCategory(
|
|
id='web',
|
|
name='Web & Digital Presence',
|
|
name_nl='Web & Digitale Aanwezigheid',
|
|
description='Website data, digital platforms, social media',
|
|
description_nl='Websitegegevens, digitale platformen, sociale media',
|
|
icon='🌐',
|
|
sources=['web_enrichment', 'web_claims', 'digital_platforms', 'youtube_enrichment'],
|
|
),
|
|
MappingCategory(
|
|
id='legal',
|
|
name='Legal & Organization',
|
|
name_nl='Juridisch & Organisatie',
|
|
description='Legal status, organizational structure',
|
|
description_nl='Juridische status, organisatiestructuur',
|
|
icon='⚖️',
|
|
sources=['legal_status', 'original_entry'],
|
|
),
|
|
MappingCategory(
|
|
id='temporal',
|
|
name='Temporal & Provenance',
|
|
name_nl='Temporeel & Herkomst',
|
|
description='Time spans, data provenance, versioning',
|
|
description_nl='Tijdspannes, data-herkomst, versiebeheer',
|
|
icon='⏱️',
|
|
sources=['timespan', 'provenance'],
|
|
),
|
|
MappingCategory(
|
|
id='heritage',
|
|
name='Heritage Specific',
|
|
name_nl='Erfgoed Specifiek',
|
|
description='UNESCO, collections, domain-specific data',
|
|
description_nl='UNESCO, collecties, domeinspecifieke gegevens',
|
|
icon='🏛️',
|
|
sources=['unesco_ich_enrichment'],
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# PHASE 1 ADDITIONS: Schema Class Coverage Categories
|
|
# -------------------------------------------------------------------------
|
|
MappingCategory(
|
|
id='archive_types',
|
|
name='Archive Types',
|
|
name_nl='Archieftypen',
|
|
description='Specialized archive classification types (97 classes): academic, audiovisual, church, municipal, national, etc.',
|
|
description_nl='Gespecialiseerde archiefclassificatietypen (97 klassen): academisch, audiovisueel, kerkelijk, gemeentelijk, nationaal, etc.',
|
|
icon='📦',
|
|
sources=[
|
|
'archive_type_academic', 'archive_type_audiovisual', 'archive_type_church',
|
|
'archive_type_corporate', 'archive_type_government', 'archive_type_municipal',
|
|
'archive_type_national', 'archive_type_regional', 'archive_type_specialized',
|
|
'archive_type_thematic',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='organizational_structure',
|
|
name='Organizational Structure',
|
|
name_nl='Organisatiestructuur',
|
|
description='Organizational hierarchy and structure classes (30+ classes): departments, divisions, branches, parent organizations',
|
|
description_nl='Organisatiehiërarchie en structuurklassen (30+ klassen): afdelingen, divisies, vestigingen, moederorganisaties',
|
|
icon='🏢',
|
|
sources=[
|
|
'org_structure_hierarchy', 'org_structure_administrative', 'org_structure_subdivision',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='heritage_cultural',
|
|
name='Heritage & Cultural Sites',
|
|
name_nl='Erfgoed & Culturele Locaties',
|
|
description='World heritage sites, intangible heritage, cultural institutions (15+ classes)',
|
|
description_nl='Werelderfgoedlocaties, immaterieel erfgoed, culturele instellingen (15+ klassen)',
|
|
icon='🗿',
|
|
sources=[
|
|
'heritage_world_sites', 'heritage_intangible', 'heritage_national_treasures',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='classification_types',
|
|
name='Classification Types',
|
|
name_nl='Classificatietypen',
|
|
description='Type classes for custodian classification (32 classes): MuseumType, LibraryType, ArchiveOrganizationType, etc.',
|
|
description_nl='Typeklassen voor bronhouderclassificatie (32 klassen): MuseumType, BibliotheekType, ArchiefOrganisatieType, etc.',
|
|
icon='🏷️',
|
|
sources=[
|
|
'type_classes_glam', 'type_classes_digital', 'type_classes_organizational',
|
|
],
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# PHASE 2 ADDITIONS: Remaining Schema Class Coverage Categories
|
|
# -------------------------------------------------------------------------
|
|
MappingCategory(
|
|
id='place_location',
|
|
name='Place & Location',
|
|
name_nl='Plaats & Locatie',
|
|
description='Geographic and spatial location classes (8 classes): settlements, countries, custodian places, feature places',
|
|
description_nl='Geografische en ruimtelijke locatieklassen (8 klassen): nederzettingen, landen, bronhouderplaatsen, kenmerkplaatsen',
|
|
icon='📍',
|
|
sources=[
|
|
'place_geographic', 'place_custodian_specific',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='collections',
|
|
name='Collections & Holdings',
|
|
name_nl='Collecties & Bezittingen',
|
|
description='Collection management and holdings classes (6 classes): collections, special collections, collection management systems',
|
|
description_nl='Collectiebeheer en bezitklassen (6 klassen): collecties, bijzondere collecties, collectiebeheersystemen',
|
|
icon='🗃️',
|
|
sources=[
|
|
'collection_core', 'collection_management',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='person_staff',
|
|
name='Person & Staff',
|
|
name_nl='Persoon & Personeel',
|
|
description='Person and staff-related classes (9 classes): profiles, connections, work experience, credentials',
|
|
description_nl="Persoon- en personeelgerelateerde klassen (9 klassen): profielen, connecties, werkervaring, diploma's",
|
|
icon='👥',
|
|
sources=[
|
|
'person_profile_extended', 'person_work_education',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='digital_api',
|
|
name='Digital & API Services',
|
|
name_nl='Digitaal & API Diensten',
|
|
description='Digital platforms and API endpoint classes (11 classes): web portals, OAI-PMH, search APIs, file APIs',
|
|
description_nl="Digitale platformen en API-eindpuntklassen (11 klassen): webportalen, OAI-PMH, zoek-API's, bestand-API's",
|
|
icon='🔌',
|
|
sources=[
|
|
'digital_platforms_extended', 'api_endpoints',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='video_media',
|
|
name='Video & Social Media',
|
|
name_nl='Video & Sociale Media',
|
|
description='Video content and social media classes (11 classes): video annotations, chapters, social media posts/profiles',
|
|
description_nl='Video-inhoud en sociale mediaklassen (11 klassen): video-annotaties, hoofdstukken, sociale media posts/profielen',
|
|
icon='🎬',
|
|
sources=[
|
|
'video_content', 'social_media_content',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='legal_admin',
|
|
name='Legal & Administrative',
|
|
name_nl='Juridisch & Administratief',
|
|
description='Legal, policy, and administrative classes (9 classes): access policies, budgets, projects, registration',
|
|
description_nl='Juridische, beleids- en administratieve klassen (9 klassen): toegangsbeleid, budgetten, projecten, registratie',
|
|
icon='⚖️',
|
|
sources=[
|
|
'legal_policies', 'administrative_records',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='finding_aids',
|
|
name='Finding Aids & Standards',
|
|
name_nl='Toegangen & Standaarden',
|
|
description='Finding aids, standards, and documentation classes (5 classes): finding aids, source documents, standards',
|
|
description_nl='Toegangen, standaarden en documentatieklassen (5 klassen): toegangen, brondocumenten, standaarden',
|
|
icon='📑',
|
|
sources=[
|
|
'finding_aids_standards',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='reconstruction',
|
|
name='Reconstruction & Provenance',
|
|
name_nl='Reconstructie & Herkomst',
|
|
description='Entity reconstruction and provenance tracking classes (4 classes): reconstructed entities, activities, agents',
|
|
description_nl='Entiteitsreconstructie en herkomsttrackingklassen (4 klassen): gereconstrueerde entiteiten, activiteiten, agenten',
|
|
icon='🔄',
|
|
sources=[
|
|
'reconstruction_provenance',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='storage_facilities',
|
|
name='Storage & Facilities',
|
|
name_nl='Opslag & Faciliteiten',
|
|
description='Storage conditions and facility classes (7 classes): storage types, conditions, education centers',
|
|
description_nl='Opslagcondities en faciliteitenklassen (7 klassen): opslagtypen, condities, onderwijscentra',
|
|
icon='🏪',
|
|
sources=[
|
|
'storage_facilities',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='funding',
|
|
name='Funding & Grants',
|
|
name_nl='Financiering & Subsidies',
|
|
description='Funding and grant-related classes (3 classes): funding agendas, requirements, applications',
|
|
description_nl="Financiering- en subsidieklassen (3 klassen): financieringsagenda's, vereisten, aanvragen",
|
|
icon='💰',
|
|
sources=[
|
|
'funding_grants',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='language_naming',
|
|
name='Language & Naming',
|
|
name_nl='Taal & Naamgeving',
|
|
description='Language and naming classes (4 classes): language codes, proficiency, appellations',
|
|
description_nl='Taal- en naamgevingsklassen (4 klassen): taalcodes, taalvaardigheid, benamingen',
|
|
icon='🗣️',
|
|
sources=[
|
|
'language_naming',
|
|
],
|
|
),
|
|
MappingCategory(
|
|
id='specialized_archives_intl',
|
|
name='Specialized Archives (International)',
|
|
name_nl='Gespecialiseerde Archieven (Internationaal)',
|
|
description='Country-specific specialized archive types (19 classes): German, Swedish, French, Czech archive types',
|
|
description_nl='Landspecifieke gespecialiseerde archieftypen (19 klassen): Duitse, Zweedse, Franse, Tsjechische archieftypen',
|
|
icon='🌍',
|
|
sources=[
|
|
'archives_german', 'archives_swedish', 'archives_french', 'archives_other',
|
|
],
|
|
),
|
|
]
|
|
|
|
|
|
# ============================================================================
|
|
# ENRICHMENT SOURCE MAPPINGS
|
|
# ============================================================================
|
|
|
|
ENRICHMENT_MAPPINGS: list[EnrichmentSourceMapping] = [
|
|
# -------------------------------------------------------------------------
|
|
# GHCID - Global Heritage Custodian Identifier
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='ghcid',
|
|
description='Global Heritage Custodian Identifier - persistent unique identifier',
|
|
detailed_description="""
|
|
The GHCID is the persistent unique identifier for every heritage custodian.
|
|
It follows the format: {COUNTRY}-{REGION}-{SETTLEMENT}-{TYPE}-{ABBREV}
|
|
|
|
Example: NL-NH-AMS-M-RM (Rijksmuseum, Amsterdam, Netherlands)
|
|
|
|
GHCIDs are deterministically generated and hashed to multiple UUID formats
|
|
for different use cases (UUID v5 for primary, UUID v8 for future-proofing).
|
|
""".strip(),
|
|
linkml_class='GHCID',
|
|
typedb_entity='ghcid',
|
|
provenance=Provenance(
|
|
source_type='computed',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='ghcid.ghcid_current',
|
|
source_description='Current GHCID string',
|
|
target_class='GHCID',
|
|
target_slot='ghcid_string',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='ghcid',
|
|
typedb_attribute='ghcid-string',
|
|
rdf_predicate='hc:ghcidString',
|
|
required=True,
|
|
example=FieldExample(
|
|
source_value='NL-NH-AMS-M-RM',
|
|
target_value='NL-NH-AMS-M-RM',
|
|
rdf_triple='<https://w3id.org/hc/NL-NH-AMS-M-RM> hc:ghcidString "NL-NH-AMS-M-RM" .',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='ghcid.ghcid_uuid',
|
|
source_description='UUID v5 derived from GHCID string',
|
|
target_class='GHCID',
|
|
target_slot='ghcid_uuid',
|
|
transformation=TransformationType.COMPUTED,
|
|
transformation_details='UUID v5 generated using SHA-1 hash of GHCID string with heritage namespace',
|
|
typedb_entity='ghcid',
|
|
typedb_attribute='ghcid-uuid',
|
|
rdf_predicate='hc:ghcidUuid',
|
|
required=True,
|
|
example=FieldExample(
|
|
source_value='550e8400-e29b-5d4f-a716-446655440000',
|
|
target_value='550e8400-e29b-5d4f-a716-446655440000',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='ghcid.ghcid_numeric',
|
|
source_description='64-bit numeric ID for database optimization',
|
|
target_class='GHCID',
|
|
target_slot='ghcid_numeric',
|
|
transformation=TransformationType.COMPUTED,
|
|
transformation_details='SHA-256 hash truncated to 64-bit integer',
|
|
typedb_entity='ghcid',
|
|
typedb_attribute='ghcid-numeric',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='ghcid.location_resolution',
|
|
source_description='GeoNames resolution metadata',
|
|
target_class='GHCID',
|
|
target_slot='location_resolution',
|
|
transformation=TransformationType.NESTED,
|
|
transformation_details='Maps to LocationResolution class with GeoNames provenance',
|
|
typedb_entity='location-resolution',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
ghcid:
|
|
ghcid_current: NL-NH-AMS-M-RM
|
|
ghcid_uuid: 550e8400-e29b-5d4f-a716-446655440000
|
|
ghcid_numeric: 213324328442227739
|
|
location_resolution:
|
|
method: REVERSE_GEOCODE
|
|
geonames_id: 2759794
|
|
geonames_name: Amsterdam
|
|
settlement_code: AMS
|
|
admin1_code: "07"
|
|
region_code: NH
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# Identifiers
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='identifiers',
|
|
description='External identifiers from various sources',
|
|
detailed_description="""
|
|
Links to external identifier systems including:
|
|
- ISIL codes (International Standard Identifier for Libraries)
|
|
- Wikidata QIDs
|
|
- VIAF (Virtual International Authority File)
|
|
- KvK numbers (Dutch Chamber of Commerce)
|
|
- Museum Register numbers
|
|
- And more...
|
|
""".strip(),
|
|
linkml_class='Identifier',
|
|
typedb_entity='identifier',
|
|
provenance=Provenance(
|
|
source_type='registry_lookup',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='identifiers[].identifier_scheme',
|
|
source_description='Identifier system/scheme name',
|
|
target_class='Identifier',
|
|
target_slot='identifier_scheme',
|
|
transformation=TransformationType.LOOKUP,
|
|
transformation_details='Maps to IdentifierSchemeEnum',
|
|
typedb_entity='identifier',
|
|
typedb_attribute='scheme',
|
|
rdf_predicate='hc:identifierScheme',
|
|
required=True,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['GHCID', 'ISIL', 'Wikidata', 'VIAF', 'KvK', 'MuseumRegister', 'NDE', 'Website'],
|
|
),
|
|
example=FieldExample(
|
|
source_value='ISIL',
|
|
target_value='ISIL',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='identifiers[].identifier_value',
|
|
source_description='The identifier value/code',
|
|
target_class='Identifier',
|
|
target_slot='identifier_value',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='identifier',
|
|
typedb_attribute='value',
|
|
rdf_predicate='hc:identifierValue',
|
|
required=True,
|
|
example=FieldExample(
|
|
source_value='NL-AmRM',
|
|
target_value='NL-AmRM',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='identifiers[].identifier_url',
|
|
source_description='URL to the identifier record',
|
|
target_class='Identifier',
|
|
target_slot='identifier_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='identifier',
|
|
typedb_attribute='url',
|
|
rdf_predicate='schema:url',
|
|
required=False,
|
|
example=FieldExample(
|
|
source_value='https://www.wikidata.org/wiki/Q190804',
|
|
target_value='https://www.wikidata.org/wiki/Q190804',
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
identifiers:
|
|
- identifier_scheme: GHCID
|
|
identifier_value: NL-NH-AMS-M-RM
|
|
- identifier_scheme: ISIL
|
|
identifier_value: NL-AmRM
|
|
- identifier_scheme: Wikidata
|
|
identifier_value: Q190804
|
|
identifier_url: https://www.wikidata.org/wiki/Q190804
|
|
- identifier_scheme: VIAF
|
|
identifier_value: "148691498"
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# Custodian Name
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='custodian_name',
|
|
description='Consensus name with confidence scoring',
|
|
detailed_description="""
|
|
The canonical name determined through multi-source reconciliation.
|
|
Includes emic (native language) name with legal form stripped per Rule 8.
|
|
Confidence scores indicate how many sources agree on each name variant.
|
|
""".strip(),
|
|
linkml_class='CustodianName',
|
|
typedb_entity='custodian-name',
|
|
provenance=Provenance(
|
|
source_type='reconciliation',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='custodian_name.consensus_name',
|
|
source_description='Reconciled canonical name',
|
|
target_class='CustodianName',
|
|
target_slot='name_string',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-name',
|
|
typedb_attribute='name-string',
|
|
rdf_predicate='skos:prefLabel',
|
|
required=True,
|
|
example=FieldExample(
|
|
source_value='Rijksmuseum',
|
|
target_value='Rijksmuseum',
|
|
rdf_triple='<https://w3id.org/hc/NL-NH-AMS-M-RM> skos:prefLabel "Rijksmuseum"@nl .',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='custodian_name.emic_name',
|
|
source_description='Native language name (legal form stripped)',
|
|
target_class='CustodianName',
|
|
target_slot='emic_name',
|
|
transformation=TransformationType.NORMALIZE,
|
|
transformation_details='Legal form terms (Stichting, Foundation, etc.) removed per Rule 8',
|
|
typedb_entity='custodian-name',
|
|
typedb_attribute='emic-name',
|
|
rdf_predicate='hc:emicName',
|
|
required=False,
|
|
example=FieldExample(
|
|
source_value='Rijksmuseum Amsterdam',
|
|
target_value='Rijksmuseum Amsterdam',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='custodian_name.confidence_score',
|
|
source_description='Confidence in name accuracy (0-1)',
|
|
target_class='CustodianName',
|
|
target_slot='confidence',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-name',
|
|
typedb_attribute='confidence-score',
|
|
required=False,
|
|
validation=FieldValidation(type='number'),
|
|
example=FieldExample(
|
|
source_value=0.95,
|
|
target_value=0.95,
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='custodian_name.alternative_names',
|
|
source_description='List of alternative/historical names',
|
|
target_class='CustodianName',
|
|
target_slot='alternative_names',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='custodian-name',
|
|
typedb_attribute='alternative-names',
|
|
rdf_predicate='skos:altLabel',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
custodian_name:
|
|
consensus_name: Rijksmuseum
|
|
emic_name: Rijksmuseum Amsterdam
|
|
confidence_score: 0.95
|
|
alternative_names:
|
|
- Rijksmuseum Amsterdam
|
|
- Netherlands State Museum
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# Google Maps Enrichment
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='google_maps_enrichment',
|
|
description='Location and business data from Google Maps Places API',
|
|
detailed_description="""
|
|
Rich location data including coordinates, address, opening hours,
|
|
ratings, reviews, and photos from Google Maps Places API.
|
|
Each enrichment creates a CustodianObservation with google_maps_api provenance.
|
|
""".strip(),
|
|
linkml_class='CustodianObservation',
|
|
typedb_entity='custodian-observation',
|
|
provenance=Provenance(
|
|
source_type='google_maps_api',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
api_endpoint='https://maps.googleapis.com/maps/api/place/',
|
|
update_frequency='On-demand',
|
|
),
|
|
generated_classes=['Place', 'GeoCoordinates', 'OpeningHours'],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='google_maps_enrichment.place_id',
|
|
source_description='Google Maps Place ID',
|
|
target_class='CustodianObservation',
|
|
target_slot='external_id',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-observation',
|
|
typedb_attribute='external-id',
|
|
rdf_predicate='schema:identifier',
|
|
required=True,
|
|
example=FieldExample(
|
|
source_value='ChIJ5Ra7we4JxkcRhYVAaq5zQ9U',
|
|
target_value='ChIJ5Ra7we4JxkcRhYVAaq5zQ9U',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='google_maps_enrichment.coordinates.latitude',
|
|
source_description='Latitude coordinate',
|
|
target_class='GeoCoordinates',
|
|
target_slot='latitude',
|
|
transformation=TransformationType.NESTED,
|
|
typedb_entity='geo-coordinates',
|
|
typedb_attribute='latitude',
|
|
rdf_predicate='schema:latitude',
|
|
required=True,
|
|
validation=FieldValidation(type='number'),
|
|
example=FieldExample(
|
|
source_value=52.3599976,
|
|
target_value=52.3599976,
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='google_maps_enrichment.coordinates.longitude',
|
|
source_description='Longitude coordinate',
|
|
target_class='GeoCoordinates',
|
|
target_slot='longitude',
|
|
transformation=TransformationType.NESTED,
|
|
typedb_entity='geo-coordinates',
|
|
typedb_attribute='longitude',
|
|
rdf_predicate='schema:longitude',
|
|
required=True,
|
|
validation=FieldValidation(type='number'),
|
|
example=FieldExample(
|
|
source_value=4.8852188,
|
|
target_value=4.8852188,
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='google_maps_enrichment.formatted_address',
|
|
source_description='Human-readable formatted address',
|
|
target_class='Place',
|
|
target_slot='formatted_address',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='place',
|
|
typedb_attribute='formatted-address',
|
|
rdf_predicate='schema:address',
|
|
required=False,
|
|
example=FieldExample(
|
|
source_value='Museumstraat 1, 1071 XX Amsterdam, Netherlands',
|
|
target_value='Museumstraat 1, 1071 XX Amsterdam, Netherlands',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='google_maps_enrichment.rating',
|
|
source_description='Average rating (1-5)',
|
|
target_class='CustodianObservation',
|
|
target_slot='rating',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-observation',
|
|
typedb_attribute='rating',
|
|
rdf_predicate='schema:aggregateRating',
|
|
required=False,
|
|
validation=FieldValidation(type='number'),
|
|
example=FieldExample(
|
|
source_value=4.6,
|
|
target_value=4.6,
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='google_maps_enrichment.total_ratings',
|
|
source_description='Total number of ratings',
|
|
target_class='CustodianObservation',
|
|
target_slot='review_count',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-observation',
|
|
typedb_attribute='review-count',
|
|
rdf_predicate='schema:reviewCount',
|
|
required=False,
|
|
validation=FieldValidation(type='number'),
|
|
example=FieldExample(
|
|
source_value=47832,
|
|
target_value=47832,
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='google_maps_enrichment.opening_hours',
|
|
source_description='Weekly opening hours schedule',
|
|
target_class='OpeningHours',
|
|
target_slot='opening_hours_specification',
|
|
transformation=TransformationType.NESTED,
|
|
transformation_details='Maps to OpeningHoursSpecification array per day',
|
|
typedb_entity='opening-hours',
|
|
rdf_predicate='schema:openingHoursSpecification',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='google_maps_enrichment.website',
|
|
source_description='Official website URL',
|
|
target_class='CustodianObservation',
|
|
target_slot='website',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-observation',
|
|
typedb_attribute='website',
|
|
rdf_predicate='schema:url',
|
|
required=False,
|
|
validation=FieldValidation(type='uri'),
|
|
example=FieldExample(
|
|
source_value='https://www.rijksmuseum.nl/',
|
|
target_value='https://www.rijksmuseum.nl/',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='google_maps_enrichment.phone',
|
|
source_description='Phone number',
|
|
target_class='CustodianObservation',
|
|
target_slot='telephone',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-observation',
|
|
typedb_attribute='telephone',
|
|
rdf_predicate='schema:telephone',
|
|
required=False,
|
|
example=FieldExample(
|
|
source_value='+31 20 674 7000',
|
|
target_value='+31 20 674 7000',
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
google_maps_enrichment:
|
|
place_id: ChIJ5Ra7we4JxkcRhYVAaq5zQ9U
|
|
name: Rijksmuseum
|
|
coordinates:
|
|
latitude: 52.3599976
|
|
longitude: 4.8852188
|
|
formatted_address: Museumstraat 1, 1071 XX Amsterdam, Netherlands
|
|
rating: 4.6
|
|
total_ratings: 47832
|
|
website: https://www.rijksmuseum.nl/
|
|
phone: +31 20 674 7000
|
|
opening_hours:
|
|
Monday: 9:00 AM - 5:00 PM
|
|
Tuesday: 9:00 AM - 5:00 PM
|
|
# ...
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# Wikidata Enrichment
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='wikidata_enrichment',
|
|
description='Linked data from Wikidata knowledge graph',
|
|
detailed_description="""
|
|
Rich semantic data from Wikidata including:
|
|
- Multilingual labels and descriptions
|
|
- Sitelinks to Wikipedia articles
|
|
- Structured properties (coordinates, founding date, etc.)
|
|
- Instance-of relationships for type classification
|
|
|
|
Creates a CustodianObservation with wikidata_api provenance.
|
|
""".strip(),
|
|
linkml_class='CustodianObservation',
|
|
typedb_entity='custodian-observation',
|
|
provenance=Provenance(
|
|
source_type='wikidata_api',
|
|
data_tier=DataTier.TIER_3_CROWD_SOURCED,
|
|
api_endpoint='https://www.wikidata.org/wiki/Special:EntityData/',
|
|
),
|
|
generated_classes=['WikidataEntity', 'Sitelink'],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='wikidata_enrichment.entity_id',
|
|
source_description='Wikidata Q-ID',
|
|
target_class='CustodianObservation',
|
|
target_slot='external_id',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-observation',
|
|
typedb_attribute='external-id',
|
|
rdf_predicate='schema:identifier',
|
|
required=True,
|
|
example=FieldExample(
|
|
source_value='Q190804',
|
|
target_value='Q190804',
|
|
rdf_triple='<https://w3id.org/hc/obs/Q190804> schema:identifier "Q190804" .',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='wikidata_enrichment.labels',
|
|
source_description='Multilingual labels',
|
|
target_class='CustodianObservation',
|
|
target_slot='labels',
|
|
transformation=TransformationType.NESTED,
|
|
transformation_details='Language-tagged strings (e.g., {"en": "Rijksmuseum", "nl": "Rijksmuseum"})',
|
|
typedb_entity='custodian-observation',
|
|
rdf_predicate='rdfs:label',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='wikidata_enrichment.descriptions',
|
|
source_description='Multilingual descriptions',
|
|
target_class='CustodianObservation',
|
|
target_slot='descriptions',
|
|
transformation=TransformationType.NESTED,
|
|
typedb_entity='custodian-observation',
|
|
rdf_predicate='schema:description',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='wikidata_enrichment.sitelinks',
|
|
source_description='Links to Wikipedia articles',
|
|
target_class='Sitelink',
|
|
target_slot='sitelinks',
|
|
transformation=TransformationType.ARRAY_MAP,
|
|
transformation_details='Each sitelink maps to Wikipedia article URL',
|
|
typedb_entity='sitelink',
|
|
rdf_predicate='schema:sameAs',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='wikidata_enrichment.instance_of',
|
|
source_description='Wikidata type classification (P31)',
|
|
target_class='CustodianObservation',
|
|
target_slot='instance_of',
|
|
transformation=TransformationType.LOOKUP,
|
|
transformation_details='Maps Q-ID to CustodianTypeEnum',
|
|
typedb_entity='custodian-observation',
|
|
typedb_attribute='wikidata-type',
|
|
rdf_predicate='wdt:P31',
|
|
required=False,
|
|
example=FieldExample(
|
|
source_value='Q33506',
|
|
target_value='MUSEUM',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='wikidata_enrichment.coordinates',
|
|
source_description='Geographic coordinates from Wikidata (P625)',
|
|
target_class='GeoCoordinates',
|
|
target_slot='coordinates',
|
|
transformation=TransformationType.NESTED,
|
|
typedb_entity='geo-coordinates',
|
|
rdf_predicate='wdt:P625',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='wikidata_enrichment.founding_date',
|
|
source_description='Date of establishment (P571)',
|
|
target_class='CustodianObservation',
|
|
target_slot='founding_date',
|
|
transformation=TransformationType.TEMPORAL,
|
|
transformation_details='Wikidata time format to ISO 8601',
|
|
typedb_entity='custodian-observation',
|
|
typedb_attribute='founding-date',
|
|
rdf_predicate='wdt:P571',
|
|
required=False,
|
|
validation=FieldValidation(type='date'),
|
|
example=FieldExample(
|
|
source_value='+1800-01-01T00:00:00Z',
|
|
target_value='1800-01-01',
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
wikidata_enrichment:
|
|
entity_id: Q190804
|
|
labels:
|
|
en: Rijksmuseum
|
|
nl: Rijksmuseum
|
|
de: Rijksmuseum
|
|
fr: Rijksmuseum
|
|
descriptions:
|
|
en: Dutch national museum in Amsterdam
|
|
nl: Nederlands nationaal museum in Amsterdam
|
|
sitelinks:
|
|
enwiki: Rijksmuseum
|
|
nlwiki: Rijksmuseum
|
|
dewiki: Rijksmuseum
|
|
instance_of:
|
|
id: Q33506
|
|
label: museum
|
|
coordinates:
|
|
latitude: 52.36
|
|
longitude: 4.885
|
|
founding_date: "+1800-01-01T00:00:00Z"
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# Location (Canonical)
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='location',
|
|
description='Normalized canonical location',
|
|
detailed_description="""
|
|
The authoritative location record after reconciliation from multiple sources.
|
|
This is the single source of truth for the custodian's physical location.
|
|
""".strip(),
|
|
linkml_class='Place',
|
|
typedb_entity='place',
|
|
provenance=Provenance(
|
|
source_type='reconciliation',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='location.city',
|
|
source_description='City name',
|
|
target_class='Place',
|
|
target_slot='city',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='place',
|
|
typedb_attribute='city',
|
|
rdf_predicate='schema:addressLocality',
|
|
required=True,
|
|
example=FieldExample(
|
|
source_value='Amsterdam',
|
|
target_value='Amsterdam',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='location.country',
|
|
source_description='ISO 3166-1 alpha-2 country code',
|
|
target_class='Place',
|
|
target_slot='country',
|
|
transformation=TransformationType.LOOKUP,
|
|
transformation_details='Maps to CountryCodeEnum',
|
|
typedb_entity='place',
|
|
typedb_attribute='country-code',
|
|
rdf_predicate='schema:addressCountry',
|
|
required=True,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
pattern='^[A-Z]{2}$',
|
|
),
|
|
example=FieldExample(
|
|
source_value='NL',
|
|
target_value='NL',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='location.region',
|
|
source_description='Region/province name',
|
|
target_class='Place',
|
|
target_slot='region',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='place',
|
|
typedb_attribute='region',
|
|
rdf_predicate='schema:addressRegion',
|
|
required=False,
|
|
example=FieldExample(
|
|
source_value='North Holland',
|
|
target_value='North Holland',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='location.postal_code',
|
|
source_description='Postal/ZIP code',
|
|
target_class='Place',
|
|
target_slot='postal_code',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='place',
|
|
typedb_attribute='postal-code',
|
|
rdf_predicate='schema:postalCode',
|
|
required=False,
|
|
example=FieldExample(
|
|
source_value='1071 XX',
|
|
target_value='1071 XX',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='location.street_address',
|
|
source_description='Street address',
|
|
target_class='Place',
|
|
target_slot='street_address',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='place',
|
|
typedb_attribute='street-address',
|
|
rdf_predicate='schema:streetAddress',
|
|
required=False,
|
|
example=FieldExample(
|
|
source_value='Museumstraat 1',
|
|
target_value='Museumstraat 1',
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
location:
|
|
city: Amsterdam
|
|
country: NL
|
|
region: North Holland
|
|
postal_code: 1071 XX
|
|
street_address: Museumstraat 1
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# Web Enrichment
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='web_enrichment',
|
|
description='Archived website data and scraped content',
|
|
detailed_description="""
|
|
Content extracted from the institution's website using web scraping tools.
|
|
Includes organization details, collections, exhibitions, contact info, etc.
|
|
All claims must have XPath provenance per Rule 6.
|
|
""".strip(),
|
|
linkml_class='WebObservation',
|
|
typedb_entity='web-observation',
|
|
provenance=Provenance(
|
|
source_type='web_scrape',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=['WebClaim'],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='web_enrichment.source_url',
|
|
source_description='URL of scraped page',
|
|
target_class='WebObservation',
|
|
target_slot='source_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='web-observation',
|
|
typedb_attribute='source-url',
|
|
rdf_predicate='prov:wasDerivedFrom',
|
|
required=True,
|
|
validation=FieldValidation(type='uri'),
|
|
),
|
|
FieldMapping(
|
|
source_path='web_enrichment.retrieved_on',
|
|
source_description='Timestamp when page was archived',
|
|
target_class='WebObservation',
|
|
target_slot='retrieved_on',
|
|
transformation=TransformationType.TEMPORAL,
|
|
typedb_entity='web-observation',
|
|
typedb_attribute='retrieved-on',
|
|
rdf_predicate='prov:generatedAtTime',
|
|
required=True,
|
|
validation=FieldValidation(type='date'),
|
|
),
|
|
FieldMapping(
|
|
source_path='web_enrichment.html_file',
|
|
source_description='Path to archived HTML file',
|
|
target_class='WebObservation',
|
|
target_slot='archive_path',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='web-observation',
|
|
typedb_attribute='archive-path',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='web_enrichment.organization_details',
|
|
source_description='Extracted organization information',
|
|
target_class='WebObservation',
|
|
target_slot='extracted_content',
|
|
transformation=TransformationType.NESTED,
|
|
typedb_entity='web-observation',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
web_enrichment:
|
|
source_url: https://www.rijksmuseum.nl/en/about-us
|
|
retrieved_on: "2025-01-15T10:30:00Z"
|
|
html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/about-us.html
|
|
retrieval_agent: firecrawl
|
|
organization_details:
|
|
mission: "To connect people with art and history"
|
|
established: "1800"
|
|
collection_size: "1 million objects"
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# Web Claims
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='web_claims',
|
|
description='Verified claims extracted from websites with XPath provenance',
|
|
detailed_description="""
|
|
Individual facts extracted from web pages with full provenance chain.
|
|
Each claim MUST have XPath pointer to exact location in archived HTML.
|
|
Claims without XPath provenance are fabricated and must be removed per Rule 6.
|
|
""".strip(),
|
|
linkml_class='WebClaim',
|
|
typedb_entity='web-claim',
|
|
provenance=Provenance(
|
|
source_type='web_extraction',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='web_claims[].claim_type',
|
|
source_description='Type of claim (full_name, email, phone, etc.)',
|
|
target_class='WebClaim',
|
|
target_slot='claim_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='web-claim',
|
|
typedb_attribute='claim-type',
|
|
rdf_predicate='hc:claimType',
|
|
required=True,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['full_name', 'description', 'email', 'phone', 'address', 'opening_hours', 'social_media'],
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='web_claims[].claim_value',
|
|
source_description='The extracted value',
|
|
target_class='WebClaim',
|
|
target_slot='claim_value',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='web-claim',
|
|
typedb_attribute='claim-value',
|
|
rdf_predicate='rdf:value',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='web_claims[].xpath',
|
|
source_description='XPath to element containing value',
|
|
target_class='WebClaim',
|
|
target_slot='xpath',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='web-claim',
|
|
typedb_attribute='xpath',
|
|
rdf_predicate='hc:xpath',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='web_claims[].source_url',
|
|
source_description='URL where claim was extracted',
|
|
target_class='WebClaim',
|
|
target_slot='source_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='web-claim',
|
|
typedb_attribute='source-url',
|
|
rdf_predicate='prov:wasDerivedFrom',
|
|
required=True,
|
|
validation=FieldValidation(type='uri'),
|
|
),
|
|
FieldMapping(
|
|
source_path='web_claims[].xpath_match_score',
|
|
source_description='Match confidence (1.0 = exact)',
|
|
target_class='WebClaim',
|
|
target_slot='match_score',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='web-claim',
|
|
typedb_attribute='match-score',
|
|
required=False,
|
|
validation=FieldValidation(type='number'),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
web_claims:
|
|
- claim_type: full_name
|
|
claim_value: Rijksmuseum Amsterdam
|
|
source_url: https://www.rijksmuseum.nl/
|
|
retrieved_on: "2025-01-15T10:30:00Z"
|
|
xpath: /html/body/header/div[1]/a/span
|
|
html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/index.html
|
|
xpath_match_score: 1.0
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# Provenance
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='provenance',
|
|
description='Data lineage and source tracking',
|
|
detailed_description="""
|
|
Metadata about where the data came from, when it was collected,
|
|
and how confident we are in its accuracy.
|
|
""".strip(),
|
|
linkml_class='Provenance',
|
|
typedb_entity='provenance',
|
|
provenance=Provenance(
|
|
source_type='metadata',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='provenance.data_source',
|
|
source_description='Origin of the data',
|
|
target_class='Provenance',
|
|
target_slot='data_source',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='provenance',
|
|
typedb_attribute='data-source',
|
|
rdf_predicate='prov:wasAttributedTo',
|
|
required=True,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['CSV_REGISTRY', 'CONVERSATION_NLP', 'API_ENRICHMENT', 'WEB_SCRAPE', 'MANUAL_ENTRY'],
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='provenance.data_tier',
|
|
source_description='Data quality tier',
|
|
target_class='Provenance',
|
|
target_slot='data_tier',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='provenance',
|
|
typedb_attribute='data-tier',
|
|
rdf_predicate='hc:dataTier',
|
|
required=True,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['TIER_1_AUTHORITATIVE', 'TIER_2_VERIFIED', 'TIER_3_CROWD_SOURCED', 'TIER_4_INFERRED'],
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='provenance.extraction_date',
|
|
source_description='When data was extracted',
|
|
target_class='Provenance',
|
|
target_slot='extraction_date',
|
|
transformation=TransformationType.TEMPORAL,
|
|
typedb_entity='provenance',
|
|
typedb_attribute='extraction-date',
|
|
rdf_predicate='prov:generatedAtTime',
|
|
required=True,
|
|
validation=FieldValidation(type='date'),
|
|
),
|
|
FieldMapping(
|
|
source_path='provenance.confidence_score',
|
|
source_description='Confidence in data accuracy (0-1)',
|
|
target_class='Provenance',
|
|
target_slot='confidence_score',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='provenance',
|
|
typedb_attribute='confidence-score',
|
|
rdf_predicate='hc:confidenceScore',
|
|
required=False,
|
|
validation=FieldValidation(type='number'),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
provenance:
|
|
data_source: API_ENRICHMENT
|
|
data_tier: TIER_2_VERIFIED
|
|
extraction_date: "2025-01-15T10:30:00Z"
|
|
extraction_method: "Google Maps Places API + Wikidata SPARQL"
|
|
confidence_score: 0.92
|
|
source_files:
|
|
- google_maps_enrichment
|
|
- wikidata_enrichment
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# Timespan
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='timespan',
|
|
description='Temporal bounds (CIDOC-CRM style)',
|
|
detailed_description="""
|
|
Temporal information following CIDOC-CRM E52 Time-Span pattern.
|
|
Captures founding date, closure date (if applicable), and temporal bounds.
|
|
""".strip(),
|
|
linkml_class='TimeSpan',
|
|
typedb_entity='time-span',
|
|
provenance=Provenance(
|
|
source_type='derived',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='timespan.begin_of_the_begin',
|
|
source_description='Earliest possible start date',
|
|
target_class='TimeSpan',
|
|
target_slot='begin_of_the_begin',
|
|
transformation=TransformationType.TEMPORAL,
|
|
typedb_entity='time-span',
|
|
typedb_attribute='begin-of-the-begin',
|
|
rdf_predicate='crm:P82a_begin_of_the_begin',
|
|
required=False,
|
|
validation=FieldValidation(type='date'),
|
|
),
|
|
FieldMapping(
|
|
source_path='timespan.end_of_the_begin',
|
|
source_description='Latest possible start date',
|
|
target_class='TimeSpan',
|
|
target_slot='end_of_the_begin',
|
|
transformation=TransformationType.TEMPORAL,
|
|
typedb_entity='time-span',
|
|
typedb_attribute='end-of-the-begin',
|
|
rdf_predicate='crm:P81a_end_of_the_begin',
|
|
required=False,
|
|
validation=FieldValidation(type='date'),
|
|
),
|
|
FieldMapping(
|
|
source_path='timespan.begin_of_the_end',
|
|
source_description='Earliest possible end date',
|
|
target_class='TimeSpan',
|
|
target_slot='begin_of_the_end',
|
|
transformation=TransformationType.TEMPORAL,
|
|
typedb_entity='time-span',
|
|
typedb_attribute='begin-of-the-end',
|
|
rdf_predicate='crm:P81b_begin_of_the_end',
|
|
required=False,
|
|
validation=FieldValidation(type='date'),
|
|
),
|
|
FieldMapping(
|
|
source_path='timespan.end_of_the_end',
|
|
source_description='Latest possible end date',
|
|
target_class='TimeSpan',
|
|
target_slot='end_of_the_end',
|
|
transformation=TransformationType.TEMPORAL,
|
|
typedb_entity='time-span',
|
|
typedb_attribute='end-of-the-end',
|
|
rdf_predicate='crm:P82b_end_of_the_end',
|
|
required=False,
|
|
validation=FieldValidation(type='date'),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
timespan:
|
|
begin_of_the_begin: "1800-01-01"
|
|
end_of_the_begin: "1800-12-31"
|
|
# Museum still operating - no end dates
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# Legal Status
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='legal_status',
|
|
description='Legal form and organizational structure',
|
|
detailed_description="""
|
|
Legal entity information including legal form (foundation, corporation, etc.),
|
|
registration numbers, and governing body information.
|
|
""".strip(),
|
|
linkml_class='CustodianLegalStatus',
|
|
typedb_entity='custodian-legal-status',
|
|
provenance=Provenance(
|
|
source_type='registry_lookup',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='legal_status.legal_form',
|
|
source_description='ISO 20275 legal form code',
|
|
target_class='CustodianLegalStatus',
|
|
target_slot='legal_form',
|
|
transformation=TransformationType.LOOKUP,
|
|
transformation_details='Maps to ISO 20275 Entity Legal Form codes',
|
|
typedb_entity='custodian-legal-status',
|
|
typedb_attribute='legal-form',
|
|
rdf_predicate='org:classification',
|
|
required=False,
|
|
example=FieldExample(
|
|
source_value='stichting',
|
|
target_value='NL_STI', # ISO 20275 code
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='legal_status.legal_name',
|
|
source_description='Full registered name including legal form',
|
|
target_class='CustodianLegalStatus',
|
|
target_slot='legal_name',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-legal-status',
|
|
typedb_attribute='legal-name',
|
|
rdf_predicate='schema:legalName',
|
|
required=False,
|
|
example=FieldExample(
|
|
source_value='Stichting Rijksmuseum',
|
|
target_value='Stichting Rijksmuseum',
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='legal_status.kvk_number',
|
|
source_description='Dutch Chamber of Commerce number',
|
|
target_class='CustodianLegalStatus',
|
|
target_slot='kvk_number',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-legal-status',
|
|
typedb_attribute='kvk-number',
|
|
rdf_predicate='hc:kvkNumber',
|
|
required=False,
|
|
validation=FieldValidation(
|
|
type='string',
|
|
pattern='^[0-9]{8}$',
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
legal_status:
|
|
legal_form: stichting
|
|
legal_name: Stichting Rijksmuseum
|
|
kvk_number: "10205502"
|
|
registered_address: Amsterdam
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# Digital Platforms
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='digital_platforms',
|
|
description='Website and digital platform metadata',
|
|
detailed_description="""
|
|
Information about the institution's digital presence including
|
|
primary website, collection management systems, discovery portals, and APIs.
|
|
""".strip(),
|
|
linkml_class='DigitalPlatform',
|
|
typedb_entity='digital-platform',
|
|
provenance=Provenance(
|
|
source_type='web_discovery',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='digital_platforms[].platform_name',
|
|
source_description='Name of the platform',
|
|
target_class='DigitalPlatform',
|
|
target_slot='platform_name',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='digital-platform',
|
|
typedb_attribute='platform-name',
|
|
rdf_predicate='schema:name',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='digital_platforms[].platform_url',
|
|
source_description='URL of the platform',
|
|
target_class='DigitalPlatform',
|
|
target_slot='platform_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='digital-platform',
|
|
typedb_attribute='platform-url',
|
|
rdf_predicate='schema:url',
|
|
required=True,
|
|
validation=FieldValidation(type='uri'),
|
|
),
|
|
FieldMapping(
|
|
source_path='digital_platforms[].platform_type',
|
|
source_description='Type of platform',
|
|
target_class='DigitalPlatform',
|
|
target_slot='platform_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='digital-platform',
|
|
typedb_attribute='platform-type',
|
|
rdf_predicate='hc:platformType',
|
|
required=False,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['WEBSITE', 'COLLECTION_PORTAL', 'DISCOVERY_PLATFORM', 'API', 'SOCIAL_MEDIA'],
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
digital_platforms:
|
|
- platform_name: Rijksmuseum Website
|
|
platform_url: https://www.rijksmuseum.nl/
|
|
platform_type: WEBSITE
|
|
- platform_name: Rijksstudio
|
|
platform_url: https://www.rijksmuseum.nl/en/rijksstudio
|
|
platform_type: COLLECTION_PORTAL
|
|
- platform_name: Rijksmuseum API
|
|
platform_url: https://data.rijksmuseum.nl/
|
|
platform_type: API
|
|
""".strip(),
|
|
),
|
|
|
|
# =========================================================================
|
|
# ARCHIVE TYPE MAPPINGS
|
|
# =========================================================================
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archive_type_academic
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archive_type_academic',
|
|
description='Academic and research archive types - universities, institutes, learned societies',
|
|
detailed_description="""
|
|
Academic archives serve educational and research institutions. They preserve:
|
|
- University records and institutional history
|
|
- Research data and scholarly outputs
|
|
- Student and faculty records
|
|
- Scientific collections and specimen documentation
|
|
|
|
Classes: AcademicArchive, UniversityArchive, ScientificArchive, InstitutionalArchive
|
|
""".strip(),
|
|
linkml_class='AcademicArchive',
|
|
typedb_entity='academic-archive',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'AcademicArchive',
|
|
'UniversityArchive',
|
|
'ScientificArchive',
|
|
'InstitutionalArchive',
|
|
'InstitutionalRepository',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Archive classification type for academic institutions',
|
|
target_class='AcademicArchive',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='academic-archive',
|
|
typedb_attribute='archive-classification',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
status=MappingStatus.MAPPED,
|
|
notes='Subclass of Archive with specialization for academic/research contexts',
|
|
),
|
|
FieldMapping(
|
|
source_path='institution_type',
|
|
source_description='Parent educational institution type',
|
|
target_class='AcademicArchive',
|
|
target_slot='parent_institution_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='academic-archive',
|
|
typedb_attribute='parent-institution-type',
|
|
rdf_predicate='hc:parentInstitutionType',
|
|
required=False,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['UNIVERSITY', 'RESEARCH_INSTITUTE', 'ACADEMY', 'LEARNED_SOCIETY'],
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# AcademicArchive instance
|
|
institution_type: ARCHIVE
|
|
archive_classification: ACADEMIC
|
|
parent_institution: University of Amsterdam
|
|
specializations:
|
|
- scientific_records
|
|
- research_data
|
|
- institutional_history
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archive_type_audiovisual
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archive_type_audiovisual',
|
|
description='Audiovisual archive types - film, sound, television, radio',
|
|
detailed_description="""
|
|
Audiovisual archives preserve time-based media including:
|
|
- Film and cinema collections (Cinematheque, FilmArchive)
|
|
- Sound recordings (SoundArchive, MusicArchive, AnimalSoundArchive)
|
|
- Television and radio broadcasts (TelevisionArchive, RadioArchive)
|
|
- Media-specific preservation requirements
|
|
|
|
Classes: AudiovisualArchive, FilmArchive, Cinematheque, SoundArchive,
|
|
MusicArchive, RadioArchive, TelevisionArchive, AnimalSoundArchive,
|
|
MediaArchive, AdvertisingRadioArchive
|
|
""".strip(),
|
|
linkml_class='AudiovisualArchive',
|
|
typedb_entity='audiovisual-archive',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'AudiovisualArchive',
|
|
'FilmArchive',
|
|
'Cinematheque',
|
|
'SoundArchive',
|
|
'MusicArchive',
|
|
'RadioArchive',
|
|
'TelevisionArchive',
|
|
'AnimalSoundArchive',
|
|
'MediaArchive',
|
|
'AdvertisingRadioArchive',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Audiovisual archive classification',
|
|
target_class='AudiovisualArchive',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='audiovisual-archive',
|
|
typedb_attribute='archive-classification',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
status=MappingStatus.MAPPED,
|
|
),
|
|
FieldMapping(
|
|
source_path='media_types',
|
|
source_description='Types of media held in collection',
|
|
target_class='AudiovisualArchive',
|
|
target_slot='media_types',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='audiovisual-archive',
|
|
typedb_attribute='media-types',
|
|
rdf_predicate='hc:mediaTypes',
|
|
required=False,
|
|
validation=FieldValidation(
|
|
type='array',
|
|
enum_values=['FILM', 'VIDEO', 'AUDIO', 'RADIO', 'TELEVISION', 'DIGITAL_MEDIA'],
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# FilmArchive instance (e.g., EYE Filmmuseum)
|
|
institution_type: ARCHIVE
|
|
archive_classification: AUDIOVISUAL
|
|
subtype: FILM_ARCHIVE
|
|
media_types:
|
|
- FILM
|
|
- VIDEO
|
|
- DIGITAL_MEDIA
|
|
preservation_formats:
|
|
- 35mm
|
|
- 16mm
|
|
- digital_preservation
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archive_type_church
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archive_type_church',
|
|
description='Religious archive types - church, diocesan, monastery, parish',
|
|
detailed_description="""
|
|
Religious archives document faith communities and their histories:
|
|
- Church administration and governance (ChurchArchive, DiocesanArchive)
|
|
- Religious orders and communities (MonasteryArchive, ReligiousArchive)
|
|
- Parish records and sacramental registers (ParishArchive)
|
|
- Regional variations (ChurchArchiveSweden, CathedralArchive)
|
|
|
|
Classes: ChurchArchive, DiocesanArchive, MonasteryArchive, ParishArchive,
|
|
ReligiousArchive, CathedralArchive, ChurchArchiveSweden
|
|
""".strip(),
|
|
linkml_class='ChurchArchive',
|
|
typedb_entity='church-archive',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'ChurchArchive',
|
|
'DiocesanArchive',
|
|
'MonasteryArchive',
|
|
'ParishArchive',
|
|
'ReligiousArchive',
|
|
'CathedralArchive',
|
|
'ChurchArchiveSweden',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Religious archive classification',
|
|
target_class='ChurchArchive',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='church-archive',
|
|
typedb_attribute='archive-classification',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
status=MappingStatus.MAPPED,
|
|
),
|
|
FieldMapping(
|
|
source_path='religious_denomination',
|
|
source_description='Religious denomination or tradition',
|
|
target_class='ChurchArchive',
|
|
target_slot='denomination',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='church-archive',
|
|
typedb_attribute='denomination',
|
|
rdf_predicate='hc:denomination',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# DiocesanArchive instance
|
|
institution_type: ARCHIVE
|
|
archive_classification: CHURCH
|
|
subtype: DIOCESAN_ARCHIVE
|
|
religious_denomination: Roman Catholic
|
|
diocese: Diocese of Haarlem-Amsterdam
|
|
record_types:
|
|
- sacramental_registers
|
|
- parish_records
|
|
- administrative_correspondence
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archive_type_corporate
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archive_type_corporate',
|
|
description='Corporate and business archive types - company, bank, trade union',
|
|
detailed_description="""
|
|
Corporate archives document business and economic activities:
|
|
- Company history and governance (CompanyArchives, BankArchive)
|
|
- Labor organizations (TradeUnionArchive)
|
|
- Economic documentation (EconomicArchive)
|
|
- Industry-specific records (FoundationArchive, AssociationArchive)
|
|
|
|
Classes: CompanyArchives, BankArchive, TradeUnionArchive, EconomicArchive,
|
|
FoundationArchive, AssociationArchive, RegionalEconomicArchive
|
|
""".strip(),
|
|
linkml_class='CompanyArchives',
|
|
typedb_entity='corporate-archive',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'CompanyArchives',
|
|
'BankArchive',
|
|
'TradeUnionArchive',
|
|
'EconomicArchive',
|
|
'FoundationArchive',
|
|
'AssociationArchive',
|
|
'RegionalEconomicArchive',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Corporate archive classification',
|
|
target_class='CompanyArchives',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='corporate-archive',
|
|
typedb_attribute='archive-classification',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
status=MappingStatus.MAPPED,
|
|
),
|
|
FieldMapping(
|
|
source_path='industry_sector',
|
|
source_description='Industry sector of the organization',
|
|
target_class='CompanyArchives',
|
|
target_slot='industry_sector',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='corporate-archive',
|
|
typedb_attribute='industry-sector',
|
|
rdf_predicate='hc:industrySector',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# BankArchive instance
|
|
institution_type: ARCHIVE
|
|
archive_classification: CORPORATE
|
|
subtype: BANK_ARCHIVE
|
|
company_name: ABN AMRO Historical Archive
|
|
industry_sector: FINANCIAL_SERVICES
|
|
parent_organization: ABN AMRO Bank N.V.
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archive_type_government
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archive_type_government',
|
|
description='Government archive types - national, state, parliamentary, court',
|
|
detailed_description="""
|
|
Government archives document state activities at all levels:
|
|
- National archives (NationalArchives)
|
|
- State/regional government (StateArchives, StateArchivesSection)
|
|
- Parliamentary records (ParliamentaryArchives)
|
|
- Judicial records (CourtRecords, NotarialArchive)
|
|
- Public administration (PublicArchive, GovernmentArchive)
|
|
- Security and intelligence (SecurityArchives)
|
|
|
|
Classes: NationalArchives, StateArchives, GovernmentArchive, PublicArchive,
|
|
ParliamentaryArchives, CourtRecords, NotarialArchive, SecurityArchives,
|
|
CurrentArchive, PublicArchivesInFrance
|
|
""".strip(),
|
|
linkml_class='NationalArchives',
|
|
typedb_entity='government-archive',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'NationalArchives',
|
|
'StateArchives',
|
|
'StateArchivesSection',
|
|
'GovernmentArchive',
|
|
'PublicArchive',
|
|
'ParliamentaryArchives',
|
|
'CourtRecords',
|
|
'NotarialArchive',
|
|
'SecurityArchives',
|
|
'CurrentArchive',
|
|
'PublicArchivesInFrance',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Government archive classification',
|
|
target_class='NationalArchives',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='government-archive',
|
|
typedb_attribute='archive-classification',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
status=MappingStatus.MAPPED,
|
|
),
|
|
FieldMapping(
|
|
source_path='jurisdiction_level',
|
|
source_description='Level of government jurisdiction',
|
|
target_class='NationalArchives',
|
|
target_slot='jurisdiction_level',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='government-archive',
|
|
typedb_attribute='jurisdiction-level',
|
|
rdf_predicate='hc:jurisdictionLevel',
|
|
required=False,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['NATIONAL', 'STATE', 'REGIONAL', 'MUNICIPAL', 'LOCAL'],
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# NationalArchives instance (e.g., Nationaal Archief)
|
|
institution_type: ARCHIVE
|
|
archive_classification: GOVERNMENT
|
|
subtype: NATIONAL_ARCHIVES
|
|
jurisdiction_level: NATIONAL
|
|
country: NL
|
|
official_name: Nationaal Archief
|
|
legal_mandate: Archiefwet 1995
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archive_type_municipal
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archive_type_municipal',
|
|
description='Municipal and local government archive types',
|
|
detailed_description="""
|
|
Municipal archives serve local government and community documentation:
|
|
- City/town archives (MunicipalArchive)
|
|
- Local government records (LocalGovernmentArchive)
|
|
- County/district level (CountyRecordOffice, DistrictArchiveGermany)
|
|
- Local history collections (LocalHistoryArchive)
|
|
|
|
Classes: MunicipalArchive, LocalGovernmentArchive, CountyRecordOffice,
|
|
DistrictArchiveGermany, LocalHistoryArchive, ComarcalArchive,
|
|
DistritalArchive
|
|
""".strip(),
|
|
linkml_class='MunicipalArchive',
|
|
typedb_entity='municipal-archive',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'MunicipalArchive',
|
|
'LocalGovernmentArchive',
|
|
'CountyRecordOffice',
|
|
'DistrictArchiveGermany',
|
|
'LocalHistoryArchive',
|
|
'ComarcalArchive',
|
|
'DistritalArchive',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Municipal archive classification',
|
|
target_class='MunicipalArchive',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='municipal-archive',
|
|
typedb_attribute='archive-classification',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
status=MappingStatus.MAPPED,
|
|
),
|
|
FieldMapping(
|
|
source_path='municipality',
|
|
source_description='Municipality served by the archive',
|
|
target_class='MunicipalArchive',
|
|
target_slot='municipality',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='municipal-archive',
|
|
typedb_attribute='municipality',
|
|
rdf_predicate='hc:municipality',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# MunicipalArchive instance (e.g., Stadsarchief Amsterdam)
|
|
institution_type: ARCHIVE
|
|
archive_classification: MUNICIPAL
|
|
municipality: Amsterdam
|
|
province: Noord-Holland
|
|
country: NL
|
|
services:
|
|
- reading_room
|
|
- digitization
|
|
- genealogy_support
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archive_type_national
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archive_type_national',
|
|
description='National-level archive institutions and systems',
|
|
detailed_description="""
|
|
National archives represent the highest level of archival authority:
|
|
- Central national repositories (NationalArchives)
|
|
- Country-specific variations (ArchivesRegionales, Landsarkiv, Fylkesarkiv)
|
|
- International organization archives (ArchiveOfInternationalOrganization)
|
|
|
|
Classes: NationalArchives, ArchivesRegionales, Landsarkiv, Fylkesarkiv,
|
|
ArchiveOfInternationalOrganization, RegionalArchivesInIceland
|
|
""".strip(),
|
|
linkml_class='NationalArchives',
|
|
typedb_entity='national-archive',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'NationalArchives',
|
|
'ArchivesRegionales',
|
|
'Landsarkiv',
|
|
'Fylkesarkiv',
|
|
'ArchiveOfInternationalOrganization',
|
|
'RegionalArchivesInIceland',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='National archive classification',
|
|
target_class='NationalArchives',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='national-archive',
|
|
typedb_attribute='archive-classification',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
status=MappingStatus.MAPPED,
|
|
),
|
|
FieldMapping(
|
|
source_path='country_code',
|
|
source_description='ISO 3166-1 alpha-2 country code',
|
|
target_class='NationalArchives',
|
|
target_slot='country',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='national-archive',
|
|
typedb_attribute='country-code',
|
|
rdf_predicate='hc:countryCode',
|
|
required=True,
|
|
validation=FieldValidation(
|
|
type='string',
|
|
pattern='^[A-Z]{2}$',
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# National Archives variation (e.g., Swedish Landsarkiv)
|
|
institution_type: ARCHIVE
|
|
archive_classification: NATIONAL
|
|
subtype: LANDSARKIV
|
|
country_code: SE
|
|
region: Gothenburg
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archive_type_regional
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archive_type_regional',
|
|
description='Regional and provincial archive types',
|
|
detailed_description="""
|
|
Regional archives serve geographic areas between national and local:
|
|
- Provincial archives (ProvincialArchive, ProvincialHistoricalArchive)
|
|
- Regional administration (RegionalArchive, RegionalStateArchives)
|
|
- Cantonal systems (CantonalArchive - Switzerland)
|
|
- Country-specific regional (DepartmentalArchives - France, StateRegionalArchiveCzechia)
|
|
|
|
Classes: RegionalArchive, ProvincialArchive, ProvincialHistoricalArchive,
|
|
RegionalStateArchives, CantonalArchive, DepartmentalArchives,
|
|
StateRegionalArchiveCzechia, StateDistrictArchive
|
|
""".strip(),
|
|
linkml_class='RegionalArchive',
|
|
typedb_entity='regional-archive',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'RegionalArchive',
|
|
'ProvincialArchive',
|
|
'ProvincialHistoricalArchive',
|
|
'RegionalStateArchives',
|
|
'CantonalArchive',
|
|
'DepartmentalArchives',
|
|
'StateRegionalArchiveCzechia',
|
|
'StateDistrictArchive',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Regional archive classification',
|
|
target_class='RegionalArchive',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='regional-archive',
|
|
typedb_attribute='archive-classification',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
status=MappingStatus.MAPPED,
|
|
),
|
|
FieldMapping(
|
|
source_path='region',
|
|
source_description='Geographic region served',
|
|
target_class='RegionalArchive',
|
|
target_slot='region',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='regional-archive',
|
|
typedb_attribute='region',
|
|
rdf_predicate='hc:region',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# ProvincialArchive instance (e.g., Brabants Historisch Informatie Centrum)
|
|
institution_type: ARCHIVE
|
|
archive_classification: REGIONAL
|
|
subtype: PROVINCIAL_ARCHIVE
|
|
province: Noord-Brabant
|
|
country: NL
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archive_type_specialized
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archive_type_specialized',
|
|
description='Specialized archive types - thematic collections and unique formats',
|
|
detailed_description="""
|
|
Specialized archives focus on specific formats, subjects, or communities:
|
|
- Format-specific (PhotoArchive, LiteraryArchive, IconographicArchives)
|
|
- Subject-focused (PoliticalArchive, MilitaryArchive, PerformingArtsArchive)
|
|
- Community-focused (WomensArchives, LGBTArchive, CommunityArchive)
|
|
- Institutional (HospitalArchive, SchoolArchive)
|
|
|
|
Classes: PhotoArchive, LiteraryArchive, PoliticalArchive, MilitaryArchive,
|
|
PerformingArtsArchive, WomensArchives, LGBTArchive, CommunityArchive,
|
|
HospitalArchive, SchoolArchive, IconographicArchives, ArtArchive,
|
|
ArchitecturalArchive, NewspaperClippingsArchive
|
|
""".strip(),
|
|
linkml_class='SpecializedArchive',
|
|
typedb_entity='specialized-archive',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'SpecializedArchive',
|
|
'PhotoArchive',
|
|
'LiteraryArchive',
|
|
'PoliticalArchive',
|
|
'MilitaryArchive',
|
|
'PerformingArtsArchive',
|
|
'WomensArchives',
|
|
'LGBTArchive',
|
|
'CommunityArchive',
|
|
'HospitalArchive',
|
|
'SchoolArchive',
|
|
'IconographicArchives',
|
|
'ArtArchive',
|
|
'ArchitecturalArchive',
|
|
'NewspaperClippingsArchive',
|
|
'PressArchive',
|
|
'NobilityArchive',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Specialized archive classification',
|
|
target_class='SpecializedArchive',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='specialized-archive',
|
|
typedb_attribute='archive-classification',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
status=MappingStatus.MAPPED,
|
|
),
|
|
FieldMapping(
|
|
source_path='specialization',
|
|
source_description='Area of specialization',
|
|
target_class='SpecializedArchive',
|
|
target_slot='specialization',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='specialized-archive',
|
|
typedb_attribute='specialization',
|
|
rdf_predicate='hc:specialization',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# PhotoArchive instance (e.g., Nederlands Fotomuseum archive)
|
|
institution_type: ARCHIVE
|
|
archive_classification: SPECIALIZED
|
|
subtype: PHOTO_ARCHIVE
|
|
specialization: photography
|
|
collection_strengths:
|
|
- Dutch photography 1840-present
|
|
- Documentary photography
|
|
- Press photography
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archive_type_thematic
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archive_type_thematic',
|
|
description='Thematic archive types - digital, web, dark archives, and special collections',
|
|
detailed_description="""
|
|
Thematic archives organized around specific themes or functions:
|
|
- Digital preservation (DigitalArchive, DarkArchive, WebArchive)
|
|
- Collection types (CollectingArchives, DepositArchive)
|
|
- Personal papers (Nachlass, HouseArchive, PersonalCollectionType)
|
|
- Online archives (OnlineNewsArchive, MailingListArchive)
|
|
|
|
Classes: DigitalArchive, DarkArchive, WebArchive, CollectingArchives,
|
|
DepositArchive, Nachlass, HouseArchive, OnlineNewsArchive,
|
|
MailingListArchive, ClimateArchive, FreeArchive, PostcustodialArchive
|
|
""".strip(),
|
|
linkml_class='DigitalArchive',
|
|
typedb_entity='thematic-archive',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'DigitalArchive',
|
|
'DarkArchive',
|
|
'WebArchive',
|
|
'CollectingArchives',
|
|
'DepositArchive',
|
|
'Nachlass',
|
|
'HouseArchive',
|
|
'OnlineNewsArchive',
|
|
'MailingListArchive',
|
|
'ClimateArchive',
|
|
'FreeArchive',
|
|
'PostcustodialArchive',
|
|
'MuseumArchive',
|
|
'ArchivalRepository',
|
|
'ArchivalLibrary',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Thematic archive classification',
|
|
target_class='DigitalArchive',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='thematic-archive',
|
|
typedb_attribute='archive-classification',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
status=MappingStatus.MAPPED,
|
|
),
|
|
FieldMapping(
|
|
source_path='digital_preservation_level',
|
|
source_description='Level of digital preservation commitment',
|
|
target_class='DigitalArchive',
|
|
target_slot='preservation_level',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='thematic-archive',
|
|
typedb_attribute='preservation-level',
|
|
rdf_predicate='hc:preservationLevel',
|
|
required=False,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['BIT_LEVEL', 'LOGICAL', 'SEMANTIC', 'FULL'],
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# DarkArchive instance
|
|
institution_type: ARCHIVE
|
|
archive_classification: THEMATIC
|
|
subtype: DARK_ARCHIVE
|
|
digital_preservation_level: FULL
|
|
access_policy: RESTRICTED
|
|
storage_location: offline_vault
|
|
""".strip(),
|
|
),
|
|
|
|
# =========================================================================
|
|
# ORGANIZATIONAL STRUCTURE MAPPINGS
|
|
# =========================================================================
|
|
|
|
# -------------------------------------------------------------------------
|
|
# org_structure_hierarchy
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='org_structure_hierarchy',
|
|
description='Organizational hierarchy - parent organizations, subsidiaries, branches',
|
|
detailed_description="""
|
|
Models the hierarchical relationships between organizations:
|
|
- Parent-child relationships (ParentOrganizationUnit, SubsidiaryOrganization)
|
|
- Branch locations (OrganizationBranch, BranchOffice)
|
|
- Encompassing bodies (EncompassingBody)
|
|
- Networks and associations (ArchiveNetwork, ArchiveAssociation, ConnectionNetwork)
|
|
|
|
Classes: ParentOrganizationUnit, SubsidiaryOrganization, OrganizationBranch,
|
|
BranchOffice, EncompassingBody, ArchiveNetwork, ArchiveAssociation,
|
|
ConnectionNetwork, Organization, Institution
|
|
""".strip(),
|
|
linkml_class='Organization',
|
|
typedb_entity='organization',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'Organization',
|
|
'Institution',
|
|
'ParentOrganizationUnit',
|
|
'SubsidiaryOrganization',
|
|
'OrganizationBranch',
|
|
'BranchOffice',
|
|
'EncompassingBody',
|
|
'ArchiveNetwork',
|
|
'ArchiveAssociation',
|
|
'ConnectionNetwork',
|
|
'CulturalInstitution',
|
|
'MemoryInstitution',
|
|
'GLAM',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='parent_organization',
|
|
source_description='Parent organization identifier or name',
|
|
target_class='Organization',
|
|
target_slot='parent_organization',
|
|
transformation=TransformationType.NESTED,
|
|
typedb_entity='organization',
|
|
typedb_attribute='parent-organization',
|
|
rdf_predicate='org:subOrganizationOf',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='subsidiary_organizations',
|
|
source_description='List of subsidiary organizations',
|
|
target_class='Organization',
|
|
target_slot='subsidiaries',
|
|
transformation=TransformationType.ARRAY_MAP,
|
|
typedb_entity='organization',
|
|
typedb_attribute='subsidiaries',
|
|
rdf_predicate='org:hasSubOrganization',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='encompassing_body',
|
|
source_description='Larger organizational body this belongs to',
|
|
target_class='Organization',
|
|
target_slot='encompassing_body',
|
|
transformation=TransformationType.NESTED,
|
|
typedb_entity='organization',
|
|
typedb_attribute='encompassing-body',
|
|
rdf_predicate='hc:encompassingBody',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Organization with hierarchy
|
|
organization_name: Rijksmuseum Research Library
|
|
parent_organization:
|
|
name: Rijksmuseum
|
|
ghcid: NL-NH-AMS-M-RM
|
|
encompassing_body:
|
|
name: Ministry of Education, Culture and Science
|
|
type: GOVERNMENT_MINISTRY
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# org_structure_administrative
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='org_structure_administrative',
|
|
description='Administrative units - departments, divisions, offices',
|
|
detailed_description="""
|
|
Models the internal administrative structure of organizations:
|
|
- Functional divisions (Department, Division)
|
|
- Administrative units (AdministrativeOffice)
|
|
- Support functions (ConservationLab, ReadingRoom, GiftShop)
|
|
- Specialized facilities (ExhibitionSpace, Storage, Warehouse)
|
|
|
|
Classes: Department, Division, AdministrativeOffice, ConservationLab,
|
|
ReadingRoom, ReadingRoomAnnex, GiftShop, ExhibitionSpace,
|
|
Storage, Warehouse, PrintRoom, ServiceArea
|
|
""".strip(),
|
|
linkml_class='Department',
|
|
typedb_entity='department',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'Department',
|
|
'Division',
|
|
'AdministrativeOffice',
|
|
'ConservationLab',
|
|
'ReadingRoom',
|
|
'ReadingRoomAnnex',
|
|
'GiftShop',
|
|
'ExhibitionSpace',
|
|
'Storage',
|
|
'Warehouse',
|
|
'PrintRoom',
|
|
'ServiceArea',
|
|
'ClosedSpace',
|
|
'PublicSpace',
|
|
'OutdoorSite',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='departments',
|
|
source_description='List of organizational departments',
|
|
target_class='Department',
|
|
target_slot='department_name',
|
|
transformation=TransformationType.ARRAY_MAP,
|
|
typedb_entity='department',
|
|
typedb_attribute='department-name',
|
|
rdf_predicate='org:hasUnit',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='facilities',
|
|
source_description='Physical facilities and spaces',
|
|
target_class='Department',
|
|
target_slot='facilities',
|
|
transformation=TransformationType.ARRAY_MAP,
|
|
typedb_entity='department',
|
|
typedb_attribute='facilities',
|
|
rdf_predicate='hc:hasFacility',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Department structure
|
|
departments:
|
|
- name: Collection Management
|
|
type: DEPARTMENT
|
|
staff_count: 25
|
|
- name: Conservation Laboratory
|
|
type: CONSERVATION_LAB
|
|
specializations:
|
|
- paper_conservation
|
|
- photograph_conservation
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# ORGANIZATIONAL STRUCTURE - SUBDIVISION
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='org_structure_subdivision',
|
|
description='Organizational subdivisions and change events',
|
|
detailed_description="""
|
|
Models organizational subdivisions and structural changes:
|
|
- Subdivision types (OrganizationalSubdivision)
|
|
- Organizational structure (OrganizationalStructure)
|
|
- Change events (OrganizationalChangeEvent)
|
|
- Contributing and allocating agencies (ContributingAgency, AllocationAgency)
|
|
|
|
Classes: OrganizationalSubdivision, OrganizationalStructure,
|
|
OrganizationalChangeEvent, ContributingAgency, AllocationAgency,
|
|
Jurisdiction, StandardsOrganization, RegistrationAuthority
|
|
""".strip(),
|
|
linkml_class='OrganizationalSubdivision',
|
|
typedb_entity='organizational-subdivision',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'OrganizationalSubdivision',
|
|
'OrganizationalStructure',
|
|
'OrganizationalChangeEvent',
|
|
'ContributingAgency',
|
|
'AllocationAgency',
|
|
'Jurisdiction',
|
|
'StandardsOrganization',
|
|
'RegistrationAuthority',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='subdivisions',
|
|
source_description='Organizational subdivisions',
|
|
target_class='OrganizationalSubdivision',
|
|
target_slot='subdivision_name',
|
|
transformation=TransformationType.ARRAY_MAP,
|
|
typedb_entity='organizational-subdivision',
|
|
typedb_attribute='subdivision-name',
|
|
rdf_predicate='org:hasUnit',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='change_history',
|
|
source_description='History of organizational changes',
|
|
target_class='OrganizationalChangeEvent',
|
|
target_slot='change_events',
|
|
transformation=TransformationType.ARRAY_MAP,
|
|
typedb_entity='organizational-change-event',
|
|
typedb_attribute='change-events',
|
|
rdf_predicate='hc:hasChangeEvent',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Organizational change history
|
|
change_history:
|
|
- event_type: MERGER
|
|
date: "2001-01-01"
|
|
description: "Merger of Gemeentearchief and Rijksarchief"
|
|
predecessor_organizations:
|
|
- name: Gemeentearchief Haarlem
|
|
- name: Rijksarchief Noord-Holland
|
|
resulting_organization:
|
|
name: Noord-Hollands Archief
|
|
""".strip(),
|
|
),
|
|
# =========================================================================
|
|
# PHASE 1: HERITAGE & CULTURAL SITES
|
|
# =========================================================================
|
|
# -------------------------------------------------------------------------
|
|
# HERITAGE - WORLD SITES
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='heritage_world_sites',
|
|
description='UNESCO World Heritage Sites and tentative list entries',
|
|
detailed_description="""
|
|
World Heritage Site designations and related classifications:
|
|
- Inscribed sites (WorldHeritageSite)
|
|
- Tentative list entries (TentativeWorldHeritageSite)
|
|
- Historic buildings and monuments (HistoricBuilding)
|
|
- Cultural institutions (CulturalInstitution)
|
|
|
|
Classes: WorldHeritageSite, TentativeWorldHeritageSite, HistoricBuilding,
|
|
CulturalInstitution
|
|
""".strip(),
|
|
linkml_class='WorldHeritageSite',
|
|
typedb_entity='world-heritage-site',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'WorldHeritageSite',
|
|
'TentativeWorldHeritageSite',
|
|
'HistoricBuilding',
|
|
'CulturalInstitution',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='unesco_id',
|
|
source_description='UNESCO World Heritage Site ID',
|
|
target_class='WorldHeritageSite',
|
|
target_slot='unesco_id',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='world-heritage-site',
|
|
typedb_attribute='unesco-id',
|
|
rdf_predicate='hc:unescoId',
|
|
required=True,
|
|
validation=FieldValidation(type='number'),
|
|
),
|
|
FieldMapping(
|
|
source_path='inscription_year',
|
|
source_description='Year of UNESCO inscription',
|
|
target_class='WorldHeritageSite',
|
|
target_slot='inscription_year',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='world-heritage-site',
|
|
typedb_attribute='inscription-year',
|
|
rdf_predicate='hc:inscriptionYear',
|
|
required=False,
|
|
validation=FieldValidation(type='number'),
|
|
),
|
|
FieldMapping(
|
|
source_path='heritage_criteria',
|
|
source_description='UNESCO selection criteria (i-x)',
|
|
target_class='WorldHeritageSite',
|
|
target_slot='criteria',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='world-heritage-site',
|
|
typedb_attribute='criteria',
|
|
rdf_predicate='hc:heritageCriteria',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# WorldHeritageSite instance
|
|
heritage_designation: UNESCO_WORLD_HERITAGE
|
|
unesco_id: 818
|
|
name: Rietveld Schröderhuis
|
|
inscription_year: 2000
|
|
heritage_criteria:
|
|
- i
|
|
- ii
|
|
country: NL
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# HERITAGE - INTANGIBLE
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='heritage_intangible',
|
|
description='Intangible cultural heritage - traditions, performances, practices',
|
|
detailed_description="""
|
|
UNESCO Intangible Cultural Heritage and related practices:
|
|
- Intangible heritage forms (IntangibleHeritageForm)
|
|
- Performances and events (IntangibleHeritagePerformance, IntangibleHeritageEvent)
|
|
- Groups preserving traditions (IntangibleHeritageGroupType)
|
|
|
|
Classes: IntangibleHeritageForm, IntangibleHeritagePerformance,
|
|
IntangibleHeritageEvent, IntangibleHeritageGroupType
|
|
""".strip(),
|
|
linkml_class='IntangibleHeritageForm',
|
|
typedb_entity='intangible-heritage',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'IntangibleHeritageForm',
|
|
'IntangibleHeritagePerformance',
|
|
'IntangibleHeritageEvent',
|
|
'IntangibleHeritageGroupType',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='ich_domain',
|
|
source_description='UNESCO ICH domain category',
|
|
target_class='IntangibleHeritageForm',
|
|
target_slot='domain',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='intangible-heritage',
|
|
typedb_attribute='domain',
|
|
rdf_predicate='hc:ichDomain',
|
|
required=False,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=[
|
|
'ORAL_TRADITIONS',
|
|
'PERFORMING_ARTS',
|
|
'SOCIAL_PRACTICES',
|
|
'KNOWLEDGE_PRACTICES',
|
|
'TRADITIONAL_CRAFTSMANSHIP',
|
|
],
|
|
),
|
|
),
|
|
FieldMapping(
|
|
source_path='inscription_list',
|
|
source_description='UNESCO ICH list type',
|
|
target_class='IntangibleHeritageForm',
|
|
target_slot='list_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='intangible-heritage',
|
|
typedb_attribute='list-type',
|
|
rdf_predicate='hc:ichListType',
|
|
required=False,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['REPRESENTATIVE', 'URGENT_SAFEGUARDING', 'GOOD_PRACTICES'],
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# IntangibleHeritageForm instance
|
|
heritage_type: INTANGIBLE
|
|
ich_domain: TRADITIONAL_CRAFTSMANSHIP
|
|
name: Craft of the miller operating windmills and watermills
|
|
inscription_list: REPRESENTATIVE
|
|
inscription_year: 2017
|
|
countries:
|
|
- NL
|
|
- BE
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# HERITAGE - NATIONAL TREASURES
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='heritage_national_treasures',
|
|
description='National treasures and designated heritage items',
|
|
detailed_description="""
|
|
National-level heritage designations:
|
|
- National treasures (NationalTreasure)
|
|
- Country-specific designations (NationalTreasureOfFrance)
|
|
- Documentation centers (DocumentationCentre, RegionalHistoricCenter)
|
|
- Research facilities (ResearchCenter, ScientificTechnicAndIndustrialCultureCenter)
|
|
|
|
Classes: NationalTreasure, NationalTreasureOfFrance, DocumentationCentre,
|
|
RegionalHistoricCenter, ResearchCenter,
|
|
ScientificTechnicAndIndustrialCultureCenter
|
|
""".strip(),
|
|
linkml_class='NationalTreasure',
|
|
typedb_entity='national-treasure',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'NationalTreasure',
|
|
'NationalTreasureOfFrance',
|
|
'DocumentationCentre',
|
|
'RegionalHistoricCenter',
|
|
'ResearchCenter',
|
|
'ScientificTechnicAndIndustrialCultureCenter',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='designation_type',
|
|
source_description='Type of national designation',
|
|
target_class='NationalTreasure',
|
|
target_slot='designation_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='national-treasure',
|
|
typedb_attribute='designation-type',
|
|
rdf_predicate='hc:designationType',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='designation_date',
|
|
source_description='Date of official designation',
|
|
target_class='NationalTreasure',
|
|
target_slot='designation_date',
|
|
transformation=TransformationType.TEMPORAL,
|
|
typedb_entity='national-treasure',
|
|
typedb_attribute='designation-date',
|
|
rdf_predicate='hc:designationDate',
|
|
required=False,
|
|
validation=FieldValidation(type='date'),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# NationalTreasure instance
|
|
designation_type: RIJKSMONUMENT
|
|
designation_number: 12345
|
|
name: Amsterdam Canal Ring
|
|
designation_date: "1999-12-01"
|
|
country: NL
|
|
""".strip(),
|
|
),
|
|
# =========================================================================
|
|
# PHASE 1: CLASSIFICATION TYPES
|
|
# =========================================================================
|
|
# -------------------------------------------------------------------------
|
|
# TYPE CLASSES - GLAM INSTITUTIONS
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='type_classes_glam',
|
|
description='Classification types for GLAM institutions - museums, libraries, archives, galleries',
|
|
detailed_description="""
|
|
Type classes for classifying heritage custodian institutions:
|
|
- Core GLAM types (MuseumType, LibraryType, ArchiveOrganizationType, GalleryType)
|
|
- Bio/nature custodians (BioCustodianType)
|
|
- Religious/sacred sites (HolySacredSiteType)
|
|
- Education providers (EducationProviderType)
|
|
|
|
Classes: MuseumType, LibraryType, ArchiveOrganizationType, GalleryType,
|
|
BioCustodianType, HolySacredSiteType, EducationProviderType,
|
|
CustodianType, PersonalCollectionType
|
|
""".strip(),
|
|
linkml_class='CustodianType',
|
|
typedb_entity='custodian-type',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'CustodianType',
|
|
'MuseumType',
|
|
'LibraryType',
|
|
'ArchiveOrganizationType',
|
|
'GalleryType',
|
|
'BioCustodianType',
|
|
'HolySacredSiteType',
|
|
'EducationProviderType',
|
|
'PersonalCollectionType',
|
|
'FeatureCustodianType',
|
|
'TasteScentHeritageType',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='institution_type',
|
|
source_description='Primary institution type code',
|
|
target_class='CustodianType',
|
|
target_slot='type_code',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='custodian-type',
|
|
typedb_attribute='type-code',
|
|
rdf_predicate='hc:custodianTypeCode',
|
|
required=True,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
|
),
|
|
notes='GLAMORCUBESFIXPHDNT taxonomy single-letter codes',
|
|
),
|
|
FieldMapping(
|
|
source_path='institution_subtype',
|
|
source_description='Detailed institution subtype',
|
|
target_class='CustodianType',
|
|
target_slot='subtype',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='custodian-type',
|
|
typedb_attribute='subtype',
|
|
rdf_predicate='hc:custodianSubtype',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Custodian type classification
|
|
institution_type: M # Museum
|
|
institution_subtype: ART_MUSEUM
|
|
museum_type_details:
|
|
collection_focus: FINE_ARTS
|
|
governance: PUBLIC
|
|
size_category: LARGE
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# TYPE CLASSES - DIGITAL PLATFORMS
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='type_classes_digital',
|
|
description='Classification types for digital platforms and web presence',
|
|
detailed_description="""
|
|
Type classes for digital infrastructure:
|
|
- Digital platform types (DigitalPlatformType)
|
|
- Web portal classifications (WebPortalType, WebPortalTypes)
|
|
- Social media platforms (SocialMediaPlatformType, SocialMediaPlatformTypes)
|
|
- Social media content (SocialMediaPostType, SocialMediaPostTypes)
|
|
- Video content types (VideoAnnotationTypes)
|
|
|
|
Classes: DigitalPlatformType, WebPortalType, WebPortalTypes,
|
|
SocialMediaPlatformType, SocialMediaPlatformTypes,
|
|
SocialMediaPostType, SocialMediaPostTypes, VideoAnnotationTypes
|
|
""".strip(),
|
|
linkml_class='DigitalPlatformType',
|
|
typedb_entity='digital-platform-type',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'DigitalPlatformType',
|
|
'WebPortalType',
|
|
'WebPortalTypes',
|
|
'SocialMediaPlatformType',
|
|
'SocialMediaPlatformTypes',
|
|
'SocialMediaPostType',
|
|
'SocialMediaPostTypes',
|
|
'VideoAnnotationTypes',
|
|
'DataServiceEndpointType',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='platform_type',
|
|
source_description='Digital platform classification',
|
|
target_class='DigitalPlatformType',
|
|
target_slot='platform_category',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='digital-platform-type',
|
|
typedb_attribute='platform-category',
|
|
rdf_predicate='hc:platformCategory',
|
|
required=True,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=[
|
|
'WEBSITE',
|
|
'COLLECTION_PORTAL',
|
|
'DISCOVERY_PLATFORM',
|
|
'API',
|
|
'SOCIAL_MEDIA',
|
|
'CMS',
|
|
'DAM',
|
|
],
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Digital platform type classification
|
|
digital_platforms:
|
|
- platform_type: COLLECTION_PORTAL
|
|
name: Online Collection
|
|
- platform_type: API
|
|
name: Data API
|
|
protocol: REST
|
|
- platform_type: SOCIAL_MEDIA
|
|
social_platform_type: INSTAGRAM
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# TYPE CLASSES - ORGANIZATIONAL
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='type_classes_organizational',
|
|
description='Classification types for organizational structures',
|
|
detailed_description="""
|
|
Type classes for organizational classifications:
|
|
- Commercial organizations (CommercialOrganizationType)
|
|
- Non-profit organizations (NonProfitType)
|
|
- Research organizations (ResearchOrganizationType)
|
|
- Official institutions (OfficialInstitutionType)
|
|
- Heritage societies (HeritageSocietyType)
|
|
- Mixed/unspecified (MixedCustodianType, UnspecifiedType)
|
|
|
|
Classes: CommercialOrganizationType, NonProfitType, ResearchOrganizationType,
|
|
OfficialInstitutionType, HeritageSocietyType, MixedCustodianType,
|
|
UnspecifiedType, LegalEntityType
|
|
""".strip(),
|
|
linkml_class='LegalEntityType',
|
|
typedb_entity='legal-entity-type',
|
|
provenance=Provenance(
|
|
source_type='schema_documentation',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'CommercialOrganizationType',
|
|
'NonProfitType',
|
|
'ResearchOrganizationType',
|
|
'OfficialInstitutionType',
|
|
'HeritageSocietyType',
|
|
'MixedCustodianType',
|
|
'UnspecifiedType',
|
|
'LegalEntityType',
|
|
'LegalForm',
|
|
'LegalName',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='legal_form',
|
|
source_description='Legal form/entity type',
|
|
target_class='LegalEntityType',
|
|
target_slot='legal_form_code',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='legal-entity-type',
|
|
typedb_attribute='legal-form-code',
|
|
rdf_predicate='hc:legalFormCode',
|
|
required=False,
|
|
notes='ISO 20275 legal form codes or national equivalents',
|
|
),
|
|
FieldMapping(
|
|
source_path='governance_type',
|
|
source_description='Governance/ownership type',
|
|
target_class='LegalEntityType',
|
|
target_slot='governance_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='legal-entity-type',
|
|
typedb_attribute='governance-type',
|
|
rdf_predicate='hc:governanceType',
|
|
required=False,
|
|
validation=FieldValidation(
|
|
type='enum',
|
|
enum_values=['PUBLIC', 'PRIVATE', 'NON_PROFIT', 'MIXED', 'GOVERNMENT'],
|
|
),
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Legal entity type classification
|
|
legal_status:
|
|
legal_form: STICHTING # Dutch foundation
|
|
legal_form_code: "8888" # ISO 20275
|
|
governance_type: NON_PROFIT
|
|
registration_authority: KVK
|
|
registration_number: "12345678"
|
|
""".strip(),
|
|
),
|
|
# =========================================================================
|
|
# PHASE 2: PLACE & LOCATION
|
|
# =========================================================================
|
|
# -------------------------------------------------------------------------
|
|
# PLACE - GEOGRAPHIC
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='place_geographic',
|
|
description='Geographic and spatial location entities',
|
|
detailed_description="""
|
|
Geographic location classes representing physical places where heritage custodians
|
|
are located or operate. Includes settlements (cities/towns), countries, regions,
|
|
and geospatial coordinates.
|
|
|
|
These classes support:
|
|
- Settlement identification (GeoNames integration)
|
|
- Country code normalization (ISO 3166-1)
|
|
- Region/subregion hierarchies
|
|
- Geospatial coordinates (lat/lon)
|
|
""".strip(),
|
|
linkml_class='Settlement',
|
|
typedb_entity='settlement',
|
|
provenance=Provenance(
|
|
source_type='geonames',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'Settlement', 'Country', 'Subregion', 'GeoSpatialPlace', 'FeaturePlace',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='location.settlement',
|
|
source_description='Settlement/city name',
|
|
target_class='Settlement',
|
|
target_slot='name',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='settlement',
|
|
typedb_attribute='name',
|
|
rdf_predicate='schema:name',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='location.country',
|
|
source_description='Country code (ISO 3166-1)',
|
|
target_class='Country',
|
|
target_slot='country_code',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='country',
|
|
typedb_attribute='country-code',
|
|
rdf_predicate='schema:addressCountry',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='location.coordinates',
|
|
source_description='Geographic coordinates',
|
|
target_class='GeoSpatialPlace',
|
|
target_slot='coordinates',
|
|
transformation=TransformationType.COMPUTED,
|
|
typedb_entity='geo-spatial-place',
|
|
typedb_attribute='coordinates',
|
|
rdf_predicate='geo:hasGeometry',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Geographic location
|
|
location:
|
|
settlement: Amsterdam
|
|
country: NL
|
|
region: Noord-Holland
|
|
coordinates:
|
|
latitude: 52.3676
|
|
longitude: 4.9041
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# PLACE - CUSTODIAN SPECIFIC
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='place_custodian_specific',
|
|
description='Custodian-specific place classes',
|
|
detailed_description="""
|
|
Place classes specifically related to heritage custodian operations, including
|
|
auxiliary places, temporary locations, and custodian-specific place designations.
|
|
|
|
Supports modeling:
|
|
- Primary vs auxiliary locations
|
|
- Temporary/seasonal locations
|
|
- Off-site storage locations
|
|
- Branch/satellite locations
|
|
""".strip(),
|
|
linkml_class='CustodianPlace',
|
|
typedb_entity='custodian-place',
|
|
provenance=Provenance(
|
|
source_type='custodian_yaml',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=[
|
|
'CustodianPlace', 'AuxiliaryPlace', 'TemporaryLocation',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='places.primary',
|
|
source_description='Primary location',
|
|
target_class='CustodianPlace',
|
|
target_slot='primary_location',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='custodian-place',
|
|
typedb_attribute='primary-location',
|
|
rdf_predicate='hc:primaryLocation',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='places.auxiliary',
|
|
source_description='Auxiliary/secondary locations',
|
|
target_class='AuxiliaryPlace',
|
|
target_slot='auxiliary_locations',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='auxiliary-place',
|
|
typedb_attribute='location',
|
|
rdf_predicate='hc:auxiliaryLocation',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='places.temporary',
|
|
source_description='Temporary location',
|
|
target_class='TemporaryLocation',
|
|
target_slot='temporary_location',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='temporary-location',
|
|
typedb_attribute='location',
|
|
rdf_predicate='hc:temporaryLocation',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Custodian places
|
|
places:
|
|
primary:
|
|
address: Museumstraat 1
|
|
city: Amsterdam
|
|
auxiliary:
|
|
- name: Storage Facility
|
|
address: Industrieweg 100
|
|
temporary:
|
|
name: Pop-up Exhibition Space
|
|
valid_from: 2024-06-01
|
|
valid_to: 2024-09-30
|
|
""".strip(),
|
|
),
|
|
# =========================================================================
|
|
# PHASE 2: COLLECTIONS
|
|
# =========================================================================
|
|
# -------------------------------------------------------------------------
|
|
# COLLECTION - CORE
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='collection_core',
|
|
description='Core collection and holdings classes',
|
|
detailed_description="""
|
|
Core classes for modeling heritage collections and holdings. Collections represent
|
|
aggregations of objects, documents, or materials managed by a heritage custodian.
|
|
|
|
Supports:
|
|
- Collection naming and description
|
|
- Collection types (archival, library, museum, mixed)
|
|
- Special collections designation
|
|
- Subject/temporal coverage
|
|
""".strip(),
|
|
linkml_class='Collection',
|
|
typedb_entity='collection',
|
|
provenance=Provenance(
|
|
source_type='custodian_yaml',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=[
|
|
'Collection', 'CollectionType', 'SpecialCollection', 'CastCollection',
|
|
'PhotographCollection', 'CustodianCollection', 'CustodianArchive',
|
|
'CustodianAdministration',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='collections[].name',
|
|
source_description='Collection name',
|
|
target_class='Collection',
|
|
target_slot='collection_name',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='collection',
|
|
typedb_attribute='name',
|
|
rdf_predicate='schema:name',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='collections[].type',
|
|
source_description='Collection type',
|
|
target_class='CollectionType',
|
|
target_slot='collection_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='collection-type',
|
|
typedb_attribute='type',
|
|
rdf_predicate='hc:collectionType',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='collections[].description',
|
|
source_description='Collection description',
|
|
target_class='Collection',
|
|
target_slot='description',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='collection',
|
|
typedb_attribute='description',
|
|
rdf_predicate='schema:description',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Collection definition
|
|
collections:
|
|
- name: Dutch Masters Collection
|
|
type: MUSEUM_ART
|
|
description: 17th century Dutch paintings
|
|
extent: 450 paintings
|
|
subject_areas:
|
|
- Dutch Golden Age
|
|
- Portraiture
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# COLLECTION - MANAGEMENT
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='collection_management',
|
|
description='Collection management system classes',
|
|
detailed_description="""
|
|
Classes for collection management systems (CMS) used by heritage custodians
|
|
to catalog, track, and manage their collections.
|
|
|
|
Supports documentation of:
|
|
- CMS software used (Adlib, TMS, ArchivesSpace, etc.)
|
|
- System configurations
|
|
- Integration endpoints
|
|
""".strip(),
|
|
linkml_class='CollectionManagementSystem',
|
|
typedb_entity='collection-management-system',
|
|
provenance=Provenance(
|
|
source_type='custodian_yaml',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=[
|
|
'CollectionManagementSystem',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='digital_platforms.cms.name',
|
|
source_description='CMS name',
|
|
target_class='CollectionManagementSystem',
|
|
target_slot='system_name',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='collection-management-system',
|
|
typedb_attribute='system-name',
|
|
rdf_predicate='schema:name',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='digital_platforms.cms.vendor',
|
|
source_description='CMS vendor',
|
|
target_class='CollectionManagementSystem',
|
|
target_slot='vendor',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='collection-management-system',
|
|
typedb_attribute='vendor',
|
|
rdf_predicate='schema:manufacturer',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Collection management system
|
|
digital_platforms:
|
|
cms:
|
|
name: Adlib Museum
|
|
vendor: Axiell
|
|
version: "7.8"
|
|
url: https://collection.museum.nl
|
|
""".strip(),
|
|
),
|
|
# =========================================================================
|
|
# PHASE 2: PERSON & STAFF
|
|
# =========================================================================
|
|
# -------------------------------------------------------------------------
|
|
# PERSON - PROFILE EXTENDED
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='person_profile_extended',
|
|
description='Extended person profile classes',
|
|
detailed_description="""
|
|
Extended classes for person/staff profiles beyond basic identity. Includes
|
|
LinkedIn profiles, person connections (professional networks), and web claims.
|
|
|
|
Supports:
|
|
- LinkedIn profile data integration
|
|
- Professional network connections
|
|
- Web-sourced claims about persons
|
|
- Person name variants and aliases
|
|
""".strip(),
|
|
linkml_class='PersonObservation',
|
|
typedb_entity='person-observation',
|
|
provenance=Provenance(
|
|
source_type='linkedin',
|
|
data_tier=DataTier.TIER_3_CROWD_SOURCED,
|
|
),
|
|
generated_classes=[
|
|
'LinkedInProfile', 'PersonConnection', 'PersonName',
|
|
'PersonOrOrganization', 'PersonWebClaim',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='person.linkedin_url',
|
|
source_description='LinkedIn profile URL',
|
|
target_class='LinkedInProfile',
|
|
target_slot='linkedin_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='linkedin-profile',
|
|
typedb_attribute='profile-url',
|
|
rdf_predicate='schema:sameAs',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='person.connections',
|
|
source_description='Professional connections',
|
|
target_class='PersonConnection',
|
|
target_slot='connections',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='person-connection',
|
|
typedb_attribute='connected-person',
|
|
rdf_predicate='hc:hasConnection',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='person.names',
|
|
source_description='Person name variants',
|
|
target_class='PersonName',
|
|
target_slot='name_variants',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='person-name',
|
|
typedb_attribute='name-value',
|
|
rdf_predicate='schema:alternateName',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Extended person profile
|
|
person:
|
|
name: Jan de Vries
|
|
linkedin_url: https://linkedin.com/in/jandevries
|
|
names:
|
|
- value: Jan de Vries
|
|
type: legal_name
|
|
- value: J. de Vries
|
|
type: abbreviated
|
|
connections:
|
|
- name: Maria Bakker
|
|
organization: Rijksmuseum
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# PERSON - WORK & EDUCATION
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='person_work_education',
|
|
description='Work experience and education classes',
|
|
detailed_description="""
|
|
Classes for modeling work experience history and educational credentials
|
|
of persons associated with heritage custodians.
|
|
|
|
Supports:
|
|
- Employment history with dates
|
|
- Role/position tracking
|
|
- Educational credentials
|
|
- Skills and certifications
|
|
""".strip(),
|
|
linkml_class='WorkExperience',
|
|
typedb_entity='work-experience',
|
|
provenance=Provenance(
|
|
source_type='linkedin',
|
|
data_tier=DataTier.TIER_3_CROWD_SOURCED,
|
|
),
|
|
generated_classes=[
|
|
'WorkExperience', 'EducationCredential', 'StaffRole', 'StaffRoles',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='person.experience[].organization',
|
|
source_description='Employer organization',
|
|
target_class='WorkExperience',
|
|
target_slot='organization',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='work-experience',
|
|
typedb_attribute='organization',
|
|
rdf_predicate='schema:worksFor',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='person.experience[].role',
|
|
source_description='Job title/role',
|
|
target_class='WorkExperience',
|
|
target_slot='role_title',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='work-experience',
|
|
typedb_attribute='role-title',
|
|
rdf_predicate='schema:jobTitle',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='person.education[].institution',
|
|
source_description='Educational institution',
|
|
target_class='EducationCredential',
|
|
target_slot='institution',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='education-credential',
|
|
typedb_attribute='institution',
|
|
rdf_predicate='schema:alumniOf',
|
|
required=True,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Work and education
|
|
person:
|
|
experience:
|
|
- organization: Rijksmuseum
|
|
role: Senior Curator
|
|
start_date: 2018-03
|
|
current: true
|
|
education:
|
|
- institution: University of Amsterdam
|
|
degree: MA Art History
|
|
graduation_year: 2010
|
|
""".strip(),
|
|
),
|
|
# =========================================================================
|
|
# PHASE 2: DIGITAL & API SERVICES
|
|
# =========================================================================
|
|
# -------------------------------------------------------------------------
|
|
# DIGITAL PLATFORMS - EXTENDED
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='digital_platforms_extended',
|
|
description='Extended digital platform classes',
|
|
detailed_description="""
|
|
Extended digital platform classes for web portals, virtual libraries,
|
|
and auxiliary digital presence. Supports heritage custodian online presence
|
|
beyond primary websites.
|
|
|
|
Includes:
|
|
- Web portals and discovery interfaces
|
|
- Virtual/digital-only libraries
|
|
- Auxiliary digital platforms
|
|
- Primary digital presence assertions
|
|
""".strip(),
|
|
linkml_class='WebPortal',
|
|
typedb_entity='web-portal',
|
|
provenance=Provenance(
|
|
source_type='web_enrichment',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=[
|
|
'WebPortal', 'VirtualMapLibrary', 'AuxiliaryDigitalPlatform',
|
|
'PrimaryDigitalPresenceAssertion',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='digital_platforms.portals[].url',
|
|
source_description='Portal URL',
|
|
target_class='WebPortal',
|
|
target_slot='portal_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='web-portal',
|
|
typedb_attribute='url',
|
|
rdf_predicate='schema:url',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='digital_platforms.portals[].type',
|
|
source_description='Portal type',
|
|
target_class='WebPortal',
|
|
target_slot='portal_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='web-portal',
|
|
typedb_attribute='portal-type',
|
|
rdf_predicate='hc:portalType',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Digital platforms
|
|
digital_platforms:
|
|
portals:
|
|
- url: https://collectie.museum.nl
|
|
type: DISCOVERY_PORTAL
|
|
name: Online Collection
|
|
- url: https://maps.museum.nl
|
|
type: VIRTUAL_MAP
|
|
name: Interactive Map
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# API ENDPOINTS
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='api_endpoints',
|
|
description='API and data service endpoint classes',
|
|
detailed_description="""
|
|
Classes for API endpoints and data services exposed by heritage custodians.
|
|
Supports interoperability documentation for harvesting, searching, and
|
|
accessing digital content.
|
|
|
|
Includes:
|
|
- OAI-PMH harvesting endpoints
|
|
- Search APIs (SRU, OpenSearch)
|
|
- IIIF Image/Presentation APIs
|
|
- File download services (EAD, METS)
|
|
""".strip(),
|
|
linkml_class='DataServiceEndpoint',
|
|
typedb_entity='data-service-endpoint',
|
|
provenance=Provenance(
|
|
source_type='web_enrichment',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=[
|
|
'DataServiceEndpoint', 'OAIPMHEndpoint', 'SearchAPI', 'FileAPI',
|
|
'EADDownload', 'METSAPI', 'IIPImageServer', 'InternetOfThings',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='api_endpoints.oai_pmh',
|
|
source_description='OAI-PMH endpoint',
|
|
target_class='OAIPMHEndpoint',
|
|
target_slot='endpoint_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='oai-pmh-endpoint',
|
|
typedb_attribute='endpoint-url',
|
|
rdf_predicate='hc:oaiPmhEndpoint',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='api_endpoints.search',
|
|
source_description='Search API endpoint',
|
|
target_class='SearchAPI',
|
|
target_slot='search_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='search-api',
|
|
typedb_attribute='search-url',
|
|
rdf_predicate='hc:searchEndpoint',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='api_endpoints.iiif',
|
|
source_description='IIIF Image API',
|
|
target_class='IIPImageServer',
|
|
target_slot='iiif_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='iip-image-server',
|
|
typedb_attribute='iiif-url',
|
|
rdf_predicate='hc:iiifEndpoint',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# API endpoints
|
|
api_endpoints:
|
|
oai_pmh: https://api.museum.nl/oai
|
|
search: https://api.museum.nl/search
|
|
iiif: https://iiif.museum.nl/image/2
|
|
ead_download: https://api.museum.nl/ead
|
|
""".strip(),
|
|
),
|
|
# =========================================================================
|
|
# PHASE 2: VIDEO & SOCIAL MEDIA
|
|
# =========================================================================
|
|
# -------------------------------------------------------------------------
|
|
# VIDEO CONTENT
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='video_content',
|
|
description='Video content and annotation classes',
|
|
detailed_description="""
|
|
Classes for video content produced or published by heritage custodians.
|
|
Supports rich annotation of video content including chapters, transcripts,
|
|
subtitles, and time-based segments.
|
|
|
|
Includes:
|
|
- Video posts (YouTube, Vimeo)
|
|
- Video chapters and segments
|
|
- Transcripts and subtitles
|
|
- Audio/text annotations
|
|
""".strip(),
|
|
linkml_class='VideoPost',
|
|
typedb_entity='video-post',
|
|
provenance=Provenance(
|
|
source_type='social_media',
|
|
data_tier=DataTier.TIER_3_CROWD_SOURCED,
|
|
),
|
|
generated_classes=[
|
|
'VideoPost', 'VideoChapter', 'VideoTranscript', 'VideoSubtitle',
|
|
'VideoAnnotation', 'VideoAudioAnnotation', 'VideoTextContent',
|
|
'VideoTimeSegment',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='videos[].url',
|
|
source_description='Video URL',
|
|
target_class='VideoPost',
|
|
target_slot='video_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='video-post',
|
|
typedb_attribute='video-url',
|
|
rdf_predicate='schema:contentUrl',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='videos[].chapters',
|
|
source_description='Video chapters',
|
|
target_class='VideoChapter',
|
|
target_slot='chapters',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='video-chapter',
|
|
typedb_attribute='chapters',
|
|
rdf_predicate='schema:hasPart',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='videos[].transcript',
|
|
source_description='Video transcript',
|
|
target_class='VideoTranscript',
|
|
target_slot='transcript',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='video-transcript',
|
|
typedb_attribute='transcript-text',
|
|
rdf_predicate='schema:transcript',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Video content
|
|
videos:
|
|
- url: https://youtube.com/watch?v=abc123
|
|
title: Museum Tour 2024
|
|
duration: PT45M30S
|
|
chapters:
|
|
- title: Introduction
|
|
start_time: PT0S
|
|
- title: Main Gallery
|
|
start_time: PT5M
|
|
transcript:
|
|
language: nl
|
|
text: "Welkom bij het museum..."
|
|
""".strip(),
|
|
),
|
|
# -------------------------------------------------------------------------
|
|
# SOCIAL MEDIA CONTENT
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='social_media_content',
|
|
description='Social media content and profile classes',
|
|
detailed_description="""
|
|
Classes for social media presence and content of heritage custodians.
|
|
Tracks posts, profiles, and engagement across platforms.
|
|
|
|
Includes:
|
|
- Social media profiles (per platform)
|
|
- Posts and content items
|
|
- Engagement metrics
|
|
""".strip(),
|
|
linkml_class='SocialMediaProfile',
|
|
typedb_entity='social-media-profile',
|
|
provenance=Provenance(
|
|
source_type='social_media',
|
|
data_tier=DataTier.TIER_3_CROWD_SOURCED,
|
|
),
|
|
generated_classes=[
|
|
'SocialMediaProfile', 'SocialMediaPost',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='social_media.profiles[].platform',
|
|
source_description='Social media platform',
|
|
target_class='SocialMediaProfile',
|
|
target_slot='platform',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='social-media-profile',
|
|
typedb_attribute='platform',
|
|
rdf_predicate='hc:socialPlatform',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='social_media.profiles[].url',
|
|
source_description='Profile URL',
|
|
target_class='SocialMediaProfile',
|
|
target_slot='profile_url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='social-media-profile',
|
|
typedb_attribute='profile-url',
|
|
rdf_predicate='schema:url',
|
|
required=True,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Social media profiles
|
|
social_media:
|
|
profiles:
|
|
- platform: TWITTER
|
|
url: https://twitter.com/rijksmuseum
|
|
followers: 450000
|
|
- platform: INSTAGRAM
|
|
url: https://instagram.com/rijksmuseum
|
|
followers: 1200000
|
|
""".strip(),
|
|
),
|
|
|
|
# =========================================================================
|
|
# PHASE 2 SOURCE MAPPINGS: Legal & Administrative
|
|
# =========================================================================
|
|
|
|
# -------------------------------------------------------------------------
|
|
# legal_policies - Legal and policy classes
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='legal_policies',
|
|
description='Legal and policy classes',
|
|
detailed_description="""
|
|
Classes for legal policies, access restrictions, and data licensing
|
|
applicable to heritage custodians and their collections.
|
|
|
|
Includes:
|
|
- Access policies (reading room, digital)
|
|
- Data license terms
|
|
- Legal responsibility collections
|
|
- Trade register information
|
|
""".strip(),
|
|
linkml_class='AccessPolicy',
|
|
typedb_entity='access-policy',
|
|
provenance=Provenance(
|
|
source_type='custodian_yaml',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=[
|
|
'AccessPolicy', 'DataLicensePolicy', 'LegalResponsibilityCollection',
|
|
'ArticlesOfAssociation', 'TradeRegister',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='policies.access',
|
|
source_description='Access policy',
|
|
target_class='AccessPolicy',
|
|
target_slot='access_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='access-policy',
|
|
typedb_attribute='access-type',
|
|
rdf_predicate='hc:accessPolicy',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='policies.data_license',
|
|
source_description='Data license',
|
|
target_class='DataLicensePolicy',
|
|
target_slot='license',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='data-license-policy',
|
|
typedb_attribute='license-type',
|
|
rdf_predicate='schema:license',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Policies
|
|
policies:
|
|
access:
|
|
type: PUBLIC
|
|
reading_room: true
|
|
appointment_required: false
|
|
data_license: CC-BY-4.0
|
|
legal_responsibility:
|
|
type: FOUNDATION
|
|
articles_url: https://kvk.nl/articles/12345678
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# administrative_records - Administrative and financial record classes
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='administrative_records',
|
|
description='Administrative and financial record classes',
|
|
detailed_description="""
|
|
Classes for administrative records including budgets, projects, financial
|
|
statements, and registration information.
|
|
|
|
Includes:
|
|
- Budget tracking
|
|
- Project management
|
|
- Financial statements
|
|
- Registration info
|
|
""".strip(),
|
|
linkml_class='Budget',
|
|
typedb_entity='budget',
|
|
provenance=Provenance(
|
|
source_type='custodian_yaml',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=[
|
|
'Budget', 'Project', 'FinancialStatement', 'RegistrationInfo',
|
|
'ConfidenceMeasure', 'ConflictStatus',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='administration.budget',
|
|
source_description='Budget information',
|
|
target_class='Budget',
|
|
target_slot='annual_budget',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='budget',
|
|
typedb_attribute='annual-amount',
|
|
rdf_predicate='hc:annualBudget',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='administration.projects',
|
|
source_description='Active projects',
|
|
target_class='Project',
|
|
target_slot='projects',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='project',
|
|
typedb_attribute='project-name',
|
|
rdf_predicate='hc:hasProject',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='administration.registration',
|
|
source_description='Registration information',
|
|
target_class='RegistrationInfo',
|
|
target_slot='registration',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='registration-info',
|
|
typedb_attribute='registration-number',
|
|
rdf_predicate='hc:registrationInfo',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Administrative records
|
|
administration:
|
|
budget:
|
|
annual_amount: 5000000
|
|
currency: EUR
|
|
fiscal_year: 2024
|
|
projects:
|
|
- name: Digitization 2024
|
|
status: IN_PROGRESS
|
|
registration:
|
|
authority: KVK
|
|
number: "12345678"
|
|
""".strip(),
|
|
),
|
|
|
|
# =========================================================================
|
|
# PHASE 2 SOURCE MAPPINGS: Finding Aids & Standards
|
|
# =========================================================================
|
|
|
|
# -------------------------------------------------------------------------
|
|
# finding_aids_standards - Finding aids, standards, documentation classes
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='finding_aids_standards',
|
|
description='Finding aids, standards, and source documentation classes',
|
|
detailed_description="""
|
|
Classes for archival finding aids, metadata standards, and source documentation.
|
|
Essential for archival description and interoperability.
|
|
|
|
Includes:
|
|
- Finding aids (EAD, PDF, online)
|
|
- Metadata standards compliance
|
|
- Source document references
|
|
- Primary digital presence assertions
|
|
""".strip(),
|
|
linkml_class='FindingAid',
|
|
typedb_entity='finding-aid',
|
|
provenance=Provenance(
|
|
source_type='custodian_yaml',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=[
|
|
'FindingAid', 'FindingAidType', 'SourceDocument', 'Standard',
|
|
'PrimaryDigitalPresenceAssertion',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='finding_aids[].type',
|
|
source_description='Finding aid type',
|
|
target_class='FindingAidType',
|
|
target_slot='finding_aid_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='finding-aid-type',
|
|
typedb_attribute='type',
|
|
rdf_predicate='hc:findingAidType',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='finding_aids[].url',
|
|
source_description='Finding aid URL',
|
|
target_class='FindingAid',
|
|
target_slot='url',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='finding-aid',
|
|
typedb_attribute='url',
|
|
rdf_predicate='schema:url',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='standards',
|
|
source_description='Standards compliance',
|
|
target_class='Standard',
|
|
target_slot='standards',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='standard',
|
|
typedb_attribute='standard-name',
|
|
rdf_predicate='hc:conformsToStandard',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Finding aids and standards
|
|
finding_aids:
|
|
- type: EAD
|
|
url: https://archive.nl/ead/collection123.xml
|
|
- type: PDF
|
|
url: https://archive.nl/guides/collection123.pdf
|
|
standards:
|
|
- ISAD(G)
|
|
- EAD3
|
|
- Dublin Core
|
|
""".strip(),
|
|
),
|
|
|
|
# =========================================================================
|
|
# PHASE 2 SOURCE MAPPINGS: Reconstruction & Provenance
|
|
# =========================================================================
|
|
|
|
# -------------------------------------------------------------------------
|
|
# reconstruction_provenance - Entity reconstruction and provenance tracking
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='reconstruction_provenance',
|
|
description='Entity reconstruction and provenance tracking classes',
|
|
detailed_description="""
|
|
Classes for tracking entity reconstruction activities and provenance chains.
|
|
Used for modeling how information about heritage custodians is assembled
|
|
from multiple sources.
|
|
|
|
Includes:
|
|
- Reconstructed entities (from multiple sources)
|
|
- Reconstruction activities
|
|
- Reconstruction agents (human/automated)
|
|
- Timeline events from external sources
|
|
""".strip(),
|
|
linkml_class='ReconstructedEntity',
|
|
typedb_entity='reconstructed-entity',
|
|
provenance=Provenance(
|
|
source_type='computed',
|
|
data_tier=DataTier.TIER_4_INFERRED,
|
|
),
|
|
generated_classes=[
|
|
'ReconstructedEntity', 'ReconstructionActivity', 'ReconstructionAgent',
|
|
'CustodianTimelineEvent',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='reconstruction.sources',
|
|
source_description='Source documents',
|
|
target_class='ReconstructedEntity',
|
|
target_slot='sources',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='reconstructed-entity',
|
|
typedb_attribute='sources',
|
|
rdf_predicate='prov:wasDerivedFrom',
|
|
required=True,
|
|
),
|
|
FieldMapping(
|
|
source_path='reconstruction.activity',
|
|
source_description='Reconstruction activity',
|
|
target_class='ReconstructionActivity',
|
|
target_slot='activity',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='reconstruction-activity',
|
|
typedb_attribute='activity-type',
|
|
rdf_predicate='prov:wasGeneratedBy',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='reconstruction.agent',
|
|
source_description='Reconstruction agent',
|
|
target_class='ReconstructionAgent',
|
|
target_slot='agent',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='reconstruction-agent',
|
|
typedb_attribute='agent-id',
|
|
rdf_predicate='prov:wasAttributedTo',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Reconstruction provenance
|
|
reconstruction:
|
|
sources:
|
|
- type: WIKIDATA
|
|
id: Q190804
|
|
- type: ISIL_REGISTRY
|
|
id: NL-AmRM
|
|
activity:
|
|
type: AUTOMATED_MERGE
|
|
date: 2024-01-15
|
|
agent:
|
|
type: SYSTEM
|
|
name: glam-extractor
|
|
""".strip(),
|
|
),
|
|
|
|
# =========================================================================
|
|
# PHASE 2 SOURCE MAPPINGS: Storage & Facilities
|
|
# =========================================================================
|
|
|
|
# -------------------------------------------------------------------------
|
|
# storage_facilities - Storage conditions and facility classes
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='storage_facilities',
|
|
description='Storage conditions and facility classes',
|
|
detailed_description="""
|
|
Classes for physical storage conditions, educational facilities, and
|
|
specialized spaces within heritage custodian buildings.
|
|
|
|
Includes:
|
|
- Storage conditions (climate, security)
|
|
- Storage types (warehouse, vault)
|
|
- Educational centers
|
|
- Specialized facilities (libraries, social spaces)
|
|
""".strip(),
|
|
linkml_class='StorageCondition',
|
|
typedb_entity='storage-condition',
|
|
provenance=Provenance(
|
|
source_type='custodian_yaml',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=[
|
|
'StorageCondition', 'StorageConditionPolicy', 'StorageType',
|
|
'EducationCenter', 'PersonalLibrary', 'LocationLibrary', 'SocialSpace',
|
|
'CateringPlace',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='facilities.storage.conditions',
|
|
source_description='Storage conditions',
|
|
target_class='StorageCondition',
|
|
target_slot='climate_control',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='storage-condition',
|
|
typedb_attribute='climate-control',
|
|
rdf_predicate='hc:storageCondition',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='facilities.storage.type',
|
|
source_description='Storage type',
|
|
target_class='StorageType',
|
|
target_slot='storage_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='storage-type',
|
|
typedb_attribute='type',
|
|
rdf_predicate='hc:storageType',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='facilities.education_center',
|
|
source_description='Education center',
|
|
target_class='EducationCenter',
|
|
target_slot='education_center',
|
|
transformation=TransformationType.DIRECT,
|
|
typedb_entity='education-center',
|
|
typedb_attribute='center-name',
|
|
rdf_predicate='hc:hasEducationCenter',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Facilities
|
|
facilities:
|
|
storage:
|
|
type: CLIMATE_CONTROLLED_VAULT
|
|
conditions:
|
|
temperature: 18
|
|
humidity: 50
|
|
security_level: HIGH
|
|
education_center:
|
|
name: Museum Education Wing
|
|
capacity: 50
|
|
""".strip(),
|
|
),
|
|
|
|
# =========================================================================
|
|
# PHASE 2 SOURCE MAPPINGS: Funding & Grants
|
|
# =========================================================================
|
|
|
|
# -------------------------------------------------------------------------
|
|
# funding_grants - Funding, grants, and application classes
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='funding_grants',
|
|
description='Funding, grants, and application classes',
|
|
detailed_description="""
|
|
Classes for funding sources, grant applications, and financial requirements
|
|
relevant to heritage custodians.
|
|
|
|
Includes:
|
|
- Funding agendas
|
|
- Grant requirements
|
|
- Application calls
|
|
""".strip(),
|
|
linkml_class='FundingAgenda',
|
|
typedb_entity='funding-agenda',
|
|
provenance=Provenance(
|
|
source_type='external_api',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
generated_classes=[
|
|
'FundingAgenda', 'FundingRequirement', 'CallForApplication',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='funding.agendas',
|
|
source_description='Funding agendas',
|
|
target_class='FundingAgenda',
|
|
target_slot='agendas',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='funding-agenda',
|
|
typedb_attribute='agenda-name',
|
|
rdf_predicate='hc:fundingAgenda',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='funding.requirements',
|
|
source_description='Funding requirements',
|
|
target_class='FundingRequirement',
|
|
target_slot='requirements',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='funding-requirement',
|
|
typedb_attribute='requirement',
|
|
rdf_predicate='hc:fundingRequirement',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='funding.calls',
|
|
source_description='Open calls for applications',
|
|
target_class='CallForApplication',
|
|
target_slot='calls',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='call-for-application',
|
|
typedb_attribute='call-title',
|
|
rdf_predicate='hc:openCall',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Funding
|
|
funding:
|
|
agendas:
|
|
- name: Heritage Digitization Fund 2024
|
|
amount: 500000
|
|
currency: EUR
|
|
requirements:
|
|
- type: MATCHING_FUNDS
|
|
percentage: 25
|
|
calls:
|
|
- title: Digital Heritage Innovation
|
|
deadline: 2024-06-30
|
|
""".strip(),
|
|
),
|
|
|
|
# =========================================================================
|
|
# PHASE 2 SOURCE MAPPINGS: Language & Naming
|
|
# =========================================================================
|
|
|
|
# -------------------------------------------------------------------------
|
|
# language_naming - Language codes, proficiency, and naming classes
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='language_naming',
|
|
description='Language codes, proficiency, and naming classes',
|
|
detailed_description="""
|
|
Classes for language handling, proficiency levels, and naming/appellation
|
|
conventions used in heritage custodian data.
|
|
|
|
Includes:
|
|
- ISO language codes
|
|
- Language proficiency levels
|
|
- Appellations (formal names)
|
|
- Container (structural) classes
|
|
""".strip(),
|
|
linkml_class='LanguageCode',
|
|
typedb_entity='language-code',
|
|
provenance=Provenance(
|
|
source_type='reference_data',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'LanguageCode', 'LanguageProficiency', 'Appellation', 'Container',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='languages',
|
|
source_description='Languages used',
|
|
target_class='LanguageCode',
|
|
target_slot='languages',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='language-code',
|
|
typedb_attribute='iso-code',
|
|
rdf_predicate='schema:inLanguage',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='staff.language_proficiency',
|
|
source_description='Language proficiency',
|
|
target_class='LanguageProficiency',
|
|
target_slot='proficiency',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='language-proficiency',
|
|
typedb_attribute='level',
|
|
rdf_predicate='hc:languageProficiency',
|
|
required=False,
|
|
),
|
|
FieldMapping(
|
|
source_path='names.appellations',
|
|
source_description='Formal appellations',
|
|
target_class='Appellation',
|
|
target_slot='appellations',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
typedb_entity='appellation',
|
|
typedb_attribute='name-value',
|
|
rdf_predicate='crm:P1_is_identified_by',
|
|
required=False,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Language and naming
|
|
languages:
|
|
- nl
|
|
- en
|
|
- de
|
|
names:
|
|
appellations:
|
|
- value: Rijksmuseum Amsterdam
|
|
type: OFFICIAL
|
|
language: nl
|
|
- value: National Museum of the Netherlands
|
|
type: TRANSLATION
|
|
language: en
|
|
""".strip(),
|
|
),
|
|
|
|
# =========================================================================
|
|
# PHASE 2 SOURCE MAPPINGS: Specialized Archives (International)
|
|
# =========================================================================
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archives_german - German-specific archive types
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archives_german',
|
|
description='German-specific archive types',
|
|
detailed_description="""
|
|
Archive types specific to German archival tradition and organization.
|
|
German archives follow a distinctive organizational pattern based on
|
|
political/administrative regions and specialized functions.
|
|
""".strip(),
|
|
linkml_class='Verwaltungsarchiv',
|
|
typedb_entity='verwaltungsarchiv',
|
|
provenance=Provenance(
|
|
source_type='isil_registry',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'Verwaltungsarchiv', 'Vereinsarchiv', 'Verlagsarchiv',
|
|
'Bildstelle', 'Medienzentrum', 'Personenstandsarchiv',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='archive_type',
|
|
source_description='German archive type',
|
|
target_class='Verwaltungsarchiv',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='verwaltungsarchiv',
|
|
typedb_attribute='type',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# German archive type
|
|
archive_type: VERWALTUNGSARCHIV
|
|
name: Landesarchiv Baden-Württemberg
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archives_swedish - Swedish-specific archive types
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archives_swedish',
|
|
description='Swedish-specific archive types',
|
|
detailed_description="""
|
|
Archive types specific to Swedish archival tradition. Swedish archives
|
|
include national (Riksarkivet), regional (Landsarkiv), and local heritage
|
|
institutions (Hembygdsförening).
|
|
""".strip(),
|
|
linkml_class='Landsarkiv',
|
|
typedb_entity='landsarkiv',
|
|
provenance=Provenance(
|
|
source_type='isil_registry',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'Landsarkiv', 'Foremalarkiv', 'SectorOfArchivesInSweden',
|
|
'LocalHeritageInstitutionSweden',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='archive_type',
|
|
source_description='Swedish archive type',
|
|
target_class='Landsarkiv',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='landsarkiv',
|
|
typedb_attribute='type',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Swedish archive type
|
|
archive_type: LANDSARKIV
|
|
name: Landsarkivet i Uppsala
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archives_french - French-specific archive types
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archives_french',
|
|
description='French-specific archive types',
|
|
detailed_description="""
|
|
Archive types specific to French archival organization. French archives
|
|
follow a centralized national system with departmental and communal levels.
|
|
""".strip(),
|
|
linkml_class='FrenchPrivateArchives',
|
|
typedb_entity='french-private-archives',
|
|
provenance=Provenance(
|
|
source_type='isil_registry',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'FrenchPrivateArchives', 'Conservatoria',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='archive_type',
|
|
source_description='French archive type',
|
|
target_class='FrenchPrivateArchives',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='french-private-archives',
|
|
typedb_attribute='type',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# French archive type
|
|
archive_type: ARCHIVES_PRIVEES
|
|
name: Archives privées de la famille Rothschild
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# archives_other - Other international specialized archive types
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='archives_other',
|
|
description='Other international specialized archive types',
|
|
detailed_description="""
|
|
Specialized archive types from other countries including Czech regional
|
|
archives, Nordic archives, and various thematic archive types.
|
|
""".strip(),
|
|
linkml_class='SpecializedArchivesCzechia',
|
|
typedb_entity='specialized-archives-czechia',
|
|
provenance=Provenance(
|
|
source_type='isil_registry',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
generated_classes=[
|
|
'SpecializedArchivesCzechia', 'DimArchives', 'LightArchives',
|
|
'HistoricalArchive', 'JointArchives', 'PartyArchive', 'Kustodie',
|
|
'ArchivesForBuildingRecords',
|
|
],
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='archive_type',
|
|
source_description='Specialized archive type',
|
|
target_class='SpecializedArchivesCzechia',
|
|
target_slot='archive_type',
|
|
transformation=TransformationType.LOOKUP,
|
|
typedb_entity='specialized-archives-czechia',
|
|
typedb_attribute='type',
|
|
rdf_predicate='hc:archiveType',
|
|
required=True,
|
|
),
|
|
],
|
|
example_yaml="""
|
|
# Czech specialized archive
|
|
archive_type: OBLASTNI_ARCHIV
|
|
name: Státní oblastní archiv v Praze
|
|
""".strip(),
|
|
),
|
|
]
|
|
|
|
# ============================================================================
|
|
# PERSON CATEGORIES (9 Categories)
|
|
# ============================================================================
|
|
|
|
PERSON_CATEGORIES: list[MappingCategory] = [
|
|
MappingCategory(
|
|
id='identity',
|
|
name='Identity & Profile',
|
|
name_nl='Identiteit & Profiel',
|
|
description='Core person identity: name, headline, location, connections',
|
|
description_nl='Kern persoonidentiteit: naam, kopregel, locatie, connecties',
|
|
icon='🪪',
|
|
sources=['profile_identity', 'linkedin_profile'],
|
|
),
|
|
MappingCategory(
|
|
id='career',
|
|
name='Career & Experience',
|
|
name_nl='Carrière & Ervaring',
|
|
description='Work history, positions, organizations',
|
|
description_nl='Werkgeschiedenis, posities, organisaties',
|
|
icon='💼',
|
|
sources=['career_history'],
|
|
),
|
|
MappingCategory(
|
|
id='education',
|
|
name='Education & Credentials',
|
|
name_nl='Opleiding & Kwalificaties',
|
|
description='Educational background, degrees, institutions',
|
|
description_nl='Opleidingsachtergrond, diploma\'s, instellingen',
|
|
icon='🎓',
|
|
sources=['education'],
|
|
),
|
|
MappingCategory(
|
|
id='skills',
|
|
name='Skills & Expertise',
|
|
name_nl='Vaardigheden & Expertise',
|
|
description='Professional skills, languages, endorsements',
|
|
description_nl='Professionele vaardigheden, talen, aanbevelingen',
|
|
icon='⚡',
|
|
sources=['skills_expertise'],
|
|
),
|
|
MappingCategory(
|
|
id='heritage',
|
|
name='Heritage Sector Relevance',
|
|
name_nl='Erfgoedsector Relevantie',
|
|
description='Heritage domain expertise and experience',
|
|
description_nl='Erfgoed domeinexpertise en ervaring',
|
|
icon='🏛️',
|
|
sources=['heritage_relevance', 'heritage_experience'],
|
|
),
|
|
MappingCategory(
|
|
id='affiliations',
|
|
name='Affiliations & Records',
|
|
name_nl='Affiliaties & Records',
|
|
description='Linked custodians, person records, connections',
|
|
description_nl='Gekoppelde bronhouders, persoonsrecords, connecties',
|
|
icon='🔗',
|
|
sources=['affiliations', 'linked_records'],
|
|
),
|
|
MappingCategory(
|
|
id='contact',
|
|
name='Contact & Social',
|
|
name_nl='Contact & Sociaal',
|
|
description='Contact information, social media profiles',
|
|
description_nl='Contactgegevens, sociale media profielen',
|
|
icon='📧',
|
|
sources=['contact_data'],
|
|
),
|
|
MappingCategory(
|
|
id='provenance',
|
|
name='Extraction & Provenance',
|
|
name_nl='Extractie & Herkomst',
|
|
description='Data extraction metadata and web claims',
|
|
description_nl='Data extractie metadata en webclaims',
|
|
icon='📋',
|
|
sources=['extraction_metadata', 'web_claims'],
|
|
),
|
|
MappingCategory(
|
|
id='pico_ontology',
|
|
name='PiCo Ontology Mapping',
|
|
name_nl='PiCo Ontologie Mapping',
|
|
description='Person in Context (PiCo) ontology alignment',
|
|
description_nl='Person in Context (PiCo) ontologie uitlijning',
|
|
icon='🔬',
|
|
sources=['pico_mapped', 'pico_unmapped'],
|
|
),
|
|
]
|
|
|
|
# ============================================================================
|
|
# PERSON MAPPINGS (14 Person Source Blocks)
|
|
# ============================================================================
|
|
|
|
PERSON_MAPPINGS: list[EnrichmentSourceMapping] = [
|
|
# -------------------------------------------------------------------------
|
|
# PROFILE IDENTITY - Core profile information
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='profile_identity',
|
|
description='Core profile identity - name, headline, location',
|
|
detailed_description="""
|
|
Core identity information extracted from LinkedIn profiles.
|
|
Includes the person's full name, professional headline, location,
|
|
and current company affiliation.
|
|
|
|
This data forms the foundation of the person entity and is used
|
|
for display and search purposes across the heritage network.
|
|
""".strip(),
|
|
linkml_class='Person',
|
|
typedb_entity='person',
|
|
provenance=Provenance(
|
|
source_type='external_api',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='profile_data.name',
|
|
source_description='Full name of the person',
|
|
target_class='Person',
|
|
target_slot='name',
|
|
typedb_entity='person',
|
|
typedb_attribute='person-name',
|
|
rdf_predicate='foaf:name',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='Primary identifier for the person',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.headline',
|
|
source_description='Professional headline/title',
|
|
target_class='Person',
|
|
target_slot='headline',
|
|
typedb_entity='person',
|
|
typedb_attribute='headline',
|
|
rdf_predicate='schema:jobTitle',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Current professional headline from LinkedIn',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.headline_english',
|
|
source_description='English translation of headline',
|
|
target_class='Person',
|
|
target_slot='headline_english',
|
|
typedb_entity='person',
|
|
typedb_attribute='headline-english',
|
|
rdf_predicate='schema:jobTitle',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Translated headline for non-English profiles',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.location',
|
|
source_description='Geographic location',
|
|
target_class='Person',
|
|
target_slot='location',
|
|
typedb_entity='person',
|
|
typedb_attribute='location-string',
|
|
rdf_predicate='schema:address',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Free-text location from LinkedIn',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.country_code',
|
|
source_description='ISO country code',
|
|
target_class='Person',
|
|
target_slot='country_code',
|
|
typedb_entity='person',
|
|
typedb_attribute='country-code',
|
|
rdf_predicate='schema:addressCountry',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Two-letter ISO 3166-1 country code',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.current_company',
|
|
source_description='Current employer name',
|
|
target_class='Person',
|
|
target_slot='current_company',
|
|
typedb_entity='person',
|
|
typedb_attribute='current-company',
|
|
rdf_predicate='schema:worksFor',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Name of current employer organization',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.about',
|
|
source_description='About/summary section',
|
|
target_class='Person',
|
|
target_slot='about',
|
|
typedb_entity='person',
|
|
typedb_attribute='about',
|
|
rdf_predicate='schema:description',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Professional summary from LinkedIn',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.total_experience',
|
|
source_description='Total years of experience',
|
|
target_class='Person',
|
|
target_slot='total_experience',
|
|
typedb_entity='person',
|
|
typedb_attribute='total-experience',
|
|
rdf_predicate='schema:experienceYears',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Calculated total professional experience',
|
|
),
|
|
],
|
|
generated_classes=['Person', 'PersonProfile'],
|
|
example_yaml="""
|
|
profile_data:
|
|
name: Iris van Meer
|
|
headline: Staff member at the Services Department at Nationaal Archief
|
|
headline_english: Staff member at the Services Department at Nationaal Archief
|
|
location: The Randstad, Netherlands
|
|
country_code: NL
|
|
current_company: Nationaal Archief
|
|
about: Total Experience: 15 years and 8 months
|
|
total_experience: 15 years and 8 months
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# LINKEDIN PROFILE - URLs and social metrics
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='linkedin_profile',
|
|
description='LinkedIn profile URLs and social metrics',
|
|
detailed_description="""
|
|
LinkedIn-specific profile data including the profile URL, photo URL,
|
|
and social metrics like connections and followers count.
|
|
|
|
These fields enable linking back to the source profile and provide
|
|
insight into the person's professional network reach.
|
|
""".strip(),
|
|
linkml_class='Person',
|
|
typedb_entity='person',
|
|
provenance=Provenance(
|
|
source_type='external_api',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='linkedin_profile_url',
|
|
source_description='LinkedIn profile URL',
|
|
target_class='Person',
|
|
target_slot='linkedin_url',
|
|
typedb_entity='person',
|
|
typedb_attribute='linkedin-url',
|
|
rdf_predicate='schema:sameAs',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='Canonical LinkedIn profile URL',
|
|
),
|
|
FieldMapping(
|
|
source_path='linkedin_photo_url',
|
|
source_description='LinkedIn profile photo URL',
|
|
target_class='Person',
|
|
target_slot='photo_url',
|
|
typedb_entity='person',
|
|
typedb_attribute='photo-url',
|
|
rdf_predicate='schema:image',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='CDN URL for profile photo',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.connections',
|
|
source_description='Number of LinkedIn connections',
|
|
target_class='Person',
|
|
target_slot='connections',
|
|
typedb_entity='person',
|
|
typedb_attribute='connections-count',
|
|
rdf_predicate='schema:knows',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='First-degree connection count',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.followers',
|
|
source_description='Number of followers',
|
|
target_class='Person',
|
|
target_slot='followers',
|
|
typedb_entity='person',
|
|
typedb_attribute='followers-count',
|
|
rdf_predicate='schema:followerCount',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='LinkedIn follower count',
|
|
),
|
|
],
|
|
generated_classes=['Person'],
|
|
example_yaml="""
|
|
linkedin_profile_url: https://www.linkedin.com/in/iris-van-meer-34329131
|
|
linkedin_photo_url: https://media.licdn.com/dms/image/v2/...
|
|
profile_data:
|
|
connections: 286
|
|
followers: 289
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# CAREER HISTORY - Employment timeline
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='career_history',
|
|
description='Career history and employment timeline',
|
|
detailed_description="""
|
|
Complete career history extracted from LinkedIn profiles.
|
|
Each position includes organization, role, dates, duration,
|
|
location, and organizational metadata like company size and industry.
|
|
|
|
This data is crucial for understanding a person's professional
|
|
trajectory and their experience in heritage-related roles.
|
|
""".strip(),
|
|
linkml_class='CareerPosition',
|
|
typedb_entity='career-position',
|
|
provenance=Provenance(
|
|
source_type='external_api',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].organization',
|
|
source_description='Employer organization name',
|
|
target_class='CareerPosition',
|
|
target_slot='organization',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='organization-name',
|
|
rdf_predicate='schema:worksFor',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='Name of the employing organization',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].organization_linkedin',
|
|
source_description='LinkedIn URL for organization',
|
|
target_class='CareerPosition',
|
|
target_slot='organization_linkedin',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='organization-linkedin-url',
|
|
rdf_predicate='schema:sameAs',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='LinkedIn company page URL',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].role',
|
|
source_description='Job title/role',
|
|
target_class='CareerPosition',
|
|
target_slot='role',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='role-title',
|
|
rdf_predicate='schema:jobTitle',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='Original language job title',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].role_english',
|
|
source_description='English translation of role',
|
|
target_class='CareerPosition',
|
|
target_slot='role_english',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='role-title-english',
|
|
rdf_predicate='schema:jobTitle',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='English translation for non-English titles',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].dates',
|
|
source_description='Employment date range',
|
|
target_class='CareerPosition',
|
|
target_slot='dates',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='date-range',
|
|
rdf_predicate='schema:temporalCoverage',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Date range string (e.g., "Apr 2014 - Present")',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].duration',
|
|
source_description='Employment duration',
|
|
target_class='CareerPosition',
|
|
target_slot='duration',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='duration',
|
|
rdf_predicate='schema:duration',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Calculated duration (e.g., "11 years and 7 months")',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].location',
|
|
source_description='Work location',
|
|
target_class='CareerPosition',
|
|
target_slot='location',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='work-location',
|
|
rdf_predicate='schema:workLocation',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Geographic location of the position',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].current',
|
|
source_description='Is current position',
|
|
target_class='CareerPosition',
|
|
target_slot='current',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='is-current',
|
|
rdf_predicate='schema:currentPosition',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Boolean flag for current employment',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].company_size',
|
|
source_description='Company employee count range',
|
|
target_class='CareerPosition',
|
|
target_slot='company_size',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='company-size',
|
|
rdf_predicate='schema:numberOfEmployees',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Employee count range (e.g., "201-500 employees")',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].company_founded',
|
|
source_description='Year company was founded',
|
|
target_class='CareerPosition',
|
|
target_slot='company_founded',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='company-founded-year',
|
|
rdf_predicate='schema:foundingDate',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Founding year of the organization',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].company_type',
|
|
source_description='Type of company',
|
|
target_class='CareerPosition',
|
|
target_slot='company_type',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='company-type',
|
|
rdf_predicate='schema:additionalType',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Organization type (e.g., "Government Agency")',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].industry',
|
|
source_description='Industry sector',
|
|
target_class='CareerPosition',
|
|
target_slot='industry',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='industry',
|
|
rdf_predicate='schema:industry',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Industry classification',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].department',
|
|
source_description='Department within organization',
|
|
target_class='CareerPosition',
|
|
target_slot='department',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='department',
|
|
rdf_predicate='schema:department',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Department or division name',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].level',
|
|
source_description='Seniority level',
|
|
target_class='CareerPosition',
|
|
target_slot='level',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='seniority-level',
|
|
rdf_predicate='schema:occupationalCategory',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Career level (e.g., "Specialist", "Manager")',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.career_history[].description',
|
|
source_description='Role description',
|
|
target_class='CareerPosition',
|
|
target_slot='description',
|
|
typedb_entity='career-position',
|
|
typedb_attribute='role-description',
|
|
rdf_predicate='schema:description',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Free-text description of the role',
|
|
),
|
|
],
|
|
generated_classes=['CareerPosition', 'Person'],
|
|
example_yaml="""
|
|
profile_data:
|
|
career_history:
|
|
- organization: Nationaal Archief
|
|
organization_linkedin: https://www.linkedin.com/company/nationaal-archief
|
|
role: Staff Member At The Services Department
|
|
role_english: Staff Member At The Services Department
|
|
dates: Apr 2014 - Present
|
|
duration: 11 years and 7 months
|
|
location: Den Haag
|
|
current: true
|
|
company_size: 201-500 employees
|
|
company_founded: 1802
|
|
company_type: Government Agency
|
|
industry: Government Administration
|
|
department: Other
|
|
level: Specialist
|
|
description: null
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# EDUCATION - Academic background
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='education',
|
|
description='Educational background and qualifications',
|
|
detailed_description="""
|
|
Educational history including degrees, institutions, and duration.
|
|
Links to institution LinkedIn pages when available.
|
|
|
|
This data helps understand the academic foundation and
|
|
qualifications of heritage professionals.
|
|
""".strip(),
|
|
linkml_class='Education',
|
|
typedb_entity='education',
|
|
provenance=Provenance(
|
|
source_type='external_api',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='profile_data.education[].institution',
|
|
source_description='Educational institution name',
|
|
target_class='Education',
|
|
target_slot='institution',
|
|
typedb_entity='education',
|
|
typedb_attribute='institution-name',
|
|
rdf_predicate='schema:educationalCredentialAwarded',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='Name of university/school',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.education[].institution_linkedin',
|
|
source_description='LinkedIn URL for institution',
|
|
target_class='Education',
|
|
target_slot='institution_linkedin',
|
|
typedb_entity='education',
|
|
typedb_attribute='institution-linkedin-url',
|
|
rdf_predicate='schema:sameAs',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='LinkedIn school page URL',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.education[].degree',
|
|
source_description='Degree and field of study',
|
|
target_class='Education',
|
|
target_slot='degree',
|
|
typedb_entity='education',
|
|
typedb_attribute='degree',
|
|
rdf_predicate='schema:educationalLevel',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Degree type and major (e.g., "MA, History")',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.education[].years',
|
|
source_description='Years attended',
|
|
target_class='Education',
|
|
target_slot='years',
|
|
typedb_entity='education',
|
|
typedb_attribute='years-attended',
|
|
rdf_predicate='schema:temporalCoverage',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Date range (e.g., "2001 - 2007")',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.education[].duration',
|
|
source_description='Duration of study',
|
|
target_class='Education',
|
|
target_slot='duration',
|
|
typedb_entity='education',
|
|
typedb_attribute='study-duration',
|
|
rdf_predicate='schema:duration',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Calculated duration (e.g., "6 years")',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.education[].country',
|
|
source_description='Country of institution',
|
|
target_class='Education',
|
|
target_slot='country',
|
|
typedb_entity='education',
|
|
typedb_attribute='education-country',
|
|
rdf_predicate='schema:addressCountry',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='ISO country code of institution',
|
|
),
|
|
],
|
|
generated_classes=['Education', 'Person'],
|
|
example_yaml="""
|
|
profile_data:
|
|
education:
|
|
- institution: Universiteit Utrecht
|
|
institution_linkedin: https://www.linkedin.com/school/universiteit-utrecht
|
|
degree: MA, History
|
|
years: 2001 - 2007
|
|
duration: 6 years
|
|
country: NL
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# SKILLS & EXPERTISE - Professional capabilities
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='skills_expertise',
|
|
description='Professional skills, languages, and expertise areas',
|
|
detailed_description="""
|
|
Professional skills endorsed on LinkedIn, language proficiencies,
|
|
and identified expertise areas based on career history analysis.
|
|
|
|
Expertise areas are derived from analyzing the person's complete
|
|
professional background in the heritage sector.
|
|
""".strip(),
|
|
linkml_class='Person',
|
|
typedb_entity='person',
|
|
provenance=Provenance(
|
|
source_type='external_api',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='profile_data.skills',
|
|
source_description='Professional skills list',
|
|
target_class='Person',
|
|
target_slot='skills',
|
|
typedb_entity='person',
|
|
typedb_attribute='skills',
|
|
rdf_predicate='schema:knowsAbout',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
required=False,
|
|
notes='LinkedIn-endorsed skills',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.languages',
|
|
source_description='Language proficiencies',
|
|
target_class='Person',
|
|
target_slot='languages',
|
|
typedb_entity='person',
|
|
typedb_attribute='languages',
|
|
rdf_predicate='schema:knowsLanguage',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
required=False,
|
|
notes='Languages spoken with proficiency levels',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.expertise_areas',
|
|
source_description='Identified expertise areas',
|
|
target_class='Person',
|
|
target_slot='expertise_areas',
|
|
typedb_entity='person',
|
|
typedb_attribute='expertise-areas',
|
|
rdf_predicate='schema:hasOccupation',
|
|
transformation=TransformationType.ARRAY_DIRECT,
|
|
required=False,
|
|
notes='Derived from career analysis',
|
|
),
|
|
],
|
|
generated_classes=['Person'],
|
|
example_yaml="""
|
|
profile_data:
|
|
skills:
|
|
- Digital Preservation
|
|
- Archival Description
|
|
- Collection Management
|
|
languages:
|
|
- Dutch (Native)
|
|
- English (Professional)
|
|
expertise_areas:
|
|
- Archival services
|
|
- Public services
|
|
- History research
|
|
- Library services
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# HERITAGE RELEVANCE - Sector-specific assessment
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='heritage_relevance',
|
|
description='Heritage sector relevance assessment',
|
|
detailed_description="""
|
|
Assessment of the person's relevance to the heritage sector.
|
|
Includes heritage type classification (GLAMORCUBESFIXPHDNT),
|
|
current institution, sector role, and years of heritage experience.
|
|
|
|
This provides a quick overview of where the person fits
|
|
within the heritage ecosystem.
|
|
""".strip(),
|
|
linkml_class='HeritageRelevance',
|
|
typedb_entity='heritage-relevance',
|
|
provenance=Provenance(
|
|
source_type='computed',
|
|
data_tier=DataTier.TIER_3_CROWD_SOURCED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='heritage_sector_relevance.heritage_type',
|
|
source_description='Heritage type code',
|
|
target_class='HeritageRelevance',
|
|
target_slot='heritage_type',
|
|
typedb_entity='heritage-relevance',
|
|
typedb_attribute='heritage-type-code',
|
|
rdf_predicate='glam:heritageType',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Single letter GLAMORCUBESFIXPHDNT code',
|
|
),
|
|
FieldMapping(
|
|
source_path='heritage_sector_relevance.heritage_type_label',
|
|
source_description='Heritage type label',
|
|
target_class='HeritageRelevance',
|
|
target_slot='heritage_type_label',
|
|
typedb_entity='heritage-relevance',
|
|
typedb_attribute='heritage-type-label',
|
|
rdf_predicate='rdfs:label',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Human-readable type label (e.g., "Archive")',
|
|
),
|
|
FieldMapping(
|
|
source_path='heritage_sector_relevance.current_institution',
|
|
source_description='Current heritage institution',
|
|
target_class='HeritageRelevance',
|
|
target_slot='current_institution',
|
|
typedb_entity='heritage-relevance',
|
|
typedb_attribute='current-institution',
|
|
rdf_predicate='schema:worksFor',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Name of current heritage employer',
|
|
),
|
|
FieldMapping(
|
|
source_path='heritage_sector_relevance.institution_type',
|
|
source_description='Type of institution',
|
|
target_class='HeritageRelevance',
|
|
target_slot='institution_type',
|
|
typedb_entity='heritage-relevance',
|
|
typedb_attribute='institution-type',
|
|
rdf_predicate='schema:additionalType',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Classification of the institution',
|
|
),
|
|
FieldMapping(
|
|
source_path='heritage_sector_relevance.sector_role',
|
|
source_description='Role within heritage sector',
|
|
target_class='HeritageRelevance',
|
|
target_slot='sector_role',
|
|
typedb_entity='heritage-relevance',
|
|
typedb_attribute='sector-role',
|
|
rdf_predicate='schema:jobTitle',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Standardized role classification',
|
|
),
|
|
FieldMapping(
|
|
source_path='heritage_sector_relevance.years_in_heritage',
|
|
source_description='Years of heritage experience',
|
|
target_class='HeritageRelevance',
|
|
target_slot='years_in_heritage',
|
|
typedb_entity='heritage-relevance',
|
|
typedb_attribute='years-in-heritage',
|
|
rdf_predicate='schema:experienceYears',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Total years in heritage sector',
|
|
),
|
|
],
|
|
generated_classes=['HeritageRelevance', 'Person'],
|
|
example_yaml="""
|
|
heritage_sector_relevance:
|
|
heritage_type: A
|
|
heritage_type_label: Archive
|
|
current_institution: Nationaal Archief
|
|
institution_type: National Archive
|
|
sector_role: Services Staff
|
|
years_in_heritage: 11
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# HERITAGE EXPERIENCE - Relevant positions
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='heritage_experience',
|
|
description='Heritage-relevant experience from career history',
|
|
detailed_description="""
|
|
Filtered list of positions that are relevant to the heritage sector.
|
|
Extracted from full career history with relevance annotations.
|
|
|
|
Includes both current and past positions at heritage institutions
|
|
with notes explaining their relevance to the GLAM sector.
|
|
""".strip(),
|
|
linkml_class='HeritageExperience',
|
|
typedb_entity='heritage-experience',
|
|
provenance=Provenance(
|
|
source_type='computed',
|
|
data_tier=DataTier.TIER_3_CROWD_SOURCED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='profile_data.heritage_relevant_experience[].organization',
|
|
source_description='Heritage organization name',
|
|
target_class='HeritageExperience',
|
|
target_slot='organization',
|
|
typedb_entity='heritage-experience',
|
|
typedb_attribute='heritage-org-name',
|
|
rdf_predicate='schema:worksFor',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='Name of heritage institution',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.heritage_relevant_experience[].role',
|
|
source_description='Role at heritage organization',
|
|
target_class='HeritageExperience',
|
|
target_slot='role',
|
|
typedb_entity='heritage-experience',
|
|
typedb_attribute='heritage-role',
|
|
rdf_predicate='schema:jobTitle',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='Job title at heritage institution',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.heritage_relevant_experience[].relevance',
|
|
source_description='Relevance explanation',
|
|
target_class='HeritageExperience',
|
|
target_slot='relevance',
|
|
typedb_entity='heritage-experience',
|
|
typedb_attribute='relevance-notes',
|
|
rdf_predicate='schema:description',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Why this position is heritage-relevant',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.heritage_relevant_experience[].current',
|
|
source_description='Is current position',
|
|
target_class='HeritageExperience',
|
|
target_slot='current',
|
|
typedb_entity='heritage-experience',
|
|
typedb_attribute='is-current-heritage',
|
|
rdf_predicate='schema:currentPosition',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Whether this is a current position',
|
|
),
|
|
],
|
|
generated_classes=['HeritageExperience', 'Person'],
|
|
example_yaml="""
|
|
profile_data:
|
|
heritage_relevant_experience:
|
|
- organization: Nationaal Archief
|
|
role: Staff Member At The Services Department
|
|
relevance: Public services at National Archives of the Netherlands
|
|
current: true
|
|
- organization: University Library Utrecht
|
|
role: Library Employee
|
|
relevance: Academic library experience
|
|
current: false
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# AFFILIATIONS - Custodian connections
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='affiliations',
|
|
description='Affiliations with heritage custodians',
|
|
detailed_description="""
|
|
Links between the person and heritage custodian institutions.
|
|
Each affiliation includes the custodian name, slug identifier,
|
|
role title, and heritage classification.
|
|
|
|
These affiliations enable network analysis across the heritage
|
|
sector workforce.
|
|
""".strip(),
|
|
linkml_class='Affiliation',
|
|
typedb_entity='affiliation',
|
|
provenance=Provenance(
|
|
source_type='computed',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='affiliations[].custodian_name',
|
|
source_description='Heritage custodian name',
|
|
target_class='Affiliation',
|
|
target_slot='custodian_name',
|
|
typedb_entity='affiliation',
|
|
typedb_attribute='custodian-name',
|
|
rdf_predicate='schema:memberOf',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='Name of the heritage institution',
|
|
),
|
|
FieldMapping(
|
|
source_path='affiliations[].custodian_slug',
|
|
source_description='Custodian identifier slug',
|
|
target_class='Affiliation',
|
|
target_slot='custodian_slug',
|
|
typedb_entity='affiliation',
|
|
typedb_attribute='custodian-slug',
|
|
rdf_predicate='schema:identifier',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='URL-safe identifier for the custodian',
|
|
),
|
|
FieldMapping(
|
|
source_path='affiliations[].role_title',
|
|
source_description='Role at custodian',
|
|
target_class='Affiliation',
|
|
target_slot='role_title',
|
|
typedb_entity='affiliation',
|
|
typedb_attribute='affiliation-role',
|
|
rdf_predicate='schema:jobTitle',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Job title at this custodian',
|
|
),
|
|
FieldMapping(
|
|
source_path='affiliations[].heritage_relevant',
|
|
source_description='Is heritage relevant',
|
|
target_class='Affiliation',
|
|
target_slot='heritage_relevant',
|
|
typedb_entity='affiliation',
|
|
typedb_attribute='is-heritage-relevant',
|
|
rdf_predicate='glam:heritageRelevant',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Whether affiliation is heritage-relevant',
|
|
),
|
|
FieldMapping(
|
|
source_path='affiliations[].heritage_type',
|
|
source_description='Heritage type code',
|
|
target_class='Affiliation',
|
|
target_slot='heritage_type',
|
|
typedb_entity='affiliation',
|
|
typedb_attribute='affiliation-heritage-type',
|
|
rdf_predicate='glam:heritageType',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='GLAMORCUBESFIXPHDNT type code',
|
|
),
|
|
FieldMapping(
|
|
source_path='affiliations[].current',
|
|
source_description='Is current affiliation',
|
|
target_class='Affiliation',
|
|
target_slot='current',
|
|
typedb_entity='affiliation',
|
|
typedb_attribute='is-current-affiliation',
|
|
rdf_predicate='schema:currentPosition',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Whether this is a current affiliation',
|
|
),
|
|
FieldMapping(
|
|
source_path='affiliations[].observed_on',
|
|
source_description='Observation timestamp',
|
|
target_class='Affiliation',
|
|
target_slot='observed_on',
|
|
typedb_entity='affiliation',
|
|
typedb_attribute='observed-on',
|
|
rdf_predicate='prov:generatedAtTime',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='When this affiliation was observed',
|
|
),
|
|
FieldMapping(
|
|
source_path='affiliations[].source_url',
|
|
source_description='Source URL for affiliation',
|
|
target_class='Affiliation',
|
|
target_slot='source_url',
|
|
typedb_entity='affiliation',
|
|
typedb_attribute='affiliation-source-url',
|
|
rdf_predicate='prov:wasDerivedFrom',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='URL where affiliation was discovered',
|
|
),
|
|
],
|
|
generated_classes=['Affiliation'],
|
|
example_yaml="""
|
|
affiliations:
|
|
- custodian_name: Nationaal Archief
|
|
custodian_slug: nationaal-archief
|
|
role_title: Staff member at the Services Department at Nationaal Archief
|
|
heritage_relevant: true
|
|
heritage_type: A
|
|
current: true
|
|
observed_on: 2025-12-14T11:21:47Z
|
|
source_url: https://www.linkedin.com/company/nationaal-archief/people/
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# LINKED RECORDS - Cross-references
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='linked_records',
|
|
description='Links to related records in the system',
|
|
detailed_description="""
|
|
Cross-references to other records in the heritage data system.
|
|
Includes links to staff records (parsed from LinkedIn company pages)
|
|
and custodian records (heritage institution YAML files).
|
|
|
|
These links enable navigation between person profiles and
|
|
the institutions they work for.
|
|
""".strip(),
|
|
linkml_class='LinkedRecords',
|
|
typedb_entity='linked-records',
|
|
provenance=Provenance(
|
|
source_type='computed',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='linked_records.staff_record.file',
|
|
source_description='Staff record file path',
|
|
target_class='LinkedRecords',
|
|
target_slot='staff_record_file',
|
|
typedb_entity='linked-records',
|
|
typedb_attribute='staff-record-path',
|
|
rdf_predicate='prov:wasDerivedFrom',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Path to parsed staff JSON file',
|
|
),
|
|
FieldMapping(
|
|
source_path='linked_records.staff_record.staff_id',
|
|
source_description='Staff record ID',
|
|
target_class='LinkedRecords',
|
|
target_slot='staff_id',
|
|
typedb_entity='linked-records',
|
|
typedb_attribute='staff-id',
|
|
rdf_predicate='schema:identifier',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Unique staff identifier',
|
|
),
|
|
FieldMapping(
|
|
source_path='linked_records.custodian_record.ghcid',
|
|
source_description='Custodian GHCID',
|
|
target_class='LinkedRecords',
|
|
target_slot='custodian_ghcid',
|
|
typedb_entity='linked-records',
|
|
typedb_attribute='linked-ghcid',
|
|
rdf_predicate='glam:ghcid',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='GHCID of linked custodian',
|
|
),
|
|
FieldMapping(
|
|
source_path='linked_records.custodian_record.notes',
|
|
source_description='Custodian record notes',
|
|
target_class='LinkedRecords',
|
|
target_slot='custodian_notes',
|
|
typedb_entity='linked-records',
|
|
typedb_attribute='custodian-notes',
|
|
rdf_predicate='schema:description',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Additional notes about the custodian link',
|
|
),
|
|
],
|
|
generated_classes=['LinkedRecords'],
|
|
example_yaml="""
|
|
linked_records:
|
|
staff_record:
|
|
file: data/custodian/person/affiliated/parsed/nationaal-archief_staff_20251210T155415Z.json
|
|
staff_id: nationaal-archief_staff_0002_iris_van_meer
|
|
custodian_record:
|
|
ghcid: NL-ZH-DHA-A-NA
|
|
notes: Nationaal Archief, The Hague
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# CONTACT DATA - Contact information
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='contact_data',
|
|
description='Contact information and communication channels',
|
|
detailed_description="""
|
|
Contact information including email addresses and phone numbers.
|
|
Emails may be inferred from organizational naming conventions
|
|
with confidence scores indicating reliability.
|
|
|
|
Also includes profile photo URLs and external lookup service links.
|
|
""".strip(),
|
|
linkml_class='ContactData',
|
|
typedb_entity='contact-data',
|
|
provenance=Provenance(
|
|
source_type='computed',
|
|
data_tier=DataTier.TIER_3_CROWD_SOURCED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='contact_data.provenance.source',
|
|
source_description='Contact data source',
|
|
target_class='ContactData',
|
|
target_slot='provenance_source',
|
|
typedb_entity='contact-data',
|
|
typedb_attribute='contact-source',
|
|
rdf_predicate='prov:wasAttributedTo',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='How contact data was obtained',
|
|
),
|
|
FieldMapping(
|
|
source_path='contact_data.emails[].email',
|
|
source_description='Email address',
|
|
target_class='ContactData',
|
|
target_slot='email',
|
|
typedb_entity='contact-data',
|
|
typedb_attribute='email-address',
|
|
rdf_predicate='schema:email',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Email address (may be inferred)',
|
|
),
|
|
FieldMapping(
|
|
source_path='contact_data.emails[].type',
|
|
source_description='Email type',
|
|
target_class='ContactData',
|
|
target_slot='email_type',
|
|
typedb_entity='contact-data',
|
|
typedb_attribute='email-type',
|
|
rdf_predicate='schema:contactType',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Type of email (work, personal)',
|
|
),
|
|
FieldMapping(
|
|
source_path='contact_data.emails[].confidence',
|
|
source_description='Email confidence score',
|
|
target_class='ContactData',
|
|
target_slot='email_confidence',
|
|
typedb_entity='contact-data',
|
|
typedb_attribute='email-confidence',
|
|
rdf_predicate='prov:confidence',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Confidence in email accuracy (0-1)',
|
|
),
|
|
FieldMapping(
|
|
source_path='contact_data.emails[].verified',
|
|
source_description='Email verification status',
|
|
target_class='ContactData',
|
|
target_slot='email_verified',
|
|
typedb_entity='contact-data',
|
|
typedb_attribute='email-verified',
|
|
rdf_predicate='schema:verified',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Whether email has been verified',
|
|
),
|
|
FieldMapping(
|
|
source_path='contact_data.profile_photo_url',
|
|
source_description='Profile photo URL',
|
|
target_class='ContactData',
|
|
target_slot='profile_photo_url',
|
|
typedb_entity='contact-data',
|
|
typedb_attribute='profile-photo',
|
|
rdf_predicate='schema:image',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='URL to profile photo',
|
|
),
|
|
FieldMapping(
|
|
source_path='contact_data.rocketreach_url',
|
|
source_description='RocketReach lookup URL',
|
|
target_class='ContactData',
|
|
target_slot='rocketreach_url',
|
|
typedb_entity='contact-data',
|
|
typedb_attribute='rocketreach-url',
|
|
rdf_predicate='schema:sameAs',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Link to RocketReach profile lookup',
|
|
),
|
|
],
|
|
generated_classes=['ContactData'],
|
|
example_yaml="""
|
|
contact_data:
|
|
provenance:
|
|
source: LinkedIn profile + Dutch government naming convention
|
|
source_url: https://www.linkedin.com/in/iris-van-meer-34329131
|
|
retrieved_date: 2025-12-14
|
|
extraction_method: naming_convention_inference
|
|
emails:
|
|
- email: iris.van.meer@nationaalarchief.nl
|
|
domain: nationaalarchief.nl
|
|
type: work
|
|
source: inferred
|
|
confidence: 0.8
|
|
verified: false
|
|
phones: []
|
|
profile_photo_url: https://media.licdn.com/dms/image/v2/...
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# EXTRACTION METADATA - Data provenance
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='extraction_metadata',
|
|
description='Metadata about how the profile was extracted',
|
|
detailed_description="""
|
|
Provenance information about the data extraction process.
|
|
Includes source file references, extraction timestamps,
|
|
methods used, and cost tracking for API calls.
|
|
|
|
This ensures full traceability of data origin and enables
|
|
reproducibility of the extraction process.
|
|
""".strip(),
|
|
linkml_class='ExtractionMetadata',
|
|
typedb_entity='extraction-metadata',
|
|
provenance=Provenance(
|
|
source_type='computed',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='extraction_metadata.source_file',
|
|
source_description='Source file path',
|
|
target_class='ExtractionMetadata',
|
|
target_slot='source_file',
|
|
typedb_entity='extraction-metadata',
|
|
typedb_attribute='source-file-path',
|
|
rdf_predicate='prov:wasDerivedFrom',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Original source file for extraction',
|
|
),
|
|
FieldMapping(
|
|
source_path='extraction_metadata.staff_id',
|
|
source_description='Staff identifier',
|
|
target_class='ExtractionMetadata',
|
|
target_slot='staff_id',
|
|
typedb_entity='extraction-metadata',
|
|
typedb_attribute='extraction-staff-id',
|
|
rdf_predicate='schema:identifier',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Staff ID from source data',
|
|
),
|
|
FieldMapping(
|
|
source_path='extraction_metadata.extraction_date',
|
|
source_description='Extraction timestamp',
|
|
target_class='ExtractionMetadata',
|
|
target_slot='extraction_date',
|
|
typedb_entity='extraction-metadata',
|
|
typedb_attribute='extraction-date',
|
|
rdf_predicate='prov:generatedAtTime',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='ISO 8601 timestamp of extraction',
|
|
),
|
|
FieldMapping(
|
|
source_path='extraction_metadata.extraction_method',
|
|
source_description='Method used for extraction',
|
|
target_class='ExtractionMetadata',
|
|
target_slot='extraction_method',
|
|
typedb_entity='extraction-metadata',
|
|
typedb_attribute='extraction-method',
|
|
rdf_predicate='prov:wasGeneratedBy',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Tool/API used (e.g., exa_crawling_exa)',
|
|
),
|
|
FieldMapping(
|
|
source_path='extraction_metadata.extraction_agent',
|
|
source_description='Agent performing extraction',
|
|
target_class='ExtractionMetadata',
|
|
target_slot='extraction_agent',
|
|
typedb_entity='extraction-metadata',
|
|
typedb_attribute='extraction-agent',
|
|
rdf_predicate='prov:wasAttributedTo',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='AI agent or script name',
|
|
),
|
|
FieldMapping(
|
|
source_path='extraction_metadata.linkedin_url',
|
|
source_description='Source LinkedIn URL',
|
|
target_class='ExtractionMetadata',
|
|
target_slot='linkedin_url',
|
|
typedb_entity='extraction-metadata',
|
|
typedb_attribute='extraction-linkedin-url',
|
|
rdf_predicate='prov:hadPrimarySource',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='LinkedIn profile URL that was extracted',
|
|
),
|
|
FieldMapping(
|
|
source_path='extraction_metadata.cost_usd',
|
|
source_description='Extraction cost in USD',
|
|
target_class='ExtractionMetadata',
|
|
target_slot='cost_usd',
|
|
typedb_entity='extraction-metadata',
|
|
typedb_attribute='extraction-cost',
|
|
rdf_predicate='schema:price',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='API cost for extraction',
|
|
),
|
|
FieldMapping(
|
|
source_path='extraction_metadata.request_id',
|
|
source_description='API request identifier',
|
|
target_class='ExtractionMetadata',
|
|
target_slot='request_id',
|
|
typedb_entity='extraction-metadata',
|
|
typedb_attribute='api-request-id',
|
|
rdf_predicate='schema:identifier',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Unique request ID for tracing',
|
|
),
|
|
],
|
|
generated_classes=['ExtractionMetadata'],
|
|
example_yaml="""
|
|
extraction_metadata:
|
|
source_file: null
|
|
staff_id: null
|
|
extraction_date: 2025-12-13T17:35:24.524090+00:00
|
|
extraction_method: exa_crawling_exa
|
|
extraction_agent: claude-opus-4.5
|
|
linkedin_url: https://www.linkedin.com/in/iris-van-meer-34329131
|
|
cost_usd: 0
|
|
request_id: null
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# WEB CLAIMS - Verifiable claims from web sources
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='web_claims',
|
|
description='Web claims with provenance tracking',
|
|
detailed_description="""
|
|
Individual claims extracted from web sources with full provenance.
|
|
Each claim includes the claim type, value, source URL, retrieval
|
|
timestamp, and the agent/tool that performed the extraction.
|
|
|
|
This follows the WebObservation pattern for verifiable data claims.
|
|
""".strip(),
|
|
linkml_class='WebClaim',
|
|
typedb_entity='web-claim',
|
|
provenance=Provenance(
|
|
source_type='external_api',
|
|
data_tier=DataTier.TIER_2_VERIFIED,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='web_claims[].claim_type',
|
|
source_description='Type of claim',
|
|
target_class='WebClaim',
|
|
target_slot='claim_type',
|
|
typedb_entity='web-claim',
|
|
typedb_attribute='claim-type',
|
|
rdf_predicate='rdf:type',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='Category of claim (e.g., full_name, role_title)',
|
|
),
|
|
FieldMapping(
|
|
source_path='web_claims[].claim_value',
|
|
source_description='Value of the claim',
|
|
target_class='WebClaim',
|
|
target_slot='claim_value',
|
|
typedb_entity='web-claim',
|
|
typedb_attribute='claim-value',
|
|
rdf_predicate='rdf:value',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='The actual claimed value',
|
|
),
|
|
FieldMapping(
|
|
source_path='web_claims[].source_url',
|
|
source_description='URL source of claim',
|
|
target_class='WebClaim',
|
|
target_slot='source_url',
|
|
typedb_entity='web-claim',
|
|
typedb_attribute='claim-source-url',
|
|
rdf_predicate='prov:wasDerivedFrom',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='Web page where claim was found',
|
|
),
|
|
FieldMapping(
|
|
source_path='web_claims[].retrieved_on',
|
|
source_description='Retrieval timestamp',
|
|
target_class='WebClaim',
|
|
target_slot='retrieved_on',
|
|
typedb_entity='web-claim',
|
|
typedb_attribute='claim-retrieved-on',
|
|
rdf_predicate='prov:generatedAtTime',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='When the claim was retrieved',
|
|
),
|
|
FieldMapping(
|
|
source_path='web_claims[].retrieval_agent',
|
|
source_description='Agent that retrieved claim',
|
|
target_class='WebClaim',
|
|
target_slot='retrieval_agent',
|
|
typedb_entity='web-claim',
|
|
typedb_attribute='claim-retrieval-agent',
|
|
rdf_predicate='prov:wasAttributedTo',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='Tool/agent that extracted the claim',
|
|
),
|
|
],
|
|
generated_classes=['WebClaim'],
|
|
example_yaml="""
|
|
web_claims:
|
|
- claim_type: full_name
|
|
claim_value: Iris van Meer
|
|
source_url: https://www.linkedin.com/in/iris-van-meer-34329131
|
|
retrieved_on: 2025-12-14T11:21:47Z
|
|
retrieval_agent: linkedin_html_parser
|
|
- claim_type: role_title
|
|
claim_value: Staff member at the Services Department at Nationaal Archief
|
|
source_url: https://www.linkedin.com/in/iris-van-meer-34329131
|
|
retrieved_on: 2025-12-14T11:21:47Z
|
|
retrieval_agent: linkedin_html_parser
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# PICO MAPPED - PiCo ontology properties that ARE mapped
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='pico_mapped',
|
|
description='PiCo ontology properties mapped to HC person data',
|
|
detailed_description="""
|
|
The Heritage Custodian (HC) system implements a focused subset of the PiCo
|
|
(Persons in Context) ontology, optimized for tracking heritage sector staff.
|
|
|
|
PiCo was designed for historical vital records (birth/death certificates,
|
|
marriage records, census data), but HC uses LinkedIn as the primary data
|
|
source, which provides professional context rather than biographical/genealogical
|
|
data.
|
|
|
|
This section documents which PiCo properties ARE mapped to HC fields,
|
|
showing the semantic alignment between the ontologies.
|
|
""".strip(),
|
|
linkml_class='PersonObservation',
|
|
typedb_entity='person-observation',
|
|
provenance=Provenance(
|
|
source_type='ontology_mapping',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
fields=[
|
|
FieldMapping(
|
|
source_path='profile_data.name',
|
|
source_description='Full name of the person',
|
|
target_class='PersonObservation',
|
|
target_slot='name',
|
|
typedb_entity='person-observation',
|
|
typedb_attribute='person-name',
|
|
rdf_predicate='sdo:name',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='PiCo uses sdo:name (Schema.org) for full names. Directly mapped.',
|
|
status='mapped',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.location',
|
|
source_description='Current geographic location',
|
|
target_class='PersonObservation',
|
|
target_slot='location',
|
|
typedb_entity='person-observation',
|
|
typedb_attribute='location-string',
|
|
rdf_predicate='sdo:address',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='PiCo uses sdo:address for location. LinkedIn provides free-text location.',
|
|
status='mapped',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.headline',
|
|
source_description='Current occupation/role',
|
|
target_class='PersonObservation',
|
|
target_slot='headline',
|
|
typedb_entity='person-observation',
|
|
typedb_attribute='headline',
|
|
rdf_predicate='sdo:hasOccupation',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='PiCo uses sdo:hasOccupation for job roles. HC captures this via LinkedIn headline.',
|
|
status='mapped',
|
|
),
|
|
FieldMapping(
|
|
source_path='extraction_metadata.linkedin_url',
|
|
source_description='LinkedIn profile URL as primary source',
|
|
target_class='PersonObservation',
|
|
target_slot='source_url',
|
|
typedb_entity='person-observation',
|
|
typedb_attribute='source-url',
|
|
rdf_predicate='prov:hadPrimarySource',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='PiCo uses prov:hadPrimarySource for provenance. LinkedIn URL serves as source document.',
|
|
status='mapped',
|
|
),
|
|
FieldMapping(
|
|
source_path='affiliations[].role_title',
|
|
source_description='Role at heritage institution',
|
|
target_class='Affiliation',
|
|
target_slot='role_title',
|
|
typedb_entity='person-observation',
|
|
typedb_attribute='role-title',
|
|
rdf_predicate='pico:hasRole',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='PiCo uses pico:hasRole with picot_roles thesaurus. HC captures current institutional roles.',
|
|
status='mapped',
|
|
),
|
|
FieldMapping(
|
|
source_path='extraction_metadata.extraction_date',
|
|
source_description='When observation was recorded',
|
|
target_class='PersonObservation',
|
|
target_slot='observation_date',
|
|
typedb_entity='person-observation',
|
|
typedb_attribute='observation-date',
|
|
rdf_predicate='prov:generatedAtTime',
|
|
transformation=TransformationType.DIRECT,
|
|
required=True,
|
|
notes='PiCo uses prov:generatedAtTime for temporal provenance. Mapped to extraction timestamp.',
|
|
status='mapped',
|
|
),
|
|
FieldMapping(
|
|
source_path='extraction_metadata.extraction_agent',
|
|
source_description='Agent that performed extraction',
|
|
target_class='PersonObservation',
|
|
target_slot='extraction_agent',
|
|
typedb_entity='person-observation',
|
|
typedb_attribute='extraction-agent',
|
|
rdf_predicate='prov:wasAttributedTo',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='PiCo uses prov:wasAttributedTo for agent provenance. Records which tool/agent extracted data.',
|
|
status='mapped',
|
|
),
|
|
FieldMapping(
|
|
source_path='profile_data.profile_image_url',
|
|
source_description='Profile photo URL',
|
|
target_class='PersonObservation',
|
|
target_slot='image_url',
|
|
typedb_entity='person-observation',
|
|
typedb_attribute='profile-image-url',
|
|
rdf_predicate='sdo:image',
|
|
transformation=TransformationType.DIRECT,
|
|
required=False,
|
|
notes='PiCo uses sdo:image for visual representation. LinkedIn CDN URL stored.',
|
|
status='mapped',
|
|
),
|
|
],
|
|
generated_classes=['PersonObservation', 'Affiliation'],
|
|
example_yaml="""
|
|
# PiCo ontology alignment example
|
|
# HC PersonObservation → PiCo PersonObservation
|
|
|
|
profile_data:
|
|
name: "Iris van Meer" # → sdo:name
|
|
headline: "Staff member at..." # → sdo:hasOccupation
|
|
location: "The Hague, Netherlands" # → sdo:address
|
|
profile_image_url: "https://..." # → sdo:image
|
|
|
|
extraction_metadata:
|
|
linkedin_url: "https://linkedin.com/in/..." # → prov:hadPrimarySource
|
|
extraction_date: "2025-12-14T11:21:47Z" # → prov:generatedAtTime
|
|
extraction_agent: "claude-opus-4.5" # → prov:wasAttributedTo
|
|
|
|
affiliations:
|
|
- role_title: "Archivist" # → pico:hasRole
|
|
""".strip(),
|
|
),
|
|
|
|
# -------------------------------------------------------------------------
|
|
# PICO UNMAPPED - PiCo ontology properties intentionally OUT OF SCOPE
|
|
# -------------------------------------------------------------------------
|
|
EnrichmentSourceMapping(
|
|
source_block='pico_unmapped',
|
|
description='PiCo ontology properties intentionally not mapped',
|
|
detailed_description="""
|
|
Many PiCo properties are intentionally NOT mapped in the HC system.
|
|
This is a design decision, not a gap to be filled.
|
|
|
|
**Why these properties are out of scope:**
|
|
|
|
1. **Data source limitation**: LinkedIn profiles don't contain vital records
|
|
(birth dates, death dates, marriage records, baptism records).
|
|
|
|
2. **Use case mismatch**: HC tracks heritage sector workforce, not genealogical
|
|
reconstruction. Family relationships aren't relevant for institutional
|
|
staff directories.
|
|
|
|
3. **Privacy considerations**: Collecting personal biographical data about
|
|
living individuals raises GDPR concerns. Professional context is appropriate;
|
|
personal history is not.
|
|
|
|
4. **Ontology purpose**: PiCo was designed for historical archives processing
|
|
(civil registration, notarial records). HC serves a different purpose.
|
|
|
|
This documentation ensures transparency about the ontology alignment scope.
|
|
""".strip(),
|
|
linkml_class='PersonObservation',
|
|
typedb_entity='person-observation',
|
|
provenance=Provenance(
|
|
source_type='ontology_mapping',
|
|
data_tier=DataTier.TIER_1_AUTHORITATIVE,
|
|
),
|
|
fields=[
|
|
# Vital records - not available from LinkedIn
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Birth date',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:birthDate',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: LinkedIn does not provide birth dates. Historical vital records property.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Death date',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:deathDate',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: LinkedIn profiles are for living professionals. Historical vital records property.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Birth place',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:birthPlace',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: LinkedIn does not provide birth location. Use sdo:address for current location.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Death place',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:deathPlace',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: LinkedIn profiles are for living professionals.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Deceased flag',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='pico:deceased',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: HC tracks active professionals. Memorial profiles not in scope.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Age',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='pico:hasAge',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Age not available from LinkedIn. Privacy consideration for living individuals.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Gender',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:gender',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Gender not reliably extractable from LinkedIn. Privacy consideration.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Religion',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='pico:hasReligion',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Religious affiliation not available from LinkedIn. Privacy consideration.',
|
|
status='out_of_scope',
|
|
),
|
|
# Structured name components - partial mapping
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Given name (first name)',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:givenName',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='PARTIAL: Could be extracted from full name, but not reliably for all cultures. Full name (sdo:name) is used instead.',
|
|
status='partial',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Family name (surname)',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:familyName',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='PARTIAL: Could be extracted from full name, but not reliably for all cultures. Full name (sdo:name) is used instead.',
|
|
status='partial',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Patronym',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='pnv:patronym',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Patronymic names are historical/cultural. Not extractable from LinkedIn.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Base surname',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='pnv:baseSurname',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Genealogical name component. Not relevant for staff tracking.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Surname prefix',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='pnv:surnamePrefix',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Dutch tussenvoegsel (van, de, etc.) not separately tracked. Full name preserved.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Initials',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='pnv:initials',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Initials not separately extracted. Full name used.',
|
|
status='out_of_scope',
|
|
),
|
|
# Family relationships - 40+ properties not mapped
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Parent relationship',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:parent',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Children relationship',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:children',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Spouse relationship',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:spouse',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Sibling relationship',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:sibling',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
|
|
status='out_of_scope',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Extended family (40+ PiCo properties)',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='pico:has*',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: PiCo defines 40+ family relationship properties (grandparent, in-law, step-relations, cousins, etc.). None are mapped - HC tracks professional, not familial relationships.',
|
|
status='out_of_scope',
|
|
),
|
|
# Archival source properties
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Archive component source',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='sdo:ArchiveComponent',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='OUT OF SCOPE: HC uses LinkedIn as source, not archival documents. Web claims serve similar provenance purpose.',
|
|
status='out_of_scope',
|
|
),
|
|
# Reconstruction properties
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Person reconstruction aggregation',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='pico:PersonReconstruction',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='FUTURE: PersonReconstruction (aggregating multiple observations) not yet implemented. Currently each person has one LinkedIn-based observation.',
|
|
status='future',
|
|
),
|
|
FieldMapping(
|
|
source_path=None,
|
|
source_description='Derived from relationship',
|
|
target_class=None,
|
|
target_slot=None,
|
|
typedb_entity='person-observation',
|
|
typedb_attribute=None,
|
|
rdf_predicate='prov:wasDerivedFrom',
|
|
transformation=TransformationType.NOT_MAPPED,
|
|
required=False,
|
|
notes='FUTURE: Cross-observation derivation not implemented. Would link reconstructed person to source observations.',
|
|
status='future',
|
|
),
|
|
],
|
|
generated_classes=['PersonObservation'],
|
|
example_yaml="""
|
|
# PiCo properties NOT mapped in HC system
|
|
|
|
# ❌ Vital records (not available from LinkedIn):
|
|
# - sdo:birthDate, sdo:deathDate
|
|
# - sdo:birthPlace, sdo:deathPlace
|
|
# - pico:deceased, pico:hasAge
|
|
|
|
# ❌ Personal characteristics (privacy):
|
|
# - sdo:gender
|
|
# - pico:hasReligion
|
|
|
|
# ❌ Genealogical name components:
|
|
# - pnv:patronym, pnv:baseSurname
|
|
# - pnv:surnamePrefix, pnv:initials
|
|
# (HC uses full sdo:name instead)
|
|
|
|
# ❌ Family relationships (40+ properties):
|
|
# - sdo:parent, sdo:children, sdo:spouse, sdo:sibling
|
|
# - pico:hasGrandparent, pico:hasGrandchild
|
|
# - pico:hasParent-in-law, pico:hasSibling-in-law
|
|
# - pico:hasStepparent, pico:hasStepchild
|
|
# - pico:hasCousin, pico:hasUncle_Aunt
|
|
# - pico:hasFosterParent, pico:hasGodparent
|
|
# ... and many more
|
|
|
|
# ⏳ Future consideration:
|
|
# - pico:PersonReconstruction (multi-source aggregation)
|
|
# - prov:wasDerivedFrom (observation linking)
|
|
""".strip(),
|
|
),
|
|
]
|
|
|
|
|
|
# ============================================================================
|
|
# HELPER FUNCTIONS
|
|
# ============================================================================
|
|
|
|
def get_mapping_for_source(source_block: str) -> Optional[EnrichmentSourceMapping]:
|
|
"""Get the mapping for a specific source block."""
|
|
for mapping in ENRICHMENT_MAPPINGS:
|
|
if mapping.source_block == source_block:
|
|
return mapping
|
|
return None
|
|
|
|
|
|
def get_fields_for_class(class_name: str) -> list[FieldMapping]:
|
|
"""Get all field mappings that target a specific LinkML class."""
|
|
fields = []
|
|
for mapping in ENRICHMENT_MAPPINGS:
|
|
for field_mapping in mapping.fields:
|
|
if field_mapping.target_class == class_name:
|
|
fields.append(field_mapping)
|
|
return fields
|
|
|
|
|
|
def get_sources_for_class(class_name: str) -> list[str]:
|
|
"""Get all source blocks that contribute to a specific LinkML class."""
|
|
sources = []
|
|
for mapping in ENRICHMENT_MAPPINGS:
|
|
if mapping.linkml_class == class_name:
|
|
sources.append(mapping.source_block)
|
|
else:
|
|
for field_mapping in mapping.fields:
|
|
if field_mapping.target_class == class_name and mapping.source_block not in sources:
|
|
sources.append(mapping.source_block)
|
|
return sources
|
|
|
|
|
|
def get_typedb_attribute(class_name: str, slot_name: str) -> Optional[str]:
|
|
"""Get the TypeDB attribute name for a specific class/slot combination."""
|
|
for mapping in ENRICHMENT_MAPPINGS:
|
|
for field_mapping in mapping.fields:
|
|
if field_mapping.target_class == class_name and field_mapping.target_slot == slot_name:
|
|
return field_mapping.typedb_attribute
|
|
return None
|
|
|
|
|
|
def get_rdf_predicate(class_name: str, slot_name: str) -> Optional[str]:
|
|
"""Get the RDF predicate for a specific class/slot combination."""
|
|
for mapping in ENRICHMENT_MAPPINGS:
|
|
for field_mapping in mapping.fields:
|
|
if field_mapping.target_class == class_name and field_mapping.target_slot == slot_name:
|
|
return field_mapping.rdf_predicate
|
|
return None
|
|
|
|
|
|
def get_category_for_source(source_block: str) -> Optional[MappingCategory]:
|
|
"""Get the category that contains a specific source block."""
|
|
for category in MAPPING_CATEGORIES:
|
|
if source_block in category.sources:
|
|
return category
|
|
return None
|
|
|
|
|
|
def get_transformation_types() -> list[TransformationType]:
|
|
"""Get all available transformation types."""
|
|
return list(TransformationType)
|
|
|
|
|
|
def get_person_mapping_for_source(source_block: str) -> Optional[EnrichmentSourceMapping]:
|
|
"""Get the person mapping for a specific source block."""
|
|
for mapping in PERSON_MAPPINGS:
|
|
if mapping.source_block == source_block:
|
|
return mapping
|
|
return None
|
|
|
|
|
|
def get_person_fields_for_class(class_name: str) -> list[FieldMapping]:
|
|
"""Get all person field mappings that target a specific LinkML class."""
|
|
fields = []
|
|
for mapping in PERSON_MAPPINGS:
|
|
for field_mapping in mapping.fields:
|
|
if field_mapping.target_class == class_name:
|
|
fields.append(field_mapping)
|
|
return fields
|
|
|
|
|
|
def get_person_sources_for_class(class_name: str) -> list[str]:
|
|
"""Get all person source blocks that contribute to a specific LinkML class."""
|
|
sources = []
|
|
for mapping in PERSON_MAPPINGS:
|
|
if mapping.linkml_class == class_name:
|
|
sources.append(mapping.source_block)
|
|
else:
|
|
for field_mapping in mapping.fields:
|
|
if field_mapping.target_class == class_name and mapping.source_block not in sources:
|
|
sources.append(mapping.source_block)
|
|
return sources
|
|
|
|
|
|
def get_person_category_for_source(source_block: str) -> Optional[MappingCategory]:
|
|
"""Get the person category that contains a specific source block."""
|
|
for category in PERSON_CATEGORIES:
|
|
if source_block in category.sources:
|
|
return category
|
|
return None
|
|
|
|
|
|
def get_mapping_statistics() -> dict:
|
|
"""Get statistics about the custodian mappings."""
|
|
total_fields = sum(len(m.fields) for m in ENRICHMENT_MAPPINGS)
|
|
required_fields = sum(
|
|
1 for m in ENRICHMENT_MAPPINGS for f in m.fields if f.required
|
|
)
|
|
classes = set()
|
|
for m in ENRICHMENT_MAPPINGS:
|
|
classes.add(m.linkml_class)
|
|
for f in m.fields:
|
|
if f.target_class:
|
|
classes.add(f.target_class)
|
|
|
|
return {
|
|
'total_source_blocks': len(ENRICHMENT_MAPPINGS),
|
|
'total_categories': len(MAPPING_CATEGORIES),
|
|
'total_fields': total_fields,
|
|
'required_fields': required_fields,
|
|
'unique_classes': len(classes),
|
|
'classes': sorted(classes),
|
|
}
|
|
|
|
|
|
def get_person_mapping_statistics() -> dict:
|
|
"""Get statistics about the person mappings."""
|
|
total_fields = sum(len(m.fields) for m in PERSON_MAPPINGS)
|
|
required_fields = sum(
|
|
1 for m in PERSON_MAPPINGS for f in m.fields if f.required
|
|
)
|
|
classes = set()
|
|
for m in PERSON_MAPPINGS:
|
|
classes.add(m.linkml_class)
|
|
for f in m.fields:
|
|
if f.target_class:
|
|
classes.add(f.target_class)
|
|
|
|
return {
|
|
'total_source_blocks': len(PERSON_MAPPINGS),
|
|
'total_categories': len(PERSON_CATEGORIES),
|
|
'total_fields': total_fields,
|
|
'required_fields': required_fields,
|
|
'unique_classes': len(classes),
|
|
'classes': sorted(classes),
|
|
}
|
|
|
|
|
|
def get_mappings_for_data_source(data_source: str) -> list[EnrichmentSourceMapping]:
|
|
"""Get all mappings that use a specific data source type."""
|
|
return [
|
|
m for m in ENRICHMENT_MAPPINGS
|
|
if m.provenance.source_type == data_source
|
|
]
|
|
|
|
|
|
def get_categories_for_data_source(data_source: str) -> list[MappingCategory]:
|
|
"""Get all categories that contain mappings from a specific data source."""
|
|
source_blocks = {
|
|
m.source_block for m in ENRICHMENT_MAPPINGS
|
|
if m.provenance.source_type == data_source
|
|
}
|
|
return [
|
|
c for c in MAPPING_CATEGORIES
|
|
if any(s in source_blocks for s in c.sources)
|
|
]
|
|
|
|
|
|
# ============================================================================
|
|
# MODULE EXPORTS
|
|
# ============================================================================
|
|
|
|
__all__ = [
|
|
# Enums
|
|
'TransformationType',
|
|
'MappingStatus',
|
|
'DataTier',
|
|
# Dataclasses
|
|
'FieldExample',
|
|
'FieldValidation',
|
|
'FieldMapping',
|
|
'Provenance',
|
|
'EnrichmentSourceMapping',
|
|
'MappingCategory',
|
|
# Data
|
|
'MAPPING_CATEGORIES',
|
|
'ENRICHMENT_MAPPINGS',
|
|
'PERSON_CATEGORIES',
|
|
'PERSON_MAPPINGS',
|
|
# Functions
|
|
'get_mapping_for_source',
|
|
'get_fields_for_class',
|
|
'get_sources_for_class',
|
|
'get_typedb_attribute',
|
|
'get_rdf_predicate',
|
|
'get_category_for_source',
|
|
'get_transformation_types',
|
|
'get_person_mapping_for_source',
|
|
'get_person_fields_for_class',
|
|
'get_person_sources_for_class',
|
|
'get_person_category_for_source',
|
|
'get_mapping_statistics',
|
|
'get_person_mapping_statistics',
|
|
'get_mappings_for_data_source',
|
|
'get_categories_for_data_source',
|
|
]
|