6802 lines
224 KiB
TypeScript
6802 lines
224 KiB
TypeScript
/**
|
|
* custodian-data-mappings.ts - Data Transformation Mapping Documentation
|
|
*
|
|
* This module documents how raw YAML/JSON data from custodian files maps to:
|
|
* 1. LinkML schema classes and slots
|
|
* 2. TypeDB entities and attributes
|
|
* 3. RDF triples and predicates
|
|
*
|
|
* ARCHITECTURE OVERVIEW:
|
|
* ======================
|
|
*
|
|
* The Heritage Custodian System uses a "hub architecture" where:
|
|
* - CustodianHub: Abstract entity with only persistent hc_id
|
|
* - CustodianObservation: Evidence/claims from a specific source
|
|
* - ReconstructionActivity: Process that generates standardized aspects
|
|
* - Four aspects: LegalStatus, Name, Place, Collection (independent temporal lifecycles)
|
|
*
|
|
* Each enrichment block in YAML (google_maps_enrichment, wikidata_enrichment, etc.)
|
|
* maps to a SEPARATE CustodianObservation with its own provenance.
|
|
*
|
|
* DATA FLOW:
|
|
* ==========
|
|
*
|
|
* Raw YAML (data/custodian/*.yaml)
|
|
* ↓
|
|
* [Transform Layer]
|
|
* ↓
|
|
* LinkML Instance Data
|
|
* ↓
|
|
* ┌──────┼──────┐
|
|
* ↓ ↓ ↓
|
|
* RDF TypeDB JSON-LD
|
|
*
|
|
* @module custodian-data-mappings
|
|
* @version 1.0.0
|
|
*/
|
|
|
|
// ============================================================================
|
|
// TYPE DEFINITIONS
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Types of data transformations that can occur during mapping
|
|
*/
|
|
export type TransformationType =
|
|
| 'direct' // 1:1 copy, no transformation
|
|
| 'rename' // Field name change only
|
|
| 'split' // One source field → multiple target fields
|
|
| 'merge' // Multiple source fields → one target field
|
|
| 'lookup' // Enum value lookup or reference resolution
|
|
| 'computed' // Derived/calculated value
|
|
| 'nested' // Nested object mapping (object → object)
|
|
| 'array_map' // Array transformation (array → array with element mapping)
|
|
| 'array_direct' // Direct array copy without element transformation
|
|
| 'flatten' // Nested structure → flat structure
|
|
| 'aggregate' // Multiple values → single aggregate
|
|
| 'temporal' // Date/time transformation
|
|
| 'uri_construct' // Construct URI from components
|
|
| 'normalize' // Normalize/standardize value format
|
|
| 'conditional' // Conditional transformation based on other fields
|
|
| 'not_mapped'; // Ontology property intentionally not mapped (for coverage documentation)
|
|
|
|
/**
|
|
* Mapping status for ontology coverage documentation
|
|
*/
|
|
export type MappingStatus =
|
|
| 'mapped' // Property is fully mapped to HC system
|
|
| 'partial' // Property is partially mapped (e.g., full name vs components)
|
|
| 'out_of_scope' // Property is intentionally not mapped (design decision)
|
|
| 'future'; // Property may be mapped in future versions
|
|
|
|
/**
|
|
* Data tier classification (per AGENTS.md)
|
|
*/
|
|
export type DataTier =
|
|
| 'TIER_1_AUTHORITATIVE' // CSV registries (ISIL, Dutch orgs)
|
|
| 'TIER_2_VERIFIED' // Data from institutional websites, APIs
|
|
| 'TIER_3_CROWD_SOURCED' // Wikidata, OpenStreetMap
|
|
| 'TIER_4_INFERRED'; // NLP-extracted from conversations
|
|
|
|
/**
|
|
* Mapping for a single field from source to target
|
|
*/
|
|
export interface FieldMapping {
|
|
/** JSON path in source YAML (e.g., "google_maps_enrichment.place_id"). Null for unmapped ontology properties. */
|
|
sourcePath: string | null;
|
|
|
|
/** Human-readable description of the source field */
|
|
sourceDescription: string;
|
|
|
|
/** LinkML class this maps to. Null for unmapped ontology properties. */
|
|
targetClass: string | null;
|
|
|
|
/** LinkML slot name. Null for unmapped ontology properties. */
|
|
targetSlot: string | null;
|
|
|
|
/** Type of transformation applied */
|
|
transformation: TransformationType;
|
|
|
|
/** Human-readable explanation of the transformation */
|
|
transformationDetails?: string;
|
|
|
|
/** TypeDB entity type (snake-case with hyphens) */
|
|
typedbEntity?: string;
|
|
|
|
/** TypeDB attribute name. Null for unmapped ontology properties. */
|
|
typedbAttribute?: string | null;
|
|
|
|
/** RDF predicate (CURIE format) */
|
|
rdfPredicate?: string;
|
|
|
|
/** Whether this field is required */
|
|
required: boolean;
|
|
|
|
/** Example showing source → target transformation */
|
|
example?: {
|
|
sourceValue: unknown;
|
|
targetValue: unknown;
|
|
typedbValue?: unknown;
|
|
rdfTriple?: string;
|
|
};
|
|
|
|
/** Related/dependent fields */
|
|
relatedFields?: string[];
|
|
|
|
/** Validation rules */
|
|
validation?: {
|
|
type: 'string' | 'number' | 'boolean' | 'date' | 'uri' | 'enum' | 'array';
|
|
pattern?: string;
|
|
enumValues?: string[];
|
|
minLength?: number;
|
|
maxLength?: number;
|
|
};
|
|
|
|
/** Additional notes or comments about this mapping */
|
|
notes?: string;
|
|
|
|
/** Mapping status for ontology coverage documentation */
|
|
status?: MappingStatus;
|
|
}
|
|
|
|
/**
|
|
* Complete mapping for an enrichment source block
|
|
*/
|
|
export interface EnrichmentSourceMapping {
|
|
/** Source block name in YAML (e.g., "google_maps_enrichment") */
|
|
sourceBlock: string;
|
|
|
|
/** Human-readable description */
|
|
description: string;
|
|
|
|
/** Extended description with usage notes */
|
|
detailedDescription?: string;
|
|
|
|
/** Primary LinkML class this maps to */
|
|
linkmlClass: string;
|
|
|
|
/** Primary TypeDB entity */
|
|
typedbEntity: string;
|
|
|
|
/** Provenance information */
|
|
provenance: {
|
|
sourceType: string;
|
|
dataTier: DataTier;
|
|
apiEndpoint?: string;
|
|
updateFrequency?: string;
|
|
};
|
|
|
|
/** All field mappings for this source */
|
|
fields: FieldMapping[];
|
|
|
|
/** Classes that can be generated from this source */
|
|
generatedClasses?: string[];
|
|
|
|
/** Example YAML snippet */
|
|
exampleYaml?: string;
|
|
}
|
|
|
|
/**
|
|
* Category group for organizing categories in UI dropdowns
|
|
*/
|
|
export type CategoryGroup =
|
|
| 'core' // Core identity, location, temporal
|
|
| 'enrichment' // External data sources, web presence
|
|
| 'heritage' // Heritage-specific, cultural sites, collections
|
|
| 'organization' // Legal, organizational structure, funding
|
|
| 'technical' // Digital, API, standards
|
|
| 'specialized'; // Archive types, specialized collections
|
|
|
|
export const CATEGORY_GROUP_LABELS: Record<CategoryGroup, { en: string; nl: string }> = {
|
|
core: { en: 'Core Data', nl: 'Kerngegevens' },
|
|
enrichment: { en: 'Enrichment Sources', nl: 'Verrijkingsbronnen' },
|
|
heritage: { en: 'Heritage & Collections', nl: 'Erfgoed & Collecties' },
|
|
organization: { en: 'Organization & Legal', nl: 'Organisatie & Juridisch' },
|
|
technical: { en: 'Technical & Digital', nl: 'Technisch & Digitaal' },
|
|
specialized: { en: 'Specialized Archives', nl: 'Gespecialiseerde Archieven' },
|
|
};
|
|
|
|
/**
|
|
* Category grouping for mappings in the UI
|
|
*/
|
|
export interface MappingCategory {
|
|
id: string;
|
|
name: string;
|
|
nameNl: string;
|
|
description: string;
|
|
descriptionNl: string;
|
|
icon: string;
|
|
sources: string[]; // sourceBlock names
|
|
group?: CategoryGroup; // Optional group for UI organization
|
|
}
|
|
|
|
// ============================================================================
|
|
// MAPPING CATEGORIES
|
|
// ============================================================================
|
|
|
|
export const MAPPING_CATEGORIES: MappingCategory[] = [
|
|
{
|
|
id: 'identity',
|
|
name: 'Identity & Identification',
|
|
nameNl: 'Identiteit & Identificatie',
|
|
description: 'Core identity fields: GHCID, names, identifiers',
|
|
descriptionNl: 'Kernidentiteitsvelden: GHCID, namen, identificatiecodes',
|
|
icon: '🪪',
|
|
sources: ['ghcid', 'identifiers', 'custodian_name'],
|
|
group: 'core',
|
|
},
|
|
{
|
|
id: 'location',
|
|
name: 'Location & Geography',
|
|
nameNl: 'Locatie & Geografie',
|
|
description: 'Physical location, addresses, coordinates',
|
|
descriptionNl: 'Fysieke locatie, adressen, coördinaten',
|
|
icon: '📍',
|
|
sources: ['location', 'google_maps_enrichment'],
|
|
group: 'core',
|
|
},
|
|
{
|
|
id: 'external',
|
|
name: 'External Data Sources',
|
|
nameNl: 'Externe Databronnen',
|
|
description: 'Enrichment from external APIs and databases',
|
|
descriptionNl: 'Verrijking van externe API\'s en databases',
|
|
icon: '🔗',
|
|
sources: ['wikidata_enrichment', 'museum_register_enrichment', 'genealogiewerkbalk_enrichment'],
|
|
group: 'enrichment',
|
|
},
|
|
{
|
|
id: 'web',
|
|
name: 'Web & Digital Presence',
|
|
nameNl: 'Web & Digitale Aanwezigheid',
|
|
description: 'Website data, digital platforms, social media',
|
|
descriptionNl: 'Websitegegevens, digitale platformen, sociale media',
|
|
icon: '🌐',
|
|
sources: ['web_enrichment', 'web_claims', 'digital_platforms', 'youtube_enrichment'],
|
|
group: 'enrichment',
|
|
},
|
|
{
|
|
id: 'legal',
|
|
name: 'Legal & Organization',
|
|
nameNl: 'Juridisch & Organisatie',
|
|
description: 'Legal status, organizational structure',
|
|
descriptionNl: 'Juridische status, organisatiestructuur',
|
|
icon: '⚖️',
|
|
sources: ['legal_status', 'original_entry'],
|
|
group: 'organization',
|
|
},
|
|
{
|
|
id: 'temporal',
|
|
name: 'Temporal & Provenance',
|
|
nameNl: 'Temporeel & Herkomst',
|
|
description: 'Time spans, data provenance, versioning',
|
|
descriptionNl: 'Tijdspannes, data-herkomst, versiebeheer',
|
|
icon: '⏱️',
|
|
sources: ['timespan', 'provenance'],
|
|
group: 'core',
|
|
},
|
|
{
|
|
id: 'heritage',
|
|
name: 'Heritage Specific',
|
|
nameNl: 'Erfgoed Specifiek',
|
|
description: 'UNESCO, collections, domain-specific data',
|
|
descriptionNl: 'UNESCO, collecties, domeinspecifieke gegevens',
|
|
icon: '🏛️',
|
|
sources: ['unesco_ich_enrichment'],
|
|
group: 'heritage',
|
|
},
|
|
// -------------------------------------------------------------------------
|
|
// PHASE 1 ADDITIONS: Schema Class Coverage Categories
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
id: 'archive_types',
|
|
name: 'Archive Types',
|
|
nameNl: 'Archieftypen',
|
|
description: 'Specialized archive classification types (97 classes): academic, audiovisual, church, municipal, national, etc.',
|
|
descriptionNl: 'Gespecialiseerde archiefclassificatietypen (97 klassen): academisch, audiovisueel, kerkelijk, gemeentelijk, nationaal, etc.',
|
|
icon: '📦',
|
|
sources: [
|
|
'archive_type_academic', 'archive_type_audiovisual', 'archive_type_church',
|
|
'archive_type_corporate', 'archive_type_government', 'archive_type_municipal',
|
|
'archive_type_national', 'archive_type_regional', 'archive_type_specialized',
|
|
'archive_type_thematic',
|
|
],
|
|
group: 'specialized',
|
|
},
|
|
{
|
|
id: 'organizational_structure',
|
|
name: 'Organizational Structure',
|
|
nameNl: 'Organisatiestructuur',
|
|
description: 'Organizational hierarchy and structure classes (30+ classes): departments, divisions, branches, parent organizations',
|
|
descriptionNl: 'Organisatiehiërarchie en structuurklassen (30+ klassen): afdelingen, divisies, vestigingen, moederorganisaties',
|
|
icon: '🏢',
|
|
sources: [
|
|
'org_structure_hierarchy', 'org_structure_administrative', 'org_structure_subdivision',
|
|
],
|
|
group: 'organization',
|
|
},
|
|
{
|
|
id: 'heritage_cultural',
|
|
name: 'Heritage & Cultural Sites',
|
|
nameNl: 'Erfgoed & Culturele Locaties',
|
|
description: 'World heritage sites, intangible heritage, cultural institutions (15+ classes)',
|
|
descriptionNl: 'Werelderfgoedlocaties, immaterieel erfgoed, culturele instellingen (15+ klassen)',
|
|
icon: '🗿',
|
|
sources: [
|
|
'heritage_world_sites', 'heritage_intangible', 'heritage_national_treasures',
|
|
],
|
|
group: 'heritage',
|
|
},
|
|
{
|
|
id: 'classification_types',
|
|
name: 'Classification Types',
|
|
nameNl: 'Classificatietypen',
|
|
description: 'Type classes for custodian classification (32 classes): MuseumType, LibraryType, ArchiveOrganizationType, etc.',
|
|
descriptionNl: 'Typeklassen voor bronhouderclassificatie (32 klassen): MuseumType, BibliotheekType, ArchiefOrganisatieType, etc.',
|
|
icon: '🏷️',
|
|
sources: [
|
|
'type_classes_glam', 'type_classes_digital', 'type_classes_organizational',
|
|
],
|
|
group: 'specialized',
|
|
},
|
|
// -------------------------------------------------------------------------
|
|
// PHASE 2 ADDITIONS: Remaining Schema Class Coverage Categories
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
id: 'place_location',
|
|
name: 'Place & Location',
|
|
nameNl: 'Plaats & Locatie',
|
|
description: 'Geographic and spatial location classes (8 classes): settlements, countries, custodian places, feature places',
|
|
descriptionNl: 'Geografische en ruimtelijke locatieklassen (8 klassen): nederzettingen, landen, bronhouderplaatsen, kenmerkplaatsen',
|
|
icon: '📍',
|
|
sources: [
|
|
'place_geographic', 'place_custodian_specific',
|
|
],
|
|
group: 'specialized',
|
|
},
|
|
{
|
|
id: 'collections',
|
|
name: 'Collections & Holdings',
|
|
nameNl: 'Collecties & Bezittingen',
|
|
description: 'Collection management and holdings classes (6 classes): collections, special collections, collection management systems',
|
|
descriptionNl: 'Collectiebeheer en bezitklassen (6 klassen): collecties, bijzondere collecties, collectiebeheersystemen',
|
|
icon: '🗃️',
|
|
sources: [
|
|
'collection_core', 'collection_management',
|
|
],
|
|
group: 'heritage',
|
|
},
|
|
{
|
|
id: 'person_staff',
|
|
name: 'Person & Staff',
|
|
nameNl: 'Persoon & Personeel',
|
|
description: 'Person and staff-related classes (9 classes): profiles, connections, work experience, credentials',
|
|
descriptionNl: 'Persoon- en personeelgerelateerde klassen (9 klassen): profielen, connecties, werkervaring, diploma\'s',
|
|
icon: '👥',
|
|
sources: [
|
|
'person_profile_extended', 'person_work_education',
|
|
],
|
|
group: 'organization',
|
|
},
|
|
{
|
|
id: 'digital_api',
|
|
name: 'Digital & API Services',
|
|
nameNl: 'Digitaal & API Diensten',
|
|
description: 'Digital platforms and API endpoint classes (11 classes): web portals, OAI-PMH, search APIs, file APIs',
|
|
descriptionNl: 'Digitale platformen en API-eindpuntklassen (11 klassen): webportalen, OAI-PMH, zoek-API\'s, bestand-API\'s',
|
|
icon: '🔌',
|
|
sources: [
|
|
'digital_platforms_extended', 'api_endpoints',
|
|
],
|
|
group: 'technical',
|
|
},
|
|
{
|
|
id: 'video_media',
|
|
name: 'Video & Social Media',
|
|
nameNl: 'Video & Sociale Media',
|
|
description: 'Video content and social media classes (11 classes): video annotations, chapters, social media posts/profiles',
|
|
descriptionNl: 'Video-inhoud en sociale mediaklassen (11 klassen): video-annotaties, hoofdstukken, sociale media posts/profielen',
|
|
icon: '🎬',
|
|
sources: [
|
|
'video_content', 'social_media_content',
|
|
],
|
|
group: 'technical',
|
|
},
|
|
{
|
|
id: 'legal_admin',
|
|
name: 'Legal & Administrative',
|
|
nameNl: 'Juridisch & Administratief',
|
|
description: 'Legal, policy, and administrative classes (9 classes): access policies, budgets, projects, registration',
|
|
descriptionNl: 'Juridische, beleids- en administratieve klassen (9 klassen): toegangsbeleid, budgetten, projecten, registratie',
|
|
icon: '⚖️',
|
|
sources: [
|
|
'legal_policies', 'administrative_records',
|
|
],
|
|
group: 'organization',
|
|
},
|
|
{
|
|
id: 'finding_aids',
|
|
name: 'Finding Aids & Standards',
|
|
nameNl: 'Toegangen & Standaarden',
|
|
description: 'Finding aids, standards, and documentation classes (5 classes): finding aids, source documents, standards',
|
|
descriptionNl: 'Toegangen, standaarden en documentatieklassen (5 klassen): toegangen, brondocumenten, standaarden',
|
|
icon: '📑',
|
|
sources: [
|
|
'finding_aids_standards',
|
|
],
|
|
group: 'technical',
|
|
},
|
|
{
|
|
id: 'reconstruction',
|
|
name: 'Reconstruction & Provenance',
|
|
nameNl: 'Reconstructie & Herkomst',
|
|
description: 'Entity reconstruction and provenance tracking classes (4 classes): reconstructed entities, activities, agents',
|
|
descriptionNl: 'Entiteitsreconstructie en herkomsttrackingklassen (4 klassen): gereconstrueerde entiteiten, activiteiten, agenten',
|
|
icon: '🔄',
|
|
sources: [
|
|
'reconstruction_provenance',
|
|
],
|
|
group: 'heritage',
|
|
},
|
|
{
|
|
id: 'storage_facilities',
|
|
name: 'Storage & Facilities',
|
|
nameNl: 'Opslag & Faciliteiten',
|
|
description: 'Storage conditions and facility classes (7 classes): storage types, conditions, education centers',
|
|
descriptionNl: 'Opslagcondities en faciliteitenklassen (7 klassen): opslagtypen, condities, onderwijscentra',
|
|
icon: '🏪',
|
|
sources: [
|
|
'storage_facilities',
|
|
],
|
|
group: 'specialized',
|
|
},
|
|
{
|
|
id: 'funding',
|
|
name: 'Funding & Grants',
|
|
nameNl: 'Financiering & Subsidies',
|
|
description: 'Funding and grant-related classes (3 classes): funding agendas, requirements, applications',
|
|
descriptionNl: 'Financiering- en subsidieklassen (3 klassen): financieringsagenda\'s, vereisten, aanvragen',
|
|
icon: '💰',
|
|
sources: [
|
|
'funding_grants',
|
|
],
|
|
group: 'organization',
|
|
},
|
|
{
|
|
id: 'language_naming',
|
|
name: 'Language & Naming',
|
|
nameNl: 'Taal & Naamgeving',
|
|
description: 'Language and naming classes (4 classes): language codes, proficiency, appellations',
|
|
descriptionNl: 'Taal- en naamgevingsklassen (4 klassen): taalcodes, taalvaardigheid, benamingen',
|
|
icon: '🗣️',
|
|
sources: [
|
|
'language_naming',
|
|
],
|
|
group: 'technical',
|
|
},
|
|
{
|
|
id: 'specialized_archives_intl',
|
|
name: 'Specialized Archives (International)',
|
|
nameNl: 'Gespecialiseerde Archieven (Internationaal)',
|
|
description: 'Country-specific specialized archive types (19 classes): German, Swedish, French, Czech archive types',
|
|
descriptionNl: 'Landspecifieke gespecialiseerde archieftypen (19 klassen): Duitse, Zweedse, Franse, Tsjechische archieftypen',
|
|
icon: '🌍',
|
|
sources: [
|
|
'archives_german', 'archives_swedish', 'archives_french', 'archives_other',
|
|
],
|
|
group: 'specialized',
|
|
},
|
|
];
|
|
|
|
// ============================================================================
|
|
// ENRICHMENT SOURCE MAPPINGS
|
|
// ============================================================================
|
|
|
|
export const ENRICHMENT_MAPPINGS: EnrichmentSourceMapping[] = [
|
|
// -------------------------------------------------------------------------
|
|
// GHCID - Global Heritage Custodian Identifier
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'ghcid',
|
|
description: 'Global Heritage Custodian Identifier - persistent unique identifier',
|
|
detailedDescription: `
|
|
The GHCID is the persistent unique identifier for every heritage custodian.
|
|
It follows the format: {COUNTRY}-{REGION}-{SETTLEMENT}-{TYPE}-{ABBREV}
|
|
|
|
Example: NL-NH-AMS-M-RM (Rijksmuseum, Amsterdam, Netherlands)
|
|
|
|
GHCIDs are deterministically generated and hashed to multiple UUID formats
|
|
for different use cases (UUID v5 for primary, UUID v8 for future-proofing).
|
|
`.trim(),
|
|
linkmlClass: 'GHCID',
|
|
typedbEntity: 'ghcid',
|
|
provenance: {
|
|
sourceType: 'computed',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'ghcid.ghcid_current',
|
|
sourceDescription: 'Current GHCID string',
|
|
targetClass: 'GHCID',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'direct',
|
|
typedbEntity: 'ghcid',
|
|
typedbAttribute: 'ghcid-string',
|
|
rdfPredicate: 'hc:ghcidString',
|
|
required: true,
|
|
example: {
|
|
sourceValue: 'NL-NH-AMS-M-RM',
|
|
targetValue: 'NL-NH-AMS-M-RM',
|
|
rdfTriple: '<https://w3id.org/hc/NL-NH-AMS-M-RM> hc:ghcidString "NL-NH-AMS-M-RM" .',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'ghcid.ghcid_uuid',
|
|
sourceDescription: 'UUID v5 derived from GHCID string',
|
|
targetClass: 'GHCID',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'computed',
|
|
transformationDetails: 'UUID v5 generated using SHA-1 hash of GHCID string with heritage namespace',
|
|
typedbEntity: 'ghcid',
|
|
typedbAttribute: 'ghcid-uuid',
|
|
rdfPredicate: 'hc:ghcidUuid',
|
|
required: true,
|
|
example: {
|
|
sourceValue: '550e8400-e29b-5d4f-a716-446655440000',
|
|
targetValue: '550e8400-e29b-5d4f-a716-446655440000',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'ghcid.ghcid_numeric',
|
|
sourceDescription: '64-bit numeric ID for database optimization',
|
|
targetClass: 'GHCID',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'computed',
|
|
transformationDetails: 'SHA-256 hash truncated to 64-bit integer',
|
|
typedbEntity: 'ghcid',
|
|
typedbAttribute: 'ghcid-numeric',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'ghcid.location_resolution',
|
|
sourceDescription: 'GeoNames resolution metadata',
|
|
targetClass: 'GHCID',
|
|
targetSlot: 'has_location',
|
|
transformation: 'nested',
|
|
transformationDetails: 'Maps to LocationResolution class with GeoNames provenance',
|
|
typedbEntity: 'location-resolution',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
ghcid:
|
|
ghcid_current: NL-NH-AMS-M-RM
|
|
ghcid_uuid: 550e8400-e29b-5d4f-a716-446655440000
|
|
ghcid_numeric: 213324328442227739
|
|
location_resolution:
|
|
method: REVERSE_GEOCODE
|
|
geonames_id: 2759794
|
|
geonames_name: Amsterdam
|
|
settlement_code: AMS
|
|
admin1_code: "07"
|
|
region_code: NH
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Identifiers
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'identifiers',
|
|
description: 'External identifiers from various sources',
|
|
detailedDescription: `
|
|
Links to external identifier systems including:
|
|
- ISIL codes (International Standard Identifier for Libraries)
|
|
- Wikidata QIDs
|
|
- VIAF (Virtual International Authority File)
|
|
- KvK numbers (Dutch Chamber of Commerce)
|
|
- Museum Register numbers
|
|
- And more...
|
|
`.trim(),
|
|
linkmlClass: 'Identifier',
|
|
typedbEntity: 'identifier',
|
|
provenance: {
|
|
sourceType: 'registry_lookup',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'identifiers[].identifier_scheme',
|
|
sourceDescription: 'Identifier system/scheme name',
|
|
targetClass: 'Identifier',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'lookup',
|
|
transformationDetails: 'Maps to IdentifierSchemeEnum',
|
|
typedbEntity: 'identifier',
|
|
typedbAttribute: 'scheme',
|
|
rdfPredicate: 'hc:identifierScheme',
|
|
required: true,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['GHCID', 'ISIL', 'Wikidata', 'VIAF', 'KvK', 'MuseumRegister', 'NDE', 'Website'],
|
|
},
|
|
example: {
|
|
sourceValue: 'ISIL',
|
|
targetValue: 'ISIL',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'identifiers[].identifier_value',
|
|
sourceDescription: 'The identifier value/code',
|
|
targetClass: 'Identifier',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'direct',
|
|
typedbEntity: 'identifier',
|
|
typedbAttribute: 'value',
|
|
rdfPredicate: 'hc:identifierValue',
|
|
required: true,
|
|
example: {
|
|
sourceValue: 'NL-AmRM',
|
|
targetValue: 'NL-AmRM',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'identifiers[].identifier_url',
|
|
sourceDescription: 'URL to the identifier record',
|
|
targetClass: 'Identifier',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'direct',
|
|
typedbEntity: 'identifier',
|
|
typedbAttribute: 'url',
|
|
rdfPredicate: 'schema:url',
|
|
required: false,
|
|
example: {
|
|
sourceValue: 'https://www.wikidata.org/wiki/Q190804',
|
|
targetValue: 'https://www.wikidata.org/wiki/Q190804',
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
identifiers:
|
|
- identifier_scheme: GHCID
|
|
identifier_value: NL-NH-AMS-M-RM
|
|
- identifier_scheme: ISIL
|
|
identifier_value: NL-AmRM
|
|
- identifier_scheme: Wikidata
|
|
identifier_value: Q190804
|
|
identifier_url: https://www.wikidata.org/wiki/Q190804
|
|
- identifier_scheme: VIAF
|
|
identifier_value: "148691498"
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Custodian Name
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'custodian_name',
|
|
description: 'Consensus name with confidence scoring',
|
|
detailedDescription: `
|
|
The canonical name determined through multi-source reconciliation.
|
|
Includes emic (native language) name with legal form stripped per Rule 8.
|
|
Confidence scores indicate how many sources agree on each name variant.
|
|
`.trim(),
|
|
linkmlClass: 'CustodianName',
|
|
typedbEntity: 'custodian-name',
|
|
provenance: {
|
|
sourceType: 'reconciliation',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'custodian_name.consensus_name',
|
|
sourceDescription: 'Reconciled canonical name',
|
|
targetClass: 'CustodianName',
|
|
targetSlot: 'has_name',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-name',
|
|
typedbAttribute: 'name-string',
|
|
rdfPredicate: 'skos:prefLabel',
|
|
required: true,
|
|
example: {
|
|
sourceValue: 'Rijksmuseum',
|
|
targetValue: 'Rijksmuseum',
|
|
rdfTriple: '<https://w3id.org/hc/NL-NH-AMS-M-RM> skos:prefLabel "Rijksmuseum"@nl .',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'custodian_name.emic_name',
|
|
sourceDescription: 'Native language name (legal form stripped)',
|
|
targetClass: 'CustodianName',
|
|
targetSlot: 'has_name',
|
|
transformation: 'normalize',
|
|
transformationDetails: 'Legal form terms (Stichting, Foundation, etc.) removed per Rule 8',
|
|
typedbEntity: 'custodian-name',
|
|
typedbAttribute: 'emic-name',
|
|
rdfPredicate: 'hc:emicName',
|
|
required: false,
|
|
example: {
|
|
sourceValue: 'Rijksmuseum Amsterdam',
|
|
targetValue: 'Rijksmuseum Amsterdam',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'custodian_name.confidence_score',
|
|
sourceDescription: 'Confidence in name accuracy (0-1)',
|
|
targetClass: 'CustodianName',
|
|
targetSlot: 'has_confidence_measure',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-name',
|
|
typedbAttribute: 'confidence-score',
|
|
required: false,
|
|
validation: {
|
|
type: 'number',
|
|
},
|
|
example: {
|
|
sourceValue: 0.95,
|
|
targetValue: 0.95,
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'custodian_name.alternative_names',
|
|
sourceDescription: 'List of alternative/historical names',
|
|
targetClass: 'CustodianName',
|
|
targetSlot: 'has_alias',
|
|
transformation: 'array_map',
|
|
typedbEntity: 'custodian-name',
|
|
typedbAttribute: 'alternative-names',
|
|
rdfPredicate: 'skos:altLabel',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
custodian_name:
|
|
consensus_name: Rijksmuseum
|
|
emic_name: Rijksmuseum Amsterdam
|
|
confidence_score: 0.95
|
|
source_agreement:
|
|
Wikidata: Rijksmuseum
|
|
Google Maps: Rijksmuseum
|
|
Museum Register: Rijksmuseum Amsterdam
|
|
alternative_names:
|
|
- Rijks Museum
|
|
- National Museum of the Netherlands
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Google Maps Enrichment
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'google_maps_enrichment',
|
|
description: 'Location and business data from Google Maps Places API',
|
|
detailedDescription: `
|
|
Rich location data including coordinates, address, opening hours,
|
|
ratings, reviews, and photos from Google Maps Places API.
|
|
Each enrichment creates a CustodianObservation with google_maps_api provenance.
|
|
`.trim(),
|
|
linkmlClass: 'CustodianObservation',
|
|
typedbEntity: 'custodian-observation',
|
|
provenance: {
|
|
sourceType: 'google_maps_api',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
apiEndpoint: 'https://maps.googleapis.com/maps/api/place/',
|
|
updateFrequency: 'On-demand',
|
|
},
|
|
generatedClasses: ['Place', 'GeoCoordinates', 'OpeningHours'],
|
|
fields: [
|
|
{
|
|
sourcePath: 'google_maps_enrichment.place_id',
|
|
sourceDescription: 'Google Maps Place ID',
|
|
targetClass: 'CustodianObservation',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-observation',
|
|
typedbAttribute: 'external-id',
|
|
rdfPredicate: 'schema:identifier',
|
|
required: true,
|
|
example: {
|
|
sourceValue: 'ChIJ5Ra7we4JxkcRhYVAaq5zQ9U',
|
|
targetValue: 'ChIJ5Ra7we4JxkcRhYVAaq5zQ9U',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'google_maps_enrichment.coordinates.latitude',
|
|
sourceDescription: 'Latitude coordinate',
|
|
targetClass: 'GeoCoordinates',
|
|
targetSlot: 'has_latitude',
|
|
transformation: 'nested',
|
|
typedbEntity: 'geo-coordinates',
|
|
typedbAttribute: 'latitude',
|
|
rdfPredicate: 'schema:latitude',
|
|
required: true,
|
|
validation: { type: 'number' },
|
|
example: {
|
|
sourceValue: 52.3599976,
|
|
targetValue: 52.3599976,
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'google_maps_enrichment.coordinates.longitude',
|
|
sourceDescription: 'Longitude coordinate',
|
|
targetClass: 'GeoCoordinates',
|
|
targetSlot: 'has_longitude',
|
|
transformation: 'nested',
|
|
typedbEntity: 'geo-coordinates',
|
|
typedbAttribute: 'longitude',
|
|
rdfPredicate: 'schema:longitude',
|
|
required: true,
|
|
validation: { type: 'number' },
|
|
example: {
|
|
sourceValue: 4.8852188,
|
|
targetValue: 4.8852188,
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'google_maps_enrichment.formatted_address',
|
|
sourceDescription: 'Human-readable formatted address',
|
|
targetClass: 'Place',
|
|
targetSlot: 'has_address',
|
|
transformation: 'direct',
|
|
typedbEntity: 'place',
|
|
typedbAttribute: 'formatted-address',
|
|
rdfPredicate: 'schema:address',
|
|
required: false,
|
|
example: {
|
|
sourceValue: 'Museumstraat 1, 1071 XX Amsterdam, Netherlands',
|
|
targetValue: 'Museumstraat 1, 1071 XX Amsterdam, Netherlands',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'google_maps_enrichment.rating',
|
|
sourceDescription: 'Average rating (1-5)',
|
|
targetClass: 'CustodianObservation',
|
|
targetSlot: 'has_rating',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-observation',
|
|
typedbAttribute: 'rating',
|
|
rdfPredicate: 'schema:aggregateRating',
|
|
required: false,
|
|
validation: { type: 'number' },
|
|
example: {
|
|
sourceValue: 4.6,
|
|
targetValue: 4.6,
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'google_maps_enrichment.total_ratings',
|
|
sourceDescription: 'Total number of ratings',
|
|
targetClass: 'CustodianObservation',
|
|
targetSlot: 'has_detail',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-observation',
|
|
typedbAttribute: 'review-count',
|
|
rdfPredicate: 'schema:reviewCount',
|
|
required: false,
|
|
validation: { type: 'number' },
|
|
example: {
|
|
sourceValue: 47832,
|
|
targetValue: 47832,
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'google_maps_enrichment.opening_hours',
|
|
sourceDescription: 'Weekly opening hours schedule',
|
|
targetClass: 'OpeningHours',
|
|
targetSlot: 'has_operating_hours',
|
|
transformation: 'nested',
|
|
transformationDetails: 'Maps to OpeningHoursSpecification array per day',
|
|
typedbEntity: 'opening-hours',
|
|
rdfPredicate: 'schema:openingHoursSpecification',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'google_maps_enrichment.website',
|
|
sourceDescription: 'Official website URL',
|
|
targetClass: 'CustodianObservation',
|
|
targetSlot: 'has_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-observation',
|
|
typedbAttribute: 'website',
|
|
rdfPredicate: 'schema:url',
|
|
required: false,
|
|
validation: { type: 'uri' },
|
|
example: {
|
|
sourceValue: 'https://www.rijksmuseum.nl/',
|
|
targetValue: 'https://www.rijksmuseum.nl/',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'google_maps_enrichment.phone',
|
|
sourceDescription: 'Phone number',
|
|
targetClass: 'CustodianObservation',
|
|
targetSlot: 'has_contact_point',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-observation',
|
|
typedbAttribute: 'telephone',
|
|
rdfPredicate: 'schema:telephone',
|
|
required: false,
|
|
example: {
|
|
sourceValue: '+31 20 674 7000',
|
|
targetValue: '+31 20 674 7000',
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
google_maps_enrichment:
|
|
place_id: ChIJ5Ra7we4JxkcRhYVAaq5zQ9U
|
|
name: Rijksmuseum
|
|
coordinates:
|
|
latitude: 52.3599976
|
|
longitude: 4.8852188
|
|
formatted_address: Museumstraat 1, 1071 XX Amsterdam, Netherlands
|
|
rating: 4.6
|
|
total_ratings: 47832
|
|
website: https://www.rijksmuseum.nl/
|
|
phone: +31 20 674 7000
|
|
opening_hours:
|
|
Monday: 9:00 AM - 5:00 PM
|
|
Tuesday: 9:00 AM - 5:00 PM
|
|
# ...
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Wikidata Enrichment
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'wikidata_enrichment',
|
|
description: 'Linked data from Wikidata knowledge graph',
|
|
detailedDescription: `
|
|
Rich semantic data from Wikidata including:
|
|
- Multilingual labels and descriptions
|
|
- Sitelinks to Wikipedia articles
|
|
- Structured properties (coordinates, founding date, etc.)
|
|
- Instance-of relationships for type classification
|
|
|
|
Creates a CustodianObservation with wikidata_api provenance.
|
|
`.trim(),
|
|
linkmlClass: 'CustodianObservation',
|
|
typedbEntity: 'custodian-observation',
|
|
provenance: {
|
|
sourceType: 'wikidata_api',
|
|
dataTier: 'TIER_3_CROWD_SOURCED',
|
|
apiEndpoint: 'https://www.wikidata.org/wiki/Special:EntityData/',
|
|
},
|
|
generatedClasses: ['WikidataEntity', 'Sitelink'],
|
|
fields: [
|
|
{
|
|
sourcePath: 'wikidata_enrichment.entity_id',
|
|
sourceDescription: 'Wikidata Q-ID',
|
|
targetClass: 'CustodianObservation',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-observation',
|
|
typedbAttribute: 'external-id',
|
|
rdfPredicate: 'schema:identifier',
|
|
required: true,
|
|
example: {
|
|
sourceValue: 'Q190804',
|
|
targetValue: 'Q190804',
|
|
rdfTriple: '<https://w3id.org/hc/obs/Q190804> schema:identifier "Q190804" .',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'wikidata_enrichment.labels',
|
|
sourceDescription: 'Multilingual labels',
|
|
targetClass: 'CustodianObservation',
|
|
targetSlot: 'has_label',
|
|
transformation: 'nested',
|
|
transformationDetails: 'Language-tagged strings (e.g., {"en": "Rijksmuseum", "nl": "Rijksmuseum"})',
|
|
typedbEntity: 'custodian-observation',
|
|
rdfPredicate: 'rdfs:label',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'wikidata_enrichment.descriptions',
|
|
sourceDescription: 'Multilingual descriptions',
|
|
targetClass: 'CustodianObservation',
|
|
targetSlot: 'has_description',
|
|
transformation: 'nested',
|
|
typedbEntity: 'custodian-observation',
|
|
rdfPredicate: 'schema:description',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'wikidata_enrichment.sitelinks',
|
|
sourceDescription: 'Links to Wikipedia articles',
|
|
targetClass: 'Sitelink',
|
|
targetSlot: 'has_url',
|
|
transformation: 'array_map',
|
|
transformationDetails: 'Each sitelink maps to Wikipedia article URL',
|
|
typedbEntity: 'sitelink',
|
|
rdfPredicate: 'schema:sameAs',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'wikidata_enrichment.instance_of',
|
|
sourceDescription: 'Wikidata type classification (P31)',
|
|
targetClass: 'CustodianObservation',
|
|
targetSlot: 'instance_of',
|
|
transformation: 'lookup',
|
|
transformationDetails: 'Maps Q-ID to CustodianTypeEnum',
|
|
typedbEntity: 'custodian-observation',
|
|
typedbAttribute: 'wikidata-type',
|
|
rdfPredicate: 'wdt:P31',
|
|
required: false,
|
|
example: {
|
|
sourceValue: 'Q33506',
|
|
targetValue: 'MUSEUM',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'wikidata_enrichment.coordinates',
|
|
sourceDescription: 'Geographic coordinates from Wikidata (P625)',
|
|
targetClass: 'GeoCoordinates',
|
|
targetSlot: 'has_coordinates',
|
|
transformation: 'nested',
|
|
typedbEntity: 'geo-coordinates',
|
|
rdfPredicate: 'wdt:P625',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'wikidata_enrichment.founding_date',
|
|
sourceDescription: 'Date of establishment (P571)',
|
|
targetClass: 'CustodianObservation',
|
|
targetSlot: 'founded_through',
|
|
transformation: 'temporal',
|
|
transformationDetails: 'Wikidata time format to ISO 8601',
|
|
typedbEntity: 'custodian-observation',
|
|
typedbAttribute: 'founding-date',
|
|
rdfPredicate: 'wdt:P571',
|
|
required: false,
|
|
validation: { type: 'date' },
|
|
example: {
|
|
sourceValue: '+1800-01-01T00:00:00Z',
|
|
targetValue: '1800-01-01',
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
wikidata_enrichment:
|
|
entity_id: Q190804
|
|
labels:
|
|
en: Rijksmuseum
|
|
nl: Rijksmuseum
|
|
de: Rijksmuseum
|
|
fr: Rijksmuseum
|
|
descriptions:
|
|
en: Dutch national museum in Amsterdam
|
|
nl: Nederlands nationaal museum in Amsterdam
|
|
sitelinks:
|
|
enwiki: Rijksmuseum
|
|
nlwiki: Rijksmuseum
|
|
dewiki: Rijksmuseum
|
|
instance_of:
|
|
id: Q33506
|
|
label: museum
|
|
coordinates:
|
|
latitude: 52.36
|
|
longitude: 4.885
|
|
founding_date: "+1800-01-01T00:00:00Z"
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Location (Canonical)
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'location',
|
|
description: 'Normalized canonical location',
|
|
detailedDescription: `
|
|
The authoritative location record after reconciliation from multiple sources.
|
|
This is the single source of truth for the custodian's physical location.
|
|
`.trim(),
|
|
linkmlClass: 'Place',
|
|
typedbEntity: 'place',
|
|
provenance: {
|
|
sourceType: 'reconciliation',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'location.city',
|
|
sourceDescription: 'City name',
|
|
targetClass: 'Place',
|
|
targetSlot: 'cover_place',
|
|
transformation: 'direct',
|
|
typedbEntity: 'place',
|
|
typedbAttribute: 'city',
|
|
rdfPredicate: 'schema:addressLocality',
|
|
required: true,
|
|
example: {
|
|
sourceValue: 'Amsterdam',
|
|
targetValue: 'Amsterdam',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'location.country',
|
|
sourceDescription: 'ISO 3166-1 alpha-2 country code',
|
|
targetClass: 'Place',
|
|
targetSlot: 'cover_country',
|
|
transformation: 'lookup',
|
|
transformationDetails: 'Maps to CountryCodeEnum',
|
|
typedbEntity: 'place',
|
|
typedbAttribute: 'country-code',
|
|
rdfPredicate: 'schema:addressCountry',
|
|
required: true,
|
|
validation: {
|
|
type: 'enum',
|
|
pattern: '^[A-Z]{2}$',
|
|
},
|
|
example: {
|
|
sourceValue: 'NL',
|
|
targetValue: 'NL',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'location.region',
|
|
sourceDescription: 'Region/province name',
|
|
targetClass: 'Place',
|
|
targetSlot: 'region',
|
|
transformation: 'direct',
|
|
typedbEntity: 'place',
|
|
typedbAttribute: 'region',
|
|
rdfPredicate: 'schema:addressRegion',
|
|
required: false,
|
|
example: {
|
|
sourceValue: 'North Holland',
|
|
targetValue: 'North Holland',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'location.postal_code',
|
|
sourceDescription: 'Postal/ZIP code',
|
|
targetClass: 'Place',
|
|
targetSlot: 'has_postal_code',
|
|
transformation: 'direct',
|
|
typedbEntity: 'place',
|
|
typedbAttribute: 'postal-code',
|
|
rdfPredicate: 'schema:postalCode',
|
|
required: false,
|
|
example: {
|
|
sourceValue: '1071 XX',
|
|
targetValue: '1071 XX',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'location.street_address',
|
|
sourceDescription: 'Street address',
|
|
targetClass: 'Place',
|
|
targetSlot: 'has_address',
|
|
transformation: 'direct',
|
|
typedbEntity: 'place',
|
|
typedbAttribute: 'street-address',
|
|
rdfPredicate: 'schema:streetAddress',
|
|
required: false,
|
|
example: {
|
|
sourceValue: 'Museumstraat 1',
|
|
targetValue: 'Museumstraat 1',
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
location:
|
|
city: Amsterdam
|
|
country: NL
|
|
region: North Holland
|
|
postal_code: 1071 XX
|
|
street_address: Museumstraat 1
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Web Enrichment
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'web_enrichment',
|
|
description: 'Archived website data and scraped content',
|
|
detailedDescription: `
|
|
Content extracted from the institution's website using web scraping tools.
|
|
Includes organization details, collections, exhibitions, contact info, etc.
|
|
All claims must have XPath provenance per Rule 6.
|
|
`.trim(),
|
|
linkmlClass: 'WebObservation',
|
|
typedbEntity: 'web-observation',
|
|
provenance: {
|
|
sourceType: 'web_scrape',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
generatedClasses: ['WebClaim'],
|
|
fields: [
|
|
{
|
|
sourcePath: 'web_enrichment.source_url',
|
|
sourceDescription: 'URL of scraped page',
|
|
targetClass: 'WebObservation',
|
|
targetSlot: 'source_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'web-observation',
|
|
typedbAttribute: 'source-url',
|
|
rdfPredicate: 'prov:wasDerivedFrom',
|
|
required: true,
|
|
validation: { type: 'uri' },
|
|
},
|
|
{
|
|
sourcePath: 'web_enrichment.retrieved_on',
|
|
sourceDescription: 'Timestamp when page was archived',
|
|
targetClass: 'WebObservation',
|
|
targetSlot: 'retrieved_on',
|
|
transformation: 'temporal',
|
|
typedbEntity: 'web-observation',
|
|
typedbAttribute: 'retrieved-on',
|
|
rdfPredicate: 'prov:generatedAtTime',
|
|
required: true,
|
|
validation: { type: 'date' },
|
|
},
|
|
{
|
|
sourcePath: 'web_enrichment.html_file',
|
|
sourceDescription: 'Path to archived HTML file',
|
|
targetClass: 'WebObservation',
|
|
targetSlot: 'has_file_location',
|
|
transformation: 'direct',
|
|
typedbEntity: 'web-observation',
|
|
typedbAttribute: 'archive-path',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'web_enrichment.organization_details',
|
|
sourceDescription: 'Extracted organization information',
|
|
targetClass: 'WebObservation',
|
|
targetSlot: 'has_detail',
|
|
transformation: 'nested',
|
|
typedbEntity: 'web-observation',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
web_enrichment:
|
|
source_url: https://www.rijksmuseum.nl/en/about-us
|
|
retrieved_on: "2025-01-15T10:30:00Z"
|
|
html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/about-us.html
|
|
retrieval_agent: firecrawl
|
|
organization_details:
|
|
mission: "To connect people with art and history"
|
|
established: "1800"
|
|
collection_size: "1 million objects"
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Web Claims
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'web_claims',
|
|
description: 'Verified claims extracted from websites with XPath provenance',
|
|
detailedDescription: `
|
|
Individual facts extracted from web pages with full provenance chain.
|
|
Each claim MUST have XPath pointer to exact location in archived HTML.
|
|
Claims without XPath provenance are fabricated and must be removed per Rule 6.
|
|
`.trim(),
|
|
linkmlClass: 'WebClaim',
|
|
typedbEntity: 'web-claim',
|
|
provenance: {
|
|
sourceType: 'web_extraction',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'web_claims[].claim_type',
|
|
sourceDescription: 'Type of claim (full_name, email, phone, etc.)',
|
|
targetClass: 'WebClaim',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'web-claim',
|
|
typedbAttribute: 'claim-type',
|
|
rdfPredicate: 'hc:claimType',
|
|
required: true,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['full_name', 'description', 'email', 'phone', 'address', 'opening_hours', 'social_media'],
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'web_claims[].claim_value',
|
|
sourceDescription: 'The extracted value',
|
|
targetClass: 'WebClaim',
|
|
targetSlot: 'has_detail',
|
|
transformation: 'direct',
|
|
typedbEntity: 'web-claim',
|
|
typedbAttribute: 'claim-value',
|
|
rdfPredicate: 'rdf:value',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'web_claims[].xpath',
|
|
sourceDescription: 'XPath to element containing value',
|
|
targetClass: 'WebClaim',
|
|
targetSlot: 'has_detail',
|
|
transformation: 'direct',
|
|
typedbEntity: 'web-claim',
|
|
typedbAttribute: 'xpath',
|
|
rdfPredicate: 'hc:xpath',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'web_claims[].source_url',
|
|
sourceDescription: 'URL where claim was extracted',
|
|
targetClass: 'WebClaim',
|
|
targetSlot: 'source_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'web-claim',
|
|
typedbAttribute: 'source-url',
|
|
rdfPredicate: 'prov:wasDerivedFrom',
|
|
required: true,
|
|
validation: { type: 'uri' },
|
|
},
|
|
{
|
|
sourcePath: 'web_claims[].xpath_match_score',
|
|
sourceDescription: 'Match confidence (1.0 = exact)',
|
|
targetClass: 'WebClaim',
|
|
targetSlot: 'has_score',
|
|
transformation: 'direct',
|
|
typedbEntity: 'web-claim',
|
|
typedbAttribute: 'match-score',
|
|
required: false,
|
|
validation: { type: 'number' },
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
web_claims:
|
|
- claim_type: full_name
|
|
claim_value: Rijksmuseum Amsterdam
|
|
source_url: https://www.rijksmuseum.nl/
|
|
retrieved_on: "2025-01-15T10:30:00Z"
|
|
xpath: /html/body/header/div[1]/a/span
|
|
html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/index.html
|
|
xpath_match_score: 1.0
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Provenance
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'provenance',
|
|
description: 'Data lineage and source tracking',
|
|
detailedDescription: `
|
|
Metadata about where the data came from, when it was collected,
|
|
and how confident we are in its accuracy.
|
|
`.trim(),
|
|
linkmlClass: 'Provenance',
|
|
typedbEntity: 'provenance',
|
|
provenance: {
|
|
sourceType: 'metadata',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'provenance.data_source',
|
|
sourceDescription: 'Origin of the data',
|
|
targetClass: 'Provenance',
|
|
targetSlot: 'has_source',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'provenance',
|
|
typedbAttribute: 'data-source',
|
|
rdfPredicate: 'prov:wasAttributedTo',
|
|
required: true,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['CSV_REGISTRY', 'CONVERSATION_NLP', 'API_ENRICHMENT', 'WEB_SCRAPE', 'MANUAL_ENTRY'],
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'provenance.data_tier',
|
|
sourceDescription: 'Data quality tier',
|
|
targetClass: 'Provenance',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'provenance',
|
|
typedbAttribute: 'data-tier',
|
|
rdfPredicate: 'hc:dataTier',
|
|
required: true,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['TIER_1_AUTHORITATIVE', 'TIER_2_VERIFIED', 'TIER_3_CROWD_SOURCED', 'TIER_4_INFERRED'],
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'provenance.extraction_date',
|
|
sourceDescription: 'When data was extracted',
|
|
targetClass: 'Provenance',
|
|
targetSlot: 'observed_in',
|
|
transformation: 'temporal',
|
|
typedbEntity: 'provenance',
|
|
typedbAttribute: 'extraction-date',
|
|
rdfPredicate: 'prov:generatedAtTime',
|
|
required: true,
|
|
validation: { type: 'date' },
|
|
},
|
|
{
|
|
sourcePath: 'provenance.confidence_score',
|
|
sourceDescription: 'Confidence in data accuracy (0-1)',
|
|
targetClass: 'Provenance',
|
|
targetSlot: 'has_confidence_measure',
|
|
transformation: 'direct',
|
|
typedbEntity: 'provenance',
|
|
typedbAttribute: 'confidence-score',
|
|
rdfPredicate: 'hc:confidenceScore',
|
|
required: false,
|
|
validation: { type: 'number' },
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
provenance:
|
|
data_source: API_ENRICHMENT
|
|
data_tier: TIER_2_VERIFIED
|
|
extraction_date: "2025-01-15T10:30:00Z"
|
|
extraction_method: "Google Maps Places API + Wikidata SPARQL"
|
|
confidence_score: 0.92
|
|
source_files:
|
|
- google_maps_enrichment
|
|
- wikidata_enrichment
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Timespan
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'timespan',
|
|
description: 'Temporal bounds (CIDOC-CRM style)',
|
|
detailedDescription: `
|
|
Temporal information following CIDOC-CRM E52 Time-Span pattern.
|
|
Captures founding date, closure date (if applicable), and temporal bounds.
|
|
`.trim(),
|
|
linkmlClass: 'TimeSpan',
|
|
typedbEntity: 'time-span',
|
|
provenance: {
|
|
sourceType: 'derived',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'timespan.begin_of_the_begin',
|
|
sourceDescription: 'Earliest possible start date',
|
|
targetClass: 'TimeSpan',
|
|
targetSlot: 'begin_of_the_begin',
|
|
transformation: 'temporal',
|
|
typedbEntity: 'time-span',
|
|
typedbAttribute: 'begin-of-the-begin',
|
|
rdfPredicate: 'crm:P82a_begin_of_the_begin',
|
|
required: false,
|
|
validation: { type: 'date' },
|
|
},
|
|
{
|
|
sourcePath: 'timespan.end_of_the_begin',
|
|
sourceDescription: 'Latest possible start date',
|
|
targetClass: 'TimeSpan',
|
|
targetSlot: 'end_of_the_begin',
|
|
transformation: 'temporal',
|
|
typedbEntity: 'time-span',
|
|
typedbAttribute: 'end-of-the-begin',
|
|
rdfPredicate: 'crm:P81a_end_of_the_begin',
|
|
required: false,
|
|
validation: { type: 'date' },
|
|
},
|
|
{
|
|
sourcePath: 'timespan.begin_of_the_end',
|
|
sourceDescription: 'Earliest possible end date',
|
|
targetClass: 'TimeSpan',
|
|
targetSlot: 'begin_of_the_end',
|
|
transformation: 'temporal',
|
|
typedbEntity: 'time-span',
|
|
typedbAttribute: 'begin-of-the-end',
|
|
rdfPredicate: 'crm:P81b_begin_of_the_end',
|
|
required: false,
|
|
validation: { type: 'date' },
|
|
},
|
|
{
|
|
sourcePath: 'timespan.end_of_the_end',
|
|
sourceDescription: 'Latest possible end date',
|
|
targetClass: 'TimeSpan',
|
|
targetSlot: 'end_of_the_end',
|
|
transformation: 'temporal',
|
|
typedbEntity: 'time-span',
|
|
typedbAttribute: 'end-of-the-end',
|
|
rdfPredicate: 'crm:P82b_end_of_the_end',
|
|
required: false,
|
|
validation: { type: 'date' },
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
timespan:
|
|
begin_of_the_begin: "1800-01-01"
|
|
end_of_the_begin: "1800-12-31"
|
|
# Museum still operating - no end dates
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Legal Status
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'legal_status',
|
|
description: 'Legal form and organizational structure',
|
|
detailedDescription: `
|
|
Legal entity information including legal form (foundation, corporation, etc.),
|
|
registration numbers, and governing body information.
|
|
`.trim(),
|
|
linkmlClass: 'CustodianLegalStatus',
|
|
typedbEntity: 'custodian-legal-status',
|
|
provenance: {
|
|
sourceType: 'registry_lookup',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'legal_status.legal_form',
|
|
sourceDescription: 'ISO 20275 legal form code',
|
|
targetClass: 'CustodianLegalStatus',
|
|
targetSlot: 'has_legal_form',
|
|
transformation: 'lookup',
|
|
transformationDetails: 'Maps to ISO 20275 Entity Legal Form codes',
|
|
typedbEntity: 'custodian-legal-status',
|
|
typedbAttribute: 'legal-form',
|
|
rdfPredicate: 'org:classification',
|
|
required: false,
|
|
example: {
|
|
sourceValue: 'stichting',
|
|
targetValue: 'NL_STI', // ISO 20275 code
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'legal_status.legal_name',
|
|
sourceDescription: 'Full registered name including legal form',
|
|
targetClass: 'CustodianLegalStatus',
|
|
targetSlot: 'has_name',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-legal-status',
|
|
typedbAttribute: 'legal-name',
|
|
rdfPredicate: 'schema:legalName',
|
|
required: false,
|
|
example: {
|
|
sourceValue: 'Stichting Rijksmuseum',
|
|
targetValue: 'Stichting Rijksmuseum',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'legal_status.kvk_number',
|
|
sourceDescription: 'Dutch Chamber of Commerce number',
|
|
targetClass: 'CustodianLegalStatus',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-legal-status',
|
|
typedbAttribute: 'kvk-number',
|
|
rdfPredicate: 'hc:kvkNumber',
|
|
required: false,
|
|
validation: {
|
|
type: 'string',
|
|
pattern: '^[0-9]{8}$',
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
legal_status:
|
|
legal_form: stichting
|
|
legal_name: Stichting Rijksmuseum
|
|
kvk_number: "10205502"
|
|
registered_address: Amsterdam
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Digital Platforms
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'digital_platforms',
|
|
description: 'Website and digital platform metadata',
|
|
detailedDescription: `
|
|
Information about the institution's digital presence including
|
|
primary website, collection management systems, discovery portals, and APIs.
|
|
`.trim(),
|
|
linkmlClass: 'DigitalPlatform',
|
|
typedbEntity: 'digital-platform',
|
|
provenance: {
|
|
sourceType: 'web_discovery',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'digital_platforms[].platform_name',
|
|
sourceDescription: 'Name of the platform',
|
|
targetClass: 'DigitalPlatform',
|
|
targetSlot: 'has_name',
|
|
transformation: 'direct',
|
|
typedbEntity: 'digital-platform',
|
|
typedbAttribute: 'platform-name',
|
|
rdfPredicate: 'schema:name',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'digital_platforms[].platform_url',
|
|
sourceDescription: 'URL of the platform',
|
|
targetClass: 'DigitalPlatform',
|
|
targetSlot: 'has_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'digital-platform',
|
|
typedbAttribute: 'platform-url',
|
|
rdfPredicate: 'schema:url',
|
|
required: true,
|
|
validation: { type: 'uri' },
|
|
},
|
|
{
|
|
sourcePath: 'digital_platforms[].platform_type',
|
|
sourceDescription: 'Type of platform',
|
|
targetClass: 'DigitalPlatform',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'digital-platform',
|
|
typedbAttribute: 'platform-type',
|
|
rdfPredicate: 'hc:platformType',
|
|
required: false,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['WEBSITE', 'COLLECTION_PORTAL', 'DISCOVERY_PLATFORM', 'API', 'SOCIAL_MEDIA'],
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
digital_platforms:
|
|
- platform_name: Rijksmuseum Website
|
|
platform_url: https://www.rijksmuseum.nl/
|
|
platform_type: WEBSITE
|
|
- platform_name: Rijksstudio
|
|
platform_url: https://www.rijksmuseum.nl/en/rijksstudio
|
|
platform_type: COLLECTION_PORTAL
|
|
- platform_name: Rijksmuseum API
|
|
platform_url: https://data.rijksmuseum.nl/
|
|
platform_type: API
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 1: ARCHIVE TYPES (97 classes)
|
|
// ===========================================================================
|
|
// These classes represent the specialized classification of archive institutions
|
|
// across different domains, governance structures, and collection focuses.
|
|
// ===========================================================================
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ARCHIVE TYPE - ACADEMIC
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'archive_type_academic',
|
|
description: 'Academic and research archive types - universities, institutes, learned societies',
|
|
detailedDescription: `
|
|
Academic archives serve educational and research institutions. They preserve:
|
|
- University records and institutional history
|
|
- Research data and scholarly outputs
|
|
- Student and faculty records
|
|
- Scientific collections and specimen documentation
|
|
|
|
Classes: AcademicArchive, UniversityArchive, ScientificArchive, InstitutionalArchive
|
|
`.trim(),
|
|
linkmlClass: 'AcademicArchive',
|
|
typedbEntity: 'academic-archive',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'AcademicArchive',
|
|
'UniversityArchive',
|
|
'ScientificArchive',
|
|
'InstitutionalArchive',
|
|
'InstitutionalRepository',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Archive classification type for academic institutions',
|
|
targetClass: 'AcademicArchive',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'academic-archive',
|
|
typedbAttribute: 'archive-classification',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
status: 'mapped',
|
|
notes: 'Subclass of Archive with specialization for academic/research contexts',
|
|
},
|
|
{
|
|
sourcePath: 'institution_type',
|
|
sourceDescription: 'Parent educational institution type',
|
|
targetClass: 'AcademicArchive',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'academic-archive',
|
|
typedbAttribute: 'parent-institution-type',
|
|
rdfPredicate: 'hc:parentInstitutionType',
|
|
required: false,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['UNIVERSITY', 'RESEARCH_INSTITUTE', 'ACADEMY', 'LEARNED_SOCIETY'],
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# AcademicArchive instance
|
|
institution_type: ARCHIVE
|
|
archive_classification: ACADEMIC
|
|
parent_institution: University of Amsterdam
|
|
specializations:
|
|
- scientific_records
|
|
- research_data
|
|
- institutional_history
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ARCHIVE TYPE - AUDIOVISUAL
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'archive_type_audiovisual',
|
|
description: 'Audiovisual archive types - film, sound, television, radio',
|
|
detailedDescription: `
|
|
Audiovisual archives preserve time-based media including:
|
|
- Film and cinema collections (Cinematheque, FilmArchive)
|
|
- Sound recordings (SoundArchive, MusicArchive, AnimalSoundArchive)
|
|
- Television and radio broadcasts (TelevisionArchive, RadioArchive)
|
|
- Media-specific preservation requirements
|
|
|
|
Classes: AudiovisualArchive, FilmArchive, Cinematheque, SoundArchive,
|
|
MusicArchive, RadioArchive, TelevisionArchive, AnimalSoundArchive,
|
|
MediaArchive, AdvertisingRadioArchive
|
|
`.trim(),
|
|
linkmlClass: 'AudiovisualArchive',
|
|
typedbEntity: 'audiovisual-archive',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'AudiovisualArchive',
|
|
'FilmArchive',
|
|
'Cinematheque',
|
|
'SoundArchive',
|
|
'MusicArchive',
|
|
'RadioArchive',
|
|
'TelevisionArchive',
|
|
'AnimalSoundArchive',
|
|
'MediaArchive',
|
|
'AdvertisingRadioArchive',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Audiovisual archive classification',
|
|
targetClass: 'AudiovisualArchive',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'audiovisual-archive',
|
|
typedbAttribute: 'archive-classification',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'media_types',
|
|
sourceDescription: 'Types of media held in collection',
|
|
targetClass: 'AudiovisualArchive',
|
|
targetSlot: 'has_media_type',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'audiovisual-archive',
|
|
typedbAttribute: 'media-types',
|
|
rdfPredicate: 'hc:mediaTypes',
|
|
required: false,
|
|
validation: {
|
|
type: 'array',
|
|
enumValues: ['FILM', 'VIDEO', 'AUDIO', 'RADIO', 'TELEVISION', 'DIGITAL_MEDIA'],
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# FilmArchive instance (e.g., EYE Filmmuseum)
|
|
institution_type: ARCHIVE
|
|
archive_classification: AUDIOVISUAL
|
|
subtype: FILM_ARCHIVE
|
|
media_types:
|
|
- FILM
|
|
- VIDEO
|
|
- DIGITAL_MEDIA
|
|
preservation_formats:
|
|
- 35mm
|
|
- 16mm
|
|
- digital_preservation
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ARCHIVE TYPE - CHURCH/RELIGIOUS
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'archive_type_church',
|
|
description: 'Religious archive types - church, diocesan, monastery, parish',
|
|
detailedDescription: `
|
|
Religious archives document faith communities and their histories:
|
|
- Church administration and governance (ChurchArchive, DiocesanArchive)
|
|
- Religious orders and communities (MonasteryArchive, ReligiousArchive)
|
|
- Parish records and sacramental registers (ParishArchive)
|
|
- Regional variations (ChurchArchiveSweden, CathedralArchive)
|
|
|
|
Classes: ChurchArchive, DiocesanArchive, MonasteryArchive, ParishArchive,
|
|
ReligiousArchive, CathedralArchive, ChurchArchiveSweden
|
|
`.trim(),
|
|
linkmlClass: 'ChurchArchive',
|
|
typedbEntity: 'church-archive',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'ChurchArchive',
|
|
'DiocesanArchive',
|
|
'MonasteryArchive',
|
|
'ParishArchive',
|
|
'ReligiousArchive',
|
|
'CathedralArchive',
|
|
'ChurchArchiveSweden',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Religious archive classification',
|
|
targetClass: 'ChurchArchive',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'church-archive',
|
|
typedbAttribute: 'archive-classification',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'religious_denomination',
|
|
sourceDescription: 'Religious denomination or tradition',
|
|
targetClass: 'ChurchArchive',
|
|
targetSlot: 'has_type',
|
|
transformation: 'direct',
|
|
typedbEntity: 'church-archive',
|
|
typedbAttribute: 'denomination',
|
|
rdfPredicate: 'hc:denomination',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# DiocesanArchive instance
|
|
institution_type: ARCHIVE
|
|
archive_classification: CHURCH
|
|
subtype: DIOCESAN_ARCHIVE
|
|
religious_denomination: Roman Catholic
|
|
diocese: Diocese of Haarlem-Amsterdam
|
|
record_types:
|
|
- sacramental_registers
|
|
- parish_records
|
|
- administrative_correspondence
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ARCHIVE TYPE - CORPORATE/BUSINESS
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'archive_type_corporate',
|
|
description: 'Corporate and business archive types - company, bank, trade union',
|
|
detailedDescription: `
|
|
Corporate archives document business and economic activities:
|
|
- Company history and governance (CompanyArchives, BankArchive)
|
|
- Labor organizations (TradeUnionArchive)
|
|
- Economic documentation (EconomicArchive)
|
|
- Industry-specific records (FoundationArchive, AssociationArchive)
|
|
|
|
Classes: CompanyArchives, BankArchive, TradeUnionArchive, EconomicArchive,
|
|
FoundationArchive, AssociationArchive, RegionalEconomicArchive
|
|
`.trim(),
|
|
linkmlClass: 'CompanyArchives',
|
|
typedbEntity: 'corporate-archive',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'CompanyArchives',
|
|
'BankArchive',
|
|
'TradeUnionArchive',
|
|
'EconomicArchive',
|
|
'FoundationArchive',
|
|
'AssociationArchive',
|
|
'RegionalEconomicArchive',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Corporate archive classification',
|
|
targetClass: 'CompanyArchives',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'corporate-archive',
|
|
typedbAttribute: 'archive-classification',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'industry_sector',
|
|
sourceDescription: 'Industry sector of the organization',
|
|
targetClass: 'CompanyArchives',
|
|
targetSlot: 'has_domain',
|
|
transformation: 'direct',
|
|
typedbEntity: 'corporate-archive',
|
|
typedbAttribute: 'industry-sector',
|
|
rdfPredicate: 'hc:industrySector',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# BankArchive instance
|
|
institution_type: ARCHIVE
|
|
archive_classification: CORPORATE
|
|
subtype: BANK_ARCHIVE
|
|
company_name: ABN AMRO Historical Archive
|
|
industry_sector: FINANCIAL_SERVICES
|
|
parent_organization: ABN AMRO Bank N.V.
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ARCHIVE TYPE - GOVERNMENT
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'archive_type_government',
|
|
description: 'Government archive types - national, state, parliamentary, court',
|
|
detailedDescription: `
|
|
Government archives document state activities at all levels:
|
|
- National archives (NationalArchives)
|
|
- State/regional government (StateArchives, StateArchivesSection)
|
|
- Parliamentary records (ParliamentaryArchives)
|
|
- Judicial records (CourtRecords, NotarialArchive)
|
|
- Public administration (PublicArchive, GovernmentArchive)
|
|
- Security and intelligence (SecurityArchives)
|
|
|
|
Classes: NationalArchives, StateArchives, GovernmentArchive, PublicArchive,
|
|
ParliamentaryArchives, CourtRecords, NotarialArchive, SecurityArchives,
|
|
CurrentArchive, PublicArchivesInFrance
|
|
`.trim(),
|
|
linkmlClass: 'NationalArchives',
|
|
typedbEntity: 'government-archive',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'NationalArchives',
|
|
'StateArchives',
|
|
'StateArchivesSection',
|
|
'GovernmentArchive',
|
|
'PublicArchive',
|
|
'ParliamentaryArchives',
|
|
'CourtRecords',
|
|
'NotarialArchive',
|
|
'SecurityArchives',
|
|
'CurrentArchive',
|
|
'PublicArchivesInFrance',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Government archive classification',
|
|
targetClass: 'NationalArchives',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'government-archive',
|
|
typedbAttribute: 'archive-classification',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'jurisdiction_level',
|
|
sourceDescription: 'Level of government jurisdiction',
|
|
targetClass: 'NationalArchives',
|
|
targetSlot: 'has_level',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'government-archive',
|
|
typedbAttribute: 'jurisdiction-level',
|
|
rdfPredicate: 'hc:jurisdictionLevel',
|
|
required: false,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['NATIONAL', 'STATE', 'REGIONAL', 'MUNICIPAL', 'LOCAL'],
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# NationalArchives instance (e.g., Nationaal Archief)
|
|
institution_type: ARCHIVE
|
|
archive_classification: GOVERNMENT
|
|
subtype: NATIONAL_ARCHIVES
|
|
jurisdiction_level: NATIONAL
|
|
country: NL
|
|
official_name: Nationaal Archief
|
|
legal_mandate: Archiefwet 1995
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ARCHIVE TYPE - MUNICIPAL/LOCAL
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'archive_type_municipal',
|
|
description: 'Municipal and local government archive types',
|
|
detailedDescription: `
|
|
Municipal archives serve local government and community documentation:
|
|
- City/town archives (MunicipalArchive)
|
|
- Local government records (LocalGovernmentArchive)
|
|
- County/district level (CountyRecordOffice, DistrictArchiveGermany)
|
|
- Local history collections (LocalHistoryArchive)
|
|
|
|
Classes: MunicipalArchive, LocalGovernmentArchive, CountyRecordOffice,
|
|
DistrictArchiveGermany, LocalHistoryArchive, ComarcalArchive,
|
|
DistritalArchive
|
|
`.trim(),
|
|
linkmlClass: 'MunicipalArchive',
|
|
typedbEntity: 'municipal-archive',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'MunicipalArchive',
|
|
'LocalGovernmentArchive',
|
|
'CountyRecordOffice',
|
|
'DistrictArchiveGermany',
|
|
'LocalHistoryArchive',
|
|
'ComarcalArchive',
|
|
'DistritalArchive',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Municipal archive classification',
|
|
targetClass: 'MunicipalArchive',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'municipal-archive',
|
|
typedbAttribute: 'archive-classification',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'municipality',
|
|
sourceDescription: 'Municipality served by the archive',
|
|
targetClass: 'MunicipalArchive',
|
|
targetSlot: 'cover_place',
|
|
transformation: 'direct',
|
|
typedbEntity: 'municipal-archive',
|
|
typedbAttribute: 'municipality',
|
|
rdfPredicate: 'hc:municipality',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# MunicipalArchive instance (e.g., Stadsarchief Amsterdam)
|
|
institution_type: ARCHIVE
|
|
archive_classification: MUNICIPAL
|
|
municipality: Amsterdam
|
|
province: Noord-Holland
|
|
country: NL
|
|
services:
|
|
- reading_room
|
|
- digitization
|
|
- genealogy_support
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ARCHIVE TYPE - NATIONAL
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'archive_type_national',
|
|
description: 'National-level archive institutions and systems',
|
|
detailedDescription: `
|
|
National archives represent the highest level of archival authority:
|
|
- Central national repositories (NationalArchives)
|
|
- Country-specific variations (ArchivesRegionales, Landsarkiv, Fylkesarkiv)
|
|
- International organization archives (ArchiveOfInternationalOrganization)
|
|
|
|
Classes: NationalArchives, ArchivesRegionales, Landsarkiv, Fylkesarkiv,
|
|
ArchiveOfInternationalOrganization, RegionalArchivesInIceland
|
|
`.trim(),
|
|
linkmlClass: 'NationalArchives',
|
|
typedbEntity: 'national-archive',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'NationalArchives',
|
|
'ArchivesRegionales',
|
|
'Landsarkiv',
|
|
'Fylkesarkiv',
|
|
'ArchiveOfInternationalOrganization',
|
|
'RegionalArchivesInIceland',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'National archive classification',
|
|
targetClass: 'NationalArchives',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'national-archive',
|
|
typedbAttribute: 'archive-classification',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'country_code',
|
|
sourceDescription: 'ISO 3166-1 alpha-2 country code',
|
|
targetClass: 'NationalArchives',
|
|
targetSlot: 'cover_country',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'national-archive',
|
|
typedbAttribute: 'country-code',
|
|
rdfPredicate: 'hc:countryCode',
|
|
required: true,
|
|
validation: {
|
|
type: 'string',
|
|
pattern: '^[A-Z]{2}$',
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# National Archives variation (e.g., Swedish Landsarkiv)
|
|
institution_type: ARCHIVE
|
|
archive_classification: NATIONAL
|
|
subtype: LANDSARKIV
|
|
country_code: SE
|
|
region: Gothenburg
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ARCHIVE TYPE - REGIONAL
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'archive_type_regional',
|
|
description: 'Regional and provincial archive types',
|
|
detailedDescription: `
|
|
Regional archives serve geographic areas between national and local:
|
|
- Provincial archives (ProvincialArchive, ProvincialHistoricalArchive)
|
|
- Regional administration (RegionalArchive, RegionalStateArchives)
|
|
- Cantonal systems (CantonalArchive - Switzerland)
|
|
- Country-specific regional (DepartmentalArchives - France, StateRegionalArchiveCzechia)
|
|
|
|
Classes: RegionalArchive, ProvincialArchive, ProvincialHistoricalArchive,
|
|
RegionalStateArchives, CantonalArchive, DepartmentalArchives,
|
|
StateRegionalArchiveCzechia, StateDistrictArchive
|
|
`.trim(),
|
|
linkmlClass: 'RegionalArchive',
|
|
typedbEntity: 'regional-archive',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'RegionalArchive',
|
|
'ProvincialArchive',
|
|
'ProvincialHistoricalArchive',
|
|
'RegionalStateArchives',
|
|
'CantonalArchive',
|
|
'DepartmentalArchives',
|
|
'StateRegionalArchiveCzechia',
|
|
'StateDistrictArchive',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Regional archive classification',
|
|
targetClass: 'RegionalArchive',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'regional-archive',
|
|
typedbAttribute: 'archive-classification',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'region',
|
|
sourceDescription: 'Geographic region served',
|
|
targetClass: 'RegionalArchive',
|
|
targetSlot: 'region',
|
|
transformation: 'direct',
|
|
typedbEntity: 'regional-archive',
|
|
typedbAttribute: 'region',
|
|
rdfPredicate: 'hc:region',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# ProvincialArchive instance (e.g., Brabants Historisch Informatie Centrum)
|
|
institution_type: ARCHIVE
|
|
archive_classification: REGIONAL
|
|
subtype: PROVINCIAL_ARCHIVE
|
|
province: Noord-Brabant
|
|
country: NL
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ARCHIVE TYPE - SPECIALIZED
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'archive_type_specialized',
|
|
description: 'Specialized archive types - thematic collections and unique formats',
|
|
detailedDescription: `
|
|
Specialized archives focus on specific formats, subjects, or communities:
|
|
- Format-specific (PhotoArchive, LiteraryArchive, IconographicArchives)
|
|
- Subject-focused (PoliticalArchive, MilitaryArchive, PerformingArtsArchive)
|
|
- Community-focused (WomensArchives, LGBTArchive, CommunityArchive)
|
|
- Institutional (HospitalArchive, SchoolArchive)
|
|
|
|
Classes: PhotoArchive, LiteraryArchive, PoliticalArchive, MilitaryArchive,
|
|
PerformingArtsArchive, WomensArchives, LGBTArchive, CommunityArchive,
|
|
HospitalArchive, SchoolArchive, IconographicArchives, ArtArchive,
|
|
ArchitecturalArchive, NewspaperClippingsArchive
|
|
`.trim(),
|
|
linkmlClass: 'SpecializedArchive',
|
|
typedbEntity: 'specialized-archive',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'SpecializedArchive',
|
|
'PhotoArchive',
|
|
'LiteraryArchive',
|
|
'PoliticalArchive',
|
|
'MilitaryArchive',
|
|
'PerformingArtsArchive',
|
|
'WomensArchives',
|
|
'LGBTArchive',
|
|
'CommunityArchive',
|
|
'HospitalArchive',
|
|
'SchoolArchive',
|
|
'IconographicArchives',
|
|
'ArtArchive',
|
|
'ArchitecturalArchive',
|
|
'NewspaperClippingsArchive',
|
|
'PressArchive',
|
|
'NobilityArchive',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Specialized archive classification',
|
|
targetClass: 'SpecializedArchive',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'specialized-archive',
|
|
typedbAttribute: 'archive-classification',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'specialization',
|
|
sourceDescription: 'Area of specialization',
|
|
targetClass: 'SpecializedArchive',
|
|
targetSlot: 'has_domain',
|
|
transformation: 'direct',
|
|
typedbEntity: 'specialized-archive',
|
|
typedbAttribute: 'specialization',
|
|
rdfPredicate: 'hc:specialization',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# PhotoArchive instance (e.g., Nederlands Fotomuseum archive)
|
|
institution_type: ARCHIVE
|
|
archive_classification: SPECIALIZED
|
|
subtype: PHOTO_ARCHIVE
|
|
specialization: photography
|
|
collection_strengths:
|
|
- Dutch photography 1840-present
|
|
- Documentary photography
|
|
- Press photography
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ARCHIVE TYPE - THEMATIC
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'archive_type_thematic',
|
|
description: 'Thematic archive types - digital, web, dark archives, and special collections',
|
|
detailedDescription: `
|
|
Thematic archives organized around specific themes or functions:
|
|
- Digital preservation (DigitalArchive, DarkArchive, WebArchive)
|
|
- Collection types (CollectingArchives, DepositArchive)
|
|
- Personal papers (Nachlass, HouseArchive, PersonalCollectionType)
|
|
- Online archives (OnlineNewsArchive, MailingListArchive)
|
|
|
|
Classes: DigitalArchive, DarkArchive, WebArchive, CollectingArchives,
|
|
DepositArchive, Nachlass, HouseArchive, OnlineNewsArchive,
|
|
MailingListArchive, ClimateArchive, FreeArchive, PostcustodialArchive
|
|
`.trim(),
|
|
linkmlClass: 'DigitalArchive',
|
|
typedbEntity: 'thematic-archive',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'DigitalArchive',
|
|
'DarkArchive',
|
|
'WebArchive',
|
|
'CollectingArchives',
|
|
'DepositArchive',
|
|
'Nachlass',
|
|
'HouseArchive',
|
|
'OnlineNewsArchive',
|
|
'MailingListArchive',
|
|
'ClimateArchive',
|
|
'FreeArchive',
|
|
'PostcustodialArchive',
|
|
'MuseumArchive',
|
|
'ArchivalRepository',
|
|
'ArchivalLibrary',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Thematic archive classification',
|
|
targetClass: 'DigitalArchive',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'thematic-archive',
|
|
typedbAttribute: 'archive-classification',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'digital_preservation_level',
|
|
sourceDescription: 'Level of digital preservation commitment',
|
|
targetClass: 'DigitalArchive',
|
|
targetSlot: 'has_level',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'thematic-archive',
|
|
typedbAttribute: 'preservation-level',
|
|
rdfPredicate: 'hc:preservationLevel',
|
|
required: false,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['BIT_LEVEL', 'LOGICAL', 'SEMANTIC', 'FULL'],
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# DarkArchive instance
|
|
institution_type: ARCHIVE
|
|
archive_classification: THEMATIC
|
|
subtype: DARK_ARCHIVE
|
|
digital_preservation_level: FULL
|
|
access_policy: RESTRICTED
|
|
storage_location: offline_vault
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 1: ORGANIZATIONAL STRUCTURE (30+ classes)
|
|
// ===========================================================================
|
|
// These classes model the internal structure and relationships of heritage
|
|
// organizations, including departments, divisions, and hierarchies.
|
|
// ===========================================================================
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ORGANIZATIONAL STRUCTURE - HIERARCHY
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'org_structure_hierarchy',
|
|
description: 'Organizational hierarchy - parent organizations, subsidiaries, branches',
|
|
detailedDescription: `
|
|
Models the hierarchical relationships between organizations:
|
|
- Parent-child relationships (ParentOrganizationUnit, SubsidiaryOrganization)
|
|
- Branch locations (OrganizationBranch, BranchOffice)
|
|
- Encompassing bodies (EncompassingBody)
|
|
- Networks and associations (ArchiveNetwork, ArchiveAssociation, ConnectionNetwork)
|
|
|
|
Classes: ParentOrganizationUnit, SubsidiaryOrganization, OrganizationBranch,
|
|
BranchOffice, EncompassingBody, ArchiveNetwork, ArchiveAssociation,
|
|
ConnectionNetwork, Organization, Institution
|
|
`.trim(),
|
|
linkmlClass: 'Organization',
|
|
typedbEntity: 'organization',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'Organization',
|
|
'Institution',
|
|
'ParentOrganizationUnit',
|
|
'SubsidiaryOrganization',
|
|
'OrganizationBranch',
|
|
'BranchOffice',
|
|
'EncompassingBody',
|
|
'ArchiveNetwork',
|
|
'ArchiveAssociation',
|
|
'ConnectionNetwork',
|
|
'CulturalInstitution',
|
|
'MemoryInstitution',
|
|
'GLAM',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: 'parent_organization',
|
|
sourceDescription: 'Parent organization identifier or name',
|
|
targetClass: 'Organization',
|
|
targetSlot: 'part_of',
|
|
transformation: 'nested',
|
|
typedbEntity: 'organization',
|
|
typedbAttribute: 'parent-organization',
|
|
rdfPredicate: 'org:subOrganizationOf',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'subsidiary_organizations',
|
|
sourceDescription: 'List of subsidiary organizations',
|
|
targetClass: 'Organization',
|
|
targetSlot: 'has_component',
|
|
transformation: 'array_map',
|
|
typedbEntity: 'organization',
|
|
typedbAttribute: 'subsidiaries',
|
|
rdfPredicate: 'org:hasSubOrganization',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'encompassing_body',
|
|
sourceDescription: 'Larger organizational body this belongs to',
|
|
targetClass: 'Organization',
|
|
targetSlot: 'encompassed_by',
|
|
transformation: 'nested',
|
|
typedbEntity: 'organization',
|
|
typedbAttribute: 'encompassing-body',
|
|
rdfPredicate: 'hc:encompassingBody',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# Organization with hierarchy
|
|
organization_name: Rijksmuseum Research Library
|
|
parent_organization:
|
|
name: Rijksmuseum
|
|
ghcid: NL-NH-AMS-M-RM
|
|
encompassing_body:
|
|
name: Ministry of Education, Culture and Science
|
|
type: GOVERNMENT_MINISTRY
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ORGANIZATIONAL STRUCTURE - ADMINISTRATIVE
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'org_structure_administrative',
|
|
description: 'Administrative units - departments, divisions, offices',
|
|
detailedDescription: `
|
|
Models the internal administrative structure of organizations:
|
|
- Functional divisions (Department, Division)
|
|
- Administrative units (AdministrativeOffice)
|
|
- Support functions (ConservationLab, ReadingRoom, GiftShop)
|
|
- Specialized facilities (ExhibitionSpace, Storage, Warehouse)
|
|
|
|
Classes: Department, Division, AdministrativeOffice, ConservationLab,
|
|
ReadingRoom, ReadingRoomAnnex, GiftShop, ExhibitionSpace,
|
|
Storage, Warehouse, PrintRoom, ServiceArea
|
|
`.trim(),
|
|
linkmlClass: 'Department',
|
|
typedbEntity: 'department',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'Department',
|
|
'Division',
|
|
'AdministrativeOffice',
|
|
'ConservationLab',
|
|
'ReadingRoom',
|
|
'ReadingRoomAnnex',
|
|
'GiftShop',
|
|
'ExhibitionSpace',
|
|
'Storage',
|
|
'Warehouse',
|
|
'PrintRoom',
|
|
'ServiceArea',
|
|
'ClosedSpace',
|
|
'PublicSpace',
|
|
'OutdoorSite',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: 'departments',
|
|
sourceDescription: 'List of organizational departments',
|
|
targetClass: 'Department',
|
|
targetSlot: 'has_name',
|
|
transformation: 'array_map',
|
|
typedbEntity: 'department',
|
|
typedbAttribute: 'department-name',
|
|
rdfPredicate: 'org:hasUnit',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'facilities',
|
|
sourceDescription: 'Physical facilities and spaces',
|
|
targetClass: 'Department',
|
|
targetSlot: 'has_facility',
|
|
transformation: 'array_map',
|
|
typedbEntity: 'department',
|
|
typedbAttribute: 'facilities',
|
|
rdfPredicate: 'hc:hasFacility',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# Department structure
|
|
departments:
|
|
- name: Collection Management
|
|
type: DEPARTMENT
|
|
staff_count: 25
|
|
- name: Conservation Laboratory
|
|
type: CONSERVATION_LAB
|
|
specializations:
|
|
- paper_conservation
|
|
- painting_restoration
|
|
facilities:
|
|
- type: READING_ROOM
|
|
capacity: 50
|
|
- type: EXHIBITION_SPACE
|
|
area_sqm: 2500
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// ORGANIZATIONAL STRUCTURE - SUBDIVISION
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'org_structure_subdivision',
|
|
description: 'Organizational subdivisions and change events',
|
|
detailedDescription: `
|
|
Models organizational subdivisions and structural changes:
|
|
- Subdivision types (OrganizationalSubdivision)
|
|
- Organizational structure (OrganizationalStructure)
|
|
- Change events (OrganizationalChangeEvent)
|
|
- Contributing and allocating agencies (ContributingAgency, AllocationAgency)
|
|
|
|
Classes: OrganizationalSubdivision, OrganizationalStructure,
|
|
OrganizationalChangeEvent, ContributingAgency, AllocationAgency,
|
|
Jurisdiction, StandardsOrganization, RegistrationAuthority
|
|
`.trim(),
|
|
linkmlClass: 'OrganizationalSubdivision',
|
|
typedbEntity: 'organizational-subdivision',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'OrganizationalSubdivision',
|
|
'OrganizationalStructure',
|
|
'OrganizationalChangeEvent',
|
|
'ContributingAgency',
|
|
'AllocationAgency',
|
|
'Jurisdiction',
|
|
'StandardsOrganization',
|
|
'RegistrationAuthority',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: 'subdivisions',
|
|
sourceDescription: 'Organizational subdivisions',
|
|
targetClass: 'OrganizationalSubdivision',
|
|
targetSlot: 'has_name',
|
|
transformation: 'array_map',
|
|
typedbEntity: 'organizational-subdivision',
|
|
typedbAttribute: 'subdivision-name',
|
|
rdfPredicate: 'org:hasUnit',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'change_history',
|
|
sourceDescription: 'History of organizational changes',
|
|
targetClass: 'OrganizationalChangeEvent',
|
|
targetSlot: 'has_activity',
|
|
transformation: 'array_map',
|
|
typedbEntity: 'organizational-change-event',
|
|
typedbAttribute: 'change-events',
|
|
rdfPredicate: 'hc:hasChangeEvent',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# Organizational change history
|
|
change_history:
|
|
- event_type: MERGER
|
|
date: "2001-01-01"
|
|
description: "Merger of Gemeentearchief and Rijksarchief"
|
|
predecessor_organizations:
|
|
- name: Gemeentearchief Haarlem
|
|
- name: Rijksarchief Noord-Holland
|
|
resulting_organization:
|
|
name: Noord-Hollands Archief
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 1: HERITAGE & CULTURAL SITES (15+ classes)
|
|
// ===========================================================================
|
|
// Classes representing world heritage, intangible heritage, and special
|
|
// cultural designations.
|
|
// ===========================================================================
|
|
|
|
// -------------------------------------------------------------------------
|
|
// HERITAGE - WORLD SITES
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'heritage_world_sites',
|
|
description: 'UNESCO World Heritage Sites and tentative list entries',
|
|
detailedDescription: `
|
|
World Heritage Site designations and related classifications:
|
|
- Inscribed sites (WorldHeritageSite)
|
|
- Tentative list entries (TentativeWorldHeritageSite)
|
|
- Historic buildings and monuments (HistoricBuilding)
|
|
- Cultural institutions (CulturalInstitution)
|
|
|
|
Classes: WorldHeritageSite, TentativeWorldHeritageSite, HistoricBuilding,
|
|
CulturalInstitution
|
|
`.trim(),
|
|
linkmlClass: 'WorldHeritageSite',
|
|
typedbEntity: 'world-heritage-site',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'WorldHeritageSite',
|
|
'TentativeWorldHeritageSite',
|
|
'HistoricBuilding',
|
|
'CulturalInstitution',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: 'unesco_id',
|
|
sourceDescription: 'UNESCO World Heritage Site ID',
|
|
targetClass: 'WorldHeritageSite',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'direct',
|
|
typedbEntity: 'world-heritage-site',
|
|
typedbAttribute: 'unesco-id',
|
|
rdfPredicate: 'hc:unescoId',
|
|
required: true,
|
|
validation: {
|
|
type: 'number',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'inscription_year',
|
|
sourceDescription: 'Year of UNESCO inscription',
|
|
targetClass: 'WorldHeritageSite',
|
|
targetSlot: 'temporal_extent',
|
|
transformation: 'direct',
|
|
typedbEntity: 'world-heritage-site',
|
|
typedbAttribute: 'inscription-year',
|
|
rdfPredicate: 'hc:inscriptionYear',
|
|
required: false,
|
|
validation: {
|
|
type: 'number',
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'heritage_criteria',
|
|
sourceDescription: 'UNESCO selection criteria (i-x)',
|
|
targetClass: 'WorldHeritageSite',
|
|
targetSlot: 'has_membership_criteria',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'world-heritage-site',
|
|
typedbAttribute: 'criteria',
|
|
rdfPredicate: 'hc:heritageCriteria',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# WorldHeritageSite instance
|
|
heritage_designation: UNESCO_WORLD_HERITAGE
|
|
unesco_id: 818
|
|
name: Rietveld Schröderhuis
|
|
inscription_year: 2000
|
|
heritage_criteria:
|
|
- i
|
|
- ii
|
|
country: NL
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// HERITAGE - INTANGIBLE
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'heritage_intangible',
|
|
description: 'Intangible cultural heritage - traditions, performances, practices',
|
|
detailedDescription: `
|
|
UNESCO Intangible Cultural Heritage and related practices:
|
|
- Intangible heritage forms (IntangibleHeritageForm)
|
|
- Performances and events (IntangibleHeritagePerformance, IntangibleHeritageEvent)
|
|
- Groups preserving traditions (IntangibleHeritageGroupType)
|
|
|
|
Classes: IntangibleHeritageForm, IntangibleHeritagePerformance,
|
|
IntangibleHeritageEvent, IntangibleHeritageGroupType
|
|
`.trim(),
|
|
linkmlClass: 'IntangibleHeritageForm',
|
|
typedbEntity: 'intangible-heritage',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'IntangibleHeritageForm',
|
|
'IntangibleHeritagePerformance',
|
|
'IntangibleHeritageEvent',
|
|
'IntangibleHeritageGroupType',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: 'ich_domain',
|
|
sourceDescription: 'UNESCO ICH domain category',
|
|
targetClass: 'IntangibleHeritageForm',
|
|
targetSlot: 'has_domain',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'intangible-heritage',
|
|
typedbAttribute: 'domain',
|
|
rdfPredicate: 'hc:ichDomain',
|
|
required: false,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: [
|
|
'ORAL_TRADITIONS',
|
|
'PERFORMING_ARTS',
|
|
'SOCIAL_PRACTICES',
|
|
'KNOWLEDGE_PRACTICES',
|
|
'TRADITIONAL_CRAFTSMANSHIP',
|
|
],
|
|
},
|
|
},
|
|
{
|
|
sourcePath: 'inscription_list',
|
|
sourceDescription: 'UNESCO ICH list type',
|
|
targetClass: 'IntangibleHeritageForm',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'intangible-heritage',
|
|
typedbAttribute: 'list-type',
|
|
rdfPredicate: 'hc:ichListType',
|
|
required: false,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['REPRESENTATIVE', 'URGENT_SAFEGUARDING', 'GOOD_PRACTICES'],
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# IntangibleHeritageForm instance
|
|
heritage_type: INTANGIBLE
|
|
ich_domain: TRADITIONAL_CRAFTSMANSHIP
|
|
name: Craft of the miller operating windmills and watermills
|
|
inscription_list: REPRESENTATIVE
|
|
inscription_year: 2017
|
|
countries:
|
|
- NL
|
|
- BE
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// HERITAGE - NATIONAL TREASURES
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'heritage_national_treasures',
|
|
description: 'National treasures and designated heritage items',
|
|
detailedDescription: `
|
|
National-level heritage designations:
|
|
- National treasures (NationalTreasure)
|
|
- Country-specific designations (NationalTreasureOfFrance)
|
|
- Documentation centers (DocumentationCentre, RegionalHistoricCenter)
|
|
- Research facilities (ResearchCenter, ScientificTechnicAndIndustrialCultureCenter)
|
|
|
|
Classes: NationalTreasure, NationalTreasureOfFrance, DocumentationCentre,
|
|
RegionalHistoricCenter, ResearchCenter,
|
|
ScientificTechnicAndIndustrialCultureCenter
|
|
`.trim(),
|
|
linkmlClass: 'NationalTreasure',
|
|
typedbEntity: 'national-treasure',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'NationalTreasure',
|
|
'NationalTreasureOfFrance',
|
|
'DocumentationCentre',
|
|
'RegionalHistoricCenter',
|
|
'ResearchCenter',
|
|
'ScientificTechnicAndIndustrialCultureCenter',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: 'designation_type',
|
|
sourceDescription: 'Type of national designation',
|
|
targetClass: 'NationalTreasure',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'national-treasure',
|
|
typedbAttribute: 'designation-type',
|
|
rdfPredicate: 'hc:designationType',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'designation_date',
|
|
sourceDescription: 'Date of official designation',
|
|
targetClass: 'NationalTreasure',
|
|
targetSlot: 'temporal_extent',
|
|
transformation: 'temporal',
|
|
typedbEntity: 'national-treasure',
|
|
typedbAttribute: 'designation-date',
|
|
rdfPredicate: 'hc:designationDate',
|
|
required: false,
|
|
validation: {
|
|
type: 'date',
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# NationalTreasure instance
|
|
designation_type: RIJKSMONUMENT
|
|
designation_number: 12345
|
|
name: Amsterdam Canal Ring
|
|
designation_date: "1999-12-01"
|
|
country: NL
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 1: CLASSIFICATION TYPES (32 classes)
|
|
// ===========================================================================
|
|
// Type classes that provide classification vocabularies for custodians,
|
|
// collections, and related entities.
|
|
// ===========================================================================
|
|
|
|
// -------------------------------------------------------------------------
|
|
// TYPE CLASSES - GLAM INSTITUTIONS
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'type_classes_glam',
|
|
description: 'Classification types for GLAM institutions - museums, libraries, archives, galleries',
|
|
detailedDescription: `
|
|
Type classes for classifying heritage custodian institutions:
|
|
- Core GLAM types (MuseumType, LibraryType, ArchiveOrganizationType, GalleryType)
|
|
- Bio/nature custodians (BioCustodianType)
|
|
- Religious/sacred sites (HolySacredSiteType)
|
|
- Education providers (EducationProviderType)
|
|
|
|
Classes: MuseumType, LibraryType, ArchiveOrganizationType, GalleryType,
|
|
BioCustodianType, HolySacredSiteType, EducationProviderType,
|
|
CustodianType, PersonalCollectionType
|
|
`.trim(),
|
|
linkmlClass: 'CustodianType',
|
|
typedbEntity: 'custodian-type',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'CustodianType',
|
|
'MuseumType',
|
|
'LibraryType',
|
|
'ArchiveOrganizationType',
|
|
'GalleryType',
|
|
'BioCustodianType',
|
|
'HolySacredSiteType',
|
|
'EducationProviderType',
|
|
'PersonalCollectionType',
|
|
'FeatureCustodianType',
|
|
'TasteScentHeritageType',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: 'institution_type',
|
|
sourceDescription: 'Primary institution type code',
|
|
targetClass: 'CustodianType',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'custodian-type',
|
|
typedbAttribute: 'type-code',
|
|
rdfPredicate: 'hc:custodianTypeCode',
|
|
required: true,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
|
},
|
|
notes: 'GLAMORCUBESFIXPHDNT taxonomy single-letter codes',
|
|
},
|
|
{
|
|
sourcePath: 'institution_subtype',
|
|
sourceDescription: 'Detailed institution subtype',
|
|
targetClass: 'CustodianType',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'custodian-type',
|
|
typedbAttribute: 'subtype',
|
|
rdfPredicate: 'hc:custodianSubtype',
|
|
required: false,
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# Custodian type classification
|
|
institution_type: M # Museum
|
|
institution_subtype: ART_MUSEUM
|
|
museum_type_details:
|
|
collection_focus: FINE_ARTS
|
|
governance: PUBLIC
|
|
size_category: LARGE
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// TYPE CLASSES - DIGITAL PLATFORMS
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'type_classes_digital',
|
|
description: 'Classification types for digital platforms and web presence',
|
|
detailedDescription: `
|
|
Type classes for digital infrastructure:
|
|
- Digital platform types (DigitalPlatformType)
|
|
- Web portal classifications (WebPortalType, WebPortalTypes)
|
|
- Social media platforms (SocialMediaPlatformType, SocialMediaPlatformTypes)
|
|
- Social media content (SocialMediaPostType, SocialMediaPostTypes)
|
|
- Video content types (VideoAnnotationTypes)
|
|
|
|
Classes: DigitalPlatformType, WebPortalType, WebPortalTypes,
|
|
SocialMediaPlatformType, SocialMediaPlatformTypes,
|
|
SocialMediaPostType, SocialMediaPostTypes, VideoAnnotationTypes
|
|
`.trim(),
|
|
linkmlClass: 'DigitalPlatformType',
|
|
typedbEntity: 'digital-platform-type',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'DigitalPlatformType',
|
|
'WebPortalType',
|
|
'WebPortalTypes',
|
|
'SocialMediaPlatformType',
|
|
'SocialMediaPlatformTypes',
|
|
'SocialMediaPostType',
|
|
'SocialMediaPostTypes',
|
|
'VideoAnnotationTypes',
|
|
'DataServiceEndpointType',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: 'platform_type',
|
|
sourceDescription: 'Digital platform classification',
|
|
targetClass: 'DigitalPlatformType',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'digital-platform-type',
|
|
typedbAttribute: 'platform-category',
|
|
rdfPredicate: 'hc:platformCategory',
|
|
required: true,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: [
|
|
'WEBSITE',
|
|
'COLLECTION_PORTAL',
|
|
'DISCOVERY_PLATFORM',
|
|
'API',
|
|
'SOCIAL_MEDIA',
|
|
'CMS',
|
|
'DAM',
|
|
],
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# Digital platform type classification
|
|
digital_platforms:
|
|
- platform_type: COLLECTION_PORTAL
|
|
name: Online Collection
|
|
- platform_type: API
|
|
name: Data API
|
|
protocol: REST
|
|
- platform_type: SOCIAL_MEDIA
|
|
social_platform_type: INSTAGRAM
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// TYPE CLASSES - ORGANIZATIONAL
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'type_classes_organizational',
|
|
description: 'Classification types for organizational structures',
|
|
detailedDescription: `
|
|
Type classes for organizational classifications:
|
|
- Commercial organizations (CommercialOrganizationType)
|
|
- Non-profit organizations (NonProfitType)
|
|
- Research organizations (ResearchOrganizationType)
|
|
- Official institutions (OfficialInstitutionType)
|
|
- Heritage societies (HeritageSocietyType)
|
|
- Mixed/unspecified (MixedCustodianType, UnspecifiedType)
|
|
|
|
Classes: CommercialOrganizationType, NonProfitType, ResearchOrganizationType,
|
|
OfficialInstitutionType, HeritageSocietyType, MixedCustodianType,
|
|
UnspecifiedType, LegalEntityType
|
|
`.trim(),
|
|
linkmlClass: 'LegalEntityType',
|
|
typedbEntity: 'legal-entity-type',
|
|
provenance: {
|
|
sourceType: 'schema_documentation',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
generatedClasses: [
|
|
'CommercialOrganizationType',
|
|
'NonProfitType',
|
|
'ResearchOrganizationType',
|
|
'OfficialInstitutionType',
|
|
'HeritageSocietyType',
|
|
'MixedCustodianType',
|
|
'UnspecifiedType',
|
|
'LegalEntityType',
|
|
'LegalForm',
|
|
'LegalName',
|
|
],
|
|
fields: [
|
|
{
|
|
sourcePath: 'legal_form',
|
|
sourceDescription: 'Legal form/entity type',
|
|
targetClass: 'LegalEntityType',
|
|
targetSlot: 'has_legal_form',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'legal-entity-type',
|
|
typedbAttribute: 'legal-form-code',
|
|
rdfPredicate: 'hc:legalFormCode',
|
|
required: false,
|
|
notes: 'ISO 20275 legal form codes or national equivalents',
|
|
},
|
|
{
|
|
sourcePath: 'governance_type',
|
|
sourceDescription: 'Governance/ownership type',
|
|
targetClass: 'LegalEntityType',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'legal-entity-type',
|
|
typedbAttribute: 'governance-type',
|
|
rdfPredicate: 'hc:governanceType',
|
|
required: false,
|
|
validation: {
|
|
type: 'enum',
|
|
enumValues: ['PUBLIC', 'PRIVATE', 'NON_PROFIT', 'MIXED', 'GOVERNMENT'],
|
|
},
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# Legal entity type classification
|
|
legal_status:
|
|
legal_form: STICHTING # Dutch foundation
|
|
legal_form_code: "8888" # ISO 20275
|
|
governance_type: NON_PROFIT
|
|
registration_authority: KVK
|
|
registration_number: "12345678"
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Place & Location
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'place_geographic',
|
|
description: 'Geographic and spatial location entities',
|
|
detailedDescription: `
|
|
Geographic location classes representing physical places where heritage custodians
|
|
are located or operate. Includes settlements (cities/towns), countries, regions,
|
|
and geospatial coordinates.
|
|
|
|
These classes support:
|
|
- Settlement identification (GeoNames integration)
|
|
- Country code normalization (ISO 3166-1)
|
|
- Region/subregion hierarchies
|
|
- Geospatial coordinates (lat/lon)
|
|
`.trim(),
|
|
linkmlClass: 'Settlement',
|
|
typedbEntity: 'settlement',
|
|
provenance: {
|
|
sourceType: 'geonames',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'location.settlement',
|
|
sourceDescription: 'Settlement/city name',
|
|
targetClass: 'Settlement',
|
|
targetSlot: 'has_name',
|
|
transformation: 'direct',
|
|
typedbEntity: 'settlement',
|
|
typedbAttribute: 'name',
|
|
rdfPredicate: 'schema:name',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'location.country',
|
|
sourceDescription: 'Country code (ISO 3166-1)',
|
|
targetClass: 'Country',
|
|
targetSlot: 'cover_country',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'country',
|
|
typedbAttribute: 'country-code',
|
|
rdfPredicate: 'schema:addressCountry',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'location.coordinates',
|
|
sourceDescription: 'Geographic coordinates',
|
|
targetClass: 'GeoSpatialPlace',
|
|
targetSlot: 'has_coordinates',
|
|
transformation: 'computed',
|
|
typedbEntity: 'geo-spatial-place',
|
|
typedbAttribute: 'coordinates',
|
|
rdfPredicate: 'geo:hasGeometry',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'Settlement', 'Country', 'Subregion', 'GeoSpatialPlace', 'FeaturePlace',
|
|
],
|
|
exampleYaml: `
|
|
# Geographic location
|
|
location:
|
|
settlement: Amsterdam
|
|
country: NL
|
|
region: Noord-Holland
|
|
coordinates:
|
|
latitude: 52.3676
|
|
longitude: 4.9041
|
|
`.trim(),
|
|
},
|
|
{
|
|
sourceBlock: 'place_custodian_specific',
|
|
description: 'Custodian-specific place classes',
|
|
detailedDescription: `
|
|
Place classes specifically related to heritage custodian operations, including
|
|
auxiliary places, temporary locations, and custodian-specific place designations.
|
|
|
|
Supports modeling:
|
|
- Primary vs auxiliary locations
|
|
- Temporary/seasonal locations
|
|
- Off-site storage locations
|
|
- Branch/satellite locations
|
|
`.trim(),
|
|
linkmlClass: 'CustodianPlace',
|
|
typedbEntity: 'custodian-place',
|
|
provenance: {
|
|
sourceType: 'custodian_yaml',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'places.primary',
|
|
sourceDescription: 'Primary location',
|
|
targetClass: 'CustodianPlace',
|
|
targetSlot: 'has_location',
|
|
transformation: 'direct',
|
|
typedbEntity: 'custodian-place',
|
|
typedbAttribute: 'primary-location',
|
|
rdfPredicate: 'hc:primaryLocation',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'places.auxiliary',
|
|
sourceDescription: 'Auxiliary/secondary locations',
|
|
targetClass: 'AuxiliaryPlace',
|
|
targetSlot: 'has_location',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'auxiliary-place',
|
|
typedbAttribute: 'location',
|
|
rdfPredicate: 'hc:auxiliaryLocation',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'places.temporary',
|
|
sourceDescription: 'Temporary location',
|
|
targetClass: 'TemporaryLocation',
|
|
targetSlot: 'has_location',
|
|
transformation: 'direct',
|
|
typedbEntity: 'temporary-location',
|
|
typedbAttribute: 'location',
|
|
rdfPredicate: 'hc:temporaryLocation',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'CustodianPlace', 'AuxiliaryPlace', 'TemporaryLocation',
|
|
],
|
|
exampleYaml: `
|
|
# Custodian places
|
|
places:
|
|
primary:
|
|
address: Museumstraat 1
|
|
city: Amsterdam
|
|
auxiliary:
|
|
- name: Storage Facility
|
|
address: Industrieweg 100
|
|
temporary:
|
|
name: Pop-up Exhibition Space
|
|
valid_from: 2024-06-01
|
|
valid_to: 2024-09-30
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Collections
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'collection_core',
|
|
description: 'Core collection and holdings classes',
|
|
detailedDescription: `
|
|
Core classes for modeling heritage collections and holdings. Collections represent
|
|
aggregations of objects, documents, or materials managed by a heritage custodian.
|
|
|
|
Supports:
|
|
- Collection naming and description
|
|
- Collection types (archival, library, museum, mixed)
|
|
- Special collections designation
|
|
- Subject/temporal coverage
|
|
`.trim(),
|
|
linkmlClass: 'Collection',
|
|
typedbEntity: 'collection',
|
|
provenance: {
|
|
sourceType: 'custodian_yaml',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'collections[].name',
|
|
sourceDescription: 'Collection name',
|
|
targetClass: 'Collection',
|
|
targetSlot: 'has_name',
|
|
transformation: 'direct',
|
|
typedbEntity: 'collection',
|
|
typedbAttribute: 'name',
|
|
rdfPredicate: 'schema:name',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'collections[].type',
|
|
sourceDescription: 'Collection type',
|
|
targetClass: 'CollectionType',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'collection-type',
|
|
typedbAttribute: 'type',
|
|
rdfPredicate: 'hc:collectionType',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'collections[].description',
|
|
sourceDescription: 'Collection description',
|
|
targetClass: 'Collection',
|
|
targetSlot: 'has_description',
|
|
transformation: 'direct',
|
|
typedbEntity: 'collection',
|
|
typedbAttribute: 'description',
|
|
rdfPredicate: 'schema:description',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'Collection', 'CollectionType', 'SpecialCollection', 'CastCollection', 'PhotographCollection',
|
|
// Custodian aspect classes for collection/archive/administration dimensions
|
|
'CustodianCollection', 'CustodianArchive', 'CustodianAdministration',
|
|
],
|
|
exampleYaml: `
|
|
# Collection definition
|
|
collections:
|
|
- name: Dutch Masters Collection
|
|
type: MUSEUM_ART
|
|
description: 17th century Dutch paintings
|
|
extent: 450 paintings
|
|
subject_areas:
|
|
- Dutch Golden Age
|
|
- Portraiture
|
|
`.trim(),
|
|
},
|
|
{
|
|
sourceBlock: 'collection_management',
|
|
description: 'Collection management system classes',
|
|
detailedDescription: `
|
|
Classes for collection management systems (CMS) used by heritage custodians
|
|
to catalog, track, and manage their collections.
|
|
|
|
Supports documentation of:
|
|
- CMS software used (Adlib, TMS, ArchivesSpace, etc.)
|
|
- System configurations
|
|
- Integration endpoints
|
|
`.trim(),
|
|
linkmlClass: 'CollectionManagementSystem',
|
|
typedbEntity: 'collection-management-system',
|
|
provenance: {
|
|
sourceType: 'custodian_yaml',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'digital_platforms.cms.name',
|
|
sourceDescription: 'CMS name',
|
|
targetClass: 'CollectionManagementSystem',
|
|
targetSlot: 'has_name',
|
|
transformation: 'direct',
|
|
typedbEntity: 'collection-management-system',
|
|
typedbAttribute: 'system-name',
|
|
rdfPredicate: 'schema:name',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'digital_platforms.cms.vendor',
|
|
sourceDescription: 'CMS vendor',
|
|
targetClass: 'CollectionManagementSystem',
|
|
targetSlot: 'has_agent',
|
|
transformation: 'direct',
|
|
typedbEntity: 'collection-management-system',
|
|
typedbAttribute: 'vendor',
|
|
rdfPredicate: 'schema:manufacturer',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'CollectionManagementSystem',
|
|
],
|
|
exampleYaml: `
|
|
# Collection management system
|
|
digital_platforms:
|
|
cms:
|
|
name: Adlib Museum
|
|
vendor: Axiell
|
|
version: "7.8"
|
|
url: https://collection.museum.nl
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Person & Staff
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'person_profile_extended',
|
|
description: 'Extended person profile classes',
|
|
detailedDescription: `
|
|
Extended classes for person/staff profiles beyond basic identity. Includes
|
|
LinkedIn profiles, person connections (professional networks), and web claims.
|
|
|
|
Supports:
|
|
- LinkedIn profile data integration
|
|
- Professional network connections
|
|
- Web-sourced claims about persons
|
|
- Person name variants and aliases
|
|
`.trim(),
|
|
linkmlClass: 'PersonObservation',
|
|
typedbEntity: 'person-observation',
|
|
provenance: {
|
|
sourceType: 'linkedin',
|
|
dataTier: 'TIER_3_CROWD_SOURCED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'person.linkedin_url',
|
|
sourceDescription: 'LinkedIn profile URL',
|
|
targetClass: 'LinkedInProfile',
|
|
targetSlot: 'has_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'linkedin-profile',
|
|
typedbAttribute: 'profile-url',
|
|
rdfPredicate: 'schema:sameAs',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'person.connections',
|
|
sourceDescription: 'Professional connections',
|
|
targetClass: 'PersonConnection',
|
|
targetSlot: 'has_contact_point',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'person-connection',
|
|
typedbAttribute: 'connected-person',
|
|
rdfPredicate: 'hc:hasConnection',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'person.names',
|
|
sourceDescription: 'Person name variants',
|
|
targetClass: 'PersonName',
|
|
targetSlot: 'has_alias',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'person-name',
|
|
typedbAttribute: 'name-value',
|
|
rdfPredicate: 'schema:alternateName',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'LinkedInProfile', 'PersonConnection', 'PersonName', 'PersonOrOrganization', 'PersonWebClaim',
|
|
],
|
|
exampleYaml: `
|
|
# Extended person profile
|
|
person:
|
|
name: Jan de Vries
|
|
linkedin_url: https://linkedin.com/in/jandevries
|
|
names:
|
|
- value: Jan de Vries
|
|
type: legal_name
|
|
- value: J. de Vries
|
|
type: abbreviated
|
|
connections:
|
|
- name: Maria Bakker
|
|
organization: Rijksmuseum
|
|
`.trim(),
|
|
},
|
|
{
|
|
sourceBlock: 'person_work_education',
|
|
description: 'Work experience and education classes',
|
|
detailedDescription: `
|
|
Classes for modeling work experience history and educational credentials
|
|
of persons associated with heritage custodians.
|
|
|
|
Supports:
|
|
- Employment history with dates
|
|
- Role/position tracking
|
|
- Educational credentials
|
|
- Skills and certifications
|
|
`.trim(),
|
|
linkmlClass: 'WorkExperience',
|
|
typedbEntity: 'work-experience',
|
|
provenance: {
|
|
sourceType: 'linkedin',
|
|
dataTier: 'TIER_3_CROWD_SOURCED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'person.experience[].organization',
|
|
sourceDescription: 'Employer organization',
|
|
targetClass: 'WorkExperience',
|
|
targetSlot: 'affiliated_with',
|
|
transformation: 'direct',
|
|
typedbEntity: 'work-experience',
|
|
typedbAttribute: 'organization',
|
|
rdfPredicate: 'schema:worksFor',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'person.experience[].role',
|
|
sourceDescription: 'Job title/role',
|
|
targetClass: 'WorkExperience',
|
|
targetSlot: 'role_title',
|
|
transformation: 'direct',
|
|
typedbEntity: 'work-experience',
|
|
typedbAttribute: 'role-title',
|
|
rdfPredicate: 'schema:jobTitle',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'person.education[].institution',
|
|
sourceDescription: 'Educational institution',
|
|
targetClass: 'EducationCredential',
|
|
targetSlot: 'affiliated_with',
|
|
transformation: 'direct',
|
|
typedbEntity: 'education-credential',
|
|
typedbAttribute: 'institution',
|
|
rdfPredicate: 'schema:alumniOf',
|
|
required: true,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'WorkExperience', 'EducationCredential', 'StaffRole', 'StaffRoles',
|
|
],
|
|
exampleYaml: `
|
|
# Work and education
|
|
person:
|
|
experience:
|
|
- organization: Rijksmuseum
|
|
role: Senior Curator
|
|
start_date: 2018-03
|
|
current: true
|
|
education:
|
|
- institution: University of Amsterdam
|
|
degree: MA Art History
|
|
graduation_year: 2010
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Digital & API Services
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'digital_platforms_extended',
|
|
description: 'Extended digital platform classes',
|
|
detailedDescription: `
|
|
Extended digital platform classes for web portals, virtual libraries,
|
|
and auxiliary digital presence. Supports heritage custodian online presence
|
|
beyond primary websites.
|
|
|
|
Includes:
|
|
- Web portals and discovery interfaces
|
|
- Virtual/digital-only libraries
|
|
- Auxiliary digital platforms
|
|
- Primary digital presence assertions
|
|
`.trim(),
|
|
linkmlClass: 'WebPortal',
|
|
typedbEntity: 'web-portal',
|
|
provenance: {
|
|
sourceType: 'web_enrichment',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'digital_platforms.portals[].url',
|
|
sourceDescription: 'Portal URL',
|
|
targetClass: 'WebPortal',
|
|
targetSlot: 'has_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'web-portal',
|
|
typedbAttribute: 'url',
|
|
rdfPredicate: 'schema:url',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'digital_platforms.portals[].type',
|
|
sourceDescription: 'Portal type',
|
|
targetClass: 'WebPortal',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'web-portal',
|
|
typedbAttribute: 'portal-type',
|
|
rdfPredicate: 'hc:portalType',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'WebPortal', 'VirtualMapLibrary', 'AuxiliaryDigitalPlatform', 'PrimaryDigitalPresenceAssertion',
|
|
],
|
|
exampleYaml: `
|
|
# Digital platforms
|
|
digital_platforms:
|
|
portals:
|
|
- url: https://collectie.museum.nl
|
|
type: DISCOVERY_PORTAL
|
|
name: Online Collection
|
|
- url: https://maps.museum.nl
|
|
type: VIRTUAL_MAP
|
|
name: Interactive Map
|
|
`.trim(),
|
|
},
|
|
{
|
|
sourceBlock: 'api_endpoints',
|
|
description: 'API and data service endpoint classes',
|
|
detailedDescription: `
|
|
Classes for API endpoints and data services exposed by heritage custodians.
|
|
Supports interoperability documentation for harvesting, searching, and
|
|
accessing digital content.
|
|
|
|
Includes:
|
|
- OAI-PMH harvesting endpoints
|
|
- Search APIs (SRU, OpenSearch)
|
|
- IIIF Image/Presentation APIs
|
|
- File download services (EAD, METS)
|
|
`.trim(),
|
|
linkmlClass: 'DataServiceEndpoint',
|
|
typedbEntity: 'data-service-endpoint',
|
|
provenance: {
|
|
sourceType: 'web_enrichment',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'api_endpoints.oai_pmh',
|
|
sourceDescription: 'OAI-PMH endpoint',
|
|
targetClass: 'OAIPMHEndpoint',
|
|
targetSlot: 'has_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'oai-pmh-endpoint',
|
|
typedbAttribute: 'endpoint-url',
|
|
rdfPredicate: 'hc:oaiPmhEndpoint',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'api_endpoints.search',
|
|
sourceDescription: 'Search API endpoint',
|
|
targetClass: 'SearchAPI',
|
|
targetSlot: 'has_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'search-api',
|
|
typedbAttribute: 'search-url',
|
|
rdfPredicate: 'hc:searchEndpoint',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'api_endpoints.iiif',
|
|
sourceDescription: 'IIIF Image API',
|
|
targetClass: 'IIPImageServer',
|
|
targetSlot: 'has_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'iip-image-server',
|
|
typedbAttribute: 'iiif-url',
|
|
rdfPredicate: 'hc:iiifEndpoint',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'DataServiceEndpoint', 'OAIPMHEndpoint', 'SearchAPI', 'FileAPI', 'EADDownload',
|
|
'METSAPI', 'IIPImageServer', 'InternetOfThings',
|
|
],
|
|
exampleYaml: `
|
|
# API endpoints
|
|
api_endpoints:
|
|
oai_pmh: https://api.museum.nl/oai
|
|
search: https://api.museum.nl/search
|
|
iiif: https://iiif.museum.nl/image/2
|
|
ead_download: https://api.museum.nl/ead
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Video & Social Media
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'video_content',
|
|
description: 'Video content and annotation classes',
|
|
detailedDescription: `
|
|
Classes for video content produced or published by heritage custodians.
|
|
Supports rich annotation of video content including chapters, transcripts,
|
|
subtitles, and time-based segments.
|
|
|
|
Includes:
|
|
- Video posts (YouTube, Vimeo)
|
|
- Video chapters and segments
|
|
- Transcripts and subtitles
|
|
- Audio/text annotations
|
|
`.trim(),
|
|
linkmlClass: 'VideoPost',
|
|
typedbEntity: 'video-post',
|
|
provenance: {
|
|
sourceType: 'social_media',
|
|
dataTier: 'TIER_3_CROWD_SOURCED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'videos[].url',
|
|
sourceDescription: 'Video URL',
|
|
targetClass: 'VideoPost',
|
|
targetSlot: 'has_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'video-post',
|
|
typedbAttribute: 'video-url',
|
|
rdfPredicate: 'schema:contentUrl',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'videos[].chapters',
|
|
sourceDescription: 'Video chapters',
|
|
targetClass: 'VideoChapter',
|
|
targetSlot: 'has_component',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'video-chapter',
|
|
typedbAttribute: 'chapters',
|
|
rdfPredicate: 'schema:hasPart',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'videos[].transcript',
|
|
sourceDescription: 'Video transcript',
|
|
targetClass: 'VideoTranscript',
|
|
targetSlot: 'has_transcription',
|
|
transformation: 'direct',
|
|
typedbEntity: 'video-transcript',
|
|
typedbAttribute: 'transcript-text',
|
|
rdfPredicate: 'schema:transcript',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'VideoPost', 'VideoChapter', 'VideoTranscript', 'VideoSubtitle',
|
|
'VideoAnnotation', 'VideoAudioAnnotation', 'VideoTextContent', 'VideoTimeSegment',
|
|
],
|
|
exampleYaml: `
|
|
# Video content
|
|
videos:
|
|
- url: https://youtube.com/watch?v=abc123
|
|
title: Museum Tour 2024
|
|
duration: PT45M30S
|
|
chapters:
|
|
- title: Introduction
|
|
start_time: PT0S
|
|
- title: Main Gallery
|
|
start_time: PT5M
|
|
transcript:
|
|
language: nl
|
|
text: "Welkom bij het museum..."
|
|
`.trim(),
|
|
},
|
|
{
|
|
sourceBlock: 'social_media_content',
|
|
description: 'Social media content and profile classes',
|
|
detailedDescription: `
|
|
Classes for social media presence and content of heritage custodians.
|
|
Tracks posts, profiles, and engagement across platforms.
|
|
|
|
Includes:
|
|
- Social media profiles (per platform)
|
|
- Posts and content items
|
|
- Engagement metrics
|
|
`.trim(),
|
|
linkmlClass: 'SocialMediaProfile',
|
|
typedbEntity: 'social-media-profile',
|
|
provenance: {
|
|
sourceType: 'social_media',
|
|
dataTier: 'TIER_3_CROWD_SOURCED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'social_media.profiles[].platform',
|
|
sourceDescription: 'Social media platform',
|
|
targetClass: 'SocialMediaProfile',
|
|
targetSlot: 'has_digital_platform',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'social-media-profile',
|
|
typedbAttribute: 'platform',
|
|
rdfPredicate: 'hc:socialPlatform',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'social_media.profiles[].url',
|
|
sourceDescription: 'Profile URL',
|
|
targetClass: 'SocialMediaProfile',
|
|
targetSlot: 'has_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'social-media-profile',
|
|
typedbAttribute: 'profile-url',
|
|
rdfPredicate: 'schema:url',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'social_media.posts',
|
|
sourceDescription: 'Social media posts',
|
|
targetClass: 'SocialMediaPost',
|
|
targetSlot: 'has_detail',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'social-media-post',
|
|
typedbAttribute: 'post-content',
|
|
rdfPredicate: 'hc:hasSocialPost',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'SocialMediaProfile', 'SocialMediaPost', 'SocialMediaContent',
|
|
],
|
|
exampleYaml: `
|
|
# Social media presence
|
|
social_media:
|
|
profiles:
|
|
- platform: INSTAGRAM
|
|
url: https://instagram.com/rijksmuseum
|
|
followers: 1200000
|
|
- platform: TWITTER
|
|
url: https://twitter.com/rijksmuseum
|
|
posts:
|
|
- platform: INSTAGRAM
|
|
post_url: https://instagram.com/p/abc123
|
|
date: 2024-01-15
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Legal & Administrative
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'legal_policies',
|
|
description: 'Legal and policy classes',
|
|
detailedDescription: `
|
|
Classes for legal policies, access restrictions, and data licensing
|
|
applicable to heritage custodians and their collections.
|
|
|
|
Includes:
|
|
- Access policies (reading room, digital)
|
|
- Data license terms
|
|
- Legal responsibility collections
|
|
- Trade register information
|
|
`.trim(),
|
|
linkmlClass: 'AccessPolicy',
|
|
typedbEntity: 'access-policy',
|
|
provenance: {
|
|
sourceType: 'custodian_yaml',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'policies.access',
|
|
sourceDescription: 'Access policy',
|
|
targetClass: 'AccessPolicy',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'access-policy',
|
|
typedbAttribute: 'access-type',
|
|
rdfPredicate: 'hc:accessPolicy',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'policies.data_license',
|
|
sourceDescription: 'Data license',
|
|
targetClass: 'DataLicensePolicy',
|
|
targetSlot: 'licensed_as',
|
|
transformation: 'direct',
|
|
typedbEntity: 'data-license-policy',
|
|
typedbAttribute: 'license-type',
|
|
rdfPredicate: 'schema:license',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'AccessPolicy', 'DataLicensePolicy', 'LegalResponsibilityCollection',
|
|
'ArticlesOfAssociation', 'TradeRegister',
|
|
],
|
|
exampleYaml: `
|
|
# Policies
|
|
policies:
|
|
access:
|
|
type: PUBLIC
|
|
reading_room: true
|
|
appointment_required: false
|
|
data_license: CC-BY-4.0
|
|
legal_responsibility:
|
|
type: FOUNDATION
|
|
articles_url: https://kvk.nl/articles/12345678
|
|
`.trim(),
|
|
},
|
|
{
|
|
sourceBlock: 'administrative_records',
|
|
description: 'Administrative and financial record classes',
|
|
detailedDescription: `
|
|
Classes for administrative records including budgets, projects, financial
|
|
statements, and registration information.
|
|
|
|
Includes:
|
|
- Budget tracking
|
|
- Project management
|
|
- Financial statements
|
|
- Registration info
|
|
`.trim(),
|
|
linkmlClass: 'Budget',
|
|
typedbEntity: 'budget',
|
|
provenance: {
|
|
sourceType: 'custodian_yaml',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'administration.budget',
|
|
sourceDescription: 'Budget information',
|
|
targetClass: 'Budget',
|
|
targetSlot: 'allocated_budget',
|
|
transformation: 'direct',
|
|
typedbEntity: 'budget',
|
|
typedbAttribute: 'annual-amount',
|
|
rdfPredicate: 'hc:annualBudget',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'administration.projects',
|
|
sourceDescription: 'Active projects',
|
|
targetClass: 'Project',
|
|
targetSlot: 'participate_in',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'project',
|
|
typedbAttribute: 'project-name',
|
|
rdfPredicate: 'hc:hasProject',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'administration.registration',
|
|
sourceDescription: 'Registration information',
|
|
targetClass: 'RegistrationInfo',
|
|
targetSlot: 'identified_by',
|
|
transformation: 'direct',
|
|
typedbEntity: 'registration-info',
|
|
typedbAttribute: 'registration-number',
|
|
rdfPredicate: 'hc:registrationInfo',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'Budget', 'Project', 'FinancialStatement', 'RegistrationInfo', 'ConfidenceMeasure', 'ConflictStatus',
|
|
],
|
|
exampleYaml: `
|
|
# Administrative records
|
|
administration:
|
|
budget:
|
|
annual_amount: 5000000
|
|
currency: EUR
|
|
fiscal_year: 2024
|
|
projects:
|
|
- name: Digitization 2024
|
|
status: IN_PROGRESS
|
|
registration:
|
|
authority: KVK
|
|
number: "12345678"
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Finding Aids & Standards
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'finding_aids_standards',
|
|
description: 'Finding aids, standards, and source documentation classes',
|
|
detailedDescription: `
|
|
Classes for archival finding aids, metadata standards, and source documentation.
|
|
Essential for archival description and interoperability.
|
|
|
|
Includes:
|
|
- Finding aids (EAD, PDF, online)
|
|
- Metadata standards compliance
|
|
- Source document references
|
|
- Primary digital presence assertions
|
|
`.trim(),
|
|
linkmlClass: 'FindingAid',
|
|
typedbEntity: 'finding-aid',
|
|
provenance: {
|
|
sourceType: 'custodian_yaml',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'finding_aids[].type',
|
|
sourceDescription: 'Finding aid type',
|
|
targetClass: 'FindingAidType',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'finding-aid-type',
|
|
typedbAttribute: 'type',
|
|
rdfPredicate: 'hc:findingAidType',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'finding_aids[].url',
|
|
sourceDescription: 'Finding aid URL',
|
|
targetClass: 'FindingAid',
|
|
targetSlot: 'has_url',
|
|
transformation: 'direct',
|
|
typedbEntity: 'finding-aid',
|
|
typedbAttribute: 'url',
|
|
rdfPredicate: 'schema:url',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'standards',
|
|
sourceDescription: 'Standards compliance',
|
|
targetClass: 'Standard',
|
|
targetSlot: 'has_standard',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'standard',
|
|
typedbAttribute: 'standard-name',
|
|
rdfPredicate: 'hc:conformsToStandard',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'FindingAid', 'FindingAidType', 'SourceDocument', 'Standard', 'PrimaryDigitalPresenceAssertion',
|
|
],
|
|
exampleYaml: `
|
|
# Finding aids and standards
|
|
finding_aids:
|
|
- type: EAD
|
|
url: https://archive.nl/ead/collection123.xml
|
|
- type: PDF
|
|
url: https://archive.nl/guides/collection123.pdf
|
|
standards:
|
|
- ISAD(G)
|
|
- EAD3
|
|
- Dublin Core
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Reconstruction & Provenance
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'reconstruction_provenance',
|
|
description: 'Entity reconstruction and provenance tracking classes',
|
|
detailedDescription: `
|
|
Classes for tracking entity reconstruction activities and provenance chains.
|
|
Used for modeling how information about heritage custodians is assembled
|
|
from multiple sources.
|
|
|
|
Includes:
|
|
- Reconstructed entities (from multiple sources)
|
|
- Reconstruction activities
|
|
- Reconstruction agents (human/automated)
|
|
- Timeline events from external sources
|
|
`.trim(),
|
|
linkmlClass: 'ReconstructedEntity',
|
|
typedbEntity: 'reconstructed-entity',
|
|
provenance: {
|
|
sourceType: 'computed',
|
|
dataTier: 'TIER_4_INFERRED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'reconstruction.sources',
|
|
sourceDescription: 'Source documents',
|
|
targetClass: 'ReconstructedEntity',
|
|
targetSlot: 'has_source',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'reconstructed-entity',
|
|
typedbAttribute: 'sources',
|
|
rdfPredicate: 'prov:wasDerivedFrom',
|
|
required: true,
|
|
},
|
|
{
|
|
sourcePath: 'reconstruction.activity',
|
|
sourceDescription: 'Reconstruction activity',
|
|
targetClass: 'ReconstructionActivity',
|
|
targetSlot: 'has_activity',
|
|
transformation: 'direct',
|
|
typedbEntity: 'reconstruction-activity',
|
|
typedbAttribute: 'activity-type',
|
|
rdfPredicate: 'prov:wasGeneratedBy',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'reconstruction.agent',
|
|
sourceDescription: 'Reconstruction agent',
|
|
targetClass: 'ReconstructionAgent',
|
|
targetSlot: 'has_agent',
|
|
transformation: 'direct',
|
|
typedbEntity: 'reconstruction-agent',
|
|
typedbAttribute: 'agent-id',
|
|
rdfPredicate: 'prov:wasAttributedTo',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'ReconstructedEntity', 'ReconstructionActivity', 'ReconstructionAgent', 'CustodianTimelineEvent',
|
|
],
|
|
exampleYaml: `
|
|
# Reconstruction provenance
|
|
reconstruction:
|
|
sources:
|
|
- type: WIKIDATA
|
|
id: Q190804
|
|
- type: ISIL_REGISTRY
|
|
id: NL-AmRM
|
|
activity:
|
|
type: AUTOMATED_MERGE
|
|
date: 2024-01-15
|
|
agent:
|
|
type: SYSTEM
|
|
name: glam-extractor
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Storage & Facilities
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'storage_facilities',
|
|
description: 'Storage conditions and facility classes',
|
|
detailedDescription: `
|
|
Classes for physical storage conditions, educational facilities, and
|
|
specialized spaces within heritage custodian buildings.
|
|
|
|
Includes:
|
|
- Storage conditions (climate, security)
|
|
- Storage types (warehouse, vault)
|
|
- Educational centers
|
|
- Specialized facilities (libraries, social spaces)
|
|
`.trim(),
|
|
linkmlClass: 'StorageCondition',
|
|
typedbEntity: 'storage-condition',
|
|
provenance: {
|
|
sourceType: 'custodian_yaml',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'facilities.storage.conditions',
|
|
sourceDescription: 'Storage conditions',
|
|
targetClass: 'StorageCondition',
|
|
targetSlot: 'has_type',
|
|
transformation: 'direct',
|
|
typedbEntity: 'storage-condition',
|
|
typedbAttribute: 'climate-control',
|
|
rdfPredicate: 'hc:storageCondition',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'facilities.storage.type',
|
|
sourceDescription: 'Storage type',
|
|
targetClass: 'StorageType',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'storage-type',
|
|
typedbAttribute: 'type',
|
|
rdfPredicate: 'hc:storageType',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'facilities.education_center',
|
|
sourceDescription: 'Education center',
|
|
targetClass: 'EducationCenter',
|
|
targetSlot: 'affiliated_with',
|
|
transformation: 'direct',
|
|
typedbEntity: 'education-center',
|
|
typedbAttribute: 'center-name',
|
|
rdfPredicate: 'hc:hasEducationCenter',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'StorageCondition', 'StorageConditionPolicy', 'StorageType',
|
|
'EducationCenter', 'PersonalLibrary', 'LocationLibrary', 'SocialSpace', 'CateringPlace',
|
|
],
|
|
exampleYaml: `
|
|
# Facilities
|
|
facilities:
|
|
storage:
|
|
type: CLIMATE_CONTROLLED_VAULT
|
|
conditions:
|
|
temperature: 18
|
|
humidity: 50
|
|
security_level: HIGH
|
|
education_center:
|
|
name: Museum Education Wing
|
|
capacity: 50
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Funding & Grants
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'funding_grants',
|
|
description: 'Funding, grants, and application classes',
|
|
detailedDescription: `
|
|
Classes for funding sources, grant applications, and financial requirements
|
|
relevant to heritage custodians.
|
|
|
|
Includes:
|
|
- Funding agendas
|
|
- Grant requirements
|
|
- Application calls
|
|
`.trim(),
|
|
linkmlClass: 'FundingAgenda',
|
|
typedbEntity: 'funding-agenda',
|
|
provenance: {
|
|
sourceType: 'external_api',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'funding.agendas',
|
|
sourceDescription: 'Funding agendas',
|
|
targetClass: 'FundingAgenda',
|
|
targetSlot: 'related_agenda',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'funding-agenda',
|
|
typedbAttribute: 'agenda-name',
|
|
rdfPredicate: 'hc:fundingAgenda',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'funding.requirements',
|
|
sourceDescription: 'Funding requirements',
|
|
targetClass: 'FundingRequirement',
|
|
targetSlot: 'has_detail',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'funding-requirement',
|
|
typedbAttribute: 'requirement',
|
|
rdfPredicate: 'hc:fundingRequirement',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'funding.calls',
|
|
sourceDescription: 'Open calls for applications',
|
|
targetClass: 'CallForApplication',
|
|
targetSlot: 'has_detail',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'call-for-application',
|
|
typedbAttribute: 'call-title',
|
|
rdfPredicate: 'hc:openCall',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'FundingAgenda', 'FundingRequirement', 'CallForApplication',
|
|
],
|
|
exampleYaml: `
|
|
# Funding
|
|
funding:
|
|
agendas:
|
|
- name: Heritage Digitization Fund 2024
|
|
amount: 500000
|
|
currency: EUR
|
|
requirements:
|
|
- type: MATCHING_FUNDS
|
|
percentage: 25
|
|
calls:
|
|
- title: Digital Heritage Innovation
|
|
deadline: 2024-06-30
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Language & Naming
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'language_naming',
|
|
description: 'Language codes, proficiency, and naming classes',
|
|
detailedDescription: `
|
|
Classes for language handling, proficiency levels, and naming/appellation
|
|
conventions used in heritage custodian data.
|
|
|
|
Includes:
|
|
- ISO language codes
|
|
- Language proficiency levels
|
|
- Appellations (formal names)
|
|
- Container (structural) classes
|
|
`.trim(),
|
|
linkmlClass: 'LanguageCode',
|
|
typedbEntity: 'language-code',
|
|
provenance: {
|
|
sourceType: 'reference_data',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'languages',
|
|
sourceDescription: 'Languages used',
|
|
targetClass: 'LanguageCode',
|
|
targetSlot: 'has_language',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'language-code',
|
|
typedbAttribute: 'iso-code',
|
|
rdfPredicate: 'schema:inLanguage',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'staff.language_proficiency',
|
|
sourceDescription: 'Language proficiency',
|
|
targetClass: 'LanguageProficiency',
|
|
targetSlot: 'has_level',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'language-proficiency',
|
|
typedbAttribute: 'level',
|
|
rdfPredicate: 'hc:languageProficiency',
|
|
required: false,
|
|
},
|
|
{
|
|
sourcePath: 'names.appellations',
|
|
sourceDescription: 'Formal appellations',
|
|
targetClass: 'Appellation',
|
|
targetSlot: 'has_name',
|
|
transformation: 'array_direct',
|
|
typedbEntity: 'appellation',
|
|
typedbAttribute: 'name-value',
|
|
rdfPredicate: 'crm:P1_is_identified_by',
|
|
required: false,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'LanguageCode', 'LanguageProficiency', 'Appellation', 'Container',
|
|
],
|
|
exampleYaml: `
|
|
# Language and naming
|
|
languages:
|
|
- nl
|
|
- en
|
|
- de
|
|
names:
|
|
appellations:
|
|
- value: Rijksmuseum Amsterdam
|
|
type: OFFICIAL
|
|
language: nl
|
|
- value: National Museum of the Netherlands
|
|
type: TRANSLATION
|
|
language: en
|
|
`.trim(),
|
|
},
|
|
|
|
// ===========================================================================
|
|
// PHASE 2 SOURCE MAPPINGS: Specialized Archives (International)
|
|
// ===========================================================================
|
|
{
|
|
sourceBlock: 'archives_german',
|
|
description: 'German-specific archive types',
|
|
detailedDescription: `
|
|
Archive types specific to German archival tradition and organization.
|
|
German archives follow a distinctive organizational pattern based on
|
|
political/administrative regions and specialized functions.
|
|
`.trim(),
|
|
linkmlClass: 'Verwaltungsarchiv',
|
|
typedbEntity: 'verwaltungsarchiv',
|
|
provenance: {
|
|
sourceType: 'isil_registry',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'archive_type',
|
|
sourceDescription: 'German archive type',
|
|
targetClass: 'Verwaltungsarchiv',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'verwaltungsarchiv',
|
|
typedbAttribute: 'type',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'Verwaltungsarchiv', 'Vereinsarchiv', 'Verlagsarchiv',
|
|
'Bildstelle', 'Medienzentrum', 'Personenstandsarchiv',
|
|
],
|
|
exampleYaml: `
|
|
# German archive type
|
|
archive_type: VERWALTUNGSARCHIV
|
|
name: Landesarchiv Baden-Württemberg
|
|
`.trim(),
|
|
},
|
|
{
|
|
sourceBlock: 'archives_swedish',
|
|
description: 'Swedish-specific archive types',
|
|
detailedDescription: `
|
|
Archive types specific to Swedish archival tradition. Swedish archives
|
|
include national (Riksarkivet), regional (Landsarkiv), and local heritage
|
|
institutions (Hembygdsförening).
|
|
`.trim(),
|
|
linkmlClass: 'Landsarkiv',
|
|
typedbEntity: 'landsarkiv',
|
|
provenance: {
|
|
sourceType: 'isil_registry',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'archive_type',
|
|
sourceDescription: 'Swedish archive type',
|
|
targetClass: 'Landsarkiv',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'landsarkiv',
|
|
typedbAttribute: 'type',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'Landsarkiv', 'Foremalarkiv', 'SectorOfArchivesInSweden', 'LocalHeritageInstitutionSweden',
|
|
],
|
|
exampleYaml: `
|
|
# Swedish archive type
|
|
archive_type: LANDSARKIV
|
|
name: Landsarkivet i Uppsala
|
|
`.trim(),
|
|
},
|
|
{
|
|
sourceBlock: 'archives_french',
|
|
description: 'French-specific archive types',
|
|
detailedDescription: `
|
|
Archive types specific to French archival organization. French archives
|
|
follow a centralized national system with departmental and communal levels.
|
|
`.trim(),
|
|
linkmlClass: 'FrenchPrivateArchives',
|
|
typedbEntity: 'french-private-archives',
|
|
provenance: {
|
|
sourceType: 'isil_registry',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'archive_type',
|
|
sourceDescription: 'French archive type',
|
|
targetClass: 'FrenchPrivateArchives',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'french-private-archives',
|
|
typedbAttribute: 'type',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'FrenchPrivateArchives', 'Conservatoria',
|
|
],
|
|
exampleYaml: `
|
|
# French archive type
|
|
archive_type: ARCHIVES_PRIVEES
|
|
name: Archives privées de la famille Rothschild
|
|
`.trim(),
|
|
},
|
|
{
|
|
sourceBlock: 'archives_other',
|
|
description: 'Other international specialized archive types',
|
|
detailedDescription: `
|
|
Specialized archive types from other countries including Czech regional
|
|
archives, Nordic archives, and various thematic archive types.
|
|
`.trim(),
|
|
linkmlClass: 'SpecializedArchivesCzechia',
|
|
typedbEntity: 'specialized-archives-czechia',
|
|
provenance: {
|
|
sourceType: 'isil_registry',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'archive_type',
|
|
sourceDescription: 'Specialized archive type',
|
|
targetClass: 'SpecializedArchivesCzechia',
|
|
targetSlot: 'has_type',
|
|
transformation: 'lookup',
|
|
typedbEntity: 'specialized-archives-czechia',
|
|
typedbAttribute: 'type',
|
|
rdfPredicate: 'hc:archiveType',
|
|
required: true,
|
|
},
|
|
],
|
|
generatedClasses: [
|
|
'SpecializedArchivesCzechia', 'DimArchives', 'LightArchives',
|
|
'HistoricalArchive', 'JointArchives', 'PartyArchive', 'Kustodie',
|
|
'ArchivesForBuildingRecords',
|
|
],
|
|
exampleYaml: `
|
|
# Czech specialized archive
|
|
archive_type: OBLASTNI_ARCHIV
|
|
name: Státní oblastní archiv v Praze
|
|
`.trim(),
|
|
},
|
|
];
|
|
|
|
// ============================================================================
|
|
// PERSON DATA CATEGORIES
|
|
// ============================================================================
|
|
|
|
export const PERSON_CATEGORIES: MappingCategory[] = [
|
|
{
|
|
id: 'identity',
|
|
name: 'Identity & Profile',
|
|
nameNl: 'Identiteit & Profiel',
|
|
description: 'Core identity fields: name, headline, photo, LinkedIn URL',
|
|
descriptionNl: 'Kernidentiteitsvelden: naam, headline, foto, LinkedIn URL',
|
|
icon: '👤',
|
|
sources: ['profile_identity', 'linkedin_profile'],
|
|
group: 'core',
|
|
},
|
|
{
|
|
id: 'career',
|
|
name: 'Career History',
|
|
nameNl: 'Loopbaangeschiedenis',
|
|
description: 'Employment history, roles, organizations',
|
|
descriptionNl: 'Werkgeschiedenis, functies, organisaties',
|
|
icon: '💼',
|
|
sources: ['career_history'],
|
|
group: 'organization',
|
|
},
|
|
{
|
|
id: 'education',
|
|
name: 'Education',
|
|
nameNl: 'Opleiding',
|
|
description: 'Educational background, degrees, institutions',
|
|
descriptionNl: 'Opleidingsachtergrond, diploma\'s, instellingen',
|
|
icon: '🎓',
|
|
sources: ['education'],
|
|
group: 'organization',
|
|
},
|
|
{
|
|
id: 'skills',
|
|
name: 'Skills & Expertise',
|
|
nameNl: 'Vaardigheden & Expertise',
|
|
description: 'Professional skills, languages, expertise areas',
|
|
descriptionNl: 'Professionele vaardigheden, talen, expertisegebieden',
|
|
icon: '🛠️',
|
|
sources: ['skills_expertise'],
|
|
group: 'technical',
|
|
},
|
|
{
|
|
id: 'heritage',
|
|
name: 'Heritage Relevance',
|
|
nameNl: 'Erfgoed Relevantie',
|
|
description: 'Heritage sector relevance, domain expertise, years in sector',
|
|
descriptionNl: 'Relevantie erfgoedsector, domeinexpertise, jaren in sector',
|
|
icon: '🏛️',
|
|
sources: ['heritage_relevance', 'heritage_experience'],
|
|
group: 'heritage',
|
|
},
|
|
{
|
|
id: 'affiliations',
|
|
name: 'Affiliations & Links',
|
|
nameNl: 'Affiliaties & Koppelingen',
|
|
description: 'Custodian affiliations, linked records',
|
|
descriptionNl: 'Bronhouder-affiliaties, gekoppelde records',
|
|
icon: '🔗',
|
|
sources: ['affiliations', 'linked_records'],
|
|
group: 'enrichment',
|
|
},
|
|
{
|
|
id: 'contact',
|
|
name: 'Contact & Social',
|
|
nameNl: 'Contact & Sociaal',
|
|
description: 'Contact information, social connections',
|
|
descriptionNl: 'Contactgegevens, sociale connecties',
|
|
icon: '📧',
|
|
sources: ['contact_data'],
|
|
group: 'core',
|
|
},
|
|
{
|
|
id: 'provenance',
|
|
name: 'Extraction & Provenance',
|
|
nameNl: 'Extractie & Herkomst',
|
|
description: 'Data extraction metadata, sources, timestamps',
|
|
descriptionNl: 'Data-extractiemetadata, bronnen, tijdstempels',
|
|
icon: '📋',
|
|
sources: ['extraction_metadata', 'web_claims'],
|
|
group: 'core',
|
|
},
|
|
{
|
|
id: 'pico_ontology',
|
|
name: 'PiCo Ontology Coverage',
|
|
nameNl: 'PiCo Ontologie Dekking',
|
|
description: 'Coverage of the PiCo (Persons in Context) ontology - what is mapped and what is intentionally out of scope',
|
|
descriptionNl: 'Dekking van de PiCo (Personen in Context) ontologie - wat is gekoppeld en wat bewust buiten scope valt',
|
|
icon: '🧬',
|
|
sources: ['pico_mapped', 'pico_unmapped'],
|
|
group: 'technical',
|
|
},
|
|
];
|
|
|
|
// ============================================================================
|
|
// PERSON DATA MAPPINGS
|
|
// ============================================================================
|
|
|
|
export const PERSON_MAPPINGS: EnrichmentSourceMapping[] = [
|
|
// -------------------------------------------------------------------------
|
|
// PROFILE IDENTITY - Core profile information
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'profile_identity',
|
|
description: 'Core profile identity - name, headline, location',
|
|
detailedDescription: `
|
|
Core identity information extracted from LinkedIn profiles.
|
|
Includes the person's full name, professional headline, location,
|
|
and current company affiliation.
|
|
|
|
This data forms the foundation of the person entity and is used
|
|
for display and search purposes across the heritage network.
|
|
`.trim(),
|
|
linkmlClass: 'Person',
|
|
typedbEntity: 'person',
|
|
provenance: {
|
|
sourceType: 'external_api',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'profile_data.name',
|
|
sourceDescription: 'Full name of the person',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_name',
|
|
typedbAttribute: 'person-name',
|
|
rdfPredicate: 'foaf:name',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'Primary identifier for the person',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.headline',
|
|
sourceDescription: 'Professional headline/title',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_label',
|
|
typedbAttribute: 'headline',
|
|
rdfPredicate: 'schema:jobTitle',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Current professional headline from LinkedIn',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.headline_english',
|
|
sourceDescription: 'English translation of headline',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_label',
|
|
typedbAttribute: 'headline-english',
|
|
rdfPredicate: 'schema:jobTitle',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Translated headline for non-English profiles',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.location',
|
|
sourceDescription: 'Geographic location',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_location',
|
|
typedbAttribute: 'location-string',
|
|
rdfPredicate: 'schema:address',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Free-text location from LinkedIn',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.country_code',
|
|
sourceDescription: 'ISO country code',
|
|
targetClass: 'Person',
|
|
targetSlot: 'cover_country',
|
|
typedbAttribute: 'country-code',
|
|
rdfPredicate: 'schema:addressCountry',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Two-letter ISO 3166-1 country code',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.current_company',
|
|
sourceDescription: 'Current employer name',
|
|
targetClass: 'Person',
|
|
targetSlot: 'affiliated_with',
|
|
typedbAttribute: 'current-company',
|
|
rdfPredicate: 'schema:worksFor',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Name of current employer organization',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.about',
|
|
sourceDescription: 'About/summary section',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_subject',
|
|
typedbAttribute: 'about',
|
|
rdfPredicate: 'schema:description',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Professional summary from LinkedIn',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.total_experience',
|
|
sourceDescription: 'Total years of experience',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_detail',
|
|
typedbAttribute: 'total-experience',
|
|
rdfPredicate: 'schema:experienceYears',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Calculated total professional experience',
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
profile_data:
|
|
name: Iris van Meer
|
|
headline: Staff member at the Services Department at Nationaal Archief
|
|
headline_english: Staff member at the Services Department at Nationaal Archief
|
|
location: The Randstad, Netherlands
|
|
country_code: NL
|
|
current_company: Nationaal Archief
|
|
about: Total Experience: 15 years and 8 months
|
|
total_experience: 15 years and 8 months
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// LINKEDIN PROFILE - URLs and social metrics
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'linkedin_profile',
|
|
description: 'LinkedIn profile URLs and social metrics',
|
|
detailedDescription: `
|
|
LinkedIn-specific profile data including the profile URL, photo URL,
|
|
and social metrics like connections and followers count.
|
|
|
|
These fields enable linking back to the source profile and provide
|
|
insight into the person's professional network reach.
|
|
`.trim(),
|
|
linkmlClass: 'Person',
|
|
typedbEntity: 'person',
|
|
provenance: {
|
|
sourceType: 'external_api',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'linkedin_profile_url',
|
|
sourceDescription: 'LinkedIn profile URL',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_url',
|
|
typedbAttribute: 'linkedin-url',
|
|
rdfPredicate: 'schema:sameAs',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'Canonical LinkedIn profile URL',
|
|
},
|
|
{
|
|
sourcePath: 'linkedin_photo_url',
|
|
sourceDescription: 'LinkedIn profile photo URL',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_image',
|
|
typedbAttribute: 'photo-url',
|
|
rdfPredicate: 'schema:image',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'CDN URL for profile photo',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.connections',
|
|
sourceDescription: 'Number of LinkedIn connections',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_contact_point',
|
|
typedbAttribute: 'connections-count',
|
|
rdfPredicate: 'schema:knows',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'First-degree connection count',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.followers',
|
|
sourceDescription: 'Number of followers',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_detail',
|
|
typedbAttribute: 'followers-count',
|
|
rdfPredicate: 'schema:followerCount',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'LinkedIn follower count',
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
linkedin_profile_url: https://www.linkedin.com/in/iris-van-meer-34329131
|
|
linkedin_photo_url: https://media.licdn.com/dms/image/v2/...
|
|
profile_data:
|
|
connections: 286
|
|
followers: 289
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// CAREER HISTORY - Employment timeline
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'career_history',
|
|
description: 'Career history and employment timeline',
|
|
detailedDescription: `
|
|
Complete career history extracted from LinkedIn profiles.
|
|
Each position includes organization, role, dates, duration,
|
|
location, and organizational metadata like company size and industry.
|
|
|
|
This data is crucial for understanding a person's professional
|
|
trajectory and their experience in heritage-related roles.
|
|
`.trim(),
|
|
linkmlClass: 'CareerPosition',
|
|
typedbEntity: 'career-position',
|
|
provenance: {
|
|
sourceType: 'external_api',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'profile_data.career_history[].organization',
|
|
sourceDescription: 'Employer organization name',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'affiliated_with',
|
|
typedbAttribute: 'organization-name',
|
|
rdfPredicate: 'schema:worksFor',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'Name of the employing organization',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].organization_linkedin',
|
|
sourceDescription: 'LinkedIn URL for organization',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'has_url',
|
|
typedbAttribute: 'organization-linkedin-url',
|
|
rdfPredicate: 'schema:sameAs',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'LinkedIn company page URL',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].role',
|
|
sourceDescription: 'Job title/role',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'has_role',
|
|
typedbAttribute: 'role-title',
|
|
rdfPredicate: 'schema:jobTitle',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'Original language job title',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].role_english',
|
|
sourceDescription: 'English translation of role',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'has_role',
|
|
typedbAttribute: 'role-title-english',
|
|
rdfPredicate: 'schema:jobTitle',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'English translation for non-English titles',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].dates',
|
|
sourceDescription: 'Employment date range',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'temporal_extent',
|
|
typedbAttribute: 'date-range',
|
|
rdfPredicate: 'schema:temporalCoverage',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Date range string (e.g., "Apr 2014 - Present")',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].duration',
|
|
sourceDescription: 'Employment duration',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'temporal_extent',
|
|
typedbAttribute: 'duration',
|
|
rdfPredicate: 'schema:duration',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Calculated duration (e.g., "11 years and 7 months")',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].location',
|
|
sourceDescription: 'Work location',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'has_location',
|
|
typedbAttribute: 'work-location',
|
|
rdfPredicate: 'schema:workLocation',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Geographic location of the position',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].current',
|
|
sourceDescription: 'Is current position',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'current',
|
|
typedbAttribute: 'is-current',
|
|
rdfPredicate: 'schema:currentPosition',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Boolean flag for current employment',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].company_size',
|
|
sourceDescription: 'Company employee count range',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'has_detail',
|
|
typedbAttribute: 'company-size',
|
|
rdfPredicate: 'schema:numberOfEmployees',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Employee count range (e.g., "201-500 employees")',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].company_founded',
|
|
sourceDescription: 'Year company was founded',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'founded_through',
|
|
typedbAttribute: 'company-founded-year',
|
|
rdfPredicate: 'schema:foundingDate',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Founding year of the organization',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].company_type',
|
|
sourceDescription: 'Type of company',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'has_type',
|
|
typedbAttribute: 'company-type',
|
|
rdfPredicate: 'schema:additionalType',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Organization type (e.g., "Government Agency")',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].industry',
|
|
sourceDescription: 'Industry sector',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'has_domain',
|
|
typedbAttribute: 'industry',
|
|
rdfPredicate: 'schema:industry',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Industry classification',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].department',
|
|
sourceDescription: 'Department within organization',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'department_of',
|
|
typedbAttribute: 'department',
|
|
rdfPredicate: 'schema:department',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Department or division name',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].level',
|
|
sourceDescription: 'Seniority level',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'has_level',
|
|
typedbAttribute: 'seniority-level',
|
|
rdfPredicate: 'schema:occupationalCategory',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Career level (e.g., "Specialist", "Manager")',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.career_history[].description',
|
|
sourceDescription: 'Role description',
|
|
targetClass: 'CareerPosition',
|
|
targetSlot: 'has_description',
|
|
typedbAttribute: 'role-description',
|
|
rdfPredicate: 'schema:description',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Free-text description of the role',
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
profile_data:
|
|
career_history:
|
|
- organization: Nationaal Archief
|
|
organization_linkedin: https://www.linkedin.com/company/nationaal-archief
|
|
role: Staff Member At The Services Department
|
|
role_english: Staff Member At The Services Department
|
|
dates: Apr 2014 - Present
|
|
duration: 11 years and 7 months
|
|
location: Den Haag
|
|
current: true
|
|
company_size: 201-500 employees
|
|
company_founded: 1802
|
|
company_type: Government Agency
|
|
industry: Government Administration
|
|
department: Other
|
|
level: Specialist
|
|
description: null
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// EDUCATION - Academic background
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'education',
|
|
description: 'Educational background and qualifications',
|
|
detailedDescription: `
|
|
Educational history including degrees, institutions, and duration.
|
|
Links to institution LinkedIn pages when available.
|
|
|
|
This data helps understand the academic foundation and
|
|
qualifications of heritage professionals.
|
|
`.trim(),
|
|
linkmlClass: 'Education',
|
|
typedbEntity: 'education',
|
|
provenance: {
|
|
sourceType: 'external_api',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'profile_data.education[].institution',
|
|
sourceDescription: 'Educational institution name',
|
|
targetClass: 'Education',
|
|
targetSlot: 'affiliated_with',
|
|
typedbAttribute: 'institution-name',
|
|
rdfPredicate: 'schema:educationalCredentialAwarded',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'Name of university/school',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.education[].institution_linkedin',
|
|
sourceDescription: 'LinkedIn URL for institution',
|
|
targetClass: 'Education',
|
|
targetSlot: 'has_url',
|
|
typedbAttribute: 'institution-linkedin-url',
|
|
rdfPredicate: 'schema:sameAs',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'LinkedIn school page URL',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.education[].degree',
|
|
sourceDescription: 'Degree and field of study',
|
|
targetClass: 'Education',
|
|
targetSlot: 'has_degree',
|
|
typedbAttribute: 'degree',
|
|
rdfPredicate: 'schema:educationalLevel',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Degree type and major (e.g., "MA, History")',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.education[].years',
|
|
sourceDescription: 'Years attended',
|
|
targetClass: 'Education',
|
|
targetSlot: 'temporal_extent',
|
|
typedbAttribute: 'years-attended',
|
|
rdfPredicate: 'schema:temporalCoverage',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Date range (e.g., "2001 - 2007")',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.education[].duration',
|
|
sourceDescription: 'Duration of study',
|
|
targetClass: 'Education',
|
|
targetSlot: 'temporal_extent',
|
|
typedbAttribute: 'study-duration',
|
|
rdfPredicate: 'schema:duration',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Calculated duration (e.g., "6 years")',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.education[].country',
|
|
sourceDescription: 'Country of institution',
|
|
targetClass: 'Education',
|
|
targetSlot: 'cover_country',
|
|
typedbAttribute: 'education-country',
|
|
rdfPredicate: 'schema:addressCountry',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'ISO country code of institution',
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
profile_data:
|
|
education:
|
|
- institution: Universiteit Utrecht
|
|
institution_linkedin: https://www.linkedin.com/school/universiteit-utrecht
|
|
degree: MA, History
|
|
years: 2001 - 2007
|
|
duration: 6 years
|
|
country: NL
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// SKILLS & EXPERTISE - Professional capabilities
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'skills_expertise',
|
|
description: 'Professional skills, languages, and expertise areas',
|
|
detailedDescription: `
|
|
Professional skills endorsed on LinkedIn, language proficiencies,
|
|
and identified expertise areas based on career history analysis.
|
|
|
|
Expertise areas are derived from analyzing the person's complete
|
|
professional background in the heritage sector.
|
|
`.trim(),
|
|
linkmlClass: 'Person',
|
|
typedbEntity: 'person',
|
|
provenance: {
|
|
sourceType: 'external_api',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'profile_data.skills',
|
|
sourceDescription: 'Professional skills list',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_domain',
|
|
typedbAttribute: 'skills',
|
|
rdfPredicate: 'schema:knowsAbout',
|
|
transformation: 'array_direct',
|
|
required: false,
|
|
notes: 'LinkedIn-endorsed skills',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.languages',
|
|
sourceDescription: 'Language proficiencies',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_language',
|
|
typedbAttribute: 'languages',
|
|
rdfPredicate: 'schema:knowsLanguage',
|
|
transformation: 'array_direct',
|
|
required: false,
|
|
notes: 'Languages spoken with proficiency levels',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.expertise_areas',
|
|
sourceDescription: 'Identified expertise areas',
|
|
targetClass: 'Person',
|
|
targetSlot: 'has_domain',
|
|
typedbAttribute: 'expertise-areas',
|
|
rdfPredicate: 'schema:hasOccupation',
|
|
transformation: 'array_direct',
|
|
required: false,
|
|
notes: 'Derived from career analysis',
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
profile_data:
|
|
skills:
|
|
- Digital Preservation
|
|
- Archival Description
|
|
- Collection Management
|
|
languages:
|
|
- Dutch (Native)
|
|
- English (Professional)
|
|
expertise_areas:
|
|
- Archival services
|
|
- Public services
|
|
- History research
|
|
- Library services
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// HERITAGE RELEVANCE - Sector-specific assessment
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'heritage_relevance',
|
|
description: 'Heritage sector relevance assessment',
|
|
detailedDescription: `
|
|
Assessment of the person's relevance to the heritage sector.
|
|
Includes heritage type classification (GLAMORCUBESFIXPHDNT),
|
|
current institution, sector role, and years of heritage experience.
|
|
|
|
This provides a quick overview of where the person fits
|
|
within the heritage ecosystem.
|
|
`.trim(),
|
|
linkmlClass: 'HeritageRelevance',
|
|
typedbEntity: 'heritage-relevance',
|
|
provenance: {
|
|
sourceType: 'computed',
|
|
dataTier: 'TIER_3_CROWD_SOURCED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'heritage_sector_relevance.heritage_type',
|
|
sourceDescription: 'Heritage type code',
|
|
targetClass: 'HeritageRelevance',
|
|
targetSlot: 'has_type',
|
|
typedbAttribute: 'heritage-type-code',
|
|
rdfPredicate: 'glam:heritageType',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Single letter GLAMORCUBESFIXPHDNT code',
|
|
},
|
|
{
|
|
sourcePath: 'heritage_sector_relevance.heritage_type_label',
|
|
sourceDescription: 'Heritage type label',
|
|
targetClass: 'HeritageRelevance',
|
|
targetSlot: 'has_label',
|
|
typedbAttribute: 'heritage-type-label',
|
|
rdfPredicate: 'rdfs:label',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Human-readable type label (e.g., "Archive")',
|
|
},
|
|
{
|
|
sourcePath: 'heritage_sector_relevance.current_institution',
|
|
sourceDescription: 'Current heritage institution',
|
|
targetClass: 'HeritageRelevance',
|
|
targetSlot: 'affiliated_with',
|
|
typedbAttribute: 'current-institution',
|
|
rdfPredicate: 'schema:worksFor',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Name of current heritage employer',
|
|
},
|
|
{
|
|
sourcePath: 'heritage_sector_relevance.institution_type',
|
|
sourceDescription: 'Type of institution',
|
|
targetClass: 'HeritageRelevance',
|
|
targetSlot: 'has_type',
|
|
typedbAttribute: 'institution-type',
|
|
rdfPredicate: 'schema:additionalType',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Classification of the institution',
|
|
},
|
|
{
|
|
sourcePath: 'heritage_sector_relevance.sector_role',
|
|
sourceDescription: 'Role within heritage sector',
|
|
targetClass: 'HeritageRelevance',
|
|
targetSlot: 'has_role',
|
|
typedbAttribute: 'sector-role',
|
|
rdfPredicate: 'schema:jobTitle',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Standardized role classification',
|
|
},
|
|
{
|
|
sourcePath: 'heritage_sector_relevance.years_in_heritage',
|
|
sourceDescription: 'Years of heritage experience',
|
|
targetClass: 'HeritageRelevance',
|
|
targetSlot: 'has_detail',
|
|
typedbAttribute: 'years-in-heritage',
|
|
rdfPredicate: 'schema:experienceYears',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Total years in heritage sector',
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
heritage_sector_relevance:
|
|
heritage_type: A
|
|
heritage_type_label: Archive
|
|
current_institution: Nationaal Archief
|
|
institution_type: National Archive
|
|
sector_role: Services Staff
|
|
years_in_heritage: 11
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// HERITAGE EXPERIENCE - Relevant positions
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'heritage_experience',
|
|
description: 'Heritage-relevant experience from career history',
|
|
detailedDescription: `
|
|
Filtered list of positions that are relevant to the heritage sector.
|
|
Extracted from full career history with relevance annotations.
|
|
|
|
Includes both current and past positions at heritage institutions
|
|
with notes explaining their relevance to the GLAM sector.
|
|
`.trim(),
|
|
linkmlClass: 'HeritageExperience',
|
|
typedbEntity: 'heritage-experience',
|
|
provenance: {
|
|
sourceType: 'computed',
|
|
dataTier: 'TIER_3_CROWD_SOURCED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'profile_data.heritage_relevant_experience[].organization',
|
|
sourceDescription: 'Heritage organization name',
|
|
targetClass: 'HeritageExperience',
|
|
targetSlot: 'affiliated_with',
|
|
typedbAttribute: 'heritage-org-name',
|
|
rdfPredicate: 'schema:worksFor',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'Name of heritage institution',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.heritage_relevant_experience[].role',
|
|
sourceDescription: 'Role at heritage organization',
|
|
targetClass: 'HeritageExperience',
|
|
targetSlot: 'has_role',
|
|
typedbAttribute: 'heritage-role',
|
|
rdfPredicate: 'schema:jobTitle',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'Job title at heritage institution',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.heritage_relevant_experience[].relevance',
|
|
sourceDescription: 'Relevance explanation',
|
|
targetClass: 'HeritageExperience',
|
|
targetSlot: 'has_significance',
|
|
typedbAttribute: 'relevance-notes',
|
|
rdfPredicate: 'schema:description',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Why this position is heritage-relevant',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.heritage_relevant_experience[].current',
|
|
sourceDescription: 'Is current position',
|
|
targetClass: 'HeritageExperience',
|
|
targetSlot: 'current',
|
|
typedbAttribute: 'is-current-heritage',
|
|
rdfPredicate: 'schema:currentPosition',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Whether this is a current position',
|
|
},
|
|
],
|
|
generatedClasses: ['HeritageExperience'],
|
|
exampleYaml: `
|
|
profile_data:
|
|
heritage_relevant_experience:
|
|
- organization: Nationaal Archief
|
|
role: Staff Member At The Services Department
|
|
relevance: Public services at National Archives of the Netherlands
|
|
current: true
|
|
- organization: University Library Utrecht
|
|
role: Library Employee
|
|
relevance: Academic library experience
|
|
current: false
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// AFFILIATIONS - Custodian connections
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'affiliations',
|
|
description: 'Affiliations with heritage custodians',
|
|
detailedDescription: `
|
|
Links between the person and heritage custodian institutions.
|
|
Each affiliation includes the custodian name, slug identifier,
|
|
role title, and heritage classification.
|
|
|
|
These affiliations enable network analysis across the heritage
|
|
sector workforce.
|
|
`.trim(),
|
|
linkmlClass: 'Affiliation',
|
|
typedbEntity: 'affiliation',
|
|
provenance: {
|
|
sourceType: 'computed',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'affiliations[].custodian_name',
|
|
sourceDescription: 'Heritage custodian name',
|
|
targetClass: 'Affiliation',
|
|
targetSlot: 'has_name',
|
|
typedbAttribute: 'custodian-name',
|
|
rdfPredicate: 'schema:memberOf',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'Name of the heritage institution',
|
|
},
|
|
{
|
|
sourcePath: 'affiliations[].custodian_slug',
|
|
sourceDescription: 'Custodian identifier slug',
|
|
targetClass: 'Affiliation',
|
|
targetSlot: 'identified_by',
|
|
typedbAttribute: 'custodian-slug',
|
|
rdfPredicate: 'schema:identifier',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'URL-safe identifier for the custodian',
|
|
},
|
|
{
|
|
sourcePath: 'affiliations[].role_title',
|
|
sourceDescription: 'Role at custodian',
|
|
targetClass: 'Affiliation',
|
|
targetSlot: 'role_title',
|
|
typedbAttribute: 'affiliation-role',
|
|
rdfPredicate: 'schema:jobTitle',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Job title at this custodian',
|
|
},
|
|
{
|
|
sourcePath: 'affiliations[].heritage_relevant',
|
|
sourceDescription: 'Is heritage relevant',
|
|
targetClass: 'Affiliation',
|
|
targetSlot: 'has_significance',
|
|
typedbAttribute: 'is-heritage-relevant',
|
|
rdfPredicate: 'glam:heritageRelevant',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Whether affiliation is heritage-relevant',
|
|
},
|
|
{
|
|
sourcePath: 'affiliations[].heritage_type',
|
|
sourceDescription: 'Heritage type code',
|
|
targetClass: 'Affiliation',
|
|
targetSlot: 'has_type',
|
|
typedbAttribute: 'affiliation-heritage-type',
|
|
rdfPredicate: 'glam:heritageType',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'GLAMORCUBESFIXPHDNT type code',
|
|
},
|
|
{
|
|
sourcePath: 'affiliations[].current',
|
|
sourceDescription: 'Is current affiliation',
|
|
targetClass: 'Affiliation',
|
|
targetSlot: 'current',
|
|
typedbAttribute: 'is-current-affiliation',
|
|
rdfPredicate: 'schema:currentPosition',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Whether this is a current affiliation',
|
|
},
|
|
{
|
|
sourcePath: 'affiliations[].observed_on',
|
|
sourceDescription: 'Observation timestamp',
|
|
targetClass: 'Affiliation',
|
|
targetSlot: 'observed_in',
|
|
typedbAttribute: 'observed-on',
|
|
rdfPredicate: 'prov:generatedAtTime',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'When this affiliation was observed',
|
|
},
|
|
{
|
|
sourcePath: 'affiliations[].source_url',
|
|
sourceDescription: 'Source URL for affiliation',
|
|
targetClass: 'Affiliation',
|
|
targetSlot: 'source_url',
|
|
typedbAttribute: 'affiliation-source-url',
|
|
rdfPredicate: 'prov:wasDerivedFrom',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'URL where affiliation was discovered',
|
|
},
|
|
],
|
|
generatedClasses: ['Affiliation'],
|
|
exampleYaml: `
|
|
affiliations:
|
|
- custodian_name: Nationaal Archief
|
|
custodian_slug: nationaal-archief
|
|
role_title: Staff member at the Services Department at Nationaal Archief
|
|
heritage_relevant: true
|
|
heritage_type: A
|
|
current: true
|
|
observed_on: 2025-12-14T11:21:47Z
|
|
source_url: https://www.linkedin.com/company/nationaal-archief/people/
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// LINKED RECORDS - Cross-references
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'linked_records',
|
|
description: 'Links to related records in the system',
|
|
detailedDescription: `
|
|
Cross-references to other records in the heritage data system.
|
|
Includes links to staff records (parsed from LinkedIn company pages)
|
|
and custodian records (heritage institution YAML files).
|
|
|
|
These links enable navigation between person profiles and
|
|
the institutions they work for.
|
|
`.trim(),
|
|
linkmlClass: 'LinkedRecords',
|
|
typedbEntity: 'linked-records',
|
|
provenance: {
|
|
sourceType: 'computed',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'linked_records.staff_record.file',
|
|
sourceDescription: 'Staff record file path',
|
|
targetClass: 'LinkedRecords',
|
|
targetSlot: 'has_file_location',
|
|
typedbAttribute: 'staff-record-path',
|
|
rdfPredicate: 'prov:wasDerivedFrom',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Path to parsed staff JSON file',
|
|
},
|
|
{
|
|
sourcePath: 'linked_records.staff_record.staff_id',
|
|
sourceDescription: 'Staff record ID',
|
|
targetClass: 'LinkedRecords',
|
|
targetSlot: 'staff_id',
|
|
typedbAttribute: 'staff-id',
|
|
rdfPredicate: 'schema:identifier',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Unique staff identifier',
|
|
},
|
|
{
|
|
sourcePath: 'linked_records.custodian_record.ghcid',
|
|
sourceDescription: 'Custodian GHCID',
|
|
targetClass: 'LinkedRecords',
|
|
targetSlot: 'identified_by',
|
|
typedbAttribute: 'linked-ghcid',
|
|
rdfPredicate: 'glam:ghcid',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'GHCID of linked custodian',
|
|
},
|
|
{
|
|
sourcePath: 'linked_records.custodian_record.notes',
|
|
sourceDescription: 'Custodian record notes',
|
|
targetClass: 'LinkedRecords',
|
|
targetSlot: 'has_note',
|
|
typedbAttribute: 'custodian-notes',
|
|
rdfPredicate: 'schema:description',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Additional notes about the custodian link',
|
|
},
|
|
],
|
|
generatedClasses: ['LinkedRecords'],
|
|
exampleYaml: `
|
|
linked_records:
|
|
staff_record:
|
|
file: data/custodian/person/affiliated/parsed/nationaal-archief_staff_20251210T155415Z.json
|
|
staff_id: nationaal-archief_staff_0002_iris_van_meer
|
|
custodian_record:
|
|
ghcid: NL-ZH-DHA-A-NA
|
|
notes: Nationaal Archief, The Hague
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// CONTACT DATA - Contact information
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'contact_data',
|
|
description: 'Contact information and communication channels',
|
|
detailedDescription: `
|
|
Contact information including email addresses and phone numbers.
|
|
Emails may be inferred from organizational naming conventions
|
|
with confidence scores indicating reliability.
|
|
|
|
Also includes profile photo URLs and external lookup service links.
|
|
`.trim(),
|
|
linkmlClass: 'ContactData',
|
|
typedbEntity: 'contact-data',
|
|
provenance: {
|
|
sourceType: 'computed',
|
|
dataTier: 'TIER_3_CROWD_SOURCED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'contact_data.provenance.source',
|
|
sourceDescription: 'Contact data source',
|
|
targetClass: 'ContactData',
|
|
targetSlot: 'has_source',
|
|
typedbAttribute: 'contact-source',
|
|
rdfPredicate: 'prov:wasAttributedTo',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'How contact data was obtained',
|
|
},
|
|
{
|
|
sourcePath: 'contact_data.emails[].email',
|
|
sourceDescription: 'Email address',
|
|
targetClass: 'ContactData',
|
|
targetSlot: 'has_email_address',
|
|
typedbAttribute: 'email-address',
|
|
rdfPredicate: 'schema:email',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Email address (may be inferred)',
|
|
},
|
|
{
|
|
sourcePath: 'contact_data.emails[].type',
|
|
sourceDescription: 'Email type',
|
|
targetClass: 'ContactData',
|
|
targetSlot: 'has_type',
|
|
typedbAttribute: 'email-type',
|
|
rdfPredicate: 'schema:contactType',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Type of email (work, personal)',
|
|
},
|
|
{
|
|
sourcePath: 'contact_data.emails[].confidence',
|
|
sourceDescription: 'Email confidence score',
|
|
targetClass: 'ContactData',
|
|
targetSlot: 'has_confidence_measure',
|
|
typedbAttribute: 'email-confidence',
|
|
rdfPredicate: 'prov:confidence',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Confidence in email accuracy (0-1)',
|
|
},
|
|
{
|
|
sourcePath: 'contact_data.emails[].verified',
|
|
sourceDescription: 'Email verification status',
|
|
targetClass: 'ContactData',
|
|
targetSlot: 'has_detail',
|
|
typedbAttribute: 'email-verified',
|
|
rdfPredicate: 'schema:verified',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Whether email has been verified',
|
|
},
|
|
{
|
|
sourcePath: 'contact_data.profile_photo_url',
|
|
sourceDescription: 'Profile photo URL',
|
|
targetClass: 'ContactData',
|
|
targetSlot: 'has_image',
|
|
typedbAttribute: 'profile-photo',
|
|
rdfPredicate: 'schema:image',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'URL to profile photo',
|
|
},
|
|
{
|
|
sourcePath: 'contact_data.rocketreach_url',
|
|
sourceDescription: 'RocketReach lookup URL',
|
|
targetClass: 'ContactData',
|
|
targetSlot: 'has_url',
|
|
typedbAttribute: 'rocketreach-url',
|
|
rdfPredicate: 'schema:sameAs',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Link to RocketReach profile lookup',
|
|
},
|
|
],
|
|
generatedClasses: ['ContactData'],
|
|
exampleYaml: `
|
|
contact_data:
|
|
provenance:
|
|
source: LinkedIn profile + Dutch government naming convention
|
|
source_url: https://www.linkedin.com/in/iris-van-meer-34329131
|
|
retrieved_date: 2025-12-14
|
|
extraction_method: naming_convention_inference
|
|
emails:
|
|
- email: iris.van.meer@nationaalarchief.nl
|
|
domain: nationaalarchief.nl
|
|
type: work
|
|
source: inferred
|
|
confidence: 0.8
|
|
verified: false
|
|
phones: []
|
|
profile_photo_url: https://media.licdn.com/dms/image/v2/...
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// EXTRACTION METADATA - Data provenance
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'extraction_metadata',
|
|
description: 'Metadata about how the profile was extracted',
|
|
detailedDescription: `
|
|
Provenance information about the data extraction process.
|
|
Includes source file references, extraction timestamps,
|
|
methods used, and cost tracking for API calls.
|
|
|
|
This ensures full traceability of data origin and enables
|
|
reproducibility of the extraction process.
|
|
`.trim(),
|
|
linkmlClass: 'ExtractionMetadata',
|
|
typedbEntity: 'extraction-metadata',
|
|
provenance: {
|
|
sourceType: 'computed',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'extraction_metadata.source_file',
|
|
sourceDescription: 'Source file path',
|
|
targetClass: 'ExtractionMetadata',
|
|
targetSlot: 'source_file',
|
|
typedbAttribute: 'source-file-path',
|
|
rdfPredicate: 'prov:wasDerivedFrom',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Original source file for extraction',
|
|
},
|
|
{
|
|
sourcePath: 'extraction_metadata.staff_id',
|
|
sourceDescription: 'Staff identifier',
|
|
targetClass: 'ExtractionMetadata',
|
|
targetSlot: 'staff_id',
|
|
typedbAttribute: 'extraction-staff-id',
|
|
rdfPredicate: 'schema:identifier',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Staff ID from source data',
|
|
},
|
|
{
|
|
sourcePath: 'extraction_metadata.extraction_date',
|
|
sourceDescription: 'Extraction timestamp',
|
|
targetClass: 'ExtractionMetadata',
|
|
targetSlot: 'observed_in',
|
|
typedbAttribute: 'extraction-date',
|
|
rdfPredicate: 'prov:generatedAtTime',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'ISO 8601 timestamp of extraction',
|
|
},
|
|
{
|
|
sourcePath: 'extraction_metadata.extraction_method',
|
|
sourceDescription: 'Method used for extraction',
|
|
targetClass: 'ExtractionMetadata',
|
|
targetSlot: 'has_type',
|
|
typedbAttribute: 'extraction-method',
|
|
rdfPredicate: 'prov:wasGeneratedBy',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Tool/API used (e.g., exa_crawling_exa)',
|
|
},
|
|
{
|
|
sourcePath: 'extraction_metadata.extraction_agent',
|
|
sourceDescription: 'Agent performing extraction',
|
|
targetClass: 'ExtractionMetadata',
|
|
targetSlot: 'has_agent',
|
|
typedbAttribute: 'extraction-agent',
|
|
rdfPredicate: 'prov:wasAttributedTo',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'AI agent or script name',
|
|
},
|
|
{
|
|
sourcePath: 'extraction_metadata.linkedin_url',
|
|
sourceDescription: 'Source LinkedIn URL',
|
|
targetClass: 'ExtractionMetadata',
|
|
targetSlot: 'has_url',
|
|
typedbAttribute: 'extraction-linkedin-url',
|
|
rdfPredicate: 'prov:hadPrimarySource',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'LinkedIn profile URL that was extracted',
|
|
},
|
|
{
|
|
sourcePath: 'extraction_metadata.cost_usd',
|
|
sourceDescription: 'Extraction cost in USD',
|
|
targetClass: 'ExtractionMetadata',
|
|
targetSlot: 'has_expense',
|
|
typedbAttribute: 'extraction-cost',
|
|
rdfPredicate: 'schema:price',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'API cost for extraction',
|
|
},
|
|
{
|
|
sourcePath: 'extraction_metadata.request_id',
|
|
sourceDescription: 'API request identifier',
|
|
targetClass: 'ExtractionMetadata',
|
|
targetSlot: 'request_id',
|
|
typedbAttribute: 'api-request-id',
|
|
rdfPredicate: 'schema:identifier',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Unique request ID for tracing',
|
|
},
|
|
],
|
|
generatedClasses: ['ExtractionMetadata'],
|
|
exampleYaml: `
|
|
extraction_metadata:
|
|
source_file: null
|
|
staff_id: null
|
|
extraction_date: 2025-12-13T17:35:24.524090+00:00
|
|
extraction_method: exa_crawling_exa
|
|
extraction_agent: claude-opus-4.5
|
|
linkedin_url: https://www.linkedin.com/in/iris-van-meer-34329131
|
|
cost_usd: 0
|
|
request_id: null
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// WEB CLAIMS - Verifiable claims from web sources
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'web_claims',
|
|
description: 'Web claims with provenance tracking',
|
|
detailedDescription: `
|
|
Individual claims extracted from web sources with full provenance.
|
|
Each claim includes the claim type, value, source URL, retrieval
|
|
timestamp, and the agent/tool that performed the extraction.
|
|
|
|
This follows the WebObservation pattern for verifiable data claims.
|
|
`.trim(),
|
|
linkmlClass: 'WebClaim',
|
|
typedbEntity: 'web-claim',
|
|
provenance: {
|
|
sourceType: 'external_api',
|
|
dataTier: 'TIER_2_VERIFIED',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'web_claims[].claim_type',
|
|
sourceDescription: 'Type of claim',
|
|
targetClass: 'WebClaim',
|
|
targetSlot: 'has_type',
|
|
typedbAttribute: 'claim-type',
|
|
rdfPredicate: 'rdf:type',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'Category of claim (e.g., full_name, role_title)',
|
|
},
|
|
{
|
|
sourcePath: 'web_claims[].claim_value',
|
|
sourceDescription: 'Value of the claim',
|
|
targetClass: 'WebClaim',
|
|
targetSlot: 'has_detail',
|
|
typedbAttribute: 'claim-value',
|
|
rdfPredicate: 'rdf:value',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'The actual claimed value',
|
|
},
|
|
{
|
|
sourcePath: 'web_claims[].source_url',
|
|
sourceDescription: 'URL source of claim',
|
|
targetClass: 'WebClaim',
|
|
targetSlot: 'source_url',
|
|
typedbAttribute: 'claim-source-url',
|
|
rdfPredicate: 'prov:wasDerivedFrom',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'Web page where claim was found',
|
|
},
|
|
{
|
|
sourcePath: 'web_claims[].retrieved_on',
|
|
sourceDescription: 'Retrieval timestamp',
|
|
targetClass: 'WebClaim',
|
|
targetSlot: 'retrieved_on',
|
|
typedbAttribute: 'claim-retrieved-on',
|
|
rdfPredicate: 'prov:generatedAtTime',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'When the claim was retrieved',
|
|
},
|
|
{
|
|
sourcePath: 'web_claims[].retrieval_agent',
|
|
sourceDescription: 'Agent that retrieved claim',
|
|
targetClass: 'WebClaim',
|
|
targetSlot: 'retrieval_agent',
|
|
typedbAttribute: 'claim-retrieval-agent',
|
|
rdfPredicate: 'prov:wasAttributedTo',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'Tool/agent that extracted the claim',
|
|
},
|
|
],
|
|
generatedClasses: ['WebClaim'],
|
|
exampleYaml: `
|
|
web_claims:
|
|
- claim_type: full_name
|
|
claim_value: Iris van Meer
|
|
source_url: https://www.linkedin.com/in/iris-van-meer-34329131
|
|
retrieved_on: 2025-12-14T11:21:47Z
|
|
retrieval_agent: linkedin_html_parser
|
|
- claim_type: role_title
|
|
claim_value: Staff member at the Services Department at Nationaal Archief
|
|
source_url: https://www.linkedin.com/in/iris-van-meer-34329131
|
|
retrieved_on: 2025-12-14T11:21:47Z
|
|
retrieval_agent: linkedin_html_parser
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// PICO MAPPED - PiCo ontology properties that ARE mapped
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'pico_mapped',
|
|
description: 'PiCo ontology properties mapped to HC person data',
|
|
detailedDescription: `
|
|
The Heritage Custodian (HC) system implements a focused subset of the PiCo
|
|
(Persons in Context) ontology, optimized for tracking heritage sector staff.
|
|
|
|
PiCo was designed for historical vital records (birth/death certificates,
|
|
marriage records, census data), but HC uses LinkedIn as the primary data
|
|
source, which provides professional context rather than biographical/genealogical
|
|
data.
|
|
|
|
This section documents which PiCo properties ARE mapped to HC fields,
|
|
showing the semantic alignment between the ontologies.
|
|
`.trim(),
|
|
linkmlClass: 'PersonObservation',
|
|
typedbEntity: 'person-observation',
|
|
provenance: {
|
|
sourceType: 'ontology_mapping',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
{
|
|
sourcePath: 'profile_data.name',
|
|
sourceDescription: 'Full name of the person',
|
|
targetClass: 'PersonObservation',
|
|
targetSlot: 'has_name',
|
|
typedbAttribute: 'person-name',
|
|
rdfPredicate: 'sdo:name',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'PiCo uses sdo:name (Schema.org) for full names. Directly mapped.',
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.location',
|
|
sourceDescription: 'Current geographic location',
|
|
targetClass: 'PersonObservation',
|
|
targetSlot: 'has_location',
|
|
typedbAttribute: 'location-string',
|
|
rdfPredicate: 'sdo:address',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'PiCo uses sdo:address for location. LinkedIn provides free-text location.',
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.headline',
|
|
sourceDescription: 'Current occupation/role',
|
|
targetClass: 'PersonObservation',
|
|
targetSlot: 'has_label',
|
|
typedbAttribute: 'headline',
|
|
rdfPredicate: 'sdo:hasOccupation',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'PiCo uses sdo:hasOccupation for job roles. HC captures this via LinkedIn headline.',
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'extraction_metadata.linkedin_url',
|
|
sourceDescription: 'LinkedIn profile URL as primary source',
|
|
targetClass: 'PersonObservation',
|
|
targetSlot: 'source_url',
|
|
typedbAttribute: 'source-url',
|
|
rdfPredicate: 'prov:hadPrimarySource',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'PiCo uses prov:hadPrimarySource for provenance. LinkedIn URL serves as source document.',
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'affiliations[].role_title',
|
|
sourceDescription: 'Role at heritage institution',
|
|
targetClass: 'Affiliation',
|
|
targetSlot: 'role_title',
|
|
typedbAttribute: 'role-title',
|
|
rdfPredicate: 'pico:hasRole',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'PiCo uses pico:hasRole with picot_roles thesaurus. HC captures current institutional roles.',
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'extraction_metadata.extraction_date',
|
|
sourceDescription: 'When observation was recorded',
|
|
targetClass: 'PersonObservation',
|
|
targetSlot: 'observed_in',
|
|
typedbAttribute: 'observation-date',
|
|
rdfPredicate: 'prov:generatedAtTime',
|
|
transformation: 'direct',
|
|
required: true,
|
|
notes: 'PiCo uses prov:generatedAtTime for temporal provenance. Mapped to extraction timestamp.',
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'extraction_metadata.extraction_agent',
|
|
sourceDescription: 'Agent that performed extraction',
|
|
targetClass: 'PersonObservation',
|
|
targetSlot: 'has_agent',
|
|
typedbAttribute: 'extraction-agent',
|
|
rdfPredicate: 'prov:wasAttributedTo',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'PiCo uses prov:wasAttributedTo for agent provenance. Records which tool/agent extracted data.',
|
|
status: 'mapped',
|
|
},
|
|
{
|
|
sourcePath: 'profile_data.profile_image_url',
|
|
sourceDescription: 'Profile photo URL',
|
|
targetClass: 'PersonObservation',
|
|
targetSlot: 'has_image',
|
|
typedbAttribute: 'profile-image-url',
|
|
rdfPredicate: 'sdo:image',
|
|
transformation: 'direct',
|
|
required: false,
|
|
notes: 'PiCo uses sdo:image for visual representation. LinkedIn CDN URL stored.',
|
|
status: 'mapped',
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# PiCo ontology alignment example
|
|
# HC PersonObservation → PiCo PersonObservation
|
|
|
|
profile_data:
|
|
name: "Iris van Meer" # → sdo:name
|
|
headline: "Staff member at..." # → sdo:hasOccupation
|
|
location: "The Hague, Netherlands" # → sdo:address
|
|
profile_image_url: "https://..." # → sdo:image
|
|
|
|
extraction_metadata:
|
|
linkedin_url: "https://linkedin.com/in/..." # → prov:hadPrimarySource
|
|
extraction_date: "2025-12-14T11:21:47Z" # → prov:generatedAtTime
|
|
extraction_agent: "claude-opus-4.5" # → prov:wasAttributedTo
|
|
|
|
affiliations:
|
|
- role_title: "Archivist" # → pico:hasRole
|
|
`.trim(),
|
|
},
|
|
|
|
// -------------------------------------------------------------------------
|
|
// PICO UNMAPPED - PiCo ontology properties intentionally OUT OF SCOPE
|
|
// -------------------------------------------------------------------------
|
|
{
|
|
sourceBlock: 'pico_unmapped',
|
|
description: 'PiCo ontology properties intentionally not mapped',
|
|
detailedDescription: `
|
|
Many PiCo properties are intentionally NOT mapped in the HC system.
|
|
This is a design decision, not a gap to be filled.
|
|
|
|
**Why these properties are out of scope:**
|
|
|
|
1. **Data source limitation**: LinkedIn profiles don't contain vital records
|
|
(birth dates, death dates, marriage records, baptism records).
|
|
|
|
2. **Use case mismatch**: HC tracks heritage sector workforce, not genealogical
|
|
reconstruction. Family relationships aren't relevant for institutional
|
|
staff directories.
|
|
|
|
3. **Privacy considerations**: Collecting personal biographical data about
|
|
living individuals raises GDPR concerns. Professional context is appropriate;
|
|
personal history is not.
|
|
|
|
4. **Ontology purpose**: PiCo was designed for historical archives processing
|
|
(civil registration, notarial records). HC serves a different purpose.
|
|
|
|
This documentation ensures transparency about the ontology alignment scope.
|
|
`.trim(),
|
|
linkmlClass: 'PersonObservation',
|
|
typedbEntity: 'person-observation',
|
|
provenance: {
|
|
sourceType: 'ontology_mapping',
|
|
dataTier: 'TIER_1_AUTHORITATIVE',
|
|
},
|
|
fields: [
|
|
// Vital records - not available from LinkedIn
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Birth date',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:birthDate',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: LinkedIn does not provide birth dates. Historical vital records property.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Death date',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:deathDate',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: LinkedIn profiles are for living professionals. Historical vital records property.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Birth place',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:birthPlace',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: LinkedIn does not provide birth location. Use sdo:address for current location.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Death place',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:deathPlace',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: LinkedIn profiles are for living professionals.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Deceased flag',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'pico:deceased',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: HC tracks active professionals. Memorial profiles not in scope.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Age',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'pico:hasAge',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Age not available from LinkedIn. Privacy consideration for living individuals.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Gender',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:gender',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Gender not reliably extractable from LinkedIn. Privacy consideration.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Religion',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'pico:hasReligion',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Religious affiliation not available from LinkedIn. Privacy consideration.',
|
|
status: 'out_of_scope',
|
|
},
|
|
// Structured name components - partial mapping
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Given name (first name)',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:givenName',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'PARTIAL: Could be extracted from full name, but not reliably for all cultures. Full name (sdo:name) is used instead.',
|
|
status: 'partial',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Family name (surname)',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:familyName',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'PARTIAL: Could be extracted from full name, but not reliably for all cultures. Full name (sdo:name) is used instead.',
|
|
status: 'partial',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Patronym',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'pnv:patronym',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Patronymic names are historical/cultural. Not extractable from LinkedIn.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Base surname',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'pnv:baseSurname',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Genealogical name component. Not relevant for staff tracking.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Surname prefix',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'pnv:surnamePrefix',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Dutch tussenvoegsel (van, de, etc.) not separately tracked. Full name preserved.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Initials',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'pnv:initials',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Initials not separately extracted. Full name used.',
|
|
status: 'out_of_scope',
|
|
},
|
|
// Family relationships - 40+ properties not mapped
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Parent relationship',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:parent',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Children relationship',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:children',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Spouse relationship',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:spouse',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Sibling relationship',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:sibling',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: Family relationships not relevant for institutional staff tracking.',
|
|
status: 'out_of_scope',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Extended family (40+ PiCo properties)',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'pico:has*',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: PiCo defines 40+ family relationship properties (grandparent, in-law, step-relations, cousins, etc.). None are mapped - HC tracks professional, not familial relationships.',
|
|
status: 'out_of_scope',
|
|
},
|
|
// Archival source properties
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Archive component source',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'sdo:ArchiveComponent',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'OUT OF SCOPE: HC uses LinkedIn as source, not archival documents. Web claims serve similar provenance purpose.',
|
|
status: 'out_of_scope',
|
|
},
|
|
// Reconstruction properties
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Person reconstruction aggregation',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'pico:PersonReconstruction',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'FUTURE: PersonReconstruction (aggregating multiple observations) not yet implemented. Currently each person has one LinkedIn-based observation.',
|
|
status: 'future',
|
|
},
|
|
{
|
|
sourcePath: null,
|
|
sourceDescription: 'Derived from relationship',
|
|
targetClass: null,
|
|
targetSlot: null,
|
|
typedbAttribute: null,
|
|
rdfPredicate: 'prov:wasDerivedFrom',
|
|
transformation: 'not_mapped',
|
|
required: false,
|
|
notes: 'FUTURE: Cross-observation derivation not implemented. Would link reconstructed person to source observations.',
|
|
status: 'future',
|
|
},
|
|
],
|
|
exampleYaml: `
|
|
# PiCo properties NOT mapped in HC system
|
|
|
|
# ❌ Vital records (not available from LinkedIn):
|
|
# - sdo:birthDate, sdo:deathDate
|
|
# - sdo:birthPlace, sdo:deathPlace
|
|
# - pico:deceased, pico:hasAge
|
|
|
|
# ❌ Personal characteristics (privacy):
|
|
# - sdo:gender
|
|
# - pico:hasReligion
|
|
|
|
# ❌ Genealogical name components:
|
|
# - pnv:patronym, pnv:baseSurname
|
|
# - pnv:surnamePrefix, pnv:initials
|
|
# (HC uses full sdo:name instead)
|
|
|
|
# ❌ Family relationships (40+ properties):
|
|
# - sdo:parent, sdo:children, sdo:spouse, sdo:sibling
|
|
# - pico:hasGrandparent, pico:hasGrandchild
|
|
# - pico:hasParent-in-law, pico:hasSibling-in-law
|
|
# - pico:hasStepparent, pico:hasStepchild
|
|
# - pico:hasCousin, pico:hasUncle_Aunt
|
|
# - pico:hasFosterParent, pico:hasGodparent
|
|
# ... and many more
|
|
|
|
# ⏳ Future consideration:
|
|
# - pico:PersonReconstruction (multi-source aggregation)
|
|
# - prov:wasDerivedFrom (observation linking)
|
|
`.trim(),
|
|
},
|
|
];
|
|
|
|
// ============================================================================
|
|
// HELPER FUNCTIONS
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Get mapping for a specific source block
|
|
*/
|
|
export function getMappingForSource(sourceBlock: string): EnrichmentSourceMapping | undefined {
|
|
return ENRICHMENT_MAPPINGS.find(m => m.sourceBlock === sourceBlock);
|
|
}
|
|
|
|
/**
|
|
* Get all field mappings for a specific LinkML class
|
|
*/
|
|
export function getFieldsForClass(className: string): FieldMapping[] {
|
|
const fields: FieldMapping[] = [];
|
|
for (const mapping of ENRICHMENT_MAPPINGS) {
|
|
for (const field of mapping.fields) {
|
|
if (field.targetClass === className) {
|
|
fields.push(field);
|
|
}
|
|
}
|
|
}
|
|
return fields;
|
|
}
|
|
|
|
/**
|
|
* Get all source blocks that produce a specific LinkML class
|
|
*/
|
|
export function getSourcesForClass(className: string): string[] {
|
|
const sources: string[] = [];
|
|
for (const mapping of ENRICHMENT_MAPPINGS) {
|
|
if (mapping.linkmlClass === className || mapping.generatedClasses?.includes(className)) {
|
|
sources.push(mapping.sourceBlock);
|
|
}
|
|
}
|
|
return sources;
|
|
}
|
|
|
|
/**
|
|
* Get the TypeDB attribute name for a LinkML slot
|
|
*/
|
|
export function getTypeDBAttribute(className: string, slotName: string): string | undefined {
|
|
for (const mapping of ENRICHMENT_MAPPINGS) {
|
|
for (const field of mapping.fields) {
|
|
if (field.targetClass === className && field.targetSlot === slotName) {
|
|
return field.typedbAttribute ?? undefined;
|
|
}
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
/**
|
|
* Get the RDF predicate for a LinkML slot
|
|
*/
|
|
export function getRDFPredicate(className: string, slotName: string): string | undefined {
|
|
for (const mapping of ENRICHMENT_MAPPINGS) {
|
|
for (const field of mapping.fields) {
|
|
if (field.targetClass === className && field.targetSlot === slotName) {
|
|
return field.rdfPredicate;
|
|
}
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
/**
|
|
* Get category for a source block
|
|
*/
|
|
export function getCategoryForSource(sourceBlock: string): MappingCategory | undefined {
|
|
return MAPPING_CATEGORIES.find(cat => cat.sources.includes(sourceBlock));
|
|
}
|
|
|
|
/**
|
|
* Get all transformation types used in mappings
|
|
*/
|
|
export function getTransformationTypes(): TransformationType[] {
|
|
const types = new Set<TransformationType>();
|
|
for (const mapping of ENRICHMENT_MAPPINGS) {
|
|
for (const field of mapping.fields) {
|
|
types.add(field.transformation);
|
|
}
|
|
}
|
|
return Array.from(types);
|
|
}
|
|
|
|
/**
|
|
* Statistics about the mappings
|
|
*/
|
|
export function getMappingStatistics() {
|
|
let totalFields = 0;
|
|
const classesCovered = new Set<string>();
|
|
const transformationCounts: Record<TransformationType, number> = {} as Record<TransformationType, number>;
|
|
|
|
for (const mapping of ENRICHMENT_MAPPINGS) {
|
|
totalFields += mapping.fields.length;
|
|
classesCovered.add(mapping.linkmlClass);
|
|
mapping.generatedClasses?.forEach(c => classesCovered.add(c));
|
|
|
|
for (const field of mapping.fields) {
|
|
transformationCounts[field.transformation] = (transformationCounts[field.transformation] || 0) + 1;
|
|
}
|
|
}
|
|
|
|
return {
|
|
totalSources: ENRICHMENT_MAPPINGS.length,
|
|
totalFields,
|
|
classesCovered: classesCovered.size,
|
|
classes: Array.from(classesCovered),
|
|
transformationCounts,
|
|
categories: MAPPING_CATEGORIES.length,
|
|
};
|
|
}
|
|
|
|
// ============================================================================
|
|
// PERSON DATA HELPER FUNCTIONS
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Get mapping for a specific person source block
|
|
*/
|
|
export function getPersonMappingForSource(sourceBlock: string): EnrichmentSourceMapping | undefined {
|
|
return PERSON_MAPPINGS.find(m => m.sourceBlock === sourceBlock);
|
|
}
|
|
|
|
/**
|
|
* Get all person field mappings for a specific LinkML class
|
|
*/
|
|
export function getPersonFieldsForClass(className: string): FieldMapping[] {
|
|
const fields: FieldMapping[] = [];
|
|
for (const mapping of PERSON_MAPPINGS) {
|
|
for (const field of mapping.fields) {
|
|
if (field.targetClass === className) {
|
|
fields.push(field);
|
|
}
|
|
}
|
|
}
|
|
return fields;
|
|
}
|
|
|
|
/**
|
|
* Get all person source blocks that produce a specific LinkML class
|
|
*/
|
|
export function getPersonSourcesForClass(className: string): string[] {
|
|
const sources: string[] = [];
|
|
for (const mapping of PERSON_MAPPINGS) {
|
|
if (mapping.linkmlClass === className || mapping.generatedClasses?.includes(className)) {
|
|
sources.push(mapping.sourceBlock);
|
|
}
|
|
}
|
|
return sources;
|
|
}
|
|
|
|
/**
|
|
* Get person category for a source block
|
|
*/
|
|
export function getPersonCategoryForSource(sourceBlock: string): MappingCategory | undefined {
|
|
return PERSON_CATEGORIES.find(cat => cat.sources.includes(sourceBlock));
|
|
}
|
|
|
|
/**
|
|
* Statistics about person mappings
|
|
*/
|
|
export function getPersonMappingStatistics() {
|
|
let totalFields = 0;
|
|
const classesCovered = new Set<string>();
|
|
const transformationCounts: Record<TransformationType, number> = {} as Record<TransformationType, number>;
|
|
|
|
for (const mapping of PERSON_MAPPINGS) {
|
|
totalFields += mapping.fields.length;
|
|
classesCovered.add(mapping.linkmlClass);
|
|
mapping.generatedClasses?.forEach(c => classesCovered.add(c));
|
|
|
|
for (const field of mapping.fields) {
|
|
transformationCounts[field.transformation] = (transformationCounts[field.transformation] || 0) + 1;
|
|
}
|
|
}
|
|
|
|
return {
|
|
totalSources: PERSON_MAPPINGS.length,
|
|
totalFields,
|
|
classesCovered: classesCovered.size,
|
|
classes: Array.from(classesCovered),
|
|
transformationCounts,
|
|
categories: PERSON_CATEGORIES.length,
|
|
};
|
|
}
|
|
|
|
// ============================================================================
|
|
// UNIFIED HELPER FUNCTIONS (for both data sources)
|
|
// ============================================================================
|
|
|
|
export type DataSourceType = 'custodian' | 'person';
|
|
|
|
/**
|
|
* Get mappings array for specified data source
|
|
*/
|
|
export function getMappingsForDataSource(dataSource: DataSourceType): EnrichmentSourceMapping[] {
|
|
return dataSource === 'person' ? PERSON_MAPPINGS : ENRICHMENT_MAPPINGS;
|
|
}
|
|
|
|
/**
|
|
* Get categories array for specified data source
|
|
*/
|
|
export function getCategoriesForDataSource(dataSource: DataSourceType): MappingCategory[] {
|
|
return dataSource === 'person' ? PERSON_CATEGORIES : MAPPING_CATEGORIES;
|
|
}
|
|
|
|
/**
|
|
* Get mapping for a source block in specified data source
|
|
*/
|
|
export function getMappingForSourceByType(sourceBlock: string, dataSource: DataSourceType): EnrichmentSourceMapping | undefined {
|
|
const mappings = getMappingsForDataSource(dataSource);
|
|
return mappings.find(m => m.sourceBlock === sourceBlock);
|
|
}
|
|
|
|
/**
|
|
* Get category for a source block in specified data source
|
|
*/
|
|
export function getCategoryForSourceByType(sourceBlock: string, dataSource: DataSourceType): MappingCategory | undefined {
|
|
const categories = getCategoriesForDataSource(dataSource);
|
|
return categories.find(cat => cat.sources.includes(sourceBlock));
|
|
}
|
|
|
|
/**
|
|
* Get statistics for specified data source
|
|
*/
|
|
export function getStatisticsForDataSource(dataSource: DataSourceType) {
|
|
return dataSource === 'person' ? getPersonMappingStatistics() : getMappingStatistics();
|
|
}
|
|
|
|
/**
|
|
* Export the transformation specification as YAML suitable for linkml-map.
|
|
*
|
|
* @param dataSource - 'custodian' or 'person' data source type
|
|
* @returns Promise<string> - YAML string in LinkML Map format
|
|
*/
|
|
export async function exportToLinkMLMapYaml(
|
|
dataSource: DataSourceType,
|
|
customMappings?: EnrichmentSourceMapping[],
|
|
isFiltered?: boolean
|
|
): Promise<string> {
|
|
const spec = exportToLinkMLMap(dataSource, customMappings);
|
|
|
|
// Import js-yaml dynamically using ESM import
|
|
const yaml = await import('js-yaml');
|
|
|
|
// Add header comment
|
|
const filterNote = isFiltered ? '\n# NOTE: This export reflects currently applied filters.' : '';
|
|
const header = `# LinkML Map Transformation Specification
|
|
# Generated: ${new Date().toISOString()}
|
|
# Source: Heritage Custodian System - bronhouder.nl
|
|
# Data Source: ${dataSource}${filterNote}
|
|
# Specification: https://linkml.io/linkml-map/
|
|
#
|
|
# This file defines how source YAML data is transformed to LinkML schema classes.
|
|
# Use with linkml-map tool: https://github.com/linkml/linkml-map
|
|
|
|
`;
|
|
|
|
return header + yaml.dump(spec, {
|
|
indent: 2,
|
|
lineWidth: 120,
|
|
noRefs: true,
|
|
sortKeys: false,
|
|
});
|
|
}
|
|
|
|
// ============================================================================
|
|
// LINKML MAP EXPORT FUNCTIONS
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Represents a LinkML Map SlotDerivation
|
|
* @see https://linkml.io/linkml-map/
|
|
*/
|
|
interface LinkMLMapSlotDerivation {
|
|
populated_from?: string;
|
|
expr?: string;
|
|
description?: string;
|
|
unit_conversion?: {
|
|
source_unit: string;
|
|
target_unit: string;
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Represents a LinkML Map ClassDerivation
|
|
*/
|
|
interface LinkMLMapClassDerivation {
|
|
name?: string;
|
|
populated_from?: string;
|
|
slot_derivations?: Record<string, LinkMLMapSlotDerivation>;
|
|
description?: string;
|
|
}
|
|
|
|
/**
|
|
* Represents a LinkML Map TransformationSpecification
|
|
*/
|
|
interface LinkMLMapTransformationSpecification {
|
|
id: string;
|
|
title: string;
|
|
description?: string;
|
|
source_schema: string;
|
|
target_schema: string;
|
|
prefixes?: Record<string, string>;
|
|
class_derivations: Record<string, LinkMLMapClassDerivation>;
|
|
}
|
|
|
|
/**
|
|
* Convert transformation type to LinkML Map expression format
|
|
*/
|
|
function transformationToExpr(field: FieldMapping): string | undefined {
|
|
switch (field.transformation) {
|
|
case 'computed':
|
|
return field.transformationDetails || undefined;
|
|
case 'merge':
|
|
// Merge typically combines multiple fields
|
|
return field.transformationDetails
|
|
? `{${field.transformationDetails.replace(/\s*\+\s*/g, '} + " " + {')}}`
|
|
: undefined;
|
|
case 'split':
|
|
// Split extracts part of a value
|
|
return field.transformationDetails || undefined;
|
|
case 'uri_construct':
|
|
return field.transformationDetails || (field.sourcePath ? `"https://w3id.org/heritage/custodian/" + {${field.sourcePath.split('.').pop()}}` : undefined);
|
|
case 'temporal':
|
|
return field.transformationDetails || (field.sourcePath ? `date_parse({${field.sourcePath.split('.').pop()}})` : undefined);
|
|
case 'normalize':
|
|
return field.transformationDetails || (field.sourcePath ? `normalize({${field.sourcePath.split('.').pop()}})` : undefined);
|
|
case 'conditional':
|
|
return field.transformationDetails || undefined;
|
|
case 'lookup':
|
|
return field.transformationDetails && field.sourcePath
|
|
? `lookup({${field.sourcePath.split('.').pop()}}, ${field.transformationDetails})`
|
|
: undefined;
|
|
case 'flatten':
|
|
return field.transformationDetails || (field.sourcePath ? `flatten({${field.sourcePath.split('.').pop()}})` : undefined);
|
|
case 'aggregate':
|
|
return field.transformationDetails || (field.sourcePath ? `aggregate({${field.sourcePath.split('.').pop()}})` : undefined);
|
|
default:
|
|
// direct, rename, nested, array_map, array_direct - use populated_from
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert internal mappings to LinkML Map TransformationSpecification format
|
|
* @see https://linkml.io/linkml-map/
|
|
*
|
|
* @param dataSource - 'custodian' or 'person' data source type
|
|
* @returns LinkML Map TransformationSpecification object
|
|
*/
|
|
export function exportToLinkMLMap(
|
|
dataSource: DataSourceType,
|
|
customMappings?: EnrichmentSourceMapping[]
|
|
): LinkMLMapTransformationSpecification {
|
|
const mappings = customMappings ?? getMappingsForDataSource(dataSource);
|
|
const timestamp = new Date().toISOString().split('T')[0]; // YYYY-MM-DD
|
|
|
|
// Group all field mappings by target class
|
|
const classDerivationsMap = new Map<string, {
|
|
sourceBlocks: Set<string>;
|
|
slots: Map<string, LinkMLMapSlotDerivation>;
|
|
description?: string;
|
|
}>();
|
|
|
|
for (const mapping of mappings) {
|
|
for (const field of mapping.fields) {
|
|
// Skip fields without target class (e.g., unmapped PiCo properties)
|
|
if (!field.targetClass || !field.targetSlot) continue;
|
|
|
|
const className = field.targetClass;
|
|
|
|
if (!classDerivationsMap.has(className)) {
|
|
classDerivationsMap.set(className, {
|
|
sourceBlocks: new Set(),
|
|
slots: new Map(),
|
|
description: undefined,
|
|
});
|
|
}
|
|
|
|
const classData = classDerivationsMap.get(className)!;
|
|
classData.sourceBlocks.add(mapping.sourceBlock);
|
|
|
|
// Create slot derivation
|
|
const expr = transformationToExpr(field);
|
|
const slotDerivation: LinkMLMapSlotDerivation = {
|
|
description: field.sourceDescription,
|
|
};
|
|
|
|
if (expr) {
|
|
slotDerivation.expr = expr;
|
|
} else if (field.sourcePath) {
|
|
// Use populated_from for direct mappings
|
|
slotDerivation.populated_from = field.sourcePath;
|
|
}
|
|
|
|
// Don't overwrite if slot already exists (keep first definition)
|
|
if (!classData.slots.has(field.targetSlot)) {
|
|
classData.slots.set(field.targetSlot, slotDerivation);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert to final structure
|
|
const classDerivations: Record<string, LinkMLMapClassDerivation> = {};
|
|
|
|
for (const [className, classData] of classDerivationsMap) {
|
|
const slotDerivations: Record<string, LinkMLMapSlotDerivation> = {};
|
|
|
|
for (const [slotName, slotDeriv] of classData.slots) {
|
|
slotDerivations[slotName] = slotDeriv;
|
|
}
|
|
|
|
classDerivations[className] = {
|
|
populated_from: classData.sourceBlocks.size === 1
|
|
? Array.from(classData.sourceBlocks)[0]
|
|
: Array.from(classData.sourceBlocks).join(' | '),
|
|
slot_derivations: slotDerivations,
|
|
};
|
|
}
|
|
|
|
const sourceLabel = dataSource === 'person' ? 'Person' : 'Custodian';
|
|
const totalFields = mappings.reduce((acc, m) => acc + m.fields.length, 0);
|
|
|
|
return {
|
|
id: `heritage-${dataSource}-transform-${timestamp}`,
|
|
title: `Heritage ${sourceLabel} Data Transformation`,
|
|
description: `LinkML Map transformation specification for ${sourceLabel.toLowerCase()} data. ` +
|
|
`Generated from ${mappings.length} source blocks with ${totalFields} field mappings.`,
|
|
source_schema: `${dataSource}_yaml_source_schema`,
|
|
target_schema: 'heritage_custodian_linkml_schema',
|
|
prefixes: {
|
|
linkml: 'https://w3id.org/linkml/',
|
|
heritage: 'https://w3id.org/heritage/custodian/',
|
|
schema: 'http://schema.org/',
|
|
crm: 'http://www.cidoc-crm.org/cidoc-crm/',
|
|
prov: 'http://www.w3.org/ns/prov#',
|
|
},
|
|
class_derivations: classDerivations,
|
|
};
|
|
}
|