glam/schemas/initial/core.yaml
kempersc fa5680f0dd Add initial versions of custodian hub UML diagrams in Mermaid and PlantUML formats
- Introduced custodian_hub_v3.mmd, custodian_hub_v4_final.mmd, and custodian_hub_v5_FINAL.mmd for Mermaid representation.
- Created custodian_hub_FINAL.puml and custodian_hub_v3.puml for PlantUML representation.
- Defined entities such as CustodianReconstruction, Identifier, TimeSpan, Agent, CustodianName, CustodianObservation, ReconstructionActivity, Appellation, ConfidenceMeasure, Custodian, LanguageCode, and SourceDocument.
- Established relationships and associations between entities, including temporal extents, observations, and reconstruction activities.
- Incorporated enumerations for various types, statuses, and classifications relevant to custodians and their activities.
2025-11-22 14:33:51 +01:00

625 lines
20 KiB
YAML

id: https://w3id.org/heritage/custodian/core
name: heritage-custodian-core
title: Heritage Custodian Core Schema
description: >-
Core classes and slots for heritage custodian organizations (GLAM institutions).
Provides the base HeritageCustodian class with organizational metadata, identification,
location, and contact information. Integrates W3C Org Ontology, Schema.org, and
EU CPOV standards.
license: https://creativecommons.org/publicdomain/zero/1.0/
version: 0.2.1
prefixes:
linkml: https://w3id.org/linkml/
heritage: https://w3id.org/heritage/custodian/
schema: http://schema.org/
org: http://www.w3.org/ns/org#
cpov: http://data.europa.eu/m8g/
prov: http://www.w3.org/ns/prov#
tooi: https://identifier.overheid.nl/tooi/def/ont/
dcterms: http://purl.org/dc/terms/
foaf: http://xmlns.com/foaf/0.1/
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
default_prefix: heritage
default_range: string
imports:
- linkml:types
- enums
- provenance
# =============================================================================
# CORE CLASSES
# =============================================================================
classes:
# Schema.org Proxy Classes
# Local proxies for Schema.org classes to avoid external dependencies
SchemaContactPoint:
description: >-
Local proxy for schema:ContactPoint. Represents contact information
(phone, email, etc.) for an organization. Used as mixin for ContactInfo.
class_uri: schema:ContactPoint
abstract: true
notes: >-
This is a mixin class providing Schema.org alignment.
In RDF serialization, ContactInfo will also be typed as schema:ContactPoint.
HeritageCustodian:
description: >-
A heritage custodian organization (GLAM institution) responsible for preserving,
managing, and providing access to cultural heritage collections. Integrates
W3C Org Ontology, PROV-O provenance tracking, and TOOI temporal patterns.
class_uri: org:Organization
mixins:
- ProvenanceEntity
slots:
- id
- record_id
- ghcid_uuid
- ghcid_uuid_sha256
- ghcid_numeric
- ghcid_current
- ghcid_original
- ghcid_history
- name
- alternative_names
- official_name
- sorting_name
- abbreviation
- institution_type
- organization_status
- description
- parent_organization
- parent_organization_name
- sub_organizations
- founded_date
- closed_date
- prov_generated_at
- prov_invalidated_at
- change_history
- homepage
- contact_info
- locations
- identifiers
- chamber_of_commerce_number
- municipality_identifier
- collections
- digital_platforms
- metadata_standards
- partnerships
- publications
- provenance
slot_usage:
id:
required: true
identifier: true
name:
required: true
institution_type:
required: true
Location:
description: Physical or virtual location associated with an organization
class_uri: schema:Place
slots:
- location_type
- street_address
- city
- postal_code
- region
- country
- latitude
- longitude
- geonames_id
- is_primary
ContactInfo:
description: >-
Contact information for the organization.
Maps to both Schema.org ContactPoint and EU CPOV ContactPoint.
class_uri: cpov:ContactPoint
mixins:
- SchemaContactPoint
slots:
- email
- phone
- fax
- contact_type
Identifier:
description: External identifier for the organization
class_uri: dcterms:identifier
slots:
- identifier_scheme
- identifier_value
- identifier_url
- assigned_date
slot_usage:
identifier_scheme:
required: true
examples:
- value: ISIL
- value: VIAF
- value: Wikidata
- value: KvK
- value: GLAM
identifier_value:
required: true
OrganizationalUnit:
description: >-
A sub-unit, department, or division within a heritage institution.
Examples: Conservation department, Special Collections, Reading Room.
Uses W3C Org Ontology patterns for organizational structure.
class_uri: org:OrganizationalUnit
slots:
- unit_id
- unit_name
- unit_type
- parent_unit
- description
- contact_info
- homepage
slot_usage:
unit_id:
required: true
identifier: true
unit_name:
required: true
# =============================================================================
# SLOTS
# =============================================================================
slots:
# Core identification
id:
description: Unique identifier for this record
range: uriorcurie
identifier: true
slot_uri: dcterms:identifier
ghcid_numeric:
description: >-
Global Heritage Custodian Identifier (numeric hash). This is the persistent,
immutable identifier derived from SHA256 hash of the GHCID string. Never changes
even if institution name or location changes. 64-bit unsigned integer.
range: integer
required: false
slot_uri: dcterms:identifier
comments:
- "This is the true persistent identifier for cross-system integration"
- "Use this for database primary keys and foreign keys"
- "Generated from SHA256(ghcid_current)[:8] converted to 64-bit int"
ghcid_current:
description: >-
Current human-readable GHCID string. Format: {ISO-3166-1}-{ISO-3166-2}-{UN/LOCODE}-{Type}-{Abbreviation}.
Example: NL-NH-AMS-M-RM (Rijksmuseum Amsterdam). For collision resolution when two institutions
share the same location, type, and abbreviation, append Wikidata Q-number: NL-NH-AMS-M-SM-Q924335.
May change over time if institution relocates or changes name. Track changes in ghcid_history.
range: string
required: false
pattern: '^[A-Z]{2}-[A-Z0-9]{1,3}-[A-Z]{3}-[A-Z]-[A-Z0-9]{1,10}(-Q[0-9]+)?$'
slot_uri: dcterms:identifier
comments:
- "Format: {Country}-{Region}-{City}-{Type}-{Abbreviation}"
- "Optional collision resolver: {Country}-{Region}-{City}-{Type}-{Abbreviation}-Q{WikidataID}"
- "Wikidata Q-number suffix ONLY added when collision detected"
- "Components must match GHCID specification"
- "Use for human-readable display and reports"
ghcid_original:
description: >-
Original GHCID string assigned when institution was first registered. Frozen at creation.
Useful for provenance tracking. Should match ghcid_current unless institution has changed.
Includes Wikidata Q-number suffix if collision was detected at registration time.
range: string
required: false
pattern: '^[A-Z]{2}-[A-Z0-9]{1,3}-[A-Z]{3}-[A-Z]-[A-Z0-9]{1,10}(-Q[0-9]+)?$'
slot_uri: dcterms:identifier
ghcid_history:
description: >-
Historical GHCID records tracking all changes to identifier over time.
Each entry represents a period when a specific GHCID was valid.
range: GHCIDHistoryEntry
multivalued: true
slot_uri: dcterms:provenance
record_id:
description: >-
UUID v7 database primary key (RFC 9562, time-ordered, random). Time-ordered
UUID for database performance optimization and natural chronological ordering.
NOT deterministic - each record gets a unique UUID v7 assigned at creation time.
Use for database PKs and internal record tracking. For persistent interoperable
identifiers, use ghcid_uuid (v5) or ghcid_uuid_sha256 (v8).
range: string
required: false
pattern: '^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'
slot_uri: dcterms:identifier
comments:
- "UUID v7 format: time-ordered with 48-bit millisecond timestamp"
- "Random component: 74 random bits for uniqueness"
- "Best for: Database primary keys, time-series ordering"
- "NOT for: Persistent identifiers (use ghcid_uuid or ghcid_uuid_sha256)"
- "Generated once at record creation, never changes"
ghcid_uuid:
description: >-
UUID v5 persistent identifier (RFC 4122, SHA-1 based). PRIMARY identifier for
interoperability with Europeana, DPLA, IIIF, Wikidata, and library systems.
Deterministically generated from GHCID string using UUID v5 algorithm.
Stable across systems and regeneratable from GHCID.
range: string
required: false
pattern: '^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'
slot_uri: dcterms:identifier
comments:
- "UUID v5 format: deterministic SHA-1 hash of GHCID string"
- "Namespace: 6ba7b810-9dad-11d1-80b4-00c04fd430c8 (GHCID namespace)"
- "Best for: RDF/Linked Data, IIIF manifests, cross-system integration"
- "Regeneratable: Same GHCID always produces same UUID v5"
- "Use as primary PID for semantic web applications"
ghcid_uuid_sha256:
description: >-
UUID v8 with SHA-256 (custom format, SOTA cryptographic strength). State-of-the-art
persistent identifier for future-proofing and security compliance. Deterministically
generated from GHCID string using SHA-256 algorithm (stronger than SHA-1 in UUID v5).
Use for applications requiring maximum collision resistance and cryptographic strength.
range: string
required: false
pattern: '^[0-9a-f]{8}-[0-9a-f]{4}-8[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'
slot_uri: dcterms:identifier
comments:
- "UUID v8 format: deterministic SHA-256 hash of GHCID string"
- "Namespace: 6ba7b811-9dad-11d1-80b4-00c04fd430c8 (GHCID SHA-256 namespace)"
- "Best for: High-security applications, future-proof PIDs"
- "Regeneratable: Same GHCID always produces same UUID v8"
- "Provides superior collision resistance vs UUID v5"
name:
description: Official name of the organization
range: string
required: true
slot_uri: schema:name
alternative_names:
description: Alternative names, abbreviations, former names
range: string
multivalued: true
slot_uri: schema:alternateName
official_name:
description: >-
Official legal name of the organization, including organizational form.
Based on TOOI officieleNaamInclSoort pattern.
range: string
slot_uri: tooi:officieleNaamInclSoort
comments:
- "Example: 'Stichting Rijksmuseum Amsterdam'"
- "Use this for legal/official contexts"
sorting_name:
description: >-
Name formatted for alphabetical sorting (typically without articles, prefixes).
Based on TOOI officieleNaamSorteer pattern.
range: string
slot_uri: tooi:officieleNaamSorteer
comments:
- "Example: 'Rijksmuseum Amsterdam' (without 'Het')"
- "Remove leading articles: 'The', 'Het', 'De', 'La', 'Le', etc."
abbreviation:
description: >-
Official abbreviation or acronym for the organization.
Based on TOOI afkorting pattern.
range: string
slot_uri: tooi:afkorting
comments:
- "Example: 'RMA' for Rijksmuseum Amsterdam"
- "Use for GHCID generation and display"
institution_type:
description: Primary type of heritage institution
range: InstitutionTypeEnum
slot_uri: dcterms:type
organization_status:
description: Current operational status
range: OrganizationStatusEnum
slot_uri: org:organizationStatus
description:
description: Textual description of the organization
range: string
slot_uri: dcterms:description
# Organizational hierarchy
parent_organization:
description: Parent organization (if part of larger entity)
range: HeritageCustodian
slot_uri: org:subOrganizationOf
sub_organizations:
description: Sub-organizations or departments
range: HeritageCustodian
multivalued: true
slot_uri: org:hasSubOrganization
# Temporal
founded_date:
description: Date the organization was founded
range: date
slot_uri: schema:foundingDate
closed_date:
description: Date the organization closed (if applicable)
range: date
slot_uri: schema:dissolutionDate
prov_generated_at:
description: >-
Timestamp when this organization entity was generated/created/founded.
W3C PROV-O provenance tracking. More precise than founded_date.
range: datetime
slot_uri: prov:generatedAtTime
comments:
- "Use ISO 8601 format with timezone"
- "Complements founded_date with precise timestamp"
prov_invalidated_at:
description: >-
Timestamp when this organization entity was invalidated/dissolved/ceased.
W3C PROV-O provenance tracking. More precise than closed_date.
range: datetime
slot_uri: prov:invalidatedAtTime
required: false
comments:
- "Null if organization still active"
- "Complements closed_date with precise timestamp"
change_history:
description: >-
Chronological list of significant organizational change events.
Tracks mergers, name changes, relocations, restructuring, etc.
range: ChangeEvent
multivalued: true
inlined: true
inlined_as_list: true
slot_uri: prov:wasInfluencedBy
comments:
- "Ordered by event_date (oldest first)"
- "Use for institutional history tracking"
# Web presence
homepage:
description: Official website URL
range: uri
slot_uri: foaf:homepage
# Complex objects
contact_info:
description: Contact information
range: ContactInfo
inlined: true
inlined_as_list: true
locations:
description: Physical locations
range: Location
multivalued: true
inlined: true
inlined_as_list: true
identifiers:
description: External identifiers (ISIL, VIAF, Wikidata, etc.)
range: Identifier
multivalued: true
inlined: true
inlined_as_list: true
collections:
description: Collections held by the organization
range: Collection
multivalued: true
inlined: true
inlined_as_list: true
digital_platforms:
description: Digital platforms and systems used
range: DigitalPlatform
multivalued: true
inlined: true
inlined_as_list: true
metadata_standards:
description: Metadata standards used by the institution
range: MetadataStandardEnum
multivalued: true
partnerships:
description: Partnerships and network memberships
range: Partnership
multivalued: true
inlined: true
inlined_as_list: true
publications:
description: >-
Scholarly publications about, by, or related to this heritage institution.
Includes journal articles, conference papers, books, and other research outputs
that reference or analyze the institution's collections, practices, or history.
range: Publication
multivalued: true
inlined_as_list: true
slot_uri: dcterms:bibliographicCitation
comments:
- "Use for tracking research outputs about the institution"
- "Link to Publication instances in bibliographic module"
- "Can include institutional reports, catalog publications, etc."
- "Supports both URI references and embedded Publication objects"
provenance:
description: Data provenance and quality metadata
range: Provenance
required: true
inlined: true
# Location fields
location_type:
description: Type of location (main office, branch, storage, etc.)
range: string
street_address:
description: Street address
range: string
slot_uri: schema:streetAddress
city:
description: City or town
range: string
slot_uri: schema:addressLocality
postal_code:
description: Postal code
range: string
slot_uri: schema:postalCode
region:
description: State, province, or region
range: string
slot_uri: schema:addressRegion
country:
description: Country (ISO 3166-1 alpha-2 code)
range: string
pattern: "^[A-Z]{2}$"
slot_uri: schema:addressCountry
latitude:
description: Latitude coordinate
range: float
slot_uri: schema:latitude
longitude:
description: Longitude coordinate
range: float
slot_uri: schema:longitude
geonames_id:
description: GeoNames identifier
range: string
is_primary:
description: Whether this is the primary location
range: boolean
# Contact fields
email:
description: Email address
range: string
pattern: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
slot_uri: schema:email
phone:
description: Phone number
range: string
slot_uri: schema:telephone
fax:
description: Fax number
range: string
slot_uri: schema:faxNumber
contact_type:
description: Type of contact (general, info, director, etc.)
range: string
# Identifier fields
identifier_scheme:
description: Identifier scheme name (ISIL, VIAF, Wikidata, KvK, etc.)
range: string
identifier_value:
description: The actual identifier value
range: string
identifier_url:
description: URL to the identifier in its registry
range: uri
assigned_date:
description: Date the identifier was assigned
range: date
# Universal business and administrative identifiers
chamber_of_commerce_number:
description: >-
Business registration number from national chamber of commerce or equivalent.
Universal field applicable to any country's business registry system:
Netherlands (KvK 8-digit), Brazil (CNPJ 14-digit), USA (EIN 9-digit),
UK (Companies House number), France (SIRET 14-digit), etc.
range: string
slot_uri: dcterms:identifier
comments:
- "Netherlands: KvK (8 digits) - Example: 41224096"
- "Brazil: CNPJ (14 digits) - Example: 12345678000190"
- "USA: EIN (9 digits) - Example: 123456789"
- "UK: Companies House number - Example: 12345678"
- "France: SIRET (14 digits) - Example: 12345678901234"
- "Pattern validation should be handled at country-specific schema level"
municipality_identifier:
description: >-
Local government identifier for the municipality/city where institution is located.
Universal field for any country's municipal coding system:
Netherlands (CBS gemeentecode 4-digit), Brazil (IBGE código 7-digit),
USA (FIPS code 5-digit), etc.
range: string
slot_uri: schema:addressLocality
comments:
- "Netherlands: CBS gemeentecode (4 digits) - Example: 0363 (Amsterdam)"
- "Brazil: IBGE código municipal (7 digits) - Example: 3550308 (São Paulo)"
- "USA: FIPS code (5 digits) - Example: 36061 (New York County)"
- "Pattern validation should be handled at country-specific schema level"
parent_organization_name:
description: >-
Name of parent or umbrella organization (unresolved reference).
Use when parent organization is not yet in the database but name is known.
For resolved references, use parent_organization (which expects an ID/URI).
range: string
slot_uri: org:subOrganizationOf
comments:
- "Example: 'Stichting Museumvereniging' (for member museums)"
- "Use this for CSV imports where parent org may not exist yet"
- "Prefer parent_organization (resolved ID) when possible"
# OrganizationalUnit fields
unit_id:
description: Unique identifier for this organizational unit
range: uriorcurie
identifier: true
slot_uri: dcterms:identifier
unit_name:
description: Name of the organizational unit
range: string
required: true
slot_uri: org:unitName
unit_type:
description: Type of organizational unit (department, division, section, etc.)
range: string
slot_uri: org:classification
parent_unit:
description: Parent organizational unit (if nested structure)
range: OrganizationalUnit
slot_uri: org:unitOf