274 lines
9.9 KiB
YAML
274 lines
9.9 KiB
YAML
# CustodianSourceFile - Root class representing the complete structure of a custo...
|
|
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
|
|
# Extraction date: 2026-01-08
|
|
|
|
id: https://nde.nl/ontology/hc/classes/CustodianSourceFile
|
|
name: CustodianSourceFile
|
|
title: CustodianSourceFile
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
hc: https://nde.nl/ontology/hc/
|
|
schema: http://schema.org/
|
|
prov: http://www.w3.org/ns/prov#
|
|
xsd: http://www.w3.org/2001/XMLSchema#
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
- ../enums/EnrichmentStatusEnum
|
|
- ../enums/GoogleMapsStatusEnum
|
|
|
|
- ./ChAnnotatorBlock
|
|
- ./CustodianLegalNameClaim
|
|
- ./CustodianNameConsensus
|
|
- ./DigitalPlatform
|
|
- ./DigitalPlatformV2
|
|
- ./GenealogiewerkbalkEnrichment
|
|
- ./GhcidBlock
|
|
- ./GoogleMapsEnrichment
|
|
- ./GoogleMapsPlaywrightEnrichment
|
|
- ./Identifier
|
|
- ./LogoEnrichment
|
|
- ./MuseumRegisterEnrichment
|
|
- ./NanIsilEnrichment
|
|
- ./NormalizedLocation
|
|
- ./OriginalEntry
|
|
- ./ProvenanceBlock
|
|
- ./TimespanBlock
|
|
- ./UnescoIchEnrichment
|
|
- ./WebClaimsBlock
|
|
- ./WebEnrichment
|
|
- ./WikidataEnrichment
|
|
- ./YoutubeEnrichment
|
|
default_range: string
|
|
|
|
classes:
|
|
CustodianSourceFile:
|
|
description: >-
|
|
Root class representing the complete structure of a custodian YAML
|
|
file, serving as the tree_root for LinkML validation. Aggregates
|
|
all enrichment data including original entry, Google Maps, Wikidata,
|
|
web claims, museum register, and other source data with full
|
|
provenance tracking.
|
|
|
|
Ontology mapping rationale:
|
|
- class_uri is prov:Entity because this represents a complete data
|
|
record/file that aggregates multiple enrichment sources
|
|
- close_mappings includes prov:Bundle as this bundles multiple
|
|
provenance-tracked data elements
|
|
- related_mappings includes schema:Dataset as this is essentially
|
|
a structured dataset file
|
|
class_uri: prov:Entity
|
|
close_mappings:
|
|
- prov:Bundle
|
|
related_mappings:
|
|
- schema:Dataset
|
|
tree_root: true
|
|
attributes:
|
|
original_entry:
|
|
range: OriginalEntry
|
|
description: Source registry data from NDE CSV
|
|
entry_index:
|
|
range: Any
|
|
any_of:
|
|
- range: integer
|
|
- range: string
|
|
description: Position in source CSV file (integer) or heritage reference code
|
|
(string), can be null
|
|
processing_timestamp:
|
|
range: datetime
|
|
description: When the entry was processed
|
|
enrichment_status:
|
|
range: EnrichmentStatusEnum
|
|
description: Current enrichment processing status
|
|
skip_reason:
|
|
range: string
|
|
description: Reason if enrichment was skipped
|
|
provenance:
|
|
range: ProvenanceBlock
|
|
description: Full provenance tracking
|
|
google_maps_enrichment:
|
|
range: GoogleMapsEnrichment
|
|
description: Google Maps Places API data
|
|
google_maps_status:
|
|
range: GoogleMapsStatusEnum
|
|
google_maps_search_query:
|
|
range: string
|
|
web_enrichment:
|
|
range: WebEnrichment
|
|
description: Web archive metadata
|
|
nan_isil_enrichment:
|
|
range: NanIsilEnrichment
|
|
description: Nationaal Archief ISIL registry match
|
|
identifier:
|
|
range: Identifier
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: All identifiers (ISIL, GHCID variants, etc.)
|
|
wikidata_enrichment:
|
|
range: WikidataEnrichment
|
|
description: Full Wikidata enrichment data
|
|
ghcid:
|
|
range: GhcidBlock
|
|
description: GHCID generation metadata with history
|
|
has_or_had_web_claim:
|
|
range: WebClaimsBlock
|
|
description: Claims extracted from archived websites
|
|
custodian_name:
|
|
range: CustodianNameConsensus
|
|
description: Consensus name determination
|
|
genealogiewerkbalk_enrichment:
|
|
range: GenealogiewerkbalkEnrichment
|
|
description: Dutch municipal/provincial archive links
|
|
digital_platforms:
|
|
range: DigitalPlatform
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Websites and digital collection platforms
|
|
unesco_ich_enrichment:
|
|
range: UnescoIchEnrichment
|
|
description: UNESCO Intangible Cultural Heritage elements
|
|
timespan:
|
|
range: TimespanBlock
|
|
description: CRMsci temporal bounds
|
|
location:
|
|
range: NormalizedLocation
|
|
description: Normalized geographic data
|
|
custodian_legal_name:
|
|
range: CustodianLegalNameClaim
|
|
description: Legal name claim with provenance
|
|
google_maps_playwright_enrichment:
|
|
range: GoogleMapsPlaywrightEnrichment
|
|
description: Google Maps data extracted via Playwright browser automation
|
|
museum_register_enrichment:
|
|
range: MuseumRegisterEnrichment
|
|
description: Dutch Museum Register (Museumregister) data
|
|
qp_resolution_timestamp:
|
|
range: datetime
|
|
description: Timestamp when query parameter resolution was performed
|
|
wikidata_enrichment_status:
|
|
range: string
|
|
description: Status of Wikidata enrichment process
|
|
wikidata_search_timestamp:
|
|
range: datetime
|
|
description: Timestamp when Wikidata search was performed
|
|
youtube_enrichment:
|
|
range: YoutubeEnrichment
|
|
description: YouTube channel/video data for the institution
|
|
youtube_status:
|
|
range: string
|
|
description: YouTube search status (SUCCESS, NOT_FOUND, etc.)
|
|
youtube_search_query:
|
|
range: string
|
|
description: Query used to search for YouTube channel
|
|
youtube_search_timestamp:
|
|
range: datetime
|
|
description: When YouTube search was performed
|
|
ch_annotator:
|
|
range: ChAnnotatorBlock
|
|
description: CH-Annotator convention metadata and claims
|
|
identifiers:
|
|
range: Identifier
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: All identifiers (ISIL, GHCID variants, etc.) - plural form for backward compatibility
|
|
digital_platform_v2:
|
|
range: DigitalPlatformV2
|
|
description: Enhanced digital platform data with organization profile and contacts
|
|
logo_enrichment:
|
|
range: LogoEnrichment
|
|
description: Logo and favicon enrichment data from web scraping
|
|
crawl4ai_enrichment:
|
|
range: Any
|
|
description: Crawl4AI web scraping enrichment data with retrieval metadata
|
|
inlined: true
|
|
unesco_mow_enrichment:
|
|
range: Any
|
|
description: UNESCO Memory of the World inscription data for custodian holdings
|
|
inlined: true
|
|
web_enrichments:
|
|
range: Any
|
|
description: Web enrichment claims extracted via hybrid LLM/pattern extraction
|
|
inlined: true
|
|
alternative_names:
|
|
range: Any
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Alternative names for the institution from various sources
|
|
legal_status:
|
|
range: Any
|
|
description: Legal status and registration information for the institution
|
|
inlined: true
|
|
person_observations:
|
|
range: Any
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Observations about people associated with the institution
|
|
staff:
|
|
range: Any
|
|
description: Staff members extracted from LinkedIn or other sources
|
|
inlined: true
|
|
ghcid_current:
|
|
range: string
|
|
description: Current GHCID identifier string (convenience field alongside ghcid block)
|
|
ghcid_resolution:
|
|
range: Any
|
|
description: GHCID resolution metadata
|
|
inlined: true
|
|
institution_type:
|
|
range: Any
|
|
description: Institution type classification (GLAMORCUBESFIXPHDNT single letter code or full type)
|
|
linkedin_enrichment:
|
|
range: Any
|
|
description: LinkedIn enrichment data for the institution
|
|
inlined: true
|
|
website:
|
|
range: Any
|
|
description: Institution website URL or website metadata
|
|
any_of:
|
|
- range: uri
|
|
- range: string
|
|
mission_statement:
|
|
range: Any
|
|
description: Institution mission statement extracted from website
|
|
inlined: true
|
|
website_discovery:
|
|
range: Any
|
|
description: Website discovery and validation data
|
|
inlined: true
|
|
locations:
|
|
range: NormalizedLocation
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Multiple locations for institutions with multiple sites
|
|
contact:
|
|
range: Any
|
|
description: Contact information for the institution (email, phone, address, social media)
|
|
inlined: true
|
|
kien_enrichment:
|
|
range: Any
|
|
description: KIEN (Dutch Intangible Heritage) registry enrichment data
|
|
inlined: true
|
|
location_resolution:
|
|
range: Any
|
|
description: Location resolution metadata (how geographic coordinates were determined)
|
|
inlined: true
|
|
notes:
|
|
range: Any
|
|
any_of:
|
|
- range: string
|
|
- range: string
|
|
multivalued: true
|
|
description: Free text notes about the institution or data quality
|
|
timeline_enrichment:
|
|
range: Any
|
|
description: Timeline enrichment data (founding dates, events from web research)
|
|
inlined: true
|
|
geocoding:
|
|
range: Any
|
|
description: Geocoding metadata (Nominatim, GeoNames resolution details)
|
|
inlined: true
|
|
youtube_candidates_rejected:
|
|
range: integer
|
|
description: Number of YouTube search candidates that were rejected during matching
|