glam/schemas/20251121/linkml/modules/classes/CustodianSourceFile.yaml
kempersc f30b1777f4 Enhance schema definitions and introduce new classes for DigitalPlatformV2
- Added detailed descriptions for slots: collecting_scope, collection_access, custody_history, education_level, membership_size, and publication_activity to improve clarity and usability.
- Removed the publication_date slot due to migration to a new structure.
- Updated slot fixes with migration notes and adjustments for various slots, ensuring alignment with new ontology standards.
- Introduced new classes for DigitalPlatformV2, including DigitalPlatformV2DataQualityNotes, DigitalPlatformV2DataSource, DigitalPlatformV2KeyContact, DigitalPlatformV2OrganizationProfile, DigitalPlatformV2OrganizationStatus, DigitalPlatformV2PrimaryPlatform, DigitalPlatformV2Provenance, DigitalPlatformV2ServiceDetails, and DigitalPlatformV2TransformationMetadata, each with comprehensive attributes and descriptions.
- Added classes for EnrichmentProvenance and EnrichmentProvenanceEntry to track provenance for enrichment sources, including detailed attributes for verification and source tracking.
- Created LogoClaim, LogoEnrichment, and LogoEnrichmentSummary classes to manage logo and favicon data extracted from web scraping, with attributes for claims and summary statistics.
- Archived the publication_date slot to maintain historical records.
2026-01-18 00:59:51 +01:00

210 lines
7.6 KiB
YAML

# CustodianSourceFile - Root class representing the complete structure of a custo...
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
# Extraction date: 2026-01-08
id: https://nde.nl/ontology/hc/classes/CustodianSourceFile
name: CustodianSourceFile
title: CustodianSourceFile
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
prov: http://www.w3.org/ns/prov#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
- ../enums/EnrichmentStatusEnum
- ../enums/GoogleMapsStatusEnum
- ./ChAnnotatorBlock
- ./CustodianLegalNameClaim
- ./CustodianNameConsensus
- ./DigitalPlatform
- ./DigitalPlatformV2
- ./GenealogiewerkbalkEnrichment
- ./GhcidBlock
- ./GoogleMapsEnrichment
- ./GoogleMapsPlaywrightEnrichment
- ./Identifier
- ./LogoEnrichment
- ./MuseumRegisterEnrichment
- ./NanIsilEnrichment
- ./NormalizedLocation
- ./OriginalEntry
- ./ProvenanceBlock
- ./TimespanBlock
- ./UnescoIchEnrichment
- ./WebClaimsBlock
- ./WebEnrichment
- ./WikidataEnrichment
- ./YoutubeEnrichment
default_range: string
classes:
CustodianSourceFile:
description: >-
Root class representing the complete structure of a custodian YAML
file, serving as the tree_root for LinkML validation. Aggregates
all enrichment data including original entry, Google Maps, Wikidata,
web claims, museum register, and other source data with full
provenance tracking.
Ontology mapping rationale:
- class_uri is prov:Entity because this represents a complete data
record/file that aggregates multiple enrichment sources
- close_mappings includes prov:Bundle as this bundles multiple
provenance-tracked data elements
- related_mappings includes schema:Dataset as this is essentially
a structured dataset file
class_uri: prov:Entity
close_mappings:
- prov:Bundle
related_mappings:
- schema:Dataset
tree_root: true
attributes:
original_entry:
range: OriginalEntry
description: Source registry data from NDE CSV
entry_index:
any_of:
- range: integer
- range: string
description: Position in source CSV file (integer) or heritage reference code
(string), can be null
processing_timestamp:
range: datetime
description: When the entry was processed
enrichment_status:
range: EnrichmentStatusEnum
description: Current enrichment processing status
skip_reason:
range: string
description: Reason if enrichment was skipped
provenance:
range: ProvenanceBlock
description: Full provenance tracking
google_maps_enrichment:
range: GoogleMapsEnrichment
description: Google Maps Places API data
google_maps_status:
range: GoogleMapsStatusEnum
google_maps_search_query:
range: string
web_enrichment:
range: WebEnrichment
description: Web archive metadata
nan_isil_enrichment:
range: NanIsilEnrichment
description: Nationaal Archief ISIL registry match
identifier:
range: Identifier
multivalued: true
inlined_as_list: true
description: All identifiers (ISIL, GHCID variants, etc.)
wikidata_enrichment:
range: WikidataEnrichment
description: Full Wikidata enrichment data
ghcid:
range: GhcidBlock
description: GHCID generation metadata with history
has_or_had_web_claim:
range: WebClaimsBlock
description: Claims extracted from archived websites
custodian_name:
range: CustodianNameConsensus
description: Consensus name determination
genealogiewerkbalk_enrichment:
range: GenealogiewerkbalkEnrichment
description: Dutch municipal/provincial archive links
digital_platforms:
range: DigitalPlatform
multivalued: true
inlined_as_list: true
description: Websites and digital collection platforms
unesco_ich_enrichment:
range: UnescoIchEnrichment
description: UNESCO Intangible Cultural Heritage elements
timespan:
range: TimespanBlock
description: CRMsci temporal bounds
location:
range: NormalizedLocation
description: Normalized geographic data
custodian_legal_name:
range: CustodianLegalNameClaim
description: Legal name claim with provenance
google_maps_playwright_enrichment:
range: GoogleMapsPlaywrightEnrichment
description: Google Maps data extracted via Playwright browser automation
museum_register_enrichment:
range: MuseumRegisterEnrichment
description: Dutch Museum Register (Museumregister) data
qp_resolution_timestamp:
range: datetime
description: Timestamp when query parameter resolution was performed
wikidata_enrichment_status:
range: string
description: Status of Wikidata enrichment process
wikidata_search_timestamp:
range: datetime
description: Timestamp when Wikidata search was performed
youtube_enrichment:
range: YoutubeEnrichment
description: YouTube channel/video data for the institution
youtube_status:
range: string
description: YouTube search status (SUCCESS, NOT_FOUND, etc.)
youtube_search_query:
range: string
description: Query used to search for YouTube channel
youtube_search_timestamp:
range: datetime
description: When YouTube search was performed
ch_annotator:
range: ChAnnotatorBlock
description: CH-Annotator convention metadata and claims
identifiers:
range: Identifier
multivalued: true
inlined_as_list: true
description: All identifiers (ISIL, GHCID variants, etc.) - plural form for backward compatibility
digital_platform_v2:
range: DigitalPlatformV2
description: Enhanced digital platform data with organization profile and contacts
logo_enrichment:
range: LogoEnrichment
description: Logo and favicon enrichment data from web scraping
crawl4ai_enrichment:
range: Any
description: Crawl4AI web scraping enrichment data with retrieval metadata
inlined: true
unesco_mow_enrichment:
range: Any
description: UNESCO Memory of the World inscription data for custodian holdings
inlined: true
web-enrichments:
range: Any
description: Web enrichment claims extracted via hybrid LLM/pattern extraction
inlined: true
alternative_names:
range: Any
multivalued: true
inlined_as_list: true
description: Alternative names for the institution from various sources
legal_status:
range: Any
description: Legal status and registration information for the institution
inlined: true
person_observations:
range: Any
multivalued: true
inlined_as_list: true
description: Observations about people associated with the institution
staff:
range: Any
description: Staff members extracted from LinkedIn or other sources
inlined: true