glam/frontend/public/schemas/20251121/linkml/modules/classes/CustodianSourceFile.yaml
kempersc 2d09776856 Refactor StorageCondition schema: Migrate compliance_status to has_or_had_status with ComplianceStatus class
- Removed compliance_status slot and replaced it with has_or_had_status.
- Updated has_or_had_status to use ComplianceStatus for structured representation.
- Adjusted examples to reflect new structure for compliance status.
- Updated documentation to indicate migration and provide details on the ComplianceStatus class.
2026-01-22 16:22:16 +01:00

368 lines
14 KiB
YAML

# CustodianSourceFile - Root class representing the complete structure of a custo...
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
# Extraction date: 2026-01-08
id: https://nde.nl/ontology/hc/classes/CustodianSourceFile
name: CustodianSourceFile
title: CustodianSourceFile
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
prov: http://www.w3.org/ns/prov#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
- ../enums/EnrichmentStatusEnum
- ../enums/GoogleMapsStatusEnum
- ./ChAnnotatorBlock
- ./CustodianLegalNameClaim
- ./CustodianNameConsensus
- ./DigitalPlatform
- ./DigitalPlatformV2
- ./GenealogiewerkbalkEnrichment
- ./GhcidBlock
- ./GoogleMapsEnrichment
- ./GoogleMapsPlaywrightEnrichment
- ./Identifier
- ./LogoEnrichment
- ./MuseumRegisterEnrichment
- ./NanIsilEnrichment
- ./NormalizedLocation
- ./OriginalEntry
- ./ProvenanceBlock
- ./TimespanBlock
- ./UnescoIchEnrichment
- ./WebClaimsBlock
- ./WebEnrichment
- ./WikidataEnrichment
- ./YoutubeEnrichment
default_range: string
classes:
CustodianSourceFile:
description: >-
Root class representing the complete structure of a custodian YAML
file, serving as the tree_root for LinkML validation. Aggregates
all enrichment data including original entry, Google Maps, Wikidata,
web claims, museum register, and other source data with full
provenance tracking.
Ontology mapping rationale:
- class_uri is prov:Entity because this represents a complete data
record/file that aggregates multiple enrichment sources
- close_mappings includes prov:Bundle as this bundles multiple
provenance-tracked data elements
- related_mappings includes schema:Dataset as this is essentially
a structured dataset file
class_uri: prov:Entity
close_mappings:
- prov:Bundle
related_mappings:
- schema:Dataset
tree_root: true
attributes:
original_entry:
range: OriginalEntry
description: Source registry data from NDE CSV
entry_index:
range: Any
any_of:
- range: integer
- range: string
description: Position in source CSV file (integer) or heritage reference code
(string), can be null
processing_timestamp:
range: datetime
description: When the entry was processed
enrichment_status:
range: EnrichmentStatusEnum
description: Current enrichment processing status
skip_reason:
range: string
description: Reason if enrichment was skipped
provenance:
range: ProvenanceBlock
description: Full provenance tracking
google_maps_enrichment:
range: GoogleMapsEnrichment
description: Google Maps Places API data
google_maps_status:
range: GoogleMapsStatusEnum
google_maps_search_query:
range: string
google_maps_search_timestamp:
range: datetime
description: When Google Maps search was performed
web_enrichment:
range: WebEnrichment
description: Web archive metadata
nan_isil_enrichment:
range: NanIsilEnrichment
description: Nationaal Archief ISIL registry match
identifier:
range: Identifier
multivalued: true
inlined_as_list: true
description: All identifiers (ISIL, GHCID variants, etc.)
wikidata_enrichment:
range: WikidataEnrichment
description: Full Wikidata enrichment data
ghcid:
range: GhcidBlock
description: GHCID generation metadata with history
has_or_had_web_claim:
range: WebClaimsBlock
description: Claims extracted from archived websites
custodian_name:
range: CustodianNameConsensus
description: Consensus name determination
genealogiewerkbalk_enrichment:
range: GenealogiewerkbalkEnrichment
description: Dutch municipal/provincial archive links
digital_platforms:
range: DigitalPlatform
multivalued: true
inlined_as_list: true
description: Websites and digital collection platforms
unesco_ich_enrichment:
range: UnescoIchEnrichment
description: UNESCO Intangible Cultural Heritage elements
timespan:
range: TimespanBlock
description: CRMsci temporal bounds
location:
range: NormalizedLocation
description: Normalized geographic data
custodian_legal_name:
range: CustodianLegalNameClaim
description: Legal name claim with provenance
google_maps_playwright_enrichment:
range: GoogleMapsPlaywrightEnrichment
description: Google Maps data extracted via Playwright browser automation
museum_register_enrichment:
range: MuseumRegisterEnrichment
description: Dutch Museum Register (Museumregister) data
qp_resolution_timestamp:
range: datetime
description: Timestamp when query parameter resolution was performed
wikidata_enrichment_status:
range: string
description: Status of Wikidata enrichment process
wikidata_search_timestamp:
range: datetime
description: Timestamp when Wikidata search was performed
youtube_enrichment:
range: YoutubeEnrichment
description: YouTube channel/video data for the institution
youtube_status:
range: string
description: YouTube search status (SUCCESS, NOT_FOUND, etc.)
youtube_search_query:
range: string
description: Query used to search for YouTube channel
youtube_search_timestamp:
range: datetime
description: When YouTube search was performed
youtube_search_note:
range: string
description: Notes about YouTube search (e.g., channel provided manually)
ch_annotator:
range: ChAnnotatorBlock
description: CH-Annotator convention metadata and claims
identifiers:
range: Identifier
multivalued: true
inlined_as_list: true
description: All identifiers (ISIL, GHCID variants, etc.) - plural form for backward compatibility
digital_platform_v2:
range: DigitalPlatformV2
description: Enhanced digital platform data with organization profile and contacts
logo_enrichment:
range: Any
description: Logo and favicon enrichment data from web scraping (can be object with claims or direct array of LogoClaim)
inlined: true
crawl4ai_enrichment:
range: Any
description: Crawl4AI web scraping enrichment data with retrieval metadata
inlined: true
unesco_mow_enrichment:
range: Any
description: UNESCO Memory of the World inscription data for custodian holdings
inlined: true
web_enrichments:
range: Any
description: Web enrichment claims extracted via hybrid LLM/pattern extraction
inlined: true
alternative_names:
range: Any
multivalued: true
inlined_as_list: true
description: Alternative names for the institution from various sources
legal_status:
range: Any
description: Legal status and registration information for the institution
inlined: true
person_observations:
range: Any
description: Observations about people associated with the institution (object or array)
inlined: true
staff:
range: Any
description: Staff members extracted from LinkedIn or other sources
inlined: true
ghcid_current:
range: string
description: Current GHCID identifier string (convenience field alongside ghcid block)
ghcid_resolution:
range: Any
description: GHCID resolution metadata
inlined: true
institution_type:
range: Any
description: Institution type classification (GLAMORCUBESFIXPHDNT single letter code or full type)
linkedin_enrichment:
range: Any
description: LinkedIn enrichment data for the institution
inlined: true
website:
range: Any
description: Institution website URL or website metadata
any_of:
- range: uri
- range: string
mission_statement:
range: Any
description: Institution mission statement extracted from website
inlined: true
website_discovery:
range: Any
description: Website discovery and validation data
inlined: true
locations:
range: NormalizedLocation
multivalued: true
inlined_as_list: true
description: Multiple locations for institutions with multiple sites
contact:
range: Any
description: Contact information for the institution (email, phone, address, social media)
inlined: true
kien_enrichment:
range: Any
description: KIEN (Dutch Intangible Heritage) registry enrichment data
inlined: true
location_resolution:
range: Any
description: Location resolution metadata (how geographic coordinates were determined)
inlined: true
notes:
range: Any
any_of:
- range: string
- range: string
multivalued: true
description: Free text notes about the institution or data quality
timeline_enrichment:
range: Any
description: Timeline enrichment data (founding dates, events from web research)
inlined: true
geocoding:
range: Any
description: Geocoding metadata (Nominatim, GeoNames resolution details)
inlined: true
youtube_candidates_rejected:
range: integer
description: Number of YouTube search candidates that were rejected during matching
google_maps_rejected:
range: Any
description: Google Maps candidate that was rejected with reason
inlined: true
firecrawl_enrichment:
range: Any
description: Firecrawl API web scraping enrichment data with page metadata, CMS detection, and standards
inlined: true
unesco_enrichment:
range: Any
description: UNESCO World Heritage Site proximity enrichment data with nearby sites
inlined: true
zcbs_enrichment:
range: Any
description: ZCBS (Zeeuwse Culturele Bibliotheek Systeem) enrichment data for Dutch institutions
inlined: true
service_area:
range: Any
description: Geographic service area of the institution (e.g., regional, national)
inlined: true
safeguards:
range: uri
multivalued: true
inlined_as_list: true
description: Heritage form safeguard URIs from KIEN registry (e.g., https://nde.nl/ontology/hc/heritage-form/ring-en-sjeesrijden)
linkup_enrichment:
range: Any
description: Linkup web research enrichment data (timeline_events, etc.)
inlined: true
enrichment_source:
range: string
description: Source identifier for the enrichment data (e.g., kb_poi_registry)
enrichment_timestamp:
range: datetime
description: When the enrichment was performed
kb_enrichment:
range: Any
description: Koninklijke Bibliotheek (KB) enrichment data for library institutions
inlined: true
web_person_claims:
range: Any
description: Person claims extracted from web sources using CH-Annotator entity extraction
inlined: true
enriched_data:
range: Any
description: Enriched institution data including alternative names, descriptions, and classification
inlined: true
legal_entity:
range: Any
description: Legal entity information including KvK number, RSIN, bank account, legal form, and ANBI status
inlined: true
collections:
range: Any
multivalued: true
inlined_as_list: true
description: Collections held by the institution
conflict_status:
range: Any
description: Conflict/destruction status for heritage at risk
inlined: true
description:
range: Any
description: Institution description (can be object with language keys like en, ar)
inlined: true
parent_organization:
range: Any
description: Parent organization information
inlined: true
time_of_destruction:
range: Any
description: Date and details of destruction for damaged heritage sites
inlined: true
wikipedia_enrichment:
range: Any
description: Wikipedia enrichment data with claims extracted from articles
inlined: true
financial_enrichment:
range: Any
description: Financial data enrichment (annual reports, funding sources)
inlined: true
publications_enrichment:
range: Any
description: Publications data enrichment (journals, reports, catalogs)
inlined: true
web_contact_data:
range: Any
description: Contact data extracted from institution website (persons, phone numbers, emails)
inlined: true