glam/schemas/20251121/linkml/modules/classes/CustodianSourceFile.yaml

# CustodianSourceFile - Root class representing the complete structure of a custo...
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
# Extraction date: 2026-01-08

id: https://nde.nl/ontology/hc/classes/CustodianSourceFile
name: CustodianSourceFile
title: CustodianSourceFile

prefixes:
  linkml: https://w3id.org/linkml/
  hc: https://nde.nl/ontology/hc/
  schema: http://schema.org/
  prov: http://www.w3.org/ns/prov#
  xsd: http://www.w3.org/2001/XMLSchema#

imports:
  - linkml:types

  - ../enums/EnrichmentStatusEnum
  - ../enums/GoogleMapsStatusEnum

  - ./ChAnnotatorBlock
  - ./CustodianLegalNameClaim
  - ./CustodianNameConsensus
  - ./DigitalPlatform
  - ./DigitalPlatformV2
  - ./GenealogiewerkbalkEnrichment
  - ./GhcidBlock
  - ./GoogleMapsEnrichment
  - ./GoogleMapsPlaywrightEnrichment
  - ./Identifier
  - ./LogoEnrichment
  - ./MuseumRegisterEnrichment
  - ./NanIsilEnrichment
  - ./NormalizedLocation
  - ./OriginalEntry
  - ./ProvenanceBlock
  - ./TimespanBlock
  - ./UnescoIchEnrichment
  - ./WebClaimsBlock
  - ./WebEnrichment
  - ./WikidataEnrichment
  - ./YoutubeEnrichment
default_range: string

classes:
  CustodianSourceFile:
      description: >-
        Root class representing the complete structure of a custodian YAML
        file, serving as the tree_root for LinkML validation. Aggregates
        all enrichment data including original entry, Google Maps, Wikidata,
        web claims, museum register, and other source data with full
        provenance tracking.

        Ontology mapping rationale:
        - class_uri is prov:Entity because this represents a complete data
          record/file that aggregates multiple enrichment sources
        - close_mappings includes prov:Bundle as this bundles multiple
          provenance-tracked data elements
        - related_mappings includes schema:Dataset as this is essentially
          a structured dataset file
      class_uri: prov:Entity
      close_mappings:
        - prov:Bundle
      related_mappings:
        - schema:Dataset
      tree_root: true
      attributes:
        original_entry:
          range: OriginalEntry
          description: Source registry data from NDE CSV
        entry_index:
          any_of:
          - range: integer
          - range: string
          description: Position in source CSV file (integer) or heritage reference code
            (string), can be null
        processing_timestamp:
          range: datetime
          description: When the entry was processed
        enrichment_status:
          range: EnrichmentStatusEnum
          description: Current enrichment processing status
        skip_reason:
          range: string
          description: Reason if enrichment was skipped
        provenance:
          range: ProvenanceBlock
          description: Full provenance tracking
        google_maps_enrichment:
          range: GoogleMapsEnrichment
          description: Google Maps Places API data
        google_maps_status:
          range: GoogleMapsStatusEnum
        google_maps_search_query:
          range: string
        web_enrichment:
          range: WebEnrichment
          description: Web archive metadata
        nan_isil_enrichment:
          range: NanIsilEnrichment
          description: Nationaal Archief ISIL registry match
        identifier:
          range: Identifier
          multivalued: true
          inlined_as_list: true
          description: All identifiers (ISIL, GHCID variants, etc.)
        wikidata_enrichment:
          range: WikidataEnrichment
          description: Full Wikidata enrichment data
        ghcid:
          range: GhcidBlock
          description: GHCID generation metadata with history
        has_or_had_web_claim:
          range: WebClaimsBlock
          description: Claims extracted from archived websites
        custodian_name:
          range: CustodianNameConsensus
          description: Consensus name determination
        genealogiewerkbalk_enrichment:
          range: GenealogiewerkbalkEnrichment
          description: Dutch municipal/provincial archive links
        digital_platforms:
          range: DigitalPlatform
          multivalued: true
          inlined_as_list: true
          description: Websites and digital collection platforms
        unesco_ich_enrichment:
          range: UnescoIchEnrichment
          description: UNESCO Intangible Cultural Heritage elements
        timespan:
          range: TimespanBlock
          description: CRMsci temporal bounds
        location:
          range: NormalizedLocation
          description: Normalized geographic data
        custodian_legal_name:
          range: CustodianLegalNameClaim
          description: Legal name claim with provenance
        google_maps_playwright_enrichment:
          range: GoogleMapsPlaywrightEnrichment
          description: Google Maps data extracted via Playwright browser automation
        museum_register_enrichment:
          range: MuseumRegisterEnrichment
          description: Dutch Museum Register (Museumregister) data
        qp_resolution_timestamp:
          range: datetime
          description: Timestamp when query parameter resolution was performed
        wikidata_enrichment_status:
          range: string
          description: Status of Wikidata enrichment process
        wikidata_search_timestamp:
          range: datetime
          description: Timestamp when Wikidata search was performed
        youtube_enrichment:
          range: YoutubeEnrichment
          description: YouTube channel/video data for the institution
        youtube_status:
          range: string
          description: YouTube search status (SUCCESS, NOT_FOUND, etc.)
        youtube_search_query:
          range: string
          description: Query used to search for YouTube channel
        youtube_search_timestamp:
          range: datetime
          description: When YouTube search was performed
        ch_annotator:
          range: ChAnnotatorBlock
          description: CH-Annotator convention metadata and claims
        identifiers:
          range: Identifier
          multivalued: true
          inlined_as_list: true
          description: All identifiers (ISIL, GHCID variants, etc.) - plural form for backward compatibility
        digital_platform_v2:
          range: DigitalPlatformV2
          description: Enhanced digital platform data with organization profile and contacts
        logo_enrichment:
          range: LogoEnrichment
          description: Logo and favicon enrichment data from web scraping
        crawl4ai_enrichment:
          range: Any
          description: Crawl4AI web scraping enrichment data with retrieval metadata
          inlined: true
        unesco_mow_enrichment:
          range: Any
          description: UNESCO Memory of the World inscription data for custodian holdings
          inlined: true
        web-enrichments:
          range: Any
          description: Web enrichment claims extracted via hybrid LLM/pattern extraction
          inlined: true
        alternative_names:
          range: Any
          multivalued: true
          inlined_as_list: true
          description: Alternative names for the institution from various sources
        legal_status:
          range: Any
          description: Legal status and registration information for the institution
          inlined: true
        person_observations:
          range: Any
          multivalued: true
          inlined_as_list: true
          description: Observations about people associated with the institution
        staff:
          range: Any
          description: Staff members extracted from LinkedIn or other sources
          inlined: true