glam/frontend/public/schemas/20251121/linkml/modules/classes/ProvenanceBlock.yaml

122 lines
4.3 KiB
YAML

# ProvenanceBlock - Complete provenance tracking for the entry (supports both...
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
# Extraction date: 2026-01-08
id: https://nde.nl/ontology/hc/classes/ProvenanceBlock
name: ProvenanceBlock
title: ProvenanceBlock
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
prov: http://www.w3.org/ns/prov#
xsd: http://www.w3.org/2001/XMLSchema#
pav: http://purl.org/pav/
imports:
- linkml:types
- ../enums/DataTierEnum
- ./DataTierSummary
- ./ProvenanceSources
default_range: string
classes:
ProvenanceBlock:
description: >-
Complete provenance tracking for the entry, supporting both nested and flat formats.
Contains information about data sources, extraction methods, timestamps, confidence
scores, and verification status. This is the primary class for documenting the
lineage and quality of data in custodian records.
Ontology mapping rationale:
- class_uri is prov:Bundle because this represents a named set of provenance
descriptions bundled together - it's provenance ABOUT provenance/data
- close_mappings includes pav:Provenance conceptually as PAV's approach to
tracking authoring and versioning provenance
- related_mappings includes prov:Entity (the bundle is itself an entity) and
prov:Activity (extraction/enrichment are activities with timestamps)
class_uri: prov:Bundle
close_mappings:
- pav:Provenance
related_mappings:
- prov:Entity
- prov:Activity
attributes:
schema_version:
range: string
description: Version of the provenance schema
generated_at:
range: datetime
description: When provenance was generated
sources:
range: ProvenanceSources
description: Nested sources by type
data_tier_summary:
range: DataTierSummary
description: Summary of data tiers present
data_source:
range: string
description: Data source type (CSV_REGISTRY, API_SCRAPING, etc.)
data_sources:
range: string
multivalued: true
inlined_as_list: true
description: List of data sources (e.g., NDE registry, Google Maps, website)
data_tier:
range: DataTierEnum
description: Quality tier of the data
extraction_date:
range: string
description: When data was extracted (ISO datetime string, may lack timezone)
extraction_method:
range: string
description: Method used to extract the data
enrichment_date:
range: string
description: When enrichment was performed (ISO date string)
enrichment_method:
range: string
description: Method used to enrich the data (e.g., website_research)
confidence_score:
range: float
description: Confidence score (0-1)
note:
any_of:
- range: string
- range: string
multivalued: true
inlined_as_list: true
description: Provenance notes (can be single string or list)
source_url:
range: uri
description: URL of source data
fix_script:
range: string
description: Script used to fix this entry
last_manual_fix:
range: datetime
description: When last manual fix was applied
source_type:
range: string
description: Type of source (e.g., wikidata_p856)
discovery_timestamp:
range: datetime
description: When source was discovered
wikidata_id:
range: string
description: Wikidata entity ID
wikidata_property:
range: string
description: Wikidata property ID (e.g., P856)
archive_location:
range: string
description: Location of archived copy (e.g., web/1186/hartebrug.nl)
claim_extracted_from:
range: string
description: Source path from which claim was extracted (e.g., original_entry.reference)
verified_via_web_archive:
range: boolean
description: Whether claim was verified via web archive