glam/frontend/public/schemas/20251121/linkml/modules/classes/ProvenanceBlock.yaml
kempersc 2d09776856 Refactor StorageCondition schema: Migrate compliance_status to has_or_had_status with ComplianceStatus class
- Removed compliance_status slot and replaced it with has_or_had_status.
- Updated has_or_had_status to use ComplianceStatus for structured representation.
- Adjusted examples to reflect new structure for compliance status.
- Updated documentation to indicate migration and provide details on the ComplianceStatus class.
2026-01-22 16:22:16 +01:00

170 lines
6.4 KiB
YAML

# ProvenanceBlock - Complete provenance tracking for the entry (supports both...
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
# Extraction date: 2026-01-08
id: https://nde.nl/ontology/hc/classes/ProvenanceBlock
name: ProvenanceBlock
title: ProvenanceBlock
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
prov: http://www.w3.org/ns/prov#
xsd: http://www.w3.org/2001/XMLSchema#
pav: http://purl.org/pav/
imports:
- linkml:types
- ../enums/DataTierEnum
- ./DataTierSummary
- ./EnrichmentProvenance
- ./ProvenanceSources
# REMOVED 2026-01-19: ../slots/confidence_score - migrated to is_or_was_generated_by + GenerationEvent + ConfidenceScore (Rule 53)
- ../slots/is_or_was_generated_by
- ./GenerationEvent
- ./ConfidenceScore
- ./ConfidenceMethod
default_range: string
classes:
ProvenanceBlock:
description: >-
Complete provenance tracking for the entry, supporting both nested and flat formats.
Contains information about data sources, extraction methods, timestamps, confidence
scores, and verification status. This is the primary class for documenting the
lineage and quality of data in custodian records.
Ontology mapping rationale:
- class_uri is prov:Bundle because this represents a named set of provenance
descriptions bundled together - it's provenance ABOUT provenance/data
- close_mappings includes pav:Provenance conceptually as PAV's approach to
tracking authoring and versioning provenance
- related_mappings includes prov:Entity (the bundle is itself an entity) and
prov:Activity (extraction/enrichment are activities with timestamps)
class_uri: prov:Bundle
close_mappings:
- pav:Provenance
related_mappings:
- prov:Entity
- prov:Activity
slots:
# REMOVED 2026-01-19: confidence_score - migrated to is_or_was_generated_by (Rule 53)
- is_or_was_generated_by
slot_usage:
is_or_was_generated_by:
range: GenerationEvent
required: false
inlined: true
description: >-
Generation event containing confidence score for the provenance block.
MIGRATED 2026-01-19: Replaces confidence_score slot with structured pattern.
examples:
- value:
has_or_had_score:
has_or_had_score: 0.95
has_or_had_method: "automated_extraction"
description: High confidence data extraction
- value:
has_or_had_score:
has_or_had_score: 0.65
has_or_had_method: "manual_review"
description: Medium confidence manual review
comments:
- "MIGRATED 2026-01-19: confidence_score → is_or_was_generated_by + GenerationEvent + ConfidenceScore (Rule 53)"
attributes:
schema_version:
range: string
description: Version of the provenance schema
generated_at:
range: datetime
description: When provenance was generated
sources:
range: ProvenanceSources
description: Nested sources by type
data_tier_summary:
range: DataTierSummary
description: Summary of data tiers present
data_source:
range: string
description: Data source type (CSV_REGISTRY, API_SCRAPING, etc.)
data_sources:
range: string
multivalued: true
inlined_as_list: true
description: List of data sources (e.g., NDE registry, Google Maps, website)
data_tier:
range: DataTierEnum
description: Quality tier of the data
extraction_date:
range: string
description: When data was extracted (ISO datetime string, may lack timezone)
extraction_method:
range: string
description: Method used to extract the data
enrichment_date:
range: string
description: When enrichment was performed (ISO date string)
enrichment_method:
range: string
description: Method used to enrich the data (e.g., website_research)
note:
range: string
multivalued: true
inlined_as_list: true
description: Provenance notes (can be single string or list)
source_url:
range: uri
description: URL of source data
fix_script:
range: string
description: Script used to fix this entry
last_manual_fix:
range: datetime
description: When last manual fix was applied
source_type:
range: string
description: Type of source (e.g., wikidata_p856)
discovery_timestamp:
range: datetime
description: When source was discovered
wikidata_id:
range: string
description: Wikidata entity ID
wikidata_property:
range: string
description: Wikidata property ID (e.g., P856)
archive_location:
range: string
description: Location of archived copy (e.g., web/1186/hartebrug.nl)
claim_extracted_from:
range: string
description: Source path from which claim was extracted (e.g., original_entry.reference)
verified_via_web_archive:
range: boolean
description: Whether claim was verified via web archive
notes:
range: Any
description: Provenance notes (can be single string or list of strings)
any_of:
- range: string
- range: string
multivalued: true
enrichment_provenance:
range: EnrichmentProvenance
description: Nested provenance for individual enrichment sources
provenance_schema_version:
range: string
description: Version of the provenance schema format (e.g., "2.0")
standards_compliance:
range: string
multivalued: true
inlined_as_list: true
description: Standards this provenance record complies with (e.g., W3C PROV-O, W3C SRI)
corrections:
range: Any
multivalued: true
inlined_as_list: true
description: List of corrections made to the entry with correction_date, correction_type, description, and corrected_by fields