- Removed compliance_status slot and replaced it with has_or_had_status. - Updated has_or_had_status to use ComplianceStatus for structured representation. - Adjusted examples to reflect new structure for compliance status. - Updated documentation to indicate migration and provide details on the ComplianceStatus class.
170 lines
6.4 KiB
YAML
170 lines
6.4 KiB
YAML
# ProvenanceBlock - Complete provenance tracking for the entry (supports both...
|
|
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
|
|
# Extraction date: 2026-01-08
|
|
|
|
id: https://nde.nl/ontology/hc/classes/ProvenanceBlock
|
|
name: ProvenanceBlock
|
|
title: ProvenanceBlock
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
hc: https://nde.nl/ontology/hc/
|
|
schema: http://schema.org/
|
|
prov: http://www.w3.org/ns/prov#
|
|
xsd: http://www.w3.org/2001/XMLSchema#
|
|
pav: http://purl.org/pav/
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
- ../enums/DataTierEnum
|
|
|
|
- ./DataTierSummary
|
|
- ./EnrichmentProvenance
|
|
- ./ProvenanceSources
|
|
# REMOVED 2026-01-19: ../slots/confidence_score - migrated to is_or_was_generated_by + GenerationEvent + ConfidenceScore (Rule 53)
|
|
- ../slots/is_or_was_generated_by
|
|
- ./GenerationEvent
|
|
- ./ConfidenceScore
|
|
- ./ConfidenceMethod
|
|
default_range: string
|
|
|
|
classes:
|
|
ProvenanceBlock:
|
|
description: >-
|
|
Complete provenance tracking for the entry, supporting both nested and flat formats.
|
|
Contains information about data sources, extraction methods, timestamps, confidence
|
|
scores, and verification status. This is the primary class for documenting the
|
|
lineage and quality of data in custodian records.
|
|
|
|
Ontology mapping rationale:
|
|
- class_uri is prov:Bundle because this represents a named set of provenance
|
|
descriptions bundled together - it's provenance ABOUT provenance/data
|
|
- close_mappings includes pav:Provenance conceptually as PAV's approach to
|
|
tracking authoring and versioning provenance
|
|
- related_mappings includes prov:Entity (the bundle is itself an entity) and
|
|
prov:Activity (extraction/enrichment are activities with timestamps)
|
|
class_uri: prov:Bundle
|
|
close_mappings:
|
|
- pav:Provenance
|
|
related_mappings:
|
|
- prov:Entity
|
|
- prov:Activity
|
|
slots:
|
|
# REMOVED 2026-01-19: confidence_score - migrated to is_or_was_generated_by (Rule 53)
|
|
- is_or_was_generated_by
|
|
slot_usage:
|
|
is_or_was_generated_by:
|
|
range: GenerationEvent
|
|
required: false
|
|
inlined: true
|
|
description: >-
|
|
Generation event containing confidence score for the provenance block.
|
|
MIGRATED 2026-01-19: Replaces confidence_score slot with structured pattern.
|
|
examples:
|
|
- value:
|
|
has_or_had_score:
|
|
has_or_had_score: 0.95
|
|
has_or_had_method: "automated_extraction"
|
|
description: High confidence data extraction
|
|
- value:
|
|
has_or_had_score:
|
|
has_or_had_score: 0.65
|
|
has_or_had_method: "manual_review"
|
|
description: Medium confidence manual review
|
|
comments:
|
|
- "MIGRATED 2026-01-19: confidence_score → is_or_was_generated_by + GenerationEvent + ConfidenceScore (Rule 53)"
|
|
attributes:
|
|
schema_version:
|
|
range: string
|
|
description: Version of the provenance schema
|
|
generated_at:
|
|
range: datetime
|
|
description: When provenance was generated
|
|
sources:
|
|
range: ProvenanceSources
|
|
description: Nested sources by type
|
|
data_tier_summary:
|
|
range: DataTierSummary
|
|
description: Summary of data tiers present
|
|
data_source:
|
|
range: string
|
|
description: Data source type (CSV_REGISTRY, API_SCRAPING, etc.)
|
|
data_sources:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: List of data sources (e.g., NDE registry, Google Maps, website)
|
|
data_tier:
|
|
range: DataTierEnum
|
|
description: Quality tier of the data
|
|
extraction_date:
|
|
range: string
|
|
description: When data was extracted (ISO datetime string, may lack timezone)
|
|
extraction_method:
|
|
range: string
|
|
description: Method used to extract the data
|
|
enrichment_date:
|
|
range: string
|
|
description: When enrichment was performed (ISO date string)
|
|
enrichment_method:
|
|
range: string
|
|
description: Method used to enrich the data (e.g., website_research)
|
|
note:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Provenance notes (can be single string or list)
|
|
source_url:
|
|
range: uri
|
|
description: URL of source data
|
|
fix_script:
|
|
range: string
|
|
description: Script used to fix this entry
|
|
last_manual_fix:
|
|
range: datetime
|
|
description: When last manual fix was applied
|
|
source_type:
|
|
range: string
|
|
description: Type of source (e.g., wikidata_p856)
|
|
discovery_timestamp:
|
|
range: datetime
|
|
description: When source was discovered
|
|
wikidata_id:
|
|
range: string
|
|
description: Wikidata entity ID
|
|
wikidata_property:
|
|
range: string
|
|
description: Wikidata property ID (e.g., P856)
|
|
archive_location:
|
|
range: string
|
|
description: Location of archived copy (e.g., web/1186/hartebrug.nl)
|
|
claim_extracted_from:
|
|
range: string
|
|
description: Source path from which claim was extracted (e.g., original_entry.reference)
|
|
verified_via_web_archive:
|
|
range: boolean
|
|
description: Whether claim was verified via web archive
|
|
notes:
|
|
range: Any
|
|
description: Provenance notes (can be single string or list of strings)
|
|
any_of:
|
|
- range: string
|
|
- range: string
|
|
multivalued: true
|
|
enrichment_provenance:
|
|
range: EnrichmentProvenance
|
|
description: Nested provenance for individual enrichment sources
|
|
provenance_schema_version:
|
|
range: string
|
|
description: Version of the provenance schema format (e.g., "2.0")
|
|
standards_compliance:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Standards this provenance record complies with (e.g., W3C PROV-O, W3C SRI)
|
|
corrections:
|
|
range: Any
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: List of corrections made to the entry with correction_date, correction_type, description, and corrected_by fields
|