114 lines
5.2 KiB
YAML
114 lines
5.2 KiB
YAML
id: https://nde.nl/ontology/hc/classes/Provenance
|
|
name: Provenance
|
|
title: Provenance
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
hc: https://nde.nl/ontology/hc/
|
|
prov: http://www.w3.org/ns/prov#
|
|
dct: http://purl.org/dc/terms/
|
|
schema: http://schema.org/
|
|
xsd: http://www.w3.org/2001/XMLSchema#
|
|
dcterms: http://purl.org/dc/terms/
|
|
crm: http://www.cidoc-crm.org/cidoc-crm/
|
|
skos: http://www.w3.org/2004/02/skos/core#
|
|
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
|
org: http://www.w3.org/ns/org#
|
|
imports:
|
|
- linkml:types
|
|
- ../slots/has_or_had_agent
|
|
- ../slots/temporal_extent
|
|
- ../slots/is_or_was_generated_by
|
|
- ./GenerationEvent
|
|
- ./ConfidenceScore
|
|
- ../slots/is_or_was_retrieved_by
|
|
- ../slots/is_or_was_retrieved_through
|
|
- ./RetrievalAgent
|
|
- ./RetrievalMethod
|
|
- ./RetrievalEvent
|
|
- ../slots/has_or_had_identifier
|
|
- ../slots/has_or_had_quantity
|
|
- ./Quantity
|
|
- ../slots/is_or_was_based_on
|
|
- ../slots/has_or_had_output
|
|
- ./LLMResponse
|
|
- ./Source
|
|
default_range: string
|
|
classes:
|
|
Provenance:
|
|
description: 'A single provenance statement describing the origin, lineage, or derivation of a piece of data.
|
|
|
|
**DISTINCTION FROM ProvenanceBlock**: - `Provenance` represents a SINGLE provenance statement (one source, one activity) - `ProvenanceBlock` bundles MULTIPLE provenance sources together
|
|
|
|
Use `Provenance` when: - Tracking provenance of individual data elements - Recording specific extraction or transformation activities - Linking data to its source agent/entity
|
|
|
|
**ONTOLOGY MAPPING**: - class_uri: prov:Activity (the extraction/derivation activity) - Links to prov:Agent via has_or_had_agent - Links to prov:Entity via used (source) and generated (result)'
|
|
class_uri: prov:Activity
|
|
close_mappings:
|
|
- dct:ProvenanceStatement
|
|
- schema:Action
|
|
related_mappings:
|
|
- prov:Entity
|
|
- prov:Derivation
|
|
slots:
|
|
- has_or_had_agent
|
|
- temporal_extent
|
|
- is_or_was_generated_by
|
|
- is_or_was_retrieved_by
|
|
- is_or_was_retrieved_through
|
|
- has_or_had_identifier
|
|
- has_or_had_quantity
|
|
- is_or_was_based_on
|
|
- has_or_had_output
|
|
attributes:
|
|
source_entity:
|
|
range: uriorcurie
|
|
description: URI of the source entity from which data was derived (prov:used).
|
|
slot_uri: prov:used
|
|
generated_entity:
|
|
range: uriorcurie
|
|
description: URI of the entity generated by this provenance activity (prov:generated).
|
|
slot_uri: prov:generated
|
|
extraction_method:
|
|
range: string
|
|
description: 'Method or process used to extract/derive the data. Examples: "xpath_extraction", "api_query", "manual_entry"'
|
|
note:
|
|
range: string
|
|
description: Human-readable note about this provenance statement.
|
|
slot_uri: prov:value
|
|
slot_usage:
|
|
is_or_was_retrieved_by:
|
|
range: RetrievalAgent
|
|
inlined: true
|
|
description: Agent that performed the extraction (e.g. "claude-opus-4.5"). MIGRATED from extraction_agent (2026-01-26).
|
|
is_or_was_retrieved_through:
|
|
range: RetrievalMethod
|
|
inlined: true
|
|
description: Method used for extraction (e.g. "exa_crawling_exa"). MIGRATED from extraction_method (2026-01-26).
|
|
temporal_extent:
|
|
description: Time period of the provenance activity (extraction time). MIGRATED from extraction_timestamp per Rule 53 (2026-01-26).
|
|
has_or_had_identifier:
|
|
range: uriorcurie
|
|
description: Request ID or process identifier. MIGRATED from request_id (2026-01-26).
|
|
has_or_had_quantity:
|
|
range: Quantity
|
|
inlined: true
|
|
description: Cost of the extraction. MIGRATED from cost_usd (2026-01-26).
|
|
is_or_was_based_on:
|
|
range: uriorcurie
|
|
multivalued: true
|
|
description: Source file or input data. MIGRATED from source_file/extraction_source (2026-01-26).
|
|
has_or_had_output:
|
|
range: LLMResponse
|
|
inlined: true
|
|
description: Raw output from LLM/Tool. MIGRATED from llm_response (2026-01-26).
|
|
annotations:
|
|
custodian_types: '["*"]'
|
|
custodian_types_rationale: Provenance tracking is universal across all custodian types.
|
|
custodian_types_primary: '*'
|
|
specificity_score: 0.15
|
|
specificity_rationale: Very low specificity - provenance metadata applies universally.
|
|
examples:
|
|
- value: "Provenance:\n extraction_method: \"xpath_extraction\"\n source_entity: \"https://example.org/webpage/12345\"\n is_or_was_generated_by:\n has_or_had_score:\n has_or_had_score: 0.95\n has_or_had_method: \"xpath_extraction\"\n has_or_had_description: \"Exact match at expected XPath\"\n note: \"Extracted from archived HTML using XPath\"\n"
|
|
description: Provenance for an XPath-extracted value from an archived webpage. Uses new structured ConfidenceScore via GenerationEvent.
|
|
- value: "Provenance:\n is_or_was_retrieved_by:\n has_or_had_label: \"claude-opus-4.5\"\n is_or_was_retrieved_through:\n has_or_had_label: \"exa_crawling_exa\"\n is_or_was_based_on:\n - \"https://www.linkedin.com/in/...\"\n has_or_had_identifier: \"exa_12345678\"\n has_or_had_quantity:\n quantity_value: 0.001\n has_or_had_unit:\n has_or_had_label: \"USD\"\n temporal_extent:\n begin_of_the_begin: \"2025-12-12T22:00:00Z\"\n"
|
|
description: Full extraction provenance example.
|