glam/schemas/20251121/linkml/modules/classes/Provenance.yaml

114 lines
5.2 KiB
YAML

id: https://nde.nl/ontology/hc/classes/Provenance
name: Provenance
title: Provenance
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
prov: http://www.w3.org/ns/prov#
dct: http://purl.org/dc/terms/
schema: http://schema.org/
xsd: http://www.w3.org/2001/XMLSchema#
dcterms: http://purl.org/dc/terms/
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
imports:
- linkml:types
- ../slots/has_or_had_agent
- ../slots/temporal_extent
- ../slots/is_or_was_generated_by
- ./GenerationEvent
- ./ConfidenceScore
- ../slots/is_or_was_retrieved_by
- ../slots/is_or_was_retrieved_through
- ./RetrievalAgent
- ./RetrievalMethod
- ./RetrievalEvent
- ../slots/has_or_had_identifier
- ../slots/has_or_had_quantity
- ./Quantity
- ../slots/is_or_was_based_on
- ../slots/has_or_had_output
- ./LLMResponse
- ./Source
default_range: string
classes:
Provenance:
description: 'A single provenance statement describing the origin, lineage, or derivation of a piece of data.
**DISTINCTION FROM ProvenanceBlock**: - `Provenance` represents a SINGLE provenance statement (one source, one activity) - `ProvenanceBlock` bundles MULTIPLE provenance sources together
Use `Provenance` when: - Tracking provenance of individual data elements - Recording specific extraction or transformation activities - Linking data to its source agent/entity
**ONTOLOGY MAPPING**: - class_uri: prov:Activity (the extraction/derivation activity) - Links to prov:Agent via has_or_had_agent - Links to prov:Entity via used (source) and generated (result)'
class_uri: prov:Activity
close_mappings:
- dct:ProvenanceStatement
- schema:Action
related_mappings:
- prov:Entity
- prov:Derivation
slots:
- has_or_had_agent
- temporal_extent
- is_or_was_generated_by
- is_or_was_retrieved_by
- is_or_was_retrieved_through
- has_or_had_identifier
- has_or_had_quantity
- is_or_was_based_on
- has_or_had_output
attributes:
source_entity:
range: uriorcurie
description: URI of the source entity from which data was derived (prov:used).
slot_uri: prov:used
generated_entity:
range: uriorcurie
description: URI of the entity generated by this provenance activity (prov:generated).
slot_uri: prov:generated
extraction_method:
range: string
description: 'Method or process used to extract/derive the data. Examples: "xpath_extraction", "api_query", "manual_entry"'
note:
range: string
description: Human-readable note about this provenance statement.
slot_uri: prov:value
slot_usage:
is_or_was_retrieved_by:
range: RetrievalAgent
inlined: true
description: Agent that performed the extraction (e.g. "claude-opus-4.5"). MIGRATED from extraction_agent (2026-01-26).
is_or_was_retrieved_through:
range: RetrievalMethod
inlined: true
description: Method used for extraction (e.g. "exa_crawling_exa"). MIGRATED from extraction_method (2026-01-26).
temporal_extent:
description: Time period of the provenance activity (extraction time). MIGRATED from extraction_timestamp per Rule 53 (2026-01-26).
has_or_had_identifier:
range: uriorcurie
description: Request ID or process identifier. MIGRATED from request_id (2026-01-26).
has_or_had_quantity:
range: Quantity
inlined: true
description: Cost of the extraction. MIGRATED from cost_usd (2026-01-26).
is_or_was_based_on:
range: uriorcurie
multivalued: true
description: Source file or input data. MIGRATED from source_file/extraction_source (2026-01-26).
has_or_had_output:
range: LLMResponse
inlined: true
description: Raw output from LLM/Tool. MIGRATED from llm_response (2026-01-26).
annotations:
custodian_types: '["*"]'
custodian_types_rationale: Provenance tracking is universal across all custodian types.
custodian_types_primary: '*'
specificity_score: 0.15
specificity_rationale: Very low specificity - provenance metadata applies universally.
examples:
- value: "Provenance:\n extraction_method: \"xpath_extraction\"\n source_entity: \"https://example.org/webpage/12345\"\n is_or_was_generated_by:\n has_or_had_score:\n has_or_had_score: 0.95\n has_or_had_method: \"xpath_extraction\"\n has_or_had_description: \"Exact match at expected XPath\"\n note: \"Extracted from archived HTML using XPath\"\n"
description: Provenance for an XPath-extracted value from an archived webpage. Uses new structured ConfidenceScore via GenerationEvent.
- value: "Provenance:\n is_or_was_retrieved_by:\n has_or_had_label: \"claude-opus-4.5\"\n is_or_was_retrieved_through:\n has_or_had_label: \"exa_crawling_exa\"\n is_or_was_based_on:\n - \"https://www.linkedin.com/in/...\"\n has_or_had_identifier: \"exa_12345678\"\n has_or_had_quantity:\n quantity_value: 0.001\n has_or_had_unit:\n has_or_had_label: \"USD\"\n temporal_extent:\n begin_of_the_begin: \"2025-12-12T22:00:00Z\"\n"
description: Full extraction provenance example.