glam/schemas/20251121/linkml/modules/classes/LinkupTimelineEvent.yaml
2025-12-21 00:01:54 +01:00

499 lines
18 KiB
YAML

# LinkupTimelineEvent Class
# Models timeline events extracted from Linkup API responses with API-appropriate provenance
#
# Created December 2025 for Dutch GLAM Timeline Event Enrichment Phase 2
#
# Key principle:
# Linkup API returns LLM-generated answers with source URLs, not XPath locations.
# Therefore, provenance is different from WebClaim:
# - Store the query that was sent to Linkup
# - Store the LLM answer (which may contain hallucinations)
# - Store source URLs (for manual verification)
# - Archive the complete API response JSON
#
# This acknowledges that Linkup data is TIER_4_INFERRED (LLM-generated)
# and requires manual verification before promotion to higher tiers.
#
# Event types are constrained to OrganizationalChangeEventTypeEnum values:
# FOUNDING, MERGER, DISSOLUTION, RENAMING, TRANSFER, EXPANSION,
# SPLIT, SPIN_OFF, REDUCTION, REORGANIZATION
id: https://nde.nl/ontology/hc/class/LinkupTimelineEvent
name: LinkupTimelineEvent
title: Linkup Timeline Event Class - API-Extracted Historical Events
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
xsd: http://www.w3.org/2001/XMLSchema#
crm: http://www.cidoc-crm.org/cidoc-crm/
tooi: https://identifier.overheid.nl/tooi/def/ont/
imports:
- linkml:types
- ../enums/OrganizationalChangeEventTypeEnum
- ../enums/DataTierEnum
default_prefix: hc
enums:
DatePrecisionEnum:
description: |
Precision level of an extracted date.
Used to indicate how specific the date information is.
permissible_values:
day:
description: Full date with day precision (YYYY-MM-DD)
comments:
- "Example: 2005-04-30 (30 april 2005)"
month:
description: Month precision only (YYYY-MM)
comments:
- "Example: 2005-04 (april 2005)"
year:
description: Year precision only (YYYY)
comments:
- "Example: 2005 (in 2005)"
decade:
description: Decade precision (YYYY0s)
comments:
- "Example: 2000s (begin 21e eeuw)"
century:
description: Century precision
comments:
- "Example: 1900s (begin 20e eeuw)"
unknown:
description: Date mentioned but precision cannot be determined
# NOTE: Renamed from ExtractionMethodEnum to LinkupExtractionMethodEnum
# to avoid naming conflict with ProfileExtractionMethodEnum in ExtractionMetadata.yaml
# These are semantically different enums for different extraction contexts.
LinkupExtractionMethodEnum:
description: |
Method used to extract the event from Linkup API response.
Used specifically for Linkup API timeline event extraction.
permissible_values:
linkup_answer_regex:
description: Date extracted via regex patterns from Linkup answer text
comments:
- "Patterns: YYYY-MM-DD, DD-MM-YYYY, 'in YYYY', 'op DD maand YYYY'"
linkup_answer_llm:
description: Date extracted using LLM analysis of Linkup answer
comments:
- "Used for complex temporal expressions"
linkup_source_regex:
description: Date extracted via regex from source snippets
comments:
- "Fallback when answer lacks date but sources have it"
manual_verification:
description: Event manually verified and corrected
comments:
- "Promoted from TIER_4 to higher tier after verification"
slots:
event_type:
slot_uri: hc:eventType
range: OrganizationalChangeEventTypeEnum
required: true
description: |
Type of organizational change event.
Constrained to values from OrganizationalChangeEventTypeEnum.
Mapping from extraction patterns:
- founding, opgericht, gesticht → FOUNDING
- merger, fusie, samengevoegd → MERGER
- dissolution, opgeheven, gesloten → DISSOLUTION
- name_change, hernoemd, naamswijziging → RENAMING
- relocation, verhuisd, verplaatst → TRANSFER
- expansion, uitgebreid, geabsorbeerd → EXPANSION
- split, opgesplitst → SPLIT
- spin_off, afgesplitst → SPIN_OFF
- reduction, ingekrompen → REDUCTION
- reorganization, herstructurering → REORGANIZATION
event_date:
slot_uri: schema:startDate
range: string
description: |
The date of the event in ISO 8601 format.
Precision indicated by date_precision slot.
Format depends on precision:
- day: YYYY-MM-DD (e.g., "2005-04-30")
- month: YYYY-MM (e.g., "2005-04")
- year: YYYY (e.g., "2005")
- decade: YYYY (e.g., "2000" for 2000s)
- century: YYYY (e.g., "1900" for 20th century)
pattern: "^\\d{4}(-\\d{2})?(-\\d{2})?$"
date_precision:
slot_uri: hc:datePrecision
range: DatePrecisionEnum
required: true
description: |
Precision level of the event_date.
Essential for proper interpretation and display.
approximate:
slot_uri: hc:approximate
range: boolean
required: true
description: |
Whether the date is approximate (circa, ongeveer, rond).
True if source uses hedging language:
- "circa 1900", "rond 2000", "ongeveer in 2005"
- "begin 20e eeuw", "eind jaren '90"
description:
slot_uri: schema:description
range: string
required: true
description: |
Human-readable description of the event.
Should summarize what happened, derived from Linkup answer.
# ===== PROVENANCE FIELDS =====
# These fields enable verification of Linkup-extracted data
source_urls:
slot_uri: dcterms:source
range: string
multivalued: true
description: |
URLs of sources cited by Linkup API for this event.
These are the URLs from the sources[].url array in API response.
Used for manual verification - users can check these URLs
to confirm the extracted information.
linkup_query:
slot_uri: hc:linkupQuery
range: string
required: true
description: |
The exact query sent to Linkup API.
Essential for reproducibility and audit trail.
Format typically: "{institution_name}" {location} {event_keywords}
Example: "Regionaal Historisch Centrum Drents Archief" Assen opgericht OR gesticht
linkup_answer:
slot_uri: hc:linkupAnswer
range: string
required: true
description: |
The LLM-generated answer from Linkup API.
This is the api_response.answer field from the JSON.
IMPORTANT: This is LLM-generated content and may contain:
- Hallucinations (fabricated facts)
- Incorrect interpretations
- Conflated information from multiple sources
Always verify against source_urls before promotion to higher tiers.
fetch_timestamp:
slot_uri: prov:generatedAtTime
range: datetime
required: true
description: |
Timestamp when the Linkup API was called.
ISO 8601 format with timezone.
Example: "2025-12-15T16:04:38Z"
archive_path:
slot_uri: hc:archivePath
range: string
required: true
description: |
Relative path to the archived Linkup API response JSON file.
Path is relative to the data/custodian/ directory.
Format: web/{entry_number}/linkup/linkup_{event_type}_{timestamp}.json
Example: web/0002/linkup/linkup_founding_20251215T160438Z.json
This JSON file contains the complete API response for audit purposes.
extraction_method:
slot_uri: hc:extractionMethod
range: LinkupExtractionMethodEnum
required: true
description: |
Method used to extract the event information from Linkup response.
extraction_timestamp:
slot_uri: hc:extractionTimestamp
range: datetime
required: true
description: |
Timestamp when the event was extracted from the archived JSON.
May differ from fetch_timestamp if extraction happens later.
data_tier:
slot_uri: hc:dataTier
range: DataTierEnum
required: true
description: |
Data quality tier for this event.
Typical values:
- TIER_4_INFERRED: Initial Linkup extraction (LLM-generated)
- TIER_3_CROWD_SOURCED: Verified against Wikipedia/Wikidata
- TIER_2_VERIFIED: Verified against institutional website
- TIER_1_AUTHORITATIVE: Verified against official registry
classes:
LinkupTimelineEvent:
class_uri: prov:Entity
description: |
A historical event extracted from Linkup API response with appropriate provenance.
**PURPOSE**
Captures organizational change events (founding, mergers, name changes, etc.)
discovered through Linkup API queries, with full provenance chain.
**PROVENANCE MODEL**
Unlike WebClaim which requires XPath provenance to specific HTML elements,
LinkupTimelineEvent acknowledges that Linkup data is LLM-generated:
1. **Query**: What was asked (linkup_query)
2. **Answer**: LLM-generated response (linkup_answer) - may contain errors
3. **Sources**: URLs cited by the API (source_urls) - for verification
4. **Archive**: Full API response JSON (archive_path) - for audit
**DATA QUALITY**
All Linkup-extracted events start at TIER_4_INFERRED because:
- LLM answers may hallucinate facts
- Dates may be incorrectly interpreted
- Sources may be misquoted or conflated
Events can be promoted to higher tiers after verification:
- Check source_urls to confirm facts
- Cross-reference with Wikidata, institutional websites
- Update data_tier and add verification notes
**EVENT TYPE MAPPING**
Events are classified using OrganizationalChangeEventTypeEnum:
- FOUNDING: Institution creation (opgericht, gesticht)
- MERGER: Multiple institutions combining (fusie, samenvoeging)
- DISSOLUTION: Institution closure (opgeheven, gesloten)
- RENAMING: Name change only (hernoemd, naamswijziging)
- TRANSFER: Physical relocation (verhuisd, verplaatst)
- EXPANSION: Absorbing other units (uitgebreid, geabsorbeerd)
- SPLIT: Division into multiple units (opgesplitst)
- SPIN_OFF: Parts becoming independent (afgesplitst)
- REDUCTION: Scope decrease (ingekrompen)
- REORGANIZATION: Complex restructuring (herstructurering)
**EXCLUDED EVENT TYPES**
Some patterns from conversation are NOT mapped to events:
- predecessor: This is a relationship, not an event
- friends_org: Separate organization (Vrienden van...)
- reopening: Not in OrganizationalChangeEventTypeEnum
**EXAMPLE USAGE**
```yaml
linkup_enrichment:
timeline_events:
- event_type: FOUNDING
event_date: "2005-04-30"
date_precision: day
approximate: false
description: >-
Het RHC Drents Archief werd opgericht op 30 april 2005.
Het is de voortzetting van het Rijksarchief in Drenthe (sinds 2000).
source_urls:
- "https://nl.wikipedia.org/wiki/Drents_Archief"
- "https://bizzy.ai/nl/nl/52454037/regionaal-historisch-centrum-rhc-drents-archief"
linkup_query: >-
"Regionaal Historisch Centrum (RHC) Drents Archief" Assen
opgericht OR gesticht OR sinds
linkup_answer: >-
Het Regionaal Historisch Centrum (RHC) Drents Archief in Assen
werd opgericht op 30 april 2005...
fetch_timestamp: "2025-12-15T16:04:38Z"
archive_path: web/0002/linkup/linkup_founding_20251215T160438Z.json
extraction_method: linkup_answer_regex
extraction_timestamp: "2025-12-16T10:00:00Z"
data_tier: TIER_4_INFERRED
```
exact_mappings:
- prov:Entity
close_mappings:
- crm:E5_Event
- tooi:Wijzigingsgebeurtenis
- schema:Event
slots:
- event_type
- event_date
- date_precision
- approximate
- description
- source_urls
- linkup_query
- linkup_answer
- fetch_timestamp
- archive_path
- extraction_method
- extraction_timestamp
- data_tier
slot_usage:
event_type:
required: true
description: |
Type of organizational change event.
Must be a valid OrganizationalChangeEventTypeEnum value.
event_date:
required: false
description: |
Date of the event. Optional because some events may not have
a determinable date from the Linkup response.
date_precision:
required: true
description: |
Always required to indicate how to interpret event_date.
Use 'unknown' if date is mentioned but precision unclear.
approximate:
required: true
description: |
Always required. Default to false unless hedging language detected.
description:
required: true
description: |
Human-readable summary of the event.
Derived from linkup_answer but may be edited for clarity.
source_urls:
required: false
description: |
URLs from Linkup sources array. May be empty if API
returned no sources (unusual but possible).
linkup_query:
required: true
description: |
The query sent to Linkup. Required for reproducibility.
linkup_answer:
required: true
description: |
The LLM answer from Linkup. Required for provenance.
fetch_timestamp:
required: true
description: |
When Linkup was called. Required for temporal provenance.
archive_path:
required: true
description: |
Path to archived JSON. Required for audit trail.
extraction_method:
required: true
description: |
How the event was extracted. Required for transparency.
extraction_timestamp:
required: true
description: |
When extraction occurred. Required for temporal provenance.
data_tier:
required: true
description: |
Quality tier. Always TIER_4_INFERRED for initial extraction.
rules:
- preconditions:
slot_conditions:
event_date:
value_presence: PRESENT
postconditions:
slot_conditions:
date_precision:
value_presence: PRESENT
description: "If event_date is provided, date_precision must be specified"
comments:
- "Linkup data is TIER_4 (LLM-generated) until manually verified"
- "source_urls enable verification against original sources"
- "archive_path enables audit of complete API response"
- "Event types constrained to OrganizationalChangeEventTypeEnum"
- "Use CIDOC-CRM TimeSpan for fuzzy dates if needed"
see_also:
- "schemas/20251121/linkml/modules/enums/OrganizationalChangeEventTypeEnum.yaml"
- "schemas/20251121/linkml/modules/classes/WebClaim.yaml"
- "schemas/20251121/linkml/modules/classes/TimeSpan.yaml"
- "scripts/extract_timeline_events.py"
examples:
- value:
event_type: FOUNDING
event_date: "2005-04-30"
date_precision: day
approximate: false
description: "Het RHC Drents Archief werd opgericht op 30 april 2005."
source_urls:
- "https://nl.wikipedia.org/wiki/Drents_Archief"
linkup_query: '"Drents Archief" Assen opgericht OR gesticht'
linkup_answer: "Het RHC Drents Archief in Assen werd opgericht op 30 april 2005..."
fetch_timestamp: "2025-12-15T16:04:38Z"
archive_path: "web/0002/linkup/linkup_founding_20251215T160438Z.json"
extraction_method: linkup_answer_regex
extraction_timestamp: "2025-12-16T10:00:00Z"
data_tier: TIER_4_INFERRED
description: "Founding event with exact date from Linkup"
- value:
event_type: MERGER
event_date: "2005"
date_precision: year
approximate: false
description: "In 2005 ging het Gemeentearchief Assen op in het Drents Archief."
source_urls:
- "https://nl.wikipedia.org/wiki/Drents_Archief"
linkup_query: '"Drents Archief" fusie OR samenvoeging'
linkup_answer: "In 2005 ging het Gemeentearchief Assen hier ook in op..."
fetch_timestamp: "2025-12-15T16:10:00Z"
archive_path: "web/0002/linkup/linkup_merger_20251215T161000Z.json"
extraction_method: linkup_answer_regex
extraction_timestamp: "2025-12-16T10:05:00Z"
data_tier: TIER_4_INFERRED
description: "Merger event with year-only precision"
- value:
event_type: TRANSFER
event_date: "1900"
date_precision: year
approximate: true
description: "Het archief is sinds circa 1900 gevestigd aan de Brink in Assen."
source_urls:
- "https://nl.wikipedia.org/wiki/Drents_Archief"
linkup_query: '"Drents Archief" verhuisd OR verplaatst'
linkup_answer: "Het archief is sinds circa 1900 gevestigd in een gebouw..."
fetch_timestamp: "2025-12-15T16:15:00Z"
archive_path: "web/0002/linkup/linkup_relocation_20251215T161500Z.json"
extraction_method: linkup_answer_regex
extraction_timestamp: "2025-12-16T10:10:00Z"
data_tier: TIER_4_INFERRED
description: "Relocation event with approximate date (circa)"