499 lines
18 KiB
YAML
499 lines
18 KiB
YAML
# LinkupTimelineEvent Class
|
|
# Models timeline events extracted from Linkup API responses with API-appropriate provenance
|
|
#
|
|
# Created December 2025 for Dutch GLAM Timeline Event Enrichment Phase 2
|
|
#
|
|
# Key principle:
|
|
# Linkup API returns LLM-generated answers with source URLs, not XPath locations.
|
|
# Therefore, provenance is different from WebClaim:
|
|
# - Store the query that was sent to Linkup
|
|
# - Store the LLM answer (which may contain hallucinations)
|
|
# - Store source URLs (for manual verification)
|
|
# - Archive the complete API response JSON
|
|
#
|
|
# This acknowledges that Linkup data is TIER_4_INFERRED (LLM-generated)
|
|
# and requires manual verification before promotion to higher tiers.
|
|
#
|
|
# Event types are constrained to OrganizationalChangeEventTypeEnum values:
|
|
# FOUNDING, MERGER, DISSOLUTION, RENAMING, TRANSFER, EXPANSION,
|
|
# SPLIT, SPIN_OFF, REDUCTION, REORGANIZATION
|
|
|
|
id: https://nde.nl/ontology/hc/class/LinkupTimelineEvent
|
|
name: LinkupTimelineEvent
|
|
title: Linkup Timeline Event Class - API-Extracted Historical Events
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
hc: https://nde.nl/ontology/hc/
|
|
schema: http://schema.org/
|
|
dcterms: http://purl.org/dc/terms/
|
|
prov: http://www.w3.org/ns/prov#
|
|
xsd: http://www.w3.org/2001/XMLSchema#
|
|
crm: http://www.cidoc-crm.org/cidoc-crm/
|
|
tooi: https://identifier.overheid.nl/tooi/def/ont/
|
|
|
|
imports:
|
|
- linkml:types
|
|
- ../enums/OrganizationalChangeEventTypeEnum
|
|
- ../enums/DataTierEnum
|
|
|
|
default_prefix: hc
|
|
|
|
enums:
|
|
DatePrecisionEnum:
|
|
description: |
|
|
Precision level of an extracted date.
|
|
Used to indicate how specific the date information is.
|
|
permissible_values:
|
|
day:
|
|
description: Full date with day precision (YYYY-MM-DD)
|
|
comments:
|
|
- "Example: 2005-04-30 (30 april 2005)"
|
|
month:
|
|
description: Month precision only (YYYY-MM)
|
|
comments:
|
|
- "Example: 2005-04 (april 2005)"
|
|
year:
|
|
description: Year precision only (YYYY)
|
|
comments:
|
|
- "Example: 2005 (in 2005)"
|
|
decade:
|
|
description: Decade precision (YYYY0s)
|
|
comments:
|
|
- "Example: 2000s (begin 21e eeuw)"
|
|
century:
|
|
description: Century precision
|
|
comments:
|
|
- "Example: 1900s (begin 20e eeuw)"
|
|
unknown:
|
|
description: Date mentioned but precision cannot be determined
|
|
|
|
# NOTE: Renamed from ExtractionMethodEnum to LinkupExtractionMethodEnum
|
|
# to avoid naming conflict with ProfileExtractionMethodEnum in ExtractionMetadata.yaml
|
|
# These are semantically different enums for different extraction contexts.
|
|
LinkupExtractionMethodEnum:
|
|
description: |
|
|
Method used to extract the event from Linkup API response.
|
|
Used specifically for Linkup API timeline event extraction.
|
|
permissible_values:
|
|
linkup_answer_regex:
|
|
description: Date extracted via regex patterns from Linkup answer text
|
|
comments:
|
|
- "Patterns: YYYY-MM-DD, DD-MM-YYYY, 'in YYYY', 'op DD maand YYYY'"
|
|
linkup_answer_llm:
|
|
description: Date extracted using LLM analysis of Linkup answer
|
|
comments:
|
|
- "Used for complex temporal expressions"
|
|
linkup_source_regex:
|
|
description: Date extracted via regex from source snippets
|
|
comments:
|
|
- "Fallback when answer lacks date but sources have it"
|
|
manual_verification:
|
|
description: Event manually verified and corrected
|
|
comments:
|
|
- "Promoted from TIER_4 to higher tier after verification"
|
|
|
|
slots:
|
|
event_type:
|
|
slot_uri: hc:eventType
|
|
range: OrganizationalChangeEventTypeEnum
|
|
required: true
|
|
description: |
|
|
Type of organizational change event.
|
|
Constrained to values from OrganizationalChangeEventTypeEnum.
|
|
|
|
Mapping from extraction patterns:
|
|
- founding, opgericht, gesticht → FOUNDING
|
|
- merger, fusie, samengevoegd → MERGER
|
|
- dissolution, opgeheven, gesloten → DISSOLUTION
|
|
- name_change, hernoemd, naamswijziging → RENAMING
|
|
- relocation, verhuisd, verplaatst → TRANSFER
|
|
- expansion, uitgebreid, geabsorbeerd → EXPANSION
|
|
- split, opgesplitst → SPLIT
|
|
- spin_off, afgesplitst → SPIN_OFF
|
|
- reduction, ingekrompen → REDUCTION
|
|
- reorganization, herstructurering → REORGANIZATION
|
|
|
|
event_date:
|
|
slot_uri: schema:startDate
|
|
range: string
|
|
description: |
|
|
The date of the event in ISO 8601 format.
|
|
Precision indicated by date_precision slot.
|
|
|
|
Format depends on precision:
|
|
- day: YYYY-MM-DD (e.g., "2005-04-30")
|
|
- month: YYYY-MM (e.g., "2005-04")
|
|
- year: YYYY (e.g., "2005")
|
|
- decade: YYYY (e.g., "2000" for 2000s)
|
|
- century: YYYY (e.g., "1900" for 20th century)
|
|
pattern: "^\\d{4}(-\\d{2})?(-\\d{2})?$"
|
|
|
|
date_precision:
|
|
slot_uri: hc:datePrecision
|
|
range: DatePrecisionEnum
|
|
required: true
|
|
description: |
|
|
Precision level of the event_date.
|
|
Essential for proper interpretation and display.
|
|
|
|
approximate:
|
|
slot_uri: hc:approximate
|
|
range: boolean
|
|
required: true
|
|
description: |
|
|
Whether the date is approximate (circa, ongeveer, rond).
|
|
True if source uses hedging language:
|
|
- "circa 1900", "rond 2000", "ongeveer in 2005"
|
|
- "begin 20e eeuw", "eind jaren '90"
|
|
|
|
description:
|
|
slot_uri: schema:description
|
|
range: string
|
|
required: true
|
|
description: |
|
|
Human-readable description of the event.
|
|
Should summarize what happened, derived from Linkup answer.
|
|
|
|
# ===== PROVENANCE FIELDS =====
|
|
# These fields enable verification of Linkup-extracted data
|
|
|
|
source_urls:
|
|
slot_uri: dcterms:source
|
|
range: string
|
|
multivalued: true
|
|
description: |
|
|
URLs of sources cited by Linkup API for this event.
|
|
These are the URLs from the sources[].url array in API response.
|
|
|
|
Used for manual verification - users can check these URLs
|
|
to confirm the extracted information.
|
|
|
|
linkup_query:
|
|
slot_uri: hc:linkupQuery
|
|
range: string
|
|
required: true
|
|
description: |
|
|
The exact query sent to Linkup API.
|
|
Essential for reproducibility and audit trail.
|
|
|
|
Format typically: "{institution_name}" {location} {event_keywords}
|
|
Example: "Regionaal Historisch Centrum Drents Archief" Assen opgericht OR gesticht
|
|
|
|
linkup_answer:
|
|
slot_uri: hc:linkupAnswer
|
|
range: string
|
|
required: true
|
|
description: |
|
|
The LLM-generated answer from Linkup API.
|
|
This is the api_response.answer field from the JSON.
|
|
|
|
IMPORTANT: This is LLM-generated content and may contain:
|
|
- Hallucinations (fabricated facts)
|
|
- Incorrect interpretations
|
|
- Conflated information from multiple sources
|
|
|
|
Always verify against source_urls before promotion to higher tiers.
|
|
|
|
fetch_timestamp:
|
|
slot_uri: prov:generatedAtTime
|
|
range: datetime
|
|
required: true
|
|
description: |
|
|
Timestamp when the Linkup API was called.
|
|
ISO 8601 format with timezone.
|
|
|
|
Example: "2025-12-15T16:04:38Z"
|
|
|
|
archive_path:
|
|
slot_uri: hc:archivePath
|
|
range: string
|
|
required: true
|
|
description: |
|
|
Relative path to the archived Linkup API response JSON file.
|
|
Path is relative to the data/custodian/ directory.
|
|
|
|
Format: web/{entry_number}/linkup/linkup_{event_type}_{timestamp}.json
|
|
Example: web/0002/linkup/linkup_founding_20251215T160438Z.json
|
|
|
|
This JSON file contains the complete API response for audit purposes.
|
|
|
|
extraction_method:
|
|
slot_uri: hc:extractionMethod
|
|
range: LinkupExtractionMethodEnum
|
|
required: true
|
|
description: |
|
|
Method used to extract the event information from Linkup response.
|
|
|
|
extraction_timestamp:
|
|
slot_uri: hc:extractionTimestamp
|
|
range: datetime
|
|
required: true
|
|
description: |
|
|
Timestamp when the event was extracted from the archived JSON.
|
|
May differ from fetch_timestamp if extraction happens later.
|
|
|
|
data_tier:
|
|
slot_uri: hc:dataTier
|
|
range: DataTierEnum
|
|
required: true
|
|
description: |
|
|
Data quality tier for this event.
|
|
|
|
Typical values:
|
|
- TIER_4_INFERRED: Initial Linkup extraction (LLM-generated)
|
|
- TIER_3_CROWD_SOURCED: Verified against Wikipedia/Wikidata
|
|
- TIER_2_VERIFIED: Verified against institutional website
|
|
- TIER_1_AUTHORITATIVE: Verified against official registry
|
|
|
|
classes:
|
|
LinkupTimelineEvent:
|
|
class_uri: prov:Entity
|
|
description: |
|
|
A historical event extracted from Linkup API response with appropriate provenance.
|
|
|
|
**PURPOSE**
|
|
|
|
Captures organizational change events (founding, mergers, name changes, etc.)
|
|
discovered through Linkup API queries, with full provenance chain.
|
|
|
|
**PROVENANCE MODEL**
|
|
|
|
Unlike WebClaim which requires XPath provenance to specific HTML elements,
|
|
LinkupTimelineEvent acknowledges that Linkup data is LLM-generated:
|
|
|
|
1. **Query**: What was asked (linkup_query)
|
|
2. **Answer**: LLM-generated response (linkup_answer) - may contain errors
|
|
3. **Sources**: URLs cited by the API (source_urls) - for verification
|
|
4. **Archive**: Full API response JSON (archive_path) - for audit
|
|
|
|
**DATA QUALITY**
|
|
|
|
All Linkup-extracted events start at TIER_4_INFERRED because:
|
|
- LLM answers may hallucinate facts
|
|
- Dates may be incorrectly interpreted
|
|
- Sources may be misquoted or conflated
|
|
|
|
Events can be promoted to higher tiers after verification:
|
|
- Check source_urls to confirm facts
|
|
- Cross-reference with Wikidata, institutional websites
|
|
- Update data_tier and add verification notes
|
|
|
|
**EVENT TYPE MAPPING**
|
|
|
|
Events are classified using OrganizationalChangeEventTypeEnum:
|
|
- FOUNDING: Institution creation (opgericht, gesticht)
|
|
- MERGER: Multiple institutions combining (fusie, samenvoeging)
|
|
- DISSOLUTION: Institution closure (opgeheven, gesloten)
|
|
- RENAMING: Name change only (hernoemd, naamswijziging)
|
|
- TRANSFER: Physical relocation (verhuisd, verplaatst)
|
|
- EXPANSION: Absorbing other units (uitgebreid, geabsorbeerd)
|
|
- SPLIT: Division into multiple units (opgesplitst)
|
|
- SPIN_OFF: Parts becoming independent (afgesplitst)
|
|
- REDUCTION: Scope decrease (ingekrompen)
|
|
- REORGANIZATION: Complex restructuring (herstructurering)
|
|
|
|
**EXCLUDED EVENT TYPES**
|
|
|
|
Some patterns from conversation are NOT mapped to events:
|
|
- predecessor: This is a relationship, not an event
|
|
- friends_org: Separate organization (Vrienden van...)
|
|
- reopening: Not in OrganizationalChangeEventTypeEnum
|
|
|
|
**EXAMPLE USAGE**
|
|
|
|
```yaml
|
|
linkup_enrichment:
|
|
timeline_events:
|
|
- event_type: FOUNDING
|
|
event_date: "2005-04-30"
|
|
date_precision: day
|
|
approximate: false
|
|
description: >-
|
|
Het RHC Drents Archief werd opgericht op 30 april 2005.
|
|
Het is de voortzetting van het Rijksarchief in Drenthe (sinds 2000).
|
|
source_urls:
|
|
- "https://nl.wikipedia.org/wiki/Drents_Archief"
|
|
- "https://bizzy.ai/nl/nl/52454037/regionaal-historisch-centrum-rhc-drents-archief"
|
|
linkup_query: >-
|
|
"Regionaal Historisch Centrum (RHC) Drents Archief" Assen
|
|
opgericht OR gesticht OR sinds
|
|
linkup_answer: >-
|
|
Het Regionaal Historisch Centrum (RHC) Drents Archief in Assen
|
|
werd opgericht op 30 april 2005...
|
|
fetch_timestamp: "2025-12-15T16:04:38Z"
|
|
archive_path: web/0002/linkup/linkup_founding_20251215T160438Z.json
|
|
extraction_method: linkup_answer_regex
|
|
extraction_timestamp: "2025-12-16T10:00:00Z"
|
|
data_tier: TIER_4_INFERRED
|
|
```
|
|
|
|
exact_mappings:
|
|
- prov:Entity
|
|
|
|
close_mappings:
|
|
- crm:E5_Event
|
|
- tooi:Wijzigingsgebeurtenis
|
|
- schema:Event
|
|
|
|
slots:
|
|
- event_type
|
|
- event_date
|
|
- date_precision
|
|
- approximate
|
|
- description
|
|
- source_urls
|
|
- linkup_query
|
|
- linkup_answer
|
|
- fetch_timestamp
|
|
- archive_path
|
|
- extraction_method
|
|
- extraction_timestamp
|
|
- data_tier
|
|
|
|
slot_usage:
|
|
event_type:
|
|
required: true
|
|
description: |
|
|
Type of organizational change event.
|
|
Must be a valid OrganizationalChangeEventTypeEnum value.
|
|
|
|
event_date:
|
|
required: false
|
|
description: |
|
|
Date of the event. Optional because some events may not have
|
|
a determinable date from the Linkup response.
|
|
|
|
date_precision:
|
|
required: true
|
|
description: |
|
|
Always required to indicate how to interpret event_date.
|
|
Use 'unknown' if date is mentioned but precision unclear.
|
|
|
|
approximate:
|
|
required: true
|
|
description: |
|
|
Always required. Default to false unless hedging language detected.
|
|
|
|
description:
|
|
required: true
|
|
description: |
|
|
Human-readable summary of the event.
|
|
Derived from linkup_answer but may be edited for clarity.
|
|
|
|
source_urls:
|
|
required: false
|
|
description: |
|
|
URLs from Linkup sources array. May be empty if API
|
|
returned no sources (unusual but possible).
|
|
|
|
linkup_query:
|
|
required: true
|
|
description: |
|
|
The query sent to Linkup. Required for reproducibility.
|
|
|
|
linkup_answer:
|
|
required: true
|
|
description: |
|
|
The LLM answer from Linkup. Required for provenance.
|
|
|
|
fetch_timestamp:
|
|
required: true
|
|
description: |
|
|
When Linkup was called. Required for temporal provenance.
|
|
|
|
archive_path:
|
|
required: true
|
|
description: |
|
|
Path to archived JSON. Required for audit trail.
|
|
|
|
extraction_method:
|
|
required: true
|
|
description: |
|
|
How the event was extracted. Required for transparency.
|
|
|
|
extraction_timestamp:
|
|
required: true
|
|
description: |
|
|
When extraction occurred. Required for temporal provenance.
|
|
|
|
data_tier:
|
|
required: true
|
|
description: |
|
|
Quality tier. Always TIER_4_INFERRED for initial extraction.
|
|
|
|
rules:
|
|
- preconditions:
|
|
slot_conditions:
|
|
event_date:
|
|
value_presence: PRESENT
|
|
postconditions:
|
|
slot_conditions:
|
|
date_precision:
|
|
value_presence: PRESENT
|
|
description: "If event_date is provided, date_precision must be specified"
|
|
|
|
comments:
|
|
- "Linkup data is TIER_4 (LLM-generated) until manually verified"
|
|
- "source_urls enable verification against original sources"
|
|
- "archive_path enables audit of complete API response"
|
|
- "Event types constrained to OrganizationalChangeEventTypeEnum"
|
|
- "Use CIDOC-CRM TimeSpan for fuzzy dates if needed"
|
|
|
|
see_also:
|
|
- "schemas/20251121/linkml/modules/enums/OrganizationalChangeEventTypeEnum.yaml"
|
|
- "schemas/20251121/linkml/modules/classes/WebClaim.yaml"
|
|
- "schemas/20251121/linkml/modules/classes/TimeSpan.yaml"
|
|
- "scripts/extract_timeline_events.py"
|
|
|
|
examples:
|
|
- value:
|
|
event_type: FOUNDING
|
|
event_date: "2005-04-30"
|
|
date_precision: day
|
|
approximate: false
|
|
description: "Het RHC Drents Archief werd opgericht op 30 april 2005."
|
|
source_urls:
|
|
- "https://nl.wikipedia.org/wiki/Drents_Archief"
|
|
linkup_query: '"Drents Archief" Assen opgericht OR gesticht'
|
|
linkup_answer: "Het RHC Drents Archief in Assen werd opgericht op 30 april 2005..."
|
|
fetch_timestamp: "2025-12-15T16:04:38Z"
|
|
archive_path: "web/0002/linkup/linkup_founding_20251215T160438Z.json"
|
|
extraction_method: linkup_answer_regex
|
|
extraction_timestamp: "2025-12-16T10:00:00Z"
|
|
data_tier: TIER_4_INFERRED
|
|
description: "Founding event with exact date from Linkup"
|
|
|
|
- value:
|
|
event_type: MERGER
|
|
event_date: "2005"
|
|
date_precision: year
|
|
approximate: false
|
|
description: "In 2005 ging het Gemeentearchief Assen op in het Drents Archief."
|
|
source_urls:
|
|
- "https://nl.wikipedia.org/wiki/Drents_Archief"
|
|
linkup_query: '"Drents Archief" fusie OR samenvoeging'
|
|
linkup_answer: "In 2005 ging het Gemeentearchief Assen hier ook in op..."
|
|
fetch_timestamp: "2025-12-15T16:10:00Z"
|
|
archive_path: "web/0002/linkup/linkup_merger_20251215T161000Z.json"
|
|
extraction_method: linkup_answer_regex
|
|
extraction_timestamp: "2025-12-16T10:05:00Z"
|
|
data_tier: TIER_4_INFERRED
|
|
description: "Merger event with year-only precision"
|
|
|
|
- value:
|
|
event_type: TRANSFER
|
|
event_date: "1900"
|
|
date_precision: year
|
|
approximate: true
|
|
description: "Het archief is sinds circa 1900 gevestigd aan de Brink in Assen."
|
|
source_urls:
|
|
- "https://nl.wikipedia.org/wiki/Drents_Archief"
|
|
linkup_query: '"Drents Archief" verhuisd OR verplaatst'
|
|
linkup_answer: "Het archief is sinds circa 1900 gevestigd in een gebouw..."
|
|
fetch_timestamp: "2025-12-15T16:15:00Z"
|
|
archive_path: "web/0002/linkup/linkup_relocation_20251215T161500Z.json"
|
|
extraction_method: linkup_answer_regex
|
|
extraction_timestamp: "2025-12-16T10:10:00Z"
|
|
data_tier: TIER_4_INFERRED
|
|
description: "Relocation event with approximate date (circa)"
|