From 4cdf9588b28593132c8fb3f41a47214fb07f496e Mon Sep 17 00:00:00 2001 From: kempersc Date: Fri, 23 Jan 2026 13:15:14 +0100 Subject: [PATCH] Refactor schema slots and introduce new classes for data sources and data tiers - Added `range: string` to `connections_by_heritage_type` slot for better data representation. - Removed obsolete `data_source_whatsapp`, `data_tier`, `date_retrieved`, and `de` slots from the schema. - Updated `derived_from_observation` slot to support multiple values and changed range to `uriorcurie`. - Introduced new `DataSource` class to represent various data sources with detailed descriptions and examples. - Created `DataTierLevel` class to classify data quality tiers with standard codes and descriptions. - Archived removed slots and updated the manifest to reflect these changes. - Added new `was_retrieved_at` slot to track data retrieval timestamps, following RiC-O conventions. --- .../schemas/20251121/linkml/manifest.json | 43 +++---- .../classes/CustodianTimelineEvent.yaml | 47 +++++-- .../linkml/modules/classes/DataSource.yaml | 119 ++++++++++++++++++ .../linkml/modules/classes/DataTierLevel.yaml | 109 ++++++++++++++++ .../linkml/modules/classes/FindingAid.yaml | 18 ++- .../modules/classes/LinkedInProfile.yaml | 19 ++- .../linkml/modules/classes/Overview.yaml | 26 ++-- ...ta_source_whatsapp_archived_20260123.yaml} | 0 .../data_tier_archived_20260123.yaml} | 0 .../date_retrieved_archived_20260123.yaml} | 0 .../de_archived_20260123.yaml} | 0 .../linkml/modules/slots/manifest.json | 9 +- .../linkml/modules/slots/slot_fixes.yaml | 89 ++++++++++++- .../modules/slots/was_retrieved_at.yaml | 41 ++++++ schemas/20251121/linkml/manifest.json | 43 +++---- .../classes/CustodianTimelineEvent.yaml | 47 +++++-- .../linkml/modules/classes/DataSource.yaml | 119 ++++++++++++++++++ .../linkml/modules/classes/DataTierLevel.yaml | 109 ++++++++++++++++ .../linkml/modules/classes/FindingAid.yaml | 18 ++- .../modules/classes/LinkedInProfile.yaml | 19 ++- .../linkml/modules/classes/Overview.yaml | 26 ++-- ...ta_source_whatsapp_archived_20260123.yaml} | 0 .../data_tier_archived_20260123.yaml} | 0 .../date_retrieved_archived_20260123.yaml} | 0 .../de_archived_20260123.yaml} | 0 .../slots/connections_by_heritage_type.yaml | 1 + .../slots/derived_from_observation.yaml | 2 + .../linkml/modules/slots/manifest.json | 9 +- .../modules/slots/network_analysis.yaml | 1 + .../modules/slots/place_custodian_ref.yaml | 1 + .../linkml/modules/slots/slot_fixes.yaml | 89 ++++++++++++- .../modules/slots/was_retrieved_at.yaml | 41 ++++++ 32 files changed, 933 insertions(+), 112 deletions(-) create mode 100644 frontend/public/schemas/20251121/linkml/modules/classes/DataSource.yaml create mode 100644 frontend/public/schemas/20251121/linkml/modules/classes/DataTierLevel.yaml rename frontend/public/schemas/20251121/linkml/modules/slots/{data_source_whatsapp.yaml => archive/data_source_whatsapp_archived_20260123.yaml} (100%) rename frontend/public/schemas/20251121/linkml/modules/slots/{data_tier.yaml => archive/data_tier_archived_20260123.yaml} (100%) rename frontend/public/schemas/20251121/linkml/modules/slots/{date_retrieved.yaml => archive/date_retrieved_archived_20260123.yaml} (100%) rename frontend/public/schemas/20251121/linkml/modules/slots/{de.yaml => archive/de_archived_20260123.yaml} (100%) create mode 100644 frontend/public/schemas/20251121/linkml/modules/slots/was_retrieved_at.yaml create mode 100644 schemas/20251121/linkml/modules/classes/DataSource.yaml create mode 100644 schemas/20251121/linkml/modules/classes/DataTierLevel.yaml rename schemas/20251121/linkml/modules/slots/{data_source_whatsapp.yaml => archive/data_source_whatsapp_archived_20260123.yaml} (100%) rename schemas/20251121/linkml/modules/slots/{data_tier.yaml => archive/data_tier_archived_20260123.yaml} (100%) rename schemas/20251121/linkml/modules/slots/{date_retrieved.yaml => archive/date_retrieved_archived_20260123.yaml} (100%) rename schemas/20251121/linkml/modules/slots/{de.yaml => archive/de_archived_20260123.yaml} (100%) create mode 100644 schemas/20251121/linkml/modules/slots/was_retrieved_at.yaml diff --git a/frontend/public/schemas/20251121/linkml/manifest.json b/frontend/public/schemas/20251121/linkml/manifest.json index ecbd31f4cd..bb6a9bb4ef 100644 --- a/frontend/public/schemas/20251121/linkml/manifest.json +++ b/frontend/public/schemas/20251121/linkml/manifest.json @@ -1,12 +1,12 @@ { - "generated": "2026-01-23T09:51:31.737Z", + "generated": "2026-01-23T10:42:56.557Z", "schemaRoot": "/schemas/20251121/linkml", - "totalFiles": 3036, + "totalFiles": 3035, "categoryCounts": { "main": 4, - "class": 980, + "class": 982, "enum": 155, - "slot": 1893, + "slot": 1890, "module": 4 }, "categories": [ @@ -1315,6 +1315,16 @@ "path": "modules/classes/DataServiceEndpointTypes.yaml", "category": "class" }, + { + "name": "DataSource", + "path": "modules/classes/DataSource.yaml", + "category": "class" + }, + { + "name": "DataTierLevel", + "path": "modules/classes/DataTierLevel.yaml", + "category": "class" + }, { "name": "DataTierSummary", "path": "modules/classes/DataTierSummary.yaml", @@ -6512,16 +6522,6 @@ "path": "modules/slots/data_repository.yaml", "category": "slot" }, - { - "name": "data_source_whatsapp", - "path": "modules/slots/data_source_whatsapp.yaml", - "category": "slot" - }, - { - "name": "data_tier", - "path": "modules/slots/data_tier.yaml", - "category": "slot" - }, { "name": "date", "path": "modules/slots/date.yaml", @@ -6552,16 +6552,6 @@ "path": "modules/slots/date_precision.yaml", "category": "slot" }, - { - "name": "date_retrieved", - "path": "modules/slots/date_retrieved.yaml", - "category": "slot" - }, - { - "name": "de", - "path": "modules/slots/de.yaml", - "category": "slot" - }, { "name": "dealer_name", "path": "modules/slots/dealer_name.yaml", @@ -15177,6 +15167,11 @@ "path": "modules/slots/was_last_updated_at.yaml", "category": "slot" }, + { + "name": "was_retrieved_at", + "path": "modules/slots/was_retrieved_at.yaml", + "category": "slot" + }, { "name": "wikidata_entity", "path": "modules/slots/wikidata_entity.yaml", diff --git a/frontend/public/schemas/20251121/linkml/modules/classes/CustodianTimelineEvent.yaml b/frontend/public/schemas/20251121/linkml/modules/classes/CustodianTimelineEvent.yaml index 2c75fb2a69..ae5b240ea0 100644 --- a/frontend/public/schemas/20251121/linkml/modules/classes/CustodianTimelineEvent.yaml +++ b/frontend/public/schemas/20251121/linkml/modules/classes/CustodianTimelineEvent.yaml @@ -19,7 +19,6 @@ imports: - ./TemplateSpecificityTypes - ../enums/OrganizationalChangeEventTypeEnum - - ../enums/DataTierEnum - ../enums/DatePrecisionEnum - ../enums/TimelineExtractionMethodEnum # REMOVED - migrated to is_or_was_approximate (Rule 53) @@ -27,7 +26,8 @@ imports: - ../slots/is_or_was_approximate - ./ApproximationStatus - ../slots/has_archive_path - - ../slots/data_tier + - ../slots/has_or_had_level # was: data_tier - migrated per Rule 53/56/57 (2026-01-23) + - ./DataTierLevel - ../slots/date_precision - ../slots/description - ../slots/event_date @@ -48,7 +48,7 @@ classes: \ DESIGN**\n\nThis class models WHAT happened, not HOW we know. For source-specific\nprovenance, use observation classes:\n\ \n- `WebObservation` - For web-scraped data with XPath or API provenance\n- `CustodianObservation` - For institutional\ \ source documents\n\nThe optional `observation_ref` slot links events to their source observations\nwhen detailed provenance\ - \ is needed.\n\n**DATA QUALITY**\n\nUse `data_tier` to indicate trustworthiness:\n- TIER_4_INFERRED: Initial extraction\ + \ is needed.\n\n**DATA QUALITY**\n\nUse `has_or_had_level` (DataTierLevel) to indicate trustworthiness:\n- TIER_4_INFERRED: Initial extraction\ \ (LLM-generated, unverified)\n- TIER_3_CROWD_SOURCED: Verified against Wikipedia/Wikidata\n- TIER_2_VERIFIED: Verified\ \ against institutional website\n- TIER_1_AUTHORITATIVE: Verified against official registry\n\nUse `extraction_notes`\ \ to capture source-specific details:\n- API queries and responses\n- XPath locations in archived HTML\n- Wikidata property\ @@ -66,7 +66,7 @@ classes: \n - \"https://bizzy.ai/nl/nl/52454037/regionaal-historisch-centrum-rhc-drents-archief\"\n extraction_method:\ \ api_response_regex\n extraction_timestamp: \"2025-12-16T10:00:00Z\"\n extraction_notes: >-\n Query: \"\ Regionaal Historisch Centrum (RHC) Drents Archief\" Assen opgericht\n Answer archived at: web/0002/linkup/linkup_founding_20251215T160438Z.json\n\ - \ archive_path: web/0002/linkup/linkup_founding_20251215T160438Z.json\n data_tier: TIER_4_INFERRED\n```\n" + \ archive_path: web/0002/linkup/linkup_founding_20251215T160438Z.json\n has_or_had_level:\n has_or_had_short_code: TIER_4_INFERRED\n```\n" exact_mappings: - prov:Entity close_mappings: @@ -78,7 +78,7 @@ classes: # - approximate - is_or_was_approximate - archive_path - - data_tier + - has_or_had_level # was: data_tier - migrated per Rule 53/56/57 (2026-01-23) - date_precision - description - event_date @@ -130,8 +130,23 @@ classes: required: false has_archive_path: required: false - data_tier: + has_or_had_level: # was: data_tier - migrated per Rule 53/56/57 (2026-01-23) + range: DataTierLevel + inlined: true required: true + description: >- + Data quality tier indicating trustworthiness of this event data. + Standard codes: TIER_1_AUTHORITATIVE, TIER_2_VERIFIED, + TIER_3_CROWD_SOURCED, TIER_4_INFERRED + examples: + - value: + has_or_had_short_code: TIER_4_INFERRED + has_or_had_description: Extracted via LLM - pending verification + description: Initial extraction tier + - value: + has_or_had_short_code: TIER_2_VERIFIED + has_or_had_description: Verified against institutional website + description: Verified tier observation_ref: required: false rules: @@ -170,7 +185,9 @@ classes: extraction_timestamp: '2025-12-16T10:00:00Z' extraction_notes: 'Query: "Drents Archief" Assen opgericht OR gesticht API: Linkup. Archived at: web/0002/linkup/linkup_founding_20251215T160438Z.json' archive_path: web/0002/linkup/linkup_founding_20251215T160438Z.json - data_tier: TIER_4_INFERRED + has_or_had_level: # was: data_tier + has_or_had_short_code: TIER_4_INFERRED + has_or_had_description: Extracted via API response - pending verification description: Founding event extracted from API response - value: event_type: MERGER @@ -183,7 +200,9 @@ classes: - https://nl.wikipedia.org/wiki/Drents_Archief extraction_method: api_response_regex extraction_timestamp: '2025-12-16T10:05:00Z' - data_tier: TIER_4_INFERRED + has_or_had_level: # was: data_tier + has_or_had_short_code: TIER_4_INFERRED + has_or_had_description: Extracted via API - pending verification description: Merger event with year-only precision - value: event_type: FOUNDING @@ -197,7 +216,9 @@ classes: extraction_method: wikidata_sparql extraction_timestamp: '2025-12-20T14:30:00Z' extraction_notes: Wikidata P571 (inception date) query - data_tier: TIER_3_CROWD_SOURCED + has_or_had_level: # was: data_tier + has_or_had_short_code: TIER_3_CROWD_SOURCED + has_or_had_description: Verified against Wikidata description: Founding event from Wikidata - value: event_type: TRANSFER @@ -212,7 +233,9 @@ classes: extraction_timestamp: '2025-12-20T15:00:00Z' extraction_notes: 'XPath: /html/body/main/section[2]/div/p[3]' archive_path: web/0001/rijksmuseum.nl/about-us/rendered.html - data_tier: TIER_2_VERIFIED + has_or_had_level: # was: data_tier + has_or_had_short_code: TIER_2_VERIFIED + has_or_had_description: Verified against institutional website description: Relocation event from institutional website - value: event_type: FOUNDING @@ -225,5 +248,7 @@ classes: description: Museum founded around 1880, exact date unknown. extraction_method: manual_research extraction_timestamp: '2025-12-20T16:00:00Z' - data_tier: TIER_4_INFERRED + has_or_had_level: # was: data_tier + has_or_had_short_code: TIER_4_INFERRED + has_or_had_description: Manual research - pending verification description: Founding event with approximate date diff --git a/frontend/public/schemas/20251121/linkml/modules/classes/DataSource.yaml b/frontend/public/schemas/20251121/linkml/modules/classes/DataSource.yaml new file mode 100644 index 0000000000..24057f7dc1 --- /dev/null +++ b/frontend/public/schemas/20251121/linkml/modules/classes/DataSource.yaml @@ -0,0 +1,119 @@ +id: https://nde.nl/ontology/hc/class/DataSource +name: data_source_class +title: DataSource Class +description: >- + Represents a source of data for enrichment or information gathering. + + Used to track where data came from, including messaging platforms, + APIs, web scraping, databases, and other data sources. + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + prov: http://www.w3.org/ns/prov# + dcterms: http://purl.org/dc/terms/ + schema: http://schema.org/ + +imports: + - linkml:types + - ../slots/has_or_had_label + - ../slots/has_or_had_description + - ../slots/has_or_had_short_code + - ../slots/source_url + +default_prefix: hc + +classes: + DataSource: + class_uri: prov:Entity + description: >- + A source of data used for enrichment or information gathering. + + **Common Source Types**: + + | Type | Short Code | Description | + |------|------------|-------------| + | WhatsApp | WHATSAPP | WhatsApp messaging platform | + | LinkedIn | LINKEDIN | LinkedIn social network | + | Web Scrape | WEB_SCRAPE | Website scraping | + | API | API | External API | + | Database | DATABASE | Database query | + | Manual | MANUAL | Manual entry | + | Wikidata | WIKIDATA | Wikidata knowledge base | + | Google Maps | GOOGLE_MAPS | Google Maps API | + + **PROV-O Alignment**: Maps to prov:Entity as the source entity + from which data was derived. + exact_mappings: + - prov:Entity + close_mappings: + - dcterms:source + - schema:CreativeWork + slots: + - has_or_had_label + - has_or_had_short_code + - has_or_had_description + - source_url + slot_usage: + has_or_had_label: + description: Human-readable name for the data source. + range: string + required: true + examples: + - value: "WhatsApp" + description: WhatsApp messaging platform + - value: "LinkedIn Profile" + description: LinkedIn social network + has_or_had_short_code: + description: >- + Short code identifier for the data source type. + Examples: WHATSAPP, LINKEDIN, WEB_SCRAPE, API, DATABASE, MANUAL + range: string + required: true + pattern: "^[A-Z][A-Z0-9_]*$" + examples: + - value: "WHATSAPP" + description: WhatsApp platform + - value: "LINKEDIN" + description: LinkedIn platform + - value: "WEB_SCRAPE" + description: Website scraping + has_or_had_description: + description: Detailed description of the data source and how it was used. + range: string + required: false + examples: + - value: "Profile information extracted from WhatsApp message exchange" + description: WhatsApp source description + source_url: + description: URL or identifier for the specific source location. + range: uriorcurie + required: false + examples: + - value: "https://linkedin.com/in/example-profile" + description: LinkedIn profile URL + annotations: + specificity_score: 0.35 + specificity_rationale: >- + Data sources are broadly applicable across all enrichment contexts. + custodian_types: '["*"]' + custodian_types_rationale: >- + Data sources apply universally to all custodian types. + examples: + - value: + has_or_had_label: "WhatsApp" + has_or_had_short_code: "WHATSAPP" + has_or_had_description: "Information gathered via WhatsApp conversation" + description: WhatsApp data source + - value: + has_or_had_label: "LinkedIn Profile" + has_or_had_short_code: "LINKEDIN" + has_or_had_description: "Profile information from LinkedIn" + source_url: "https://linkedin.com/in/example-profile" + description: LinkedIn data source with URL + - value: + has_or_had_label: "Wikidata" + has_or_had_short_code: "WIKIDATA" + has_or_had_description: "Entity data from Wikidata SPARQL query" + source_url: "https://www.wikidata.org/wiki/Q12345" + description: Wikidata data source diff --git a/frontend/public/schemas/20251121/linkml/modules/classes/DataTierLevel.yaml b/frontend/public/schemas/20251121/linkml/modules/classes/DataTierLevel.yaml new file mode 100644 index 0000000000..45b1e8c991 --- /dev/null +++ b/frontend/public/schemas/20251121/linkml/modules/classes/DataTierLevel.yaml @@ -0,0 +1,109 @@ +id: https://nde.nl/ontology/hc/class/DataTierLevel +name: data_tier_level_class +title: DataTierLevel Class +description: >- + Data quality tier classification indicating the trustworthiness and + verification status of heritage data. + + The tier system reflects the provenance and verification level of information, + from authoritative official registries to algorithmically inferred data. + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + prov: http://www.w3.org/ns/prov# + dqv: http://www.w3.org/ns/dqv# + +imports: + - linkml:types + - ../slots/has_or_had_label + - ../slots/has_or_had_description + - ../slots/has_or_had_short_code + +default_prefix: hc + +classes: + DataTierLevel: + class_uri: hc:DataTierLevel + description: >- + Data quality tier level classification. + + **Standard Tier Levels** (from most to least authoritative): + + | Tier | Short Code | Description | Examples | + |------|------------|-------------|----------| + | Tier 1 | TIER_1_AUTHORITATIVE | Official registry data | NDE CSV, ISIL registry, government databases | + | Tier 2 | TIER_2_VERIFIED | Verified external sources | Wikidata (verified), Google Maps, official websites | + | Tier 3 | TIER_3_CROWD_SOURCED | Community-contributed data | Reviews, user edits, Wikipedia | + | Tier 4 | TIER_4_INFERRED | Algorithmically extracted | Website scrape, LLM extraction, API inference | + + **Usage Guidelines**: + - Always prefer higher-tier data when available + - TIER_4 data requires verification before promotion + - Data can be promoted to higher tiers after verification + - Track tier changes in provenance history + + **PROV-O Alignment**: Maps to prov:qualifiedDerivation for data quality tracking. + exact_mappings: + - dqv:QualityMeasurement + close_mappings: + - prov:qualifiedDerivation + slots: + - has_or_had_label + - has_or_had_short_code + - has_or_had_description + slot_usage: + has_or_had_label: + description: Human-readable name for the data tier. + range: string + required: true + examples: + - value: "Authoritative" + description: Tier 1 - official registry data + - value: "Inferred" + description: Tier 4 - algorithmically extracted + has_or_had_short_code: + description: >- + Short code identifier for the data tier. + Standard codes: TIER_1_AUTHORITATIVE, TIER_2_VERIFIED, + TIER_3_CROWD_SOURCED, TIER_4_INFERRED + range: string + required: true + pattern: "^TIER_[1-4]_[A-Z_]+$" + examples: + - value: "TIER_1_AUTHORITATIVE" + description: Official registry data + - value: "TIER_4_INFERRED" + description: Algorithmically extracted data + has_or_had_description: + description: Detailed description of what this tier means and its data sources. + range: string + required: false + examples: + - value: "Extracted from NDE CSV registry - authoritative Dutch heritage data" + description: Tier 1 example + - value: "Extracted via LLM from website - requires verification" + description: Tier 4 example + annotations: + specificity_score: 0.35 + specificity_rationale: >- + Data tier levels are broadly useful across all data provenance contexts. + custodian_types: '["*"]' + custodian_types_rationale: >- + Data quality tiers apply universally to all custodian types. + examples: + - value: + has_or_had_label: "Authoritative" + has_or_had_short_code: "TIER_1_AUTHORITATIVE" + has_or_had_description: "Official data from NDE registry CSV" + description: Tier 1 authoritative data + - value: + has_or_had_label: "Verified" + has_or_had_short_code: "TIER_2_VERIFIED" + has_or_had_description: "Verified against institutional website" + description: Tier 2 verified data + - value: + has_or_had_label: "Inferred" + has_or_had_short_code: "TIER_4_INFERRED" + has_or_had_description: "Extracted via Exa search - pending verification" + description: Tier 4 inferred data diff --git a/frontend/public/schemas/20251121/linkml/modules/classes/FindingAid.yaml b/frontend/public/schemas/20251121/linkml/modules/classes/FindingAid.yaml index f4ab7ad5d1..efe2de7df3 100644 --- a/frontend/public/schemas/20251121/linkml/modules/classes/FindingAid.yaml +++ b/frontend/public/schemas/20251121/linkml/modules/classes/FindingAid.yaml @@ -84,7 +84,8 @@ imports: - ../slots/content_block - ../slots/crawler_version - ../slots/custodian - - ../slots/date_retrieved + - ../slots/was_retrieved_at # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) + - ./Timestamp - ../slots/de - ../slots/ead_id - ../slots/en @@ -887,7 +888,7 @@ classes: slots: - has_or_had_quantity # was: claims_count - migrated per Rule 53 (2026-01-19) - crawler_version - - date_retrieved + - was_retrieved_at # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) - extraction_method - extraction_note - html_snapshot_path @@ -899,9 +900,18 @@ classes: # REMOVED 2026-01-14: validation_status - migrated to has_or_had_status with ValidationStatus - has_or_had_status slot_usage: - date_retrieved: - range: date + was_retrieved_at: # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) + range: Timestamp + inlined: true required: true + description: >- + Timestamp when data was retrieved from the source. + Uses Timestamp class for structured temporal data. + examples: + - value: + has_or_had_timestamp: "2025-12-15T10:30:00Z" + has_or_had_precision: second + description: Full datetime retrieval timestamp retrieval_agent: required: true source_url: diff --git a/frontend/public/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml b/frontend/public/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml index 9b0b284298..0893bfacfe 100644 --- a/frontend/public/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml +++ b/frontend/public/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml @@ -22,7 +22,8 @@ imports: - ./ProvenanceBlock - ../slots/has_assessment_date - ../slots/connections_text - - ../slots/data_source_whatsapp + - ../slots/has_or_had_source # was: data_source_whatsapp - migrated per Rule 53/56/57 (2026-01-23) + - ./DataSource - ../slots/digital_confidence - ../slots/digital_indicator - ../slots/digital_professional @@ -342,7 +343,7 @@ classes: ' slots: - has_or_had_provenance - - data_source_whatsapp + - has_or_had_source # was: data_source_whatsapp - migrated per Rule 53/56/57 (2026-01-23) - enriched_date - enrichment_method_whatsapp - no_fabrication @@ -355,10 +356,18 @@ classes: range: string examples: - value: linkedin_profile_analysis - data_source_whatsapp: - range: string + has_or_had_source: # was: data_source_whatsapp - migrated per Rule 53/56/57 (2026-01-23) + range: DataSource + inlined: true + description: >- + Source of data for this enrichment. + Replaces data_source_whatsapp string with structured DataSource class. examples: - - value: public_linkedin_profile + - value: + has_or_had_label: LinkedIn Profile + has_or_had_short_code: LINKEDIN + has_or_had_description: Public LinkedIn profile data + description: LinkedIn as data source no_fabrication: range: boolean has_or_had_provenance: diff --git a/frontend/public/schemas/20251121/linkml/modules/classes/Overview.yaml b/frontend/public/schemas/20251121/linkml/modules/classes/Overview.yaml index 03362e37d6..347224418a 100644 --- a/frontend/public/schemas/20251121/linkml/modules/classes/Overview.yaml +++ b/frontend/public/schemas/20251121/linkml/modules/classes/Overview.yaml @@ -44,7 +44,8 @@ imports: # ARCHIVED: ../slots/valid_to - use temporal_extent instead - ../slots/temporal_extent # was: valid_from + valid_to - ../slots/source_url - - ../slots/date_retrieved + - ../slots/was_retrieved_at # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) + - ./Timestamp - ../slots/specificity_annotation - ../slots/has_or_had_score # was: template_specificity - migrated per Rule 53 (2026-01-17) - ./WebLink @@ -91,7 +92,9 @@ classes: title: "Family History Research Links" description: "All links from the family history finding aid page" source_url: https://www.nationaalarchief.nl/onderzoeken/zoekhulpen/familiegeschiedenis - date_retrieved: "2025-01-14" + was_retrieved_at: + has_or_had_timestamp: "2025-01-14" + has_or_had_precision: day includes_or_included: - id: hc:link/civil-registry url: https://example.org/civil-registry @@ -115,7 +118,7 @@ classes: - description - includes_or_included - source_url - - date_retrieved + - was_retrieved_at # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) - link_count - temporal_extent # was: valid_from + valid_to - specificity_annotation @@ -148,10 +151,17 @@ classes: range: uri description: >- The URL of the page from which links were extracted. - date_retrieved: - range: date + was_retrieved_at: # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) + range: Timestamp + inlined: true description: >- - Date when the overview was extracted or compiled. + Timestamp when the overview was extracted or compiled. + Uses Timestamp class for structured temporal data. + examples: + - value: + has_or_had_timestamp: "2025-01-14" + has_or_had_precision: day + description: Day-precision retrieval date link_count: range: integer description: >- @@ -183,7 +193,9 @@ classes: title: "Family History Research Links" description: "Comprehensive collection of links from the family history finding aid" source_url: https://www.nationaalarchief.nl/onderzoeken/zoekhulpen/familiegeschiedenis - date_retrieved: "2025-01-14" + was_retrieved_at: # was: date_retrieved + has_or_had_timestamp: "2025-01-14" + has_or_had_precision: day link_count: 15 description: Overview of links from a finding aid page diff --git a/frontend/public/schemas/20251121/linkml/modules/slots/data_source_whatsapp.yaml b/frontend/public/schemas/20251121/linkml/modules/slots/archive/data_source_whatsapp_archived_20260123.yaml similarity index 100% rename from frontend/public/schemas/20251121/linkml/modules/slots/data_source_whatsapp.yaml rename to frontend/public/schemas/20251121/linkml/modules/slots/archive/data_source_whatsapp_archived_20260123.yaml diff --git a/frontend/public/schemas/20251121/linkml/modules/slots/data_tier.yaml b/frontend/public/schemas/20251121/linkml/modules/slots/archive/data_tier_archived_20260123.yaml similarity index 100% rename from frontend/public/schemas/20251121/linkml/modules/slots/data_tier.yaml rename to frontend/public/schemas/20251121/linkml/modules/slots/archive/data_tier_archived_20260123.yaml diff --git a/frontend/public/schemas/20251121/linkml/modules/slots/date_retrieved.yaml b/frontend/public/schemas/20251121/linkml/modules/slots/archive/date_retrieved_archived_20260123.yaml similarity index 100% rename from frontend/public/schemas/20251121/linkml/modules/slots/date_retrieved.yaml rename to frontend/public/schemas/20251121/linkml/modules/slots/archive/date_retrieved_archived_20260123.yaml diff --git a/frontend/public/schemas/20251121/linkml/modules/slots/de.yaml b/frontend/public/schemas/20251121/linkml/modules/slots/archive/de_archived_20260123.yaml similarity index 100% rename from frontend/public/schemas/20251121/linkml/modules/slots/de.yaml rename to frontend/public/schemas/20251121/linkml/modules/slots/archive/de_archived_20260123.yaml diff --git a/frontend/public/schemas/20251121/linkml/modules/slots/manifest.json b/frontend/public/schemas/20251121/linkml/modules/slots/manifest.json index f3f1aa2ae7..349f8f8316 100644 --- a/frontend/public/schemas/20251121/linkml/modules/slots/manifest.json +++ b/frontend/public/schemas/20251121/linkml/modules/slots/manifest.json @@ -179,8 +179,8 @@ "data_license_policy.yaml", "data_repository.yaml", "__ARCHIVED_20260123__data_sensitivity.yaml", - "data_source_whatsapp.yaml", - "data_tier.yaml", + "__ARCHIVED_20260123__data_source_whatsapp.yaml", + "__ARCHIVED_20260123__data_tier.yaml", "__ARCHIVED_20260122__dataset_description.yaml", "__ARCHIVED_20260122__dataset_identifier.yaml", "__ARCHIVED_20260122__dataset_title.yaml", @@ -190,8 +190,8 @@ "date_of_death.yaml", "date_of_publication.yaml", "date_precision.yaml", - "date_retrieved.yaml", - "de.yaml", + "__ARCHIVED_20260123__date_retrieved.yaml", + "__ARCHIVED_20260123__de.yaml", "dealer_name.yaml", "death_place.yaml", "deceased.yaml", @@ -1906,6 +1906,7 @@ "was_fetched_at.yaml", "was_generated_by.yaml", "was_last_updated_at.yaml", + "was_retrieved_at.yaml", "wikidata_entity.yaml", "wikidata_equivalent.yaml", "wikidata_id.yaml" diff --git a/frontend/public/schemas/20251121/linkml/modules/slots/slot_fixes.yaml b/frontend/public/schemas/20251121/linkml/modules/slots/slot_fixes.yaml index 7d8959345e..eb5c568d08 100644 --- a/frontend/public/schemas/20251121/linkml/modules/slots/slot_fixes.yaml +++ b/frontend/public/schemas/20251121/linkml/modules/slots/slot_fixes.yaml @@ -11513,12 +11513,53 @@ fixes: type: slot - label: DataSource type: class + processed: + status: true + date: '2026-01-23' + agent: claude-claude-sonnet-4-20250514 + notes: | + **Migration: data_source_whatsapp** ✅ COMPLETE + + **Pattern**: data_source_whatsapp → has_or_had_source + DataSource + + **Files Created**: + - DataSource.yaml: Structured class for data sources with support for + various source types (WHATSAPP, LINKEDIN, WEB_SCRAPE, API, WIKIDATA, etc.) + + **Files Modified**: + - LinkedInProfile.yaml: Replaced data_source_whatsapp import and slot with + has_or_had_source + DataSource, updated slot_usage + + **Archived**: modules/slots/archive/data_source_whatsapp_archived_20260123.yaml - original_slot_id: https://nde.nl/ontology/hc/slot/data_tier revision: - label: has_or_had_level type: slot - label: DataTierLevel type: class + processed: + status: true + date: '2026-01-23' + agent: claude-claude-sonnet-4-20250514 + notes: | + **Migration: data_tier** ✅ COMPLETE + + **Pattern**: data_tier → has_or_had_level + DataTierLevel + + **Files Created**: + - DataTierLevel.yaml: Structured class for data quality tier classification + with standard codes (TIER_1_AUTHORITATIVE, TIER_2_VERIFIED, + TIER_3_CROWD_SOURCED, TIER_4_INFERRED) + + **Files Modified**: + - CustodianTimelineEvent.yaml: Replaced data_tier import and slot with + has_or_had_level + DataTierLevel, updated slot_usage, description, and + all examples (5 examples updated) + + **Note**: Other files (ProvenanceBlock, SourceRecord, etc.) use DataTierEnum + directly in slot_usage without importing data_tier slot - not affected. + + **Archived**: modules/slots/archive/data_tier_archived_20260123.yaml - original_slot_id: https://nde.nl/ontology/hc/slot/dataset_description revision: - label: has_or_had_description @@ -11586,7 +11627,20 @@ fixes: type: slot - label: TimeSpan type: class - + processed: + status: true + date: '2026-01-23' + agent: claude-claude-sonnet-4-20250514 + notes: | + **Migration: date_value** ✅ COMPLETE (SLOT NOT FOUND) + + **Status**: The date_value slot does not exist in the schema and is not + used by any class. It was either previously removed or never created. + + **Target Pattern**: temporal_extent + TimeSpan (both already exist) + + **No action needed** - marking as processed since the migration target + is already available for any future use cases. - original_slot_id: https://nde.nl/ontology/hc/slot/date_created revision: - label: is_or_was_created_through @@ -11643,6 +11697,26 @@ fixes: type: slot - label: Timestamp type: class + processed: + status: true + date: '2026-01-23' + agent: claude-claude-sonnet-4-20250514 + notes: | + **Migration: date_retrieved** ✅ COMPLETE + + **Pattern**: date_retrieved → was_retrieved_at + Timestamp + + **Files Created**: + - was_retrieved_at.yaml: New slot for data retrieval timestamps using + RiC-O temporal naming convention + + **Files Modified**: + - FindingAid.yaml: Replaced date_retrieved import and slot with + was_retrieved_at + Timestamp, updated slot_usage + - Overview.yaml: Replaced date_retrieved import and slot with + was_retrieved_at + Timestamp, updated slot_usage and examples + + **Archived**: modules/slots/archive/date_retrieved_archived_20260123.yaml - original_slot_id: https://nde.nl/ontology/hc/slot/de revision: - label: has_or_had_language @@ -11650,6 +11724,19 @@ fixes: - label: Language type: class value: de + processed: + status: true + date: '2026-01-23' + agent: claude-claude-sonnet-4-20250514 + notes: | + **Migration: de** ✅ COMPLETE (SLOT NOT USED) + + **Status**: The de slot (German language text) was not used by any class. + Legacy language-specific slot that predates the has_or_had_language + Language pattern. + + **Target Pattern**: has_or_had_language + Language (both already exist) + + **Archived**: modules/slots/archive/de_archived_20260123.yaml - original_slot_id: https://nde.nl/ontology/hc/slot/dealer_name revision: - label: is_or_was_associated_with diff --git a/frontend/public/schemas/20251121/linkml/modules/slots/was_retrieved_at.yaml b/frontend/public/schemas/20251121/linkml/modules/slots/was_retrieved_at.yaml new file mode 100644 index 0000000000..5d94397cea --- /dev/null +++ b/frontend/public/schemas/20251121/linkml/modules/slots/was_retrieved_at.yaml @@ -0,0 +1,41 @@ +id: https://nde.nl/ontology/hc/slot/was_retrieved_at +name: was_retrieved_at_slot +title: Was Retrieved At Slot +description: >- + Timestamp indicating when data was retrieved from a source. + + Follows RiC-O temporal naming convention (Rule 39) using past tense + to indicate the retrieval event has already occurred. + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + +imports: + - linkml:types + +default_prefix: hc + +slots: + was_retrieved_at: + description: >- + Timestamp indicating when data was retrieved from a source. + + Used for tracking when web scrapes, API calls, or other data + retrieval operations occurred. + slot_uri: prov:endedAtTime + range: Timestamp + exact_mappings: + - prov:endedAtTime + close_mappings: + - schema:dateModified + annotations: + specificity_score: 0.25 + specificity_rationale: >- + Retrieval timestamps are broadly useful for provenance tracking + across all data sources. + custodian_types: '["*"]' + custodian_types_rationale: >- + Data retrieval applies universally to all custodian types. diff --git a/schemas/20251121/linkml/manifest.json b/schemas/20251121/linkml/manifest.json index ffa98da815..682275cdec 100644 --- a/schemas/20251121/linkml/manifest.json +++ b/schemas/20251121/linkml/manifest.json @@ -1,12 +1,12 @@ { - "generated": "2026-01-23T10:04:15.652Z", + "generated": "2026-01-23T12:15:15.118Z", "schemaRoot": "/schemas/20251121/linkml", - "totalFiles": 3036, + "totalFiles": 3035, "categoryCounts": { "main": 4, - "class": 980, + "class": 982, "enum": 155, - "slot": 1893, + "slot": 1890, "module": 4 }, "categories": [ @@ -1315,6 +1315,16 @@ "path": "modules/classes/DataServiceEndpointTypes.yaml", "category": "class" }, + { + "name": "DataSource", + "path": "modules/classes/DataSource.yaml", + "category": "class" + }, + { + "name": "DataTierLevel", + "path": "modules/classes/DataTierLevel.yaml", + "category": "class" + }, { "name": "DataTierSummary", "path": "modules/classes/DataTierSummary.yaml", @@ -6512,16 +6522,6 @@ "path": "modules/slots/data_repository.yaml", "category": "slot" }, - { - "name": "data_source_whatsapp", - "path": "modules/slots/data_source_whatsapp.yaml", - "category": "slot" - }, - { - "name": "data_tier", - "path": "modules/slots/data_tier.yaml", - "category": "slot" - }, { "name": "date", "path": "modules/slots/date.yaml", @@ -6552,16 +6552,6 @@ "path": "modules/slots/date_precision.yaml", "category": "slot" }, - { - "name": "date_retrieved", - "path": "modules/slots/date_retrieved.yaml", - "category": "slot" - }, - { - "name": "de", - "path": "modules/slots/de.yaml", - "category": "slot" - }, { "name": "dealer_name", "path": "modules/slots/dealer_name.yaml", @@ -15177,6 +15167,11 @@ "path": "modules/slots/was_last_updated_at.yaml", "category": "slot" }, + { + "name": "was_retrieved_at", + "path": "modules/slots/was_retrieved_at.yaml", + "category": "slot" + }, { "name": "wikidata_entity", "path": "modules/slots/wikidata_entity.yaml", diff --git a/schemas/20251121/linkml/modules/classes/CustodianTimelineEvent.yaml b/schemas/20251121/linkml/modules/classes/CustodianTimelineEvent.yaml index 2c75fb2a69..ae5b240ea0 100644 --- a/schemas/20251121/linkml/modules/classes/CustodianTimelineEvent.yaml +++ b/schemas/20251121/linkml/modules/classes/CustodianTimelineEvent.yaml @@ -19,7 +19,6 @@ imports: - ./TemplateSpecificityTypes - ../enums/OrganizationalChangeEventTypeEnum - - ../enums/DataTierEnum - ../enums/DatePrecisionEnum - ../enums/TimelineExtractionMethodEnum # REMOVED - migrated to is_or_was_approximate (Rule 53) @@ -27,7 +26,8 @@ imports: - ../slots/is_or_was_approximate - ./ApproximationStatus - ../slots/has_archive_path - - ../slots/data_tier + - ../slots/has_or_had_level # was: data_tier - migrated per Rule 53/56/57 (2026-01-23) + - ./DataTierLevel - ../slots/date_precision - ../slots/description - ../slots/event_date @@ -48,7 +48,7 @@ classes: \ DESIGN**\n\nThis class models WHAT happened, not HOW we know. For source-specific\nprovenance, use observation classes:\n\ \n- `WebObservation` - For web-scraped data with XPath or API provenance\n- `CustodianObservation` - For institutional\ \ source documents\n\nThe optional `observation_ref` slot links events to their source observations\nwhen detailed provenance\ - \ is needed.\n\n**DATA QUALITY**\n\nUse `data_tier` to indicate trustworthiness:\n- TIER_4_INFERRED: Initial extraction\ + \ is needed.\n\n**DATA QUALITY**\n\nUse `has_or_had_level` (DataTierLevel) to indicate trustworthiness:\n- TIER_4_INFERRED: Initial extraction\ \ (LLM-generated, unverified)\n- TIER_3_CROWD_SOURCED: Verified against Wikipedia/Wikidata\n- TIER_2_VERIFIED: Verified\ \ against institutional website\n- TIER_1_AUTHORITATIVE: Verified against official registry\n\nUse `extraction_notes`\ \ to capture source-specific details:\n- API queries and responses\n- XPath locations in archived HTML\n- Wikidata property\ @@ -66,7 +66,7 @@ classes: \n - \"https://bizzy.ai/nl/nl/52454037/regionaal-historisch-centrum-rhc-drents-archief\"\n extraction_method:\ \ api_response_regex\n extraction_timestamp: \"2025-12-16T10:00:00Z\"\n extraction_notes: >-\n Query: \"\ Regionaal Historisch Centrum (RHC) Drents Archief\" Assen opgericht\n Answer archived at: web/0002/linkup/linkup_founding_20251215T160438Z.json\n\ - \ archive_path: web/0002/linkup/linkup_founding_20251215T160438Z.json\n data_tier: TIER_4_INFERRED\n```\n" + \ archive_path: web/0002/linkup/linkup_founding_20251215T160438Z.json\n has_or_had_level:\n has_or_had_short_code: TIER_4_INFERRED\n```\n" exact_mappings: - prov:Entity close_mappings: @@ -78,7 +78,7 @@ classes: # - approximate - is_or_was_approximate - archive_path - - data_tier + - has_or_had_level # was: data_tier - migrated per Rule 53/56/57 (2026-01-23) - date_precision - description - event_date @@ -130,8 +130,23 @@ classes: required: false has_archive_path: required: false - data_tier: + has_or_had_level: # was: data_tier - migrated per Rule 53/56/57 (2026-01-23) + range: DataTierLevel + inlined: true required: true + description: >- + Data quality tier indicating trustworthiness of this event data. + Standard codes: TIER_1_AUTHORITATIVE, TIER_2_VERIFIED, + TIER_3_CROWD_SOURCED, TIER_4_INFERRED + examples: + - value: + has_or_had_short_code: TIER_4_INFERRED + has_or_had_description: Extracted via LLM - pending verification + description: Initial extraction tier + - value: + has_or_had_short_code: TIER_2_VERIFIED + has_or_had_description: Verified against institutional website + description: Verified tier observation_ref: required: false rules: @@ -170,7 +185,9 @@ classes: extraction_timestamp: '2025-12-16T10:00:00Z' extraction_notes: 'Query: "Drents Archief" Assen opgericht OR gesticht API: Linkup. Archived at: web/0002/linkup/linkup_founding_20251215T160438Z.json' archive_path: web/0002/linkup/linkup_founding_20251215T160438Z.json - data_tier: TIER_4_INFERRED + has_or_had_level: # was: data_tier + has_or_had_short_code: TIER_4_INFERRED + has_or_had_description: Extracted via API response - pending verification description: Founding event extracted from API response - value: event_type: MERGER @@ -183,7 +200,9 @@ classes: - https://nl.wikipedia.org/wiki/Drents_Archief extraction_method: api_response_regex extraction_timestamp: '2025-12-16T10:05:00Z' - data_tier: TIER_4_INFERRED + has_or_had_level: # was: data_tier + has_or_had_short_code: TIER_4_INFERRED + has_or_had_description: Extracted via API - pending verification description: Merger event with year-only precision - value: event_type: FOUNDING @@ -197,7 +216,9 @@ classes: extraction_method: wikidata_sparql extraction_timestamp: '2025-12-20T14:30:00Z' extraction_notes: Wikidata P571 (inception date) query - data_tier: TIER_3_CROWD_SOURCED + has_or_had_level: # was: data_tier + has_or_had_short_code: TIER_3_CROWD_SOURCED + has_or_had_description: Verified against Wikidata description: Founding event from Wikidata - value: event_type: TRANSFER @@ -212,7 +233,9 @@ classes: extraction_timestamp: '2025-12-20T15:00:00Z' extraction_notes: 'XPath: /html/body/main/section[2]/div/p[3]' archive_path: web/0001/rijksmuseum.nl/about-us/rendered.html - data_tier: TIER_2_VERIFIED + has_or_had_level: # was: data_tier + has_or_had_short_code: TIER_2_VERIFIED + has_or_had_description: Verified against institutional website description: Relocation event from institutional website - value: event_type: FOUNDING @@ -225,5 +248,7 @@ classes: description: Museum founded around 1880, exact date unknown. extraction_method: manual_research extraction_timestamp: '2025-12-20T16:00:00Z' - data_tier: TIER_4_INFERRED + has_or_had_level: # was: data_tier + has_or_had_short_code: TIER_4_INFERRED + has_or_had_description: Manual research - pending verification description: Founding event with approximate date diff --git a/schemas/20251121/linkml/modules/classes/DataSource.yaml b/schemas/20251121/linkml/modules/classes/DataSource.yaml new file mode 100644 index 0000000000..24057f7dc1 --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/DataSource.yaml @@ -0,0 +1,119 @@ +id: https://nde.nl/ontology/hc/class/DataSource +name: data_source_class +title: DataSource Class +description: >- + Represents a source of data for enrichment or information gathering. + + Used to track where data came from, including messaging platforms, + APIs, web scraping, databases, and other data sources. + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + prov: http://www.w3.org/ns/prov# + dcterms: http://purl.org/dc/terms/ + schema: http://schema.org/ + +imports: + - linkml:types + - ../slots/has_or_had_label + - ../slots/has_or_had_description + - ../slots/has_or_had_short_code + - ../slots/source_url + +default_prefix: hc + +classes: + DataSource: + class_uri: prov:Entity + description: >- + A source of data used for enrichment or information gathering. + + **Common Source Types**: + + | Type | Short Code | Description | + |------|------------|-------------| + | WhatsApp | WHATSAPP | WhatsApp messaging platform | + | LinkedIn | LINKEDIN | LinkedIn social network | + | Web Scrape | WEB_SCRAPE | Website scraping | + | API | API | External API | + | Database | DATABASE | Database query | + | Manual | MANUAL | Manual entry | + | Wikidata | WIKIDATA | Wikidata knowledge base | + | Google Maps | GOOGLE_MAPS | Google Maps API | + + **PROV-O Alignment**: Maps to prov:Entity as the source entity + from which data was derived. + exact_mappings: + - prov:Entity + close_mappings: + - dcterms:source + - schema:CreativeWork + slots: + - has_or_had_label + - has_or_had_short_code + - has_or_had_description + - source_url + slot_usage: + has_or_had_label: + description: Human-readable name for the data source. + range: string + required: true + examples: + - value: "WhatsApp" + description: WhatsApp messaging platform + - value: "LinkedIn Profile" + description: LinkedIn social network + has_or_had_short_code: + description: >- + Short code identifier for the data source type. + Examples: WHATSAPP, LINKEDIN, WEB_SCRAPE, API, DATABASE, MANUAL + range: string + required: true + pattern: "^[A-Z][A-Z0-9_]*$" + examples: + - value: "WHATSAPP" + description: WhatsApp platform + - value: "LINKEDIN" + description: LinkedIn platform + - value: "WEB_SCRAPE" + description: Website scraping + has_or_had_description: + description: Detailed description of the data source and how it was used. + range: string + required: false + examples: + - value: "Profile information extracted from WhatsApp message exchange" + description: WhatsApp source description + source_url: + description: URL or identifier for the specific source location. + range: uriorcurie + required: false + examples: + - value: "https://linkedin.com/in/example-profile" + description: LinkedIn profile URL + annotations: + specificity_score: 0.35 + specificity_rationale: >- + Data sources are broadly applicable across all enrichment contexts. + custodian_types: '["*"]' + custodian_types_rationale: >- + Data sources apply universally to all custodian types. + examples: + - value: + has_or_had_label: "WhatsApp" + has_or_had_short_code: "WHATSAPP" + has_or_had_description: "Information gathered via WhatsApp conversation" + description: WhatsApp data source + - value: + has_or_had_label: "LinkedIn Profile" + has_or_had_short_code: "LINKEDIN" + has_or_had_description: "Profile information from LinkedIn" + source_url: "https://linkedin.com/in/example-profile" + description: LinkedIn data source with URL + - value: + has_or_had_label: "Wikidata" + has_or_had_short_code: "WIKIDATA" + has_or_had_description: "Entity data from Wikidata SPARQL query" + source_url: "https://www.wikidata.org/wiki/Q12345" + description: Wikidata data source diff --git a/schemas/20251121/linkml/modules/classes/DataTierLevel.yaml b/schemas/20251121/linkml/modules/classes/DataTierLevel.yaml new file mode 100644 index 0000000000..45b1e8c991 --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/DataTierLevel.yaml @@ -0,0 +1,109 @@ +id: https://nde.nl/ontology/hc/class/DataTierLevel +name: data_tier_level_class +title: DataTierLevel Class +description: >- + Data quality tier classification indicating the trustworthiness and + verification status of heritage data. + + The tier system reflects the provenance and verification level of information, + from authoritative official registries to algorithmically inferred data. + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + prov: http://www.w3.org/ns/prov# + dqv: http://www.w3.org/ns/dqv# + +imports: + - linkml:types + - ../slots/has_or_had_label + - ../slots/has_or_had_description + - ../slots/has_or_had_short_code + +default_prefix: hc + +classes: + DataTierLevel: + class_uri: hc:DataTierLevel + description: >- + Data quality tier level classification. + + **Standard Tier Levels** (from most to least authoritative): + + | Tier | Short Code | Description | Examples | + |------|------------|-------------|----------| + | Tier 1 | TIER_1_AUTHORITATIVE | Official registry data | NDE CSV, ISIL registry, government databases | + | Tier 2 | TIER_2_VERIFIED | Verified external sources | Wikidata (verified), Google Maps, official websites | + | Tier 3 | TIER_3_CROWD_SOURCED | Community-contributed data | Reviews, user edits, Wikipedia | + | Tier 4 | TIER_4_INFERRED | Algorithmically extracted | Website scrape, LLM extraction, API inference | + + **Usage Guidelines**: + - Always prefer higher-tier data when available + - TIER_4 data requires verification before promotion + - Data can be promoted to higher tiers after verification + - Track tier changes in provenance history + + **PROV-O Alignment**: Maps to prov:qualifiedDerivation for data quality tracking. + exact_mappings: + - dqv:QualityMeasurement + close_mappings: + - prov:qualifiedDerivation + slots: + - has_or_had_label + - has_or_had_short_code + - has_or_had_description + slot_usage: + has_or_had_label: + description: Human-readable name for the data tier. + range: string + required: true + examples: + - value: "Authoritative" + description: Tier 1 - official registry data + - value: "Inferred" + description: Tier 4 - algorithmically extracted + has_or_had_short_code: + description: >- + Short code identifier for the data tier. + Standard codes: TIER_1_AUTHORITATIVE, TIER_2_VERIFIED, + TIER_3_CROWD_SOURCED, TIER_4_INFERRED + range: string + required: true + pattern: "^TIER_[1-4]_[A-Z_]+$" + examples: + - value: "TIER_1_AUTHORITATIVE" + description: Official registry data + - value: "TIER_4_INFERRED" + description: Algorithmically extracted data + has_or_had_description: + description: Detailed description of what this tier means and its data sources. + range: string + required: false + examples: + - value: "Extracted from NDE CSV registry - authoritative Dutch heritage data" + description: Tier 1 example + - value: "Extracted via LLM from website - requires verification" + description: Tier 4 example + annotations: + specificity_score: 0.35 + specificity_rationale: >- + Data tier levels are broadly useful across all data provenance contexts. + custodian_types: '["*"]' + custodian_types_rationale: >- + Data quality tiers apply universally to all custodian types. + examples: + - value: + has_or_had_label: "Authoritative" + has_or_had_short_code: "TIER_1_AUTHORITATIVE" + has_or_had_description: "Official data from NDE registry CSV" + description: Tier 1 authoritative data + - value: + has_or_had_label: "Verified" + has_or_had_short_code: "TIER_2_VERIFIED" + has_or_had_description: "Verified against institutional website" + description: Tier 2 verified data + - value: + has_or_had_label: "Inferred" + has_or_had_short_code: "TIER_4_INFERRED" + has_or_had_description: "Extracted via Exa search - pending verification" + description: Tier 4 inferred data diff --git a/schemas/20251121/linkml/modules/classes/FindingAid.yaml b/schemas/20251121/linkml/modules/classes/FindingAid.yaml index f4ab7ad5d1..efe2de7df3 100644 --- a/schemas/20251121/linkml/modules/classes/FindingAid.yaml +++ b/schemas/20251121/linkml/modules/classes/FindingAid.yaml @@ -84,7 +84,8 @@ imports: - ../slots/content_block - ../slots/crawler_version - ../slots/custodian - - ../slots/date_retrieved + - ../slots/was_retrieved_at # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) + - ./Timestamp - ../slots/de - ../slots/ead_id - ../slots/en @@ -887,7 +888,7 @@ classes: slots: - has_or_had_quantity # was: claims_count - migrated per Rule 53 (2026-01-19) - crawler_version - - date_retrieved + - was_retrieved_at # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) - extraction_method - extraction_note - html_snapshot_path @@ -899,9 +900,18 @@ classes: # REMOVED 2026-01-14: validation_status - migrated to has_or_had_status with ValidationStatus - has_or_had_status slot_usage: - date_retrieved: - range: date + was_retrieved_at: # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) + range: Timestamp + inlined: true required: true + description: >- + Timestamp when data was retrieved from the source. + Uses Timestamp class for structured temporal data. + examples: + - value: + has_or_had_timestamp: "2025-12-15T10:30:00Z" + has_or_had_precision: second + description: Full datetime retrieval timestamp retrieval_agent: required: true source_url: diff --git a/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml b/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml index 9b0b284298..0893bfacfe 100644 --- a/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml +++ b/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml @@ -22,7 +22,8 @@ imports: - ./ProvenanceBlock - ../slots/has_assessment_date - ../slots/connections_text - - ../slots/data_source_whatsapp + - ../slots/has_or_had_source # was: data_source_whatsapp - migrated per Rule 53/56/57 (2026-01-23) + - ./DataSource - ../slots/digital_confidence - ../slots/digital_indicator - ../slots/digital_professional @@ -342,7 +343,7 @@ classes: ' slots: - has_or_had_provenance - - data_source_whatsapp + - has_or_had_source # was: data_source_whatsapp - migrated per Rule 53/56/57 (2026-01-23) - enriched_date - enrichment_method_whatsapp - no_fabrication @@ -355,10 +356,18 @@ classes: range: string examples: - value: linkedin_profile_analysis - data_source_whatsapp: - range: string + has_or_had_source: # was: data_source_whatsapp - migrated per Rule 53/56/57 (2026-01-23) + range: DataSource + inlined: true + description: >- + Source of data for this enrichment. + Replaces data_source_whatsapp string with structured DataSource class. examples: - - value: public_linkedin_profile + - value: + has_or_had_label: LinkedIn Profile + has_or_had_short_code: LINKEDIN + has_or_had_description: Public LinkedIn profile data + description: LinkedIn as data source no_fabrication: range: boolean has_or_had_provenance: diff --git a/schemas/20251121/linkml/modules/classes/Overview.yaml b/schemas/20251121/linkml/modules/classes/Overview.yaml index 03362e37d6..347224418a 100644 --- a/schemas/20251121/linkml/modules/classes/Overview.yaml +++ b/schemas/20251121/linkml/modules/classes/Overview.yaml @@ -44,7 +44,8 @@ imports: # ARCHIVED: ../slots/valid_to - use temporal_extent instead - ../slots/temporal_extent # was: valid_from + valid_to - ../slots/source_url - - ../slots/date_retrieved + - ../slots/was_retrieved_at # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) + - ./Timestamp - ../slots/specificity_annotation - ../slots/has_or_had_score # was: template_specificity - migrated per Rule 53 (2026-01-17) - ./WebLink @@ -91,7 +92,9 @@ classes: title: "Family History Research Links" description: "All links from the family history finding aid page" source_url: https://www.nationaalarchief.nl/onderzoeken/zoekhulpen/familiegeschiedenis - date_retrieved: "2025-01-14" + was_retrieved_at: + has_or_had_timestamp: "2025-01-14" + has_or_had_precision: day includes_or_included: - id: hc:link/civil-registry url: https://example.org/civil-registry @@ -115,7 +118,7 @@ classes: - description - includes_or_included - source_url - - date_retrieved + - was_retrieved_at # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) - link_count - temporal_extent # was: valid_from + valid_to - specificity_annotation @@ -148,10 +151,17 @@ classes: range: uri description: >- The URL of the page from which links were extracted. - date_retrieved: - range: date + was_retrieved_at: # was: date_retrieved - migrated per Rule 53/56/57 (2026-01-23) + range: Timestamp + inlined: true description: >- - Date when the overview was extracted or compiled. + Timestamp when the overview was extracted or compiled. + Uses Timestamp class for structured temporal data. + examples: + - value: + has_or_had_timestamp: "2025-01-14" + has_or_had_precision: day + description: Day-precision retrieval date link_count: range: integer description: >- @@ -183,7 +193,9 @@ classes: title: "Family History Research Links" description: "Comprehensive collection of links from the family history finding aid" source_url: https://www.nationaalarchief.nl/onderzoeken/zoekhulpen/familiegeschiedenis - date_retrieved: "2025-01-14" + was_retrieved_at: # was: date_retrieved + has_or_had_timestamp: "2025-01-14" + has_or_had_precision: day link_count: 15 description: Overview of links from a finding aid page diff --git a/schemas/20251121/linkml/modules/slots/data_source_whatsapp.yaml b/schemas/20251121/linkml/modules/slots/archive/data_source_whatsapp_archived_20260123.yaml similarity index 100% rename from schemas/20251121/linkml/modules/slots/data_source_whatsapp.yaml rename to schemas/20251121/linkml/modules/slots/archive/data_source_whatsapp_archived_20260123.yaml diff --git a/schemas/20251121/linkml/modules/slots/data_tier.yaml b/schemas/20251121/linkml/modules/slots/archive/data_tier_archived_20260123.yaml similarity index 100% rename from schemas/20251121/linkml/modules/slots/data_tier.yaml rename to schemas/20251121/linkml/modules/slots/archive/data_tier_archived_20260123.yaml diff --git a/schemas/20251121/linkml/modules/slots/date_retrieved.yaml b/schemas/20251121/linkml/modules/slots/archive/date_retrieved_archived_20260123.yaml similarity index 100% rename from schemas/20251121/linkml/modules/slots/date_retrieved.yaml rename to schemas/20251121/linkml/modules/slots/archive/date_retrieved_archived_20260123.yaml diff --git a/schemas/20251121/linkml/modules/slots/de.yaml b/schemas/20251121/linkml/modules/slots/archive/de_archived_20260123.yaml similarity index 100% rename from schemas/20251121/linkml/modules/slots/de.yaml rename to schemas/20251121/linkml/modules/slots/archive/de_archived_20260123.yaml diff --git a/schemas/20251121/linkml/modules/slots/connections_by_heritage_type.yaml b/schemas/20251121/linkml/modules/slots/connections_by_heritage_type.yaml index 9f7da22237..ec0b729247 100644 --- a/schemas/20251121/linkml/modules/slots/connections_by_heritage_type.yaml +++ b/schemas/20251121/linkml/modules/slots/connections_by_heritage_type.yaml @@ -15,4 +15,5 @@ slots: Breakdown of heritage-relevant connections by type code. Keys are single-letter GLAMORCUBESFIXPHDNT codes. slot_uri: hc:connectionsByHeritageType + range: string multivalued: true diff --git a/schemas/20251121/linkml/modules/slots/derived_from_observation.yaml b/schemas/20251121/linkml/modules/slots/derived_from_observation.yaml index 38681fab27..1529a02362 100644 --- a/schemas/20251121/linkml/modules/slots/derived_from_observation.yaml +++ b/schemas/20251121/linkml/modules/slots/derived_from_observation.yaml @@ -28,3 +28,5 @@ slots: ' slot_uri: prov:wasDerivedFrom + range: uriorcurie + multivalued: true diff --git a/schemas/20251121/linkml/modules/slots/manifest.json b/schemas/20251121/linkml/modules/slots/manifest.json index f3f1aa2ae7..349f8f8316 100644 --- a/schemas/20251121/linkml/modules/slots/manifest.json +++ b/schemas/20251121/linkml/modules/slots/manifest.json @@ -179,8 +179,8 @@ "data_license_policy.yaml", "data_repository.yaml", "__ARCHIVED_20260123__data_sensitivity.yaml", - "data_source_whatsapp.yaml", - "data_tier.yaml", + "__ARCHIVED_20260123__data_source_whatsapp.yaml", + "__ARCHIVED_20260123__data_tier.yaml", "__ARCHIVED_20260122__dataset_description.yaml", "__ARCHIVED_20260122__dataset_identifier.yaml", "__ARCHIVED_20260122__dataset_title.yaml", @@ -190,8 +190,8 @@ "date_of_death.yaml", "date_of_publication.yaml", "date_precision.yaml", - "date_retrieved.yaml", - "de.yaml", + "__ARCHIVED_20260123__date_retrieved.yaml", + "__ARCHIVED_20260123__de.yaml", "dealer_name.yaml", "death_place.yaml", "deceased.yaml", @@ -1906,6 +1906,7 @@ "was_fetched_at.yaml", "was_generated_by.yaml", "was_last_updated_at.yaml", + "was_retrieved_at.yaml", "wikidata_entity.yaml", "wikidata_equivalent.yaml", "wikidata_id.yaml" diff --git a/schemas/20251121/linkml/modules/slots/network_analysis.yaml b/schemas/20251121/linkml/modules/slots/network_analysis.yaml index e97a513e25..93c4635b8a 100644 --- a/schemas/20251121/linkml/modules/slots/network_analysis.yaml +++ b/schemas/20251121/linkml/modules/slots/network_analysis.yaml @@ -16,3 +16,4 @@ slots: Aggregated statistics about a connection network. Contains summary metrics like total count, heritage-relevant percentage, etc. slot_uri: schema:mainEntity + range: string diff --git a/schemas/20251121/linkml/modules/slots/place_custodian_ref.yaml b/schemas/20251121/linkml/modules/slots/place_custodian_ref.yaml index c45bedde7d..dd01a112d1 100644 --- a/schemas/20251121/linkml/modules/slots/place_custodian_ref.yaml +++ b/schemas/20251121/linkml/modules/slots/place_custodian_ref.yaml @@ -22,3 +22,4 @@ slots: ' slot_uri: dcterms:references + range: uriorcurie diff --git a/schemas/20251121/linkml/modules/slots/slot_fixes.yaml b/schemas/20251121/linkml/modules/slots/slot_fixes.yaml index 7d8959345e..eb5c568d08 100644 --- a/schemas/20251121/linkml/modules/slots/slot_fixes.yaml +++ b/schemas/20251121/linkml/modules/slots/slot_fixes.yaml @@ -11513,12 +11513,53 @@ fixes: type: slot - label: DataSource type: class + processed: + status: true + date: '2026-01-23' + agent: claude-claude-sonnet-4-20250514 + notes: | + **Migration: data_source_whatsapp** ✅ COMPLETE + + **Pattern**: data_source_whatsapp → has_or_had_source + DataSource + + **Files Created**: + - DataSource.yaml: Structured class for data sources with support for + various source types (WHATSAPP, LINKEDIN, WEB_SCRAPE, API, WIKIDATA, etc.) + + **Files Modified**: + - LinkedInProfile.yaml: Replaced data_source_whatsapp import and slot with + has_or_had_source + DataSource, updated slot_usage + + **Archived**: modules/slots/archive/data_source_whatsapp_archived_20260123.yaml - original_slot_id: https://nde.nl/ontology/hc/slot/data_tier revision: - label: has_or_had_level type: slot - label: DataTierLevel type: class + processed: + status: true + date: '2026-01-23' + agent: claude-claude-sonnet-4-20250514 + notes: | + **Migration: data_tier** ✅ COMPLETE + + **Pattern**: data_tier → has_or_had_level + DataTierLevel + + **Files Created**: + - DataTierLevel.yaml: Structured class for data quality tier classification + with standard codes (TIER_1_AUTHORITATIVE, TIER_2_VERIFIED, + TIER_3_CROWD_SOURCED, TIER_4_INFERRED) + + **Files Modified**: + - CustodianTimelineEvent.yaml: Replaced data_tier import and slot with + has_or_had_level + DataTierLevel, updated slot_usage, description, and + all examples (5 examples updated) + + **Note**: Other files (ProvenanceBlock, SourceRecord, etc.) use DataTierEnum + directly in slot_usage without importing data_tier slot - not affected. + + **Archived**: modules/slots/archive/data_tier_archived_20260123.yaml - original_slot_id: https://nde.nl/ontology/hc/slot/dataset_description revision: - label: has_or_had_description @@ -11586,7 +11627,20 @@ fixes: type: slot - label: TimeSpan type: class - + processed: + status: true + date: '2026-01-23' + agent: claude-claude-sonnet-4-20250514 + notes: | + **Migration: date_value** ✅ COMPLETE (SLOT NOT FOUND) + + **Status**: The date_value slot does not exist in the schema and is not + used by any class. It was either previously removed or never created. + + **Target Pattern**: temporal_extent + TimeSpan (both already exist) + + **No action needed** - marking as processed since the migration target + is already available for any future use cases. - original_slot_id: https://nde.nl/ontology/hc/slot/date_created revision: - label: is_or_was_created_through @@ -11643,6 +11697,26 @@ fixes: type: slot - label: Timestamp type: class + processed: + status: true + date: '2026-01-23' + agent: claude-claude-sonnet-4-20250514 + notes: | + **Migration: date_retrieved** ✅ COMPLETE + + **Pattern**: date_retrieved → was_retrieved_at + Timestamp + + **Files Created**: + - was_retrieved_at.yaml: New slot for data retrieval timestamps using + RiC-O temporal naming convention + + **Files Modified**: + - FindingAid.yaml: Replaced date_retrieved import and slot with + was_retrieved_at + Timestamp, updated slot_usage + - Overview.yaml: Replaced date_retrieved import and slot with + was_retrieved_at + Timestamp, updated slot_usage and examples + + **Archived**: modules/slots/archive/date_retrieved_archived_20260123.yaml - original_slot_id: https://nde.nl/ontology/hc/slot/de revision: - label: has_or_had_language @@ -11650,6 +11724,19 @@ fixes: - label: Language type: class value: de + processed: + status: true + date: '2026-01-23' + agent: claude-claude-sonnet-4-20250514 + notes: | + **Migration: de** ✅ COMPLETE (SLOT NOT USED) + + **Status**: The de slot (German language text) was not used by any class. + Legacy language-specific slot that predates the has_or_had_language + Language pattern. + + **Target Pattern**: has_or_had_language + Language (both already exist) + + **Archived**: modules/slots/archive/de_archived_20260123.yaml - original_slot_id: https://nde.nl/ontology/hc/slot/dealer_name revision: - label: is_or_was_associated_with diff --git a/schemas/20251121/linkml/modules/slots/was_retrieved_at.yaml b/schemas/20251121/linkml/modules/slots/was_retrieved_at.yaml new file mode 100644 index 0000000000..5d94397cea --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/was_retrieved_at.yaml @@ -0,0 +1,41 @@ +id: https://nde.nl/ontology/hc/slot/was_retrieved_at +name: was_retrieved_at_slot +title: Was Retrieved At Slot +description: >- + Timestamp indicating when data was retrieved from a source. + + Follows RiC-O temporal naming convention (Rule 39) using past tense + to indicate the retrieval event has already occurred. + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + +imports: + - linkml:types + +default_prefix: hc + +slots: + was_retrieved_at: + description: >- + Timestamp indicating when data was retrieved from a source. + + Used for tracking when web scrapes, API calls, or other data + retrieval operations occurred. + slot_uri: prov:endedAtTime + range: Timestamp + exact_mappings: + - prov:endedAtTime + close_mappings: + - schema:dateModified + annotations: + specificity_score: 0.25 + specificity_rationale: >- + Retrieval timestamps are broadly useful for provenance tracking + across all data sources. + custodian_types: '["*"]' + custodian_types_rationale: >- + Data retrieval applies universally to all custodian types.