glam/schemas/initial/hyponyms_curated_full.yaml
2025-11-21 22:12:33 +01:00

533 lines
15 KiB
YAML

id: https://w3id.org/heritage/custodian/hyponyms-curated-full
name: heritage-custodian-hyponyms-curated-full
title: Heritage Custodian Hyponyms Curated Full (Wikidata Enriched) Schema
description: >-
Schema for Wikidata-enriched hyponyms curated data. Preserves manual curation
metadata while adding complete Wikidata entity information including all labels,
descriptions, aliases (in all languages), claims (all properties), sitelinks,
and entity metadata. Output format of enrich_hyponyms_with_wikidata.py script.
license: https://creativecommons.org/publicdomain/zero/1.0/
version: 0.1.0
prefixes:
linkml: https://w3id.org/linkml/
heritage: https://w3id.org/heritage/custodian/
wikidata: http://www.wikidata.org/entity/
wikibase: http://wikiba.se/ontology#
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
rico: https://www.ica.org/standards/RiC/ontology#
default_prefix: heritage
default_range: string
imports:
- linkml:types
- hyponyms_curated
# =============================================================================
# CORE CLASSES (Extended from hyponyms_curated.yaml)
# =============================================================================
classes:
HyponymsCuratedFull:
description: >-
Root container for Wikidata-enriched hyponyms curated data.
Extends HyponymsCurated with full Wikidata entity information.
tree_root: true
slots:
- sources
- hypernym_enriched
- entity_enriched
- entity_list_enriched
- standards_enriched
- collection_enriched
- exclude
slot_usage:
sources:
required: true
hypernym_enriched:
description: "Enriched hypernym section with Wikidata data"
multivalued: true
range: EnrichedEntity
entity_enriched:
description: "Enriched named entity section with Wikidata data"
multivalued: true
range: EnrichedEntity
entity_list_enriched:
description: "Enriched entity list section with Wikidata data"
multivalued: true
range: EnrichedEntity
standards_enriched:
description: "Enriched standards section with Wikidata data"
multivalued: true
range: EnrichedEntity
collection_enriched:
description: "Enriched collection section with Wikidata data"
multivalued: true
range: EnrichedEntity
exclude:
description: "Excluded Q-numbers (unchanged from curated file)"
EnrichedEntity:
description: >-
Entity with both manual curation metadata and complete Wikidata information.
Separates human-curated data (country, time, type) from fetched Wikidata
data (labels, descriptions, claims) for transparency.
slots:
- curated
- wikidata
- qid
- enrichment_status
- enrichment_date
slot_usage:
curated:
required: true
range: CuratedEntity
inlined: true
description: "Original manual curation metadata (preserved from input)"
wikidata:
range: WikidataEntity
inlined: true
description: "Complete Wikidata entity data (fetched via API)"
qid:
description: "Extracted Wikidata Q-number for reference"
pattern: '^Q[0-9]+$'
enrichment_status:
required: true
range: EnrichmentStatusEnum
description: "Status of Wikidata enrichment (success, fetch_failed, no_qid)"
enrichment_date:
range: datetime
description: "Timestamp when enrichment was performed (ISO 8601)"
WikidataEntity:
description: >-
Complete Wikidata entity information fetched via Wikibase API.
Includes all labels, descriptions, aliases (all languages), claims
(all properties with qualifiers and references), sitelinks, and metadata.
slots:
- id
- type
- modified
- labels
- descriptions
- aliases
- claims
- sitelinks
- metadata
slot_usage:
id:
required: true
identifier: true
description: "Wikidata Q-number"
pattern: '^Q[0-9]+$'
slot_uri: dcterms:identifier
type:
description: "Wikibase entity type (typically 'item')"
slot_uri: wikibase:entityType
modified:
description: "Last modification timestamp from Wikidata"
slot_uri: dcterms:modified
labels:
required: true
range: LanguageMap
inlined: true
description: "Labels in all available languages"
slot_uri: wikibase:label
descriptions:
range: LanguageMap
inlined: true
description: "Descriptions in all available languages"
slot_uri: wikibase:description
aliases:
range: LanguageAliasMap
inlined: true
description: "Alternative names in all available languages"
slot_uri: wikibase:alias
claims:
range: ClaimMap
inlined: true
description: "All Wikidata statements (properties with values)"
slot_uri: wikibase:claim
sitelinks:
range: SitelinkMap
inlined: true
description: "Links to Wikipedia and other Wikimedia projects"
slot_uri: wikibase:sitelink
metadata:
range: EntityMetadata
inlined: true
description: "Additional entity metadata (pageid, namespace, revision)"
LanguageMap:
description: >-
Map of language codes to translated strings.
Used for labels and descriptions in multiple languages.
attributes:
en:
description: "English text"
range: string
nl:
description: "Dutch text"
range: string
de:
description: "German text"
range: string
fr:
description: "French text"
range: string
es:
description: "Spanish text"
range: string
pt:
description: "Portuguese text"
range: string
it:
description: "Italian text"
range: string
ja:
description: "Japanese text"
range: string
zh:
description: "Chinese text"
range: string
ar:
description: "Arabic text"
range: string
ru:
description: "Russian text"
range: string
comments:
- "Additional language codes may be present beyond listed attributes"
- "All ISO 639-1 language codes are supported"
LanguageAliasMap:
description: >-
Map of language codes to lists of alternative names/aliases.
Similar to LanguageMap but values are arrays of strings.
comments:
- "Each language code maps to a list of alias strings"
- "Example: {'en': ['British Museum', 'BM'], 'de': ['Britisches Museum']}"
ClaimMap:
description: >-
Map of Wikidata property IDs (P-numbers) to lists of claims/statements.
Contains all property values including qualifiers and references.
comments:
- "Keys are property IDs like 'P31' (instance of), 'P279' (subclass of)"
- "Values are lists of claim objects with datavalues, qualifiers, references"
- "See WikidataClaim for structure of individual claims"
WikidataClaim:
description: >-
Individual Wikidata claim/statement with main value, qualifiers, and references.
Preserves complete statement structure from Wikibase API.
slots:
- mainsnak
- qualifiers
- references
- rank
- id
slot_usage:
mainsnak:
description: "Main statement value (required part of claim)"
range: WikidataSnak
inlined: true
qualifiers:
description: "Qualifiers modifying the main statement"
multivalued: true
range: WikidataSnak
inlined_as_list: true
references:
description: "References supporting the statement"
multivalued: true
range: WikidataReference
inlined_as_list: true
rank:
description: "Statement rank (preferred, normal, deprecated)"
range: RankEnum
id:
description: "Unique statement ID"
WikidataSnak:
description: >-
A snak represents a property-value pair in Wikidata.
Used for both main values and qualifiers.
slots:
- property
- datatype
- datavalue
- snaktype
slot_usage:
property:
description: "Property ID (P-number)"
pattern: '^P[0-9]+$'
datatype:
description: "Wikibase datatype (wikibase-item, string, time, quantity, etc.)"
datavalue:
description: "The actual value (structure depends on datatype)"
snaktype:
description: "Type of snak (value, somevalue, novalue)"
range: SnaktypeEnum
WikidataReference:
description: >-
Reference for a Wikidata statement. Contains source information
for verifying the claim.
slots:
- snaks
- hash
slot_usage:
snaks:
description: "List of snaks providing reference information"
multivalued: true
range: WikidataSnak
inlined_as_list: true
hash:
description: "Reference hash (unique identifier)"
SitelinkMap:
description: >-
Map of Wikimedia project site codes to article links.
Includes Wikipedia articles in all languages, Wikimedia Commons, etc.
comments:
- "Keys are site codes like 'enwiki', 'nlwiki', 'commonswiki'"
- "See Sitelink for structure of individual sitelink objects"
Sitelink:
description: >-
Link to an article on a Wikimedia project (Wikipedia, Commons, etc.).
slots:
- title
- url
- badges
slot_usage:
title:
description: "Page title on the target wiki"
url:
description: "Full URL to the page"
range: uri
badges:
description: "Quality badges (featured article, good article, etc.)"
multivalued: true
pattern: '^Q[0-9]+$'
EntityMetadata:
description: >-
Additional metadata about the Wikidata entity from MediaWiki API.
Includes page ID, namespace, revision information.
slots:
- pageid
- ns
- title
- lastrevid
slot_usage:
pageid:
description: "MediaWiki page ID"
range: integer
ns:
description: "MediaWiki namespace (0 for main namespace)"
range: integer
title:
description: "Page title (typically same as Q-number)"
lastrevid:
description: "Latest revision ID"
range: integer
# =============================================================================
# SLOTS (Additional to those imported from hyponyms_curated.yaml)
# =============================================================================
slots:
# Enrichment metadata
hypernym_enriched:
description: "Enriched hypernym entities"
range: EnrichedEntity
multivalued: true
inlined_as_list: true
entity_enriched:
description: "Enriched named entities"
range: EnrichedEntity
multivalued: true
inlined_as_list: true
entity_list_enriched:
description: "Enriched entity list entries"
range: EnrichedEntity
multivalued: true
inlined_as_list: true
standards_enriched:
description: "Enriched metadata standards"
range: EnrichedEntity
multivalued: true
inlined_as_list: true
collection_enriched:
description: "Enriched collection types"
range: EnrichedEntity
multivalued: true
inlined_as_list: true
curated:
description: "Original curated entity metadata"
range: CuratedEntity
slot_uri: heritage:curationData
wikidata:
description: "Wikidata entity information"
range: WikidataEntity
slot_uri: wikidata:entityData
qid:
description: "Wikidata Q-number"
range: string
pattern: '^Q[0-9]+$'
enrichment_status:
description: "Status of enrichment process"
range: EnrichmentStatusEnum
enrichment_date:
description: "Timestamp when enrichment was performed"
range: datetime
slot_uri: prov:generatedAtTime
# Wikidata entity fields
id:
description: "Wikidata entity ID"
identifier: true
modified:
description: "Last modification timestamp"
range: datetime
labels:
description: "Entity labels in multiple languages"
range: LanguageMap
descriptions:
description: "Entity descriptions in multiple languages"
range: LanguageMap
aliases:
description: "Alternative names in multiple languages"
range: LanguageAliasMap
claims:
description: "Wikidata statements (property-value pairs)"
range: ClaimMap
sitelinks:
description: "Links to Wikimedia project pages"
range: SitelinkMap
metadata:
description: "Additional entity metadata"
range: EntityMetadata
# Claim/Snak fields
mainsnak:
description: "Main statement value"
range: WikidataSnak
qualifiers:
description: "Statement qualifiers"
range: WikidataSnak
multivalued: true
references:
description: "Statement references"
range: WikidataReference
multivalued: true
rank:
description: "Statement rank"
range: RankEnum
property:
description: "Wikidata property ID (P-number)"
pattern: '^P[0-9]+$'
datatype:
description: "Wikibase datatype"
datavalue:
description: "Statement value"
snaktype:
description: "Snak type"
range: SnaktypeEnum
snaks:
description: "Reference snaks"
range: WikidataSnak
multivalued: true
hash:
description: "Reference hash"
# Sitelink fields
title:
description: "Page title"
url:
description: "Page URL"
range: uri
badges:
description: "Quality badges"
multivalued: true
# Metadata fields
pageid:
description: "MediaWiki page ID"
range: integer
ns:
description: "MediaWiki namespace"
range: integer
lastrevid:
description: "Latest revision ID"
range: integer
# =============================================================================
# ENUMERATIONS
# =============================================================================
enums:
EnrichmentStatusEnum:
description: "Status of Wikidata enrichment process"
permissible_values:
success:
description: "Entity successfully enriched with Wikidata data"
fetch_failed:
description: "Wikidata API fetch failed (network error, invalid response)"
no_qid:
description: "No valid Q-number found in label field"
cached:
description: "Data retrieved from local cache (not counted as new enrichment)"
RankEnum:
description: "Wikidata statement rank"
permissible_values:
preferred:
description: "Preferred statement (highest priority)"
normal:
description: "Normal statement (default rank)"
deprecated:
description: "Deprecated statement (outdated or incorrect)"
SnaktypeEnum:
description: "Type of Wikidata snak"
permissible_values:
value:
description: "Snak with a specific value"
somevalue:
description: "Value exists but is unknown"
novalue:
description: "Property explicitly has no value"