533 lines
15 KiB
YAML
533 lines
15 KiB
YAML
id: https://w3id.org/heritage/custodian/hyponyms-curated-full
|
|
name: heritage-custodian-hyponyms-curated-full
|
|
title: Heritage Custodian Hyponyms Curated Full (Wikidata Enriched) Schema
|
|
description: >-
|
|
Schema for Wikidata-enriched hyponyms curated data. Preserves manual curation
|
|
metadata while adding complete Wikidata entity information including all labels,
|
|
descriptions, aliases (in all languages), claims (all properties), sitelinks,
|
|
and entity metadata. Output format of enrich_hyponyms_with_wikidata.py script.
|
|
|
|
license: https://creativecommons.org/publicdomain/zero/1.0/
|
|
version: 0.1.0
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
heritage: https://w3id.org/heritage/custodian/
|
|
wikidata: http://www.wikidata.org/entity/
|
|
wikibase: http://wikiba.se/ontology#
|
|
dcterms: http://purl.org/dc/terms/
|
|
prov: http://www.w3.org/ns/prov#
|
|
rico: https://www.ica.org/standards/RiC/ontology#
|
|
|
|
default_prefix: heritage
|
|
default_range: string
|
|
|
|
imports:
|
|
- linkml:types
|
|
- hyponyms_curated
|
|
|
|
# =============================================================================
|
|
# CORE CLASSES (Extended from hyponyms_curated.yaml)
|
|
# =============================================================================
|
|
|
|
classes:
|
|
HyponymsCuratedFull:
|
|
description: >-
|
|
Root container for Wikidata-enriched hyponyms curated data.
|
|
Extends HyponymsCurated with full Wikidata entity information.
|
|
tree_root: true
|
|
slots:
|
|
- sources
|
|
- hypernym_enriched
|
|
- entity_enriched
|
|
- entity_list_enriched
|
|
- standards_enriched
|
|
- collection_enriched
|
|
- exclude
|
|
slot_usage:
|
|
sources:
|
|
required: true
|
|
hypernym_enriched:
|
|
description: "Enriched hypernym section with Wikidata data"
|
|
multivalued: true
|
|
range: EnrichedEntity
|
|
entity_enriched:
|
|
description: "Enriched named entity section with Wikidata data"
|
|
multivalued: true
|
|
range: EnrichedEntity
|
|
entity_list_enriched:
|
|
description: "Enriched entity list section with Wikidata data"
|
|
multivalued: true
|
|
range: EnrichedEntity
|
|
standards_enriched:
|
|
description: "Enriched standards section with Wikidata data"
|
|
multivalued: true
|
|
range: EnrichedEntity
|
|
collection_enriched:
|
|
description: "Enriched collection section with Wikidata data"
|
|
multivalued: true
|
|
range: EnrichedEntity
|
|
exclude:
|
|
description: "Excluded Q-numbers (unchanged from curated file)"
|
|
|
|
EnrichedEntity:
|
|
description: >-
|
|
Entity with both manual curation metadata and complete Wikidata information.
|
|
Separates human-curated data (country, time, type) from fetched Wikidata
|
|
data (labels, descriptions, claims) for transparency.
|
|
slots:
|
|
- curated
|
|
- wikidata
|
|
- qid
|
|
- enrichment_status
|
|
- enrichment_date
|
|
slot_usage:
|
|
curated:
|
|
required: true
|
|
range: CuratedEntity
|
|
inlined: true
|
|
description: "Original manual curation metadata (preserved from input)"
|
|
wikidata:
|
|
range: WikidataEntity
|
|
inlined: true
|
|
description: "Complete Wikidata entity data (fetched via API)"
|
|
qid:
|
|
description: "Extracted Wikidata Q-number for reference"
|
|
pattern: '^Q[0-9]+$'
|
|
enrichment_status:
|
|
required: true
|
|
range: EnrichmentStatusEnum
|
|
description: "Status of Wikidata enrichment (success, fetch_failed, no_qid)"
|
|
enrichment_date:
|
|
range: datetime
|
|
description: "Timestamp when enrichment was performed (ISO 8601)"
|
|
|
|
WikidataEntity:
|
|
description: >-
|
|
Complete Wikidata entity information fetched via Wikibase API.
|
|
Includes all labels, descriptions, aliases (all languages), claims
|
|
(all properties with qualifiers and references), sitelinks, and metadata.
|
|
slots:
|
|
- id
|
|
- type
|
|
- modified
|
|
- labels
|
|
- descriptions
|
|
- aliases
|
|
- claims
|
|
- sitelinks
|
|
- metadata
|
|
slot_usage:
|
|
id:
|
|
required: true
|
|
identifier: true
|
|
description: "Wikidata Q-number"
|
|
pattern: '^Q[0-9]+$'
|
|
slot_uri: dcterms:identifier
|
|
type:
|
|
description: "Wikibase entity type (typically 'item')"
|
|
slot_uri: wikibase:entityType
|
|
modified:
|
|
description: "Last modification timestamp from Wikidata"
|
|
slot_uri: dcterms:modified
|
|
labels:
|
|
required: true
|
|
range: LanguageMap
|
|
inlined: true
|
|
description: "Labels in all available languages"
|
|
slot_uri: wikibase:label
|
|
descriptions:
|
|
range: LanguageMap
|
|
inlined: true
|
|
description: "Descriptions in all available languages"
|
|
slot_uri: wikibase:description
|
|
aliases:
|
|
range: LanguageAliasMap
|
|
inlined: true
|
|
description: "Alternative names in all available languages"
|
|
slot_uri: wikibase:alias
|
|
claims:
|
|
range: ClaimMap
|
|
inlined: true
|
|
description: "All Wikidata statements (properties with values)"
|
|
slot_uri: wikibase:claim
|
|
sitelinks:
|
|
range: SitelinkMap
|
|
inlined: true
|
|
description: "Links to Wikipedia and other Wikimedia projects"
|
|
slot_uri: wikibase:sitelink
|
|
metadata:
|
|
range: EntityMetadata
|
|
inlined: true
|
|
description: "Additional entity metadata (pageid, namespace, revision)"
|
|
|
|
LanguageMap:
|
|
description: >-
|
|
Map of language codes to translated strings.
|
|
Used for labels and descriptions in multiple languages.
|
|
attributes:
|
|
en:
|
|
description: "English text"
|
|
range: string
|
|
nl:
|
|
description: "Dutch text"
|
|
range: string
|
|
de:
|
|
description: "German text"
|
|
range: string
|
|
fr:
|
|
description: "French text"
|
|
range: string
|
|
es:
|
|
description: "Spanish text"
|
|
range: string
|
|
pt:
|
|
description: "Portuguese text"
|
|
range: string
|
|
it:
|
|
description: "Italian text"
|
|
range: string
|
|
ja:
|
|
description: "Japanese text"
|
|
range: string
|
|
zh:
|
|
description: "Chinese text"
|
|
range: string
|
|
ar:
|
|
description: "Arabic text"
|
|
range: string
|
|
ru:
|
|
description: "Russian text"
|
|
range: string
|
|
comments:
|
|
- "Additional language codes may be present beyond listed attributes"
|
|
- "All ISO 639-1 language codes are supported"
|
|
|
|
LanguageAliasMap:
|
|
description: >-
|
|
Map of language codes to lists of alternative names/aliases.
|
|
Similar to LanguageMap but values are arrays of strings.
|
|
comments:
|
|
- "Each language code maps to a list of alias strings"
|
|
- "Example: {'en': ['British Museum', 'BM'], 'de': ['Britisches Museum']}"
|
|
|
|
ClaimMap:
|
|
description: >-
|
|
Map of Wikidata property IDs (P-numbers) to lists of claims/statements.
|
|
Contains all property values including qualifiers and references.
|
|
comments:
|
|
- "Keys are property IDs like 'P31' (instance of), 'P279' (subclass of)"
|
|
- "Values are lists of claim objects with datavalues, qualifiers, references"
|
|
- "See WikidataClaim for structure of individual claims"
|
|
|
|
WikidataClaim:
|
|
description: >-
|
|
Individual Wikidata claim/statement with main value, qualifiers, and references.
|
|
Preserves complete statement structure from Wikibase API.
|
|
slots:
|
|
- mainsnak
|
|
- qualifiers
|
|
- references
|
|
- rank
|
|
- id
|
|
slot_usage:
|
|
mainsnak:
|
|
description: "Main statement value (required part of claim)"
|
|
range: WikidataSnak
|
|
inlined: true
|
|
qualifiers:
|
|
description: "Qualifiers modifying the main statement"
|
|
multivalued: true
|
|
range: WikidataSnak
|
|
inlined_as_list: true
|
|
references:
|
|
description: "References supporting the statement"
|
|
multivalued: true
|
|
range: WikidataReference
|
|
inlined_as_list: true
|
|
rank:
|
|
description: "Statement rank (preferred, normal, deprecated)"
|
|
range: RankEnum
|
|
id:
|
|
description: "Unique statement ID"
|
|
|
|
WikidataSnak:
|
|
description: >-
|
|
A snak represents a property-value pair in Wikidata.
|
|
Used for both main values and qualifiers.
|
|
slots:
|
|
- property
|
|
- datatype
|
|
- datavalue
|
|
- snaktype
|
|
slot_usage:
|
|
property:
|
|
description: "Property ID (P-number)"
|
|
pattern: '^P[0-9]+$'
|
|
datatype:
|
|
description: "Wikibase datatype (wikibase-item, string, time, quantity, etc.)"
|
|
datavalue:
|
|
description: "The actual value (structure depends on datatype)"
|
|
snaktype:
|
|
description: "Type of snak (value, somevalue, novalue)"
|
|
range: SnaktypeEnum
|
|
|
|
WikidataReference:
|
|
description: >-
|
|
Reference for a Wikidata statement. Contains source information
|
|
for verifying the claim.
|
|
slots:
|
|
- snaks
|
|
- hash
|
|
slot_usage:
|
|
snaks:
|
|
description: "List of snaks providing reference information"
|
|
multivalued: true
|
|
range: WikidataSnak
|
|
inlined_as_list: true
|
|
hash:
|
|
description: "Reference hash (unique identifier)"
|
|
|
|
SitelinkMap:
|
|
description: >-
|
|
Map of Wikimedia project site codes to article links.
|
|
Includes Wikipedia articles in all languages, Wikimedia Commons, etc.
|
|
comments:
|
|
- "Keys are site codes like 'enwiki', 'nlwiki', 'commonswiki'"
|
|
- "See Sitelink for structure of individual sitelink objects"
|
|
|
|
Sitelink:
|
|
description: >-
|
|
Link to an article on a Wikimedia project (Wikipedia, Commons, etc.).
|
|
slots:
|
|
- title
|
|
- url
|
|
- badges
|
|
slot_usage:
|
|
title:
|
|
description: "Page title on the target wiki"
|
|
url:
|
|
description: "Full URL to the page"
|
|
range: uri
|
|
badges:
|
|
description: "Quality badges (featured article, good article, etc.)"
|
|
multivalued: true
|
|
pattern: '^Q[0-9]+$'
|
|
|
|
EntityMetadata:
|
|
description: >-
|
|
Additional metadata about the Wikidata entity from MediaWiki API.
|
|
Includes page ID, namespace, revision information.
|
|
slots:
|
|
- pageid
|
|
- ns
|
|
- title
|
|
- lastrevid
|
|
slot_usage:
|
|
pageid:
|
|
description: "MediaWiki page ID"
|
|
range: integer
|
|
ns:
|
|
description: "MediaWiki namespace (0 for main namespace)"
|
|
range: integer
|
|
title:
|
|
description: "Page title (typically same as Q-number)"
|
|
lastrevid:
|
|
description: "Latest revision ID"
|
|
range: integer
|
|
|
|
# =============================================================================
|
|
# SLOTS (Additional to those imported from hyponyms_curated.yaml)
|
|
# =============================================================================
|
|
|
|
slots:
|
|
# Enrichment metadata
|
|
hypernym_enriched:
|
|
description: "Enriched hypernym entities"
|
|
range: EnrichedEntity
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
|
|
entity_enriched:
|
|
description: "Enriched named entities"
|
|
range: EnrichedEntity
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
|
|
entity_list_enriched:
|
|
description: "Enriched entity list entries"
|
|
range: EnrichedEntity
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
|
|
standards_enriched:
|
|
description: "Enriched metadata standards"
|
|
range: EnrichedEntity
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
|
|
collection_enriched:
|
|
description: "Enriched collection types"
|
|
range: EnrichedEntity
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
|
|
curated:
|
|
description: "Original curated entity metadata"
|
|
range: CuratedEntity
|
|
slot_uri: heritage:curationData
|
|
|
|
wikidata:
|
|
description: "Wikidata entity information"
|
|
range: WikidataEntity
|
|
slot_uri: wikidata:entityData
|
|
|
|
qid:
|
|
description: "Wikidata Q-number"
|
|
range: string
|
|
pattern: '^Q[0-9]+$'
|
|
|
|
enrichment_status:
|
|
description: "Status of enrichment process"
|
|
range: EnrichmentStatusEnum
|
|
|
|
enrichment_date:
|
|
description: "Timestamp when enrichment was performed"
|
|
range: datetime
|
|
slot_uri: prov:generatedAtTime
|
|
|
|
# Wikidata entity fields
|
|
id:
|
|
description: "Wikidata entity ID"
|
|
identifier: true
|
|
|
|
modified:
|
|
description: "Last modification timestamp"
|
|
range: datetime
|
|
|
|
labels:
|
|
description: "Entity labels in multiple languages"
|
|
range: LanguageMap
|
|
|
|
descriptions:
|
|
description: "Entity descriptions in multiple languages"
|
|
range: LanguageMap
|
|
|
|
aliases:
|
|
description: "Alternative names in multiple languages"
|
|
range: LanguageAliasMap
|
|
|
|
claims:
|
|
description: "Wikidata statements (property-value pairs)"
|
|
range: ClaimMap
|
|
|
|
sitelinks:
|
|
description: "Links to Wikimedia project pages"
|
|
range: SitelinkMap
|
|
|
|
metadata:
|
|
description: "Additional entity metadata"
|
|
range: EntityMetadata
|
|
|
|
# Claim/Snak fields
|
|
mainsnak:
|
|
description: "Main statement value"
|
|
range: WikidataSnak
|
|
|
|
qualifiers:
|
|
description: "Statement qualifiers"
|
|
range: WikidataSnak
|
|
multivalued: true
|
|
|
|
references:
|
|
description: "Statement references"
|
|
range: WikidataReference
|
|
multivalued: true
|
|
|
|
rank:
|
|
description: "Statement rank"
|
|
range: RankEnum
|
|
|
|
property:
|
|
description: "Wikidata property ID (P-number)"
|
|
pattern: '^P[0-9]+$'
|
|
|
|
datatype:
|
|
description: "Wikibase datatype"
|
|
|
|
datavalue:
|
|
description: "Statement value"
|
|
|
|
snaktype:
|
|
description: "Snak type"
|
|
range: SnaktypeEnum
|
|
|
|
snaks:
|
|
description: "Reference snaks"
|
|
range: WikidataSnak
|
|
multivalued: true
|
|
|
|
hash:
|
|
description: "Reference hash"
|
|
|
|
# Sitelink fields
|
|
title:
|
|
description: "Page title"
|
|
|
|
url:
|
|
description: "Page URL"
|
|
range: uri
|
|
|
|
badges:
|
|
description: "Quality badges"
|
|
multivalued: true
|
|
|
|
# Metadata fields
|
|
pageid:
|
|
description: "MediaWiki page ID"
|
|
range: integer
|
|
|
|
ns:
|
|
description: "MediaWiki namespace"
|
|
range: integer
|
|
|
|
lastrevid:
|
|
description: "Latest revision ID"
|
|
range: integer
|
|
|
|
# =============================================================================
|
|
# ENUMERATIONS
|
|
# =============================================================================
|
|
|
|
enums:
|
|
EnrichmentStatusEnum:
|
|
description: "Status of Wikidata enrichment process"
|
|
permissible_values:
|
|
success:
|
|
description: "Entity successfully enriched with Wikidata data"
|
|
fetch_failed:
|
|
description: "Wikidata API fetch failed (network error, invalid response)"
|
|
no_qid:
|
|
description: "No valid Q-number found in label field"
|
|
cached:
|
|
description: "Data retrieved from local cache (not counted as new enrichment)"
|
|
|
|
RankEnum:
|
|
description: "Wikidata statement rank"
|
|
permissible_values:
|
|
preferred:
|
|
description: "Preferred statement (highest priority)"
|
|
normal:
|
|
description: "Normal statement (default rank)"
|
|
deprecated:
|
|
description: "Deprecated statement (outdated or incorrect)"
|
|
|
|
SnaktypeEnum:
|
|
description: "Type of Wikidata snak"
|
|
permissible_values:
|
|
value:
|
|
description: "Snak with a specific value"
|
|
somevalue:
|
|
description: "Value exists but is unknown"
|
|
novalue:
|
|
description: "Property explicitly has no value"
|