id: https://w3id.org/heritage/custodian/hyponyms-curated-full name: heritage-custodian-hyponyms-curated-full title: Heritage Custodian Hyponyms Curated Full (Wikidata Enriched) Schema description: >- Schema for Wikidata-enriched hyponyms curated data. Preserves manual curation metadata while adding complete Wikidata entity information including all labels, descriptions, aliases (in all languages), claims (all properties), sitelinks, and entity metadata. Output format of enrich_hyponyms_with_wikidata.py script. license: https://creativecommons.org/publicdomain/zero/1.0/ version: 0.1.0 prefixes: linkml: https://w3id.org/linkml/ heritage: https://w3id.org/heritage/custodian/ wikidata: http://www.wikidata.org/entity/ wikibase: http://wikiba.se/ontology# dcterms: http://purl.org/dc/terms/ prov: http://www.w3.org/ns/prov# rico: https://www.ica.org/standards/RiC/ontology# default_prefix: heritage default_range: string imports: - linkml:types - hyponyms_curated # ============================================================================= # CORE CLASSES (Extended from hyponyms_curated.yaml) # ============================================================================= classes: HyponymsCuratedFull: description: >- Root container for Wikidata-enriched hyponyms curated data. Extends HyponymsCurated with full Wikidata entity information. tree_root: true slots: - sources - hypernym_enriched - entity_enriched - entity_list_enriched - standards_enriched - collection_enriched - exclude slot_usage: sources: required: true hypernym_enriched: description: "Enriched hypernym section with Wikidata data" multivalued: true range: EnrichedEntity entity_enriched: description: "Enriched named entity section with Wikidata data" multivalued: true range: EnrichedEntity entity_list_enriched: description: "Enriched entity list section with Wikidata data" multivalued: true range: EnrichedEntity standards_enriched: description: "Enriched standards section with Wikidata data" multivalued: true range: EnrichedEntity collection_enriched: description: "Enriched collection section with Wikidata data" multivalued: true range: EnrichedEntity exclude: description: "Excluded Q-numbers (unchanged from curated file)" EnrichedEntity: description: >- Entity with both manual curation metadata and complete Wikidata information. Separates human-curated data (country, time, type) from fetched Wikidata data (labels, descriptions, claims) for transparency. slots: - curated - wikidata - qid - enrichment_status - enrichment_date slot_usage: curated: required: true range: CuratedEntity inlined: true description: "Original manual curation metadata (preserved from input)" wikidata: range: WikidataEntity inlined: true description: "Complete Wikidata entity data (fetched via API)" qid: description: "Extracted Wikidata Q-number for reference" pattern: '^Q[0-9]+$' enrichment_status: required: true range: EnrichmentStatusEnum description: "Status of Wikidata enrichment (success, fetch_failed, no_qid)" enrichment_date: range: datetime description: "Timestamp when enrichment was performed (ISO 8601)" WikidataEntity: description: >- Complete Wikidata entity information fetched via Wikibase API. Includes all labels, descriptions, aliases (all languages), claims (all properties with qualifiers and references), sitelinks, and metadata. slots: - id - type - modified - labels - descriptions - aliases - claims - sitelinks - metadata slot_usage: id: required: true identifier: true description: "Wikidata Q-number" pattern: '^Q[0-9]+$' slot_uri: dcterms:identifier type: description: "Wikibase entity type (typically 'item')" slot_uri: wikibase:entityType modified: description: "Last modification timestamp from Wikidata" slot_uri: dcterms:modified labels: required: true range: LanguageMap inlined: true description: "Labels in all available languages" slot_uri: wikibase:label descriptions: range: LanguageMap inlined: true description: "Descriptions in all available languages" slot_uri: wikibase:description aliases: range: LanguageAliasMap inlined: true description: "Alternative names in all available languages" slot_uri: wikibase:alias claims: range: ClaimMap inlined: true description: "All Wikidata statements (properties with values)" slot_uri: wikibase:claim sitelinks: range: SitelinkMap inlined: true description: "Links to Wikipedia and other Wikimedia projects" slot_uri: wikibase:sitelink metadata: range: EntityMetadata inlined: true description: "Additional entity metadata (pageid, namespace, revision)" LanguageMap: description: >- Map of language codes to translated strings. Used for labels and descriptions in multiple languages. attributes: en: description: "English text" range: string nl: description: "Dutch text" range: string de: description: "German text" range: string fr: description: "French text" range: string es: description: "Spanish text" range: string pt: description: "Portuguese text" range: string it: description: "Italian text" range: string ja: description: "Japanese text" range: string zh: description: "Chinese text" range: string ar: description: "Arabic text" range: string ru: description: "Russian text" range: string comments: - "Additional language codes may be present beyond listed attributes" - "All ISO 639-1 language codes are supported" LanguageAliasMap: description: >- Map of language codes to lists of alternative names/aliases. Similar to LanguageMap but values are arrays of strings. comments: - "Each language code maps to a list of alias strings" - "Example: {'en': ['British Museum', 'BM'], 'de': ['Britisches Museum']}" ClaimMap: description: >- Map of Wikidata property IDs (P-numbers) to lists of claims/statements. Contains all property values including qualifiers and references. comments: - "Keys are property IDs like 'P31' (instance of), 'P279' (subclass of)" - "Values are lists of claim objects with datavalues, qualifiers, references" - "See WikidataClaim for structure of individual claims" WikidataClaim: description: >- Individual Wikidata claim/statement with main value, qualifiers, and references. Preserves complete statement structure from Wikibase API. slots: - mainsnak - qualifiers - references - rank - id slot_usage: mainsnak: description: "Main statement value (required part of claim)" range: WikidataSnak inlined: true qualifiers: description: "Qualifiers modifying the main statement" multivalued: true range: WikidataSnak inlined_as_list: true references: description: "References supporting the statement" multivalued: true range: WikidataReference inlined_as_list: true rank: description: "Statement rank (preferred, normal, deprecated)" range: RankEnum id: description: "Unique statement ID" WikidataSnak: description: >- A snak represents a property-value pair in Wikidata. Used for both main values and qualifiers. slots: - property - datatype - datavalue - snaktype slot_usage: property: description: "Property ID (P-number)" pattern: '^P[0-9]+$' datatype: description: "Wikibase datatype (wikibase-item, string, time, quantity, etc.)" datavalue: description: "The actual value (structure depends on datatype)" snaktype: description: "Type of snak (value, somevalue, novalue)" range: SnaktypeEnum WikidataReference: description: >- Reference for a Wikidata statement. Contains source information for verifying the claim. slots: - snaks - hash slot_usage: snaks: description: "List of snaks providing reference information" multivalued: true range: WikidataSnak inlined_as_list: true hash: description: "Reference hash (unique identifier)" SitelinkMap: description: >- Map of Wikimedia project site codes to article links. Includes Wikipedia articles in all languages, Wikimedia Commons, etc. comments: - "Keys are site codes like 'enwiki', 'nlwiki', 'commonswiki'" - "See Sitelink for structure of individual sitelink objects" Sitelink: description: >- Link to an article on a Wikimedia project (Wikipedia, Commons, etc.). slots: - title - url - badges slot_usage: title: description: "Page title on the target wiki" url: description: "Full URL to the page" range: uri badges: description: "Quality badges (featured article, good article, etc.)" multivalued: true pattern: '^Q[0-9]+$' EntityMetadata: description: >- Additional metadata about the Wikidata entity from MediaWiki API. Includes page ID, namespace, revision information. slots: - pageid - ns - title - lastrevid slot_usage: pageid: description: "MediaWiki page ID" range: integer ns: description: "MediaWiki namespace (0 for main namespace)" range: integer title: description: "Page title (typically same as Q-number)" lastrevid: description: "Latest revision ID" range: integer # ============================================================================= # SLOTS (Additional to those imported from hyponyms_curated.yaml) # ============================================================================= slots: # Enrichment metadata hypernym_enriched: description: "Enriched hypernym entities" range: EnrichedEntity multivalued: true inlined_as_list: true entity_enriched: description: "Enriched named entities" range: EnrichedEntity multivalued: true inlined_as_list: true entity_list_enriched: description: "Enriched entity list entries" range: EnrichedEntity multivalued: true inlined_as_list: true standards_enriched: description: "Enriched metadata standards" range: EnrichedEntity multivalued: true inlined_as_list: true collection_enriched: description: "Enriched collection types" range: EnrichedEntity multivalued: true inlined_as_list: true curated: description: "Original curated entity metadata" range: CuratedEntity slot_uri: heritage:curationData wikidata: description: "Wikidata entity information" range: WikidataEntity slot_uri: wikidata:entityData qid: description: "Wikidata Q-number" range: string pattern: '^Q[0-9]+$' enrichment_status: description: "Status of enrichment process" range: EnrichmentStatusEnum enrichment_date: description: "Timestamp when enrichment was performed" range: datetime slot_uri: prov:generatedAtTime # Wikidata entity fields id: description: "Wikidata entity ID" identifier: true modified: description: "Last modification timestamp" range: datetime labels: description: "Entity labels in multiple languages" range: LanguageMap descriptions: description: "Entity descriptions in multiple languages" range: LanguageMap aliases: description: "Alternative names in multiple languages" range: LanguageAliasMap claims: description: "Wikidata statements (property-value pairs)" range: ClaimMap sitelinks: description: "Links to Wikimedia project pages" range: SitelinkMap metadata: description: "Additional entity metadata" range: EntityMetadata # Claim/Snak fields mainsnak: description: "Main statement value" range: WikidataSnak qualifiers: description: "Statement qualifiers" range: WikidataSnak multivalued: true references: description: "Statement references" range: WikidataReference multivalued: true rank: description: "Statement rank" range: RankEnum property: description: "Wikidata property ID (P-number)" pattern: '^P[0-9]+$' datatype: description: "Wikibase datatype" datavalue: description: "Statement value" snaktype: description: "Snak type" range: SnaktypeEnum snaks: description: "Reference snaks" range: WikidataSnak multivalued: true hash: description: "Reference hash" # Sitelink fields title: description: "Page title" url: description: "Page URL" range: uri badges: description: "Quality badges" multivalued: true # Metadata fields pageid: description: "MediaWiki page ID" range: integer ns: description: "MediaWiki namespace" range: integer lastrevid: description: "Latest revision ID" range: integer # ============================================================================= # ENUMERATIONS # ============================================================================= enums: EnrichmentStatusEnum: description: "Status of Wikidata enrichment process" permissible_values: success: description: "Entity successfully enriched with Wikidata data" fetch_failed: description: "Wikidata API fetch failed (network error, invalid response)" no_qid: description: "No valid Q-number found in label field" cached: description: "Data retrieved from local cache (not counted as new enrichment)" RankEnum: description: "Wikidata statement rank" permissible_values: preferred: description: "Preferred statement (highest priority)" normal: description: "Normal statement (default rank)" deprecated: description: "Deprecated statement (outdated or incorrect)" SnaktypeEnum: description: "Type of Wikidata snak" permissible_values: value: description: "Snak with a specific value" somevalue: description: "Value exists but is unknown" novalue: description: "Property explicitly has no value"