glam/schemas/initial/hyponyms_curated.yaml
2025-11-21 22:12:33 +01:00

350 lines
11 KiB
YAML

id: https://w3id.org/heritage/custodian/hyponyms-curated
name: heritage-custodian-hyponyms-curated
title: Heritage Custodian Hyponyms Curated Schema
description: >-
Schema for manually curated Wikidata entity classifications for heritage institutions.
Documents entity types (hyponyms), geographic scope, temporal context, and classification
metadata for GLAMORCUBESFIXPHDNT taxonomy. Used as input for Wikidata enrichment pipeline.
license: https://creativecommons.org/publicdomain/zero/1.0/
version: 0.1.0
prefixes:
linkml: https://w3id.org/linkml/
heritage: https://w3id.org/heritage/custodian/
wikidata: http://www.wikidata.org/entity/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
rico: https://www.ica.org/standards/RiC/ontology#
default_prefix: heritage
default_range: string
imports:
- linkml:types
# =============================================================================
# CORE CLASSES
# =============================================================================
classes:
HyponymsCurated:
description: >-
Root container for curated Wikidata entity classifications organized by
section type (hypernym, entity, entity_list, standards, collection, exclude).
tree_root: true
slots:
- sources
- hypernym
- entity
- entity_list
- standards
- collection
- exclude
slot_usage:
sources:
required: true
hypernym:
description: "Main classification section for entity types with hierarchical context"
multivalued: true
entity:
description: "Named entity classifications (single institutions, specific places)"
multivalued: true
entity_list:
description: "Category or list classifications (Wikipedia categories, Wikidata lists)"
multivalued: true
standards:
description: "Metadata standards, properties, or technical specifications"
multivalued: true
collection:
description: "Collection types or aggregation categories"
multivalued: true
exclude:
description: "Q-numbers explicitly excluded from taxonomy (no enrichment needed)"
multivalued: true
DataSource:
description: >-
Provenance metadata for source files (SPARQL queries, manual curation).
Documents where curated data originated and when it was created.
slots:
- label
- type
- date
slot_usage:
label:
required: true
description: "File path or URL of source data"
type:
required: true
description: "Type of source (sparql_results, sparql_query, manual_curation)"
date:
required: true
description: "Timestamp when source was created (ISO 8601 format)"
CuratedEntity:
description: >-
Base class for curated entity metadata. Contains manual classifications
including geographic scope, temporal context, GLAMORCUBESFIXPHDNT type codes,
and hierarchical hypernyms.
slots:
- label
- hypernym
- type
- country
- subregion
- settlement
- time
- duplicate
- rico
- notes
slot_usage:
label:
required: true
identifier: true
description: "Wikidata Q-number (e.g., Q12345) or other identifier"
pattern: '^(Q[0-9]+|P[0-9]+|Category:.+|List_of_.+)$'
hypernym:
description: "Parent concept(s) in taxonomy hierarchy"
multivalued: true
comments:
- "Example: ['museum', 'art institution']"
- "Multiple hypernyms for multi-faceted entities"
type:
description: "GLAMORCUBESFIXPHDNT type code(s)"
multivalued: true
range: GlamTypeCode
comments:
- "Allowed: G, L, A, M, O, R, C, U, B, E, S, F, I, X, P, H, D, N, T"
country:
description: "Geographic scope: country name(s)"
multivalued: true
comments:
- "Use for entities specific to one or more countries"
- "Example: ['Netherlands', 'Belgium']"
subregion:
description: "Geographic scope: subregion, province, or state"
multivalued: true
comments:
- "Example: ['North Holland', 'Utrecht']"
- "More granular than country, less than settlement"
settlement:
description: "Geographic scope: city, town, or locality"
multivalued: true
comments:
- "Example: ['Amsterdam', 'Rotterdam']"
- "Most specific geographic level"
time:
description: "Temporal scope: time period(s) this entity applies to"
multivalued: true
range: TemporalScope
comments:
- "Use for historically bounded concepts"
- "Example: 17th century museums, 1960s art centers"
duplicate:
description: "Flag indicating entity is a duplicate of another Q-number"
range: boolean
comments:
- "Set to true if this entity duplicates another in the taxonomy"
rico:
description: "Records in Contexts (RiC-O) classification"
multivalued: true
comments:
- "Example: ['recordSetTypes'] for archival record types"
notes:
description: "Free-text notes about curation decisions or entity ambiguity"
TemporalScope:
description: >-
Temporal context for an entity. Specifies the time period an entity
applies to or was active during, with start/end markers.
slots:
- label
- type
slot_usage:
label:
required: true
description: "Human-readable time period label"
comments:
- "Example: '17th century', '1960s', 'Medieval period'"
type:
description: "List of temporal markers (start, end, point)"
multivalued: true
range: TemporalMarker
comments:
- "Use 'start' for beginning of period"
- "Use 'end' for terminus of period"
- "Use 'point' for specific date/year"
TemporalMarker:
description: >-
Marker indicating temporal position (start, end, or point in time).
slots:
- label
slot_usage:
label:
required: true
range: TemporalMarkerEnum
description: "Type of temporal marker: start, end, or point"
# =============================================================================
# SLOTS
# =============================================================================
slots:
# Root container slots
sources:
description: "List of data sources for curated entities"
range: DataSource
multivalued: true
inlined_as_list: true
slot_uri: prov:wasDerivedFrom
hypernym:
description: "Main classification section for hyponyms with hierarchical context"
range: CuratedEntity
multivalued: true
inlined_as_list: true
entity:
description: "Named entity classifications"
range: CuratedEntity
multivalued: true
inlined_as_list: true
entity_list:
description: "Category or list classifications"
range: CuratedEntity
multivalued: true
inlined_as_list: true
standards:
description: "Metadata standards and properties"
range: CuratedEntity
multivalued: true
inlined_as_list: true
collection:
description: "Collection types and aggregation categories"
range: CuratedEntity
multivalued: true
inlined_as_list: true
exclude:
description: "Q-numbers excluded from taxonomy (plain list)"
range: string
multivalued: true
pattern: '^Q[0-9]+$'
comments:
- "No enrichment performed on excluded entities"
# DataSource slots
label:
description: "Identifier, file path, or time period label"
range: string
identifier: true
type:
description: "Type classification (data source type, temporal marker, GLAM type)"
range: string
date:
description: "Date or timestamp (ISO 8601 format)"
range: string
pattern: '^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$'
comments:
- "Format: YYYY-MM-DDTHH:mm:ssZ"
# CuratedEntity slots
country:
description: "Country name(s) for geographic scope"
range: string
multivalued: true
subregion:
description: "Subregion, province, or state name(s)"
range: string
multivalued: true
settlement:
description: "City, town, or locality name(s)"
range: string
multivalued: true
time:
description: "Temporal scope: time period(s)"
range: TemporalScope
multivalued: true
duplicate:
description: "Flag for duplicate entities"
range: boolean
rico:
description: "Records in Contexts (RiC-O) classification"
range: string
multivalued: true
slot_uri: rico:hasRecordSetType
notes:
description: "Free-text curation notes"
range: string
# =============================================================================
# ENUMERATIONS
# =============================================================================
enums:
GlamTypeCode:
description: >-
GLAMORCUBESFIXPHDNT taxonomy type codes (19 types).
Single-letter codes for heritage institution classification.
permissible_values:
G:
description: "Gallery - Art gallery or exhibition space"
L:
description: "Library - Public, academic, or specialized library"
A:
description: "Archive - Government, corporate, or personal archive"
M:
description: "Museum - Art, history, science, or specialized museum"
O:
description: "Official Institution - Government heritage agencies"
R:
description: "Research Center - Research institutes and documentation centers"
C:
description: "Corporation - Corporate heritage collections"
U:
description: "Unknown - Institution type cannot be determined"
B:
description: "Botanical/Zoo - Botanical gardens and zoological parks"
E:
description: "Education Provider - Educational institutions with collections"
S:
description: "Collecting Society - Societies collecting specialized materials"
F:
description: "Features - Physical landscape features with heritage significance"
I:
description: "Intangible Heritage Group - Organizations preserving intangible heritage"
X:
description: "Mixed - Multiple types (uses X code)"
P:
description: "Personal Collection - Private personal collections"
H:
description: "Holy Sites - Religious heritage sites and institutions"
D:
description: "Digital Platform - Digital heritage platforms and repositories"
N:
description: "NGO - Non-governmental heritage organizations"
T:
description: "Taste/Smell - Culinary and olfactory heritage institutions"
TemporalMarkerEnum:
description: "Type of temporal marker"
permissible_values:
start:
description: "Beginning of time period"
end:
description: "End of time period"
point:
description: "Specific point in time"