glam/data/entity_annotation/modules/advanced/tei/nets.yaml
2025-12-05 15:30:23 +01:00

980 lines
30 KiB
YAML

# =============================================================================
# GLAM-NER: TEI P5 GRAPHS, NETWORKS, AND TREES MODULE
# =============================================================================
# Module: modules/advanced/tei/nets.yaml
# Parent: entity_annotation_rules_v1.7.0_unified.yaml
# Purpose: LinkML schema for TEI P5 Chapter 20 - Graphs, Networks, and Trees
# Source: TEI P5 4.10.2 (September 2025) - nets module
# =============================================================================
# This module provides LinkML class definitions for encoding graphs, networks,
# and tree structures. Essential for entity relationship modeling, knowledge
# graphs, dependency trees, and stemmatology in heritage document analysis.
# =============================================================================
id: https://w3id.org/glam/ner/tei/nets
name: glam-ner-tei-nets
title: TEI P5 Graphs, Networks, and Trees Module for GLAM-NER
version: "1.0.0"
license: https://creativecommons.org/licenses/by/4.0/
prefixes:
tei: http://www.tei-c.org/ns/1.0/
glam: https://w3id.org/glam/ner/
linkml: https://w3id.org/linkml/
crm: http://www.cidoc-crm.org/cidoc-crm/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
skos: http://www.w3.org/2004/02/skos/core#
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
rdfs: http://www.w3.org/2000/01/rdf-schema#
owl: http://www.w3.org/2002/07/owl#
oa: http://www.w3.org/ns/oa#
xsd: http://www.w3.org/2001/XMLSchema#
default_prefix: glam
default_range: string
# =============================================================================
# IMPORTS
# =============================================================================
imports:
- linkml:types
# =============================================================================
# ENUMS
# =============================================================================
enums:
# ---------------------------------------------------------------------------
# Graph Types
# ---------------------------------------------------------------------------
GraphTypeEnum:
description: Types of graph structures
permissible_values:
directed:
description: Directed graph (digraph) - arcs have direction
undirected:
description: Undirected graph - edges have no direction
mixed:
description: Mixed graph with both directed and undirected connections
# ---------------------------------------------------------------------------
# Tree Types
# ---------------------------------------------------------------------------
TreeTypeEnum:
description: Types of tree structures
permissible_values:
rooted:
description: Tree with designated root node
unrooted:
description: Tree without designated root
binary:
description: Binary tree (max 2 children per node)
n-ary:
description: N-ary tree (variable children per node)
ordered:
description: Ordered tree (child order significant)
unordered:
description: Unordered tree (child order not significant)
# ---------------------------------------------------------------------------
# Node Types for NER
# ---------------------------------------------------------------------------
EntityNodeTypeEnum:
description: Types of nodes in entity relationship graphs
permissible_values:
entity:
description: Named entity node
meaning: glam:NamedEntity
mention:
description: Entity mention in text
meaning: glam:EntityMention
concept:
description: Abstract concept node
meaning: skos:Concept
event:
description: Event node
meaning: crm:E5_Event
place:
description: Place/location node
meaning: crm:E53_Place
time:
description: Temporal node
meaning: crm:E52_Time-Span
document:
description: Document node
meaning: crm:E31_Document
# ---------------------------------------------------------------------------
# Arc/Edge Types for Relations
# ---------------------------------------------------------------------------
RelationTypeEnum:
description: Types of relations/edges in entity graphs
permissible_values:
# Semantic relations
same_as:
description: Identity/coreference relation
meaning: owl:sameAs
related_to:
description: General relatedness
meaning: skos:related
broader:
description: Broader/hypernym relation
meaning: skos:broader
narrower:
description: Narrower/hyponym relation
meaning: skos:narrower
part_of:
description: Part-whole relation
meaning: dcterms:isPartOf
# Temporal relations
before:
description: Temporal precedence
after:
description: Temporal succession
during:
description: Temporal inclusion
# Spatial relations
located_in:
description: Spatial containment
meaning: crm:P89_falls_within
near:
description: Spatial proximity
# Agent relations
created_by:
description: Creator relation
meaning: dcterms:creator
participated_in:
description: Participation in event
meaning: crm:P11_had_participant
# Document relations
mentions:
description: Document mentions entity
referenced_in:
description: Entity referenced in document
# Dependency relations (for syntax trees)
nsubj:
description: Nominal subject
dobj:
description: Direct object
iobj:
description: Indirect object
nmod:
description: Nominal modifier
amod:
description: Adjectival modifier
# ---------------------------------------------------------------------------
# Stemma Types (Textual Transmission)
# ---------------------------------------------------------------------------
WitnessTypeEnum:
description: Types of textual witnesses in stemmatology
permissible_values:
archetype:
description: Hypothetical original or archetype
hyparchetype:
description: Hypothetical intermediate ancestor
extant:
description: Extant manuscript witness
lost:
description: Lost manuscript (inferred)
edition:
description: Published edition
fragment:
description: Fragmentary witness
# =============================================================================
# SLOTS
# =============================================================================
slots:
# ---------------------------------------------------------------------------
# Graph Properties
# ---------------------------------------------------------------------------
graph_type:
description: Type of graph (directed, undirected, mixed)
range: GraphTypeEnum
graph_order:
description: Number of nodes in graph
range: integer
graph_size:
description: Number of arcs/edges in graph
range: integer
# ---------------------------------------------------------------------------
# Node Properties
# ---------------------------------------------------------------------------
node_id:
description: Unique identifier for node
identifier: true
range: string
node_label:
description: Display label for node
range: string
node_type:
description: Type classification of node
range: EntityNodeTypeEnum
node_degree:
description: Number of incident arcs (undirected)
range: integer
in_degree:
description: Number of incoming arcs (directed)
range: integer
out_degree:
description: Number of outgoing arcs (directed)
range: integer
# ---------------------------------------------------------------------------
# Arc/Edge Properties
# ---------------------------------------------------------------------------
arc_from:
description: Source node of arc
range: string
arc_to:
description: Target node of arc
range: string
arc_label:
description: Label for arc/edge
range: string
relation_type:
description: Type of relation represented by arc
range: RelationTypeEnum
arc_weight:
description: Numeric weight of arc
range: float
# ---------------------------------------------------------------------------
# Tree Properties
# ---------------------------------------------------------------------------
tree_type:
description: Type of tree structure
range: TreeTypeEnum
tree_root:
description: Reference to root node
range: string
tree_depth:
description: Maximum depth of tree
range: integer
# ---------------------------------------------------------------------------
# Stemma Properties
# ---------------------------------------------------------------------------
witness_type:
description: Type of textual witness
range: WitnessTypeEnum
siglum:
description: Manuscript siglum/abbreviation
range: string
# =============================================================================
# CLASSES
# =============================================================================
classes:
# ---------------------------------------------------------------------------
# Graph (graph)
# ---------------------------------------------------------------------------
Graph:
description: >-
A graph structure consisting of nodes connected by arcs. Can be directed
or undirected. Used for representing relationships, networks, and
hierarchies.
class_uri: tei:graph
mixins:
- TEIElement
slots:
- graph_type
- graph_order
- graph_size
attributes:
label:
description: Label or title for the graph
range: string
nodes:
description: Nodes in the graph
range: Node
multivalued: true
required: true
arcs:
description: Arcs connecting nodes
range: Arc
multivalued: true
annotations:
tei_element: graph
tei_module: nets
glam_hypernym: graph_structure
ontology_mapping: rdf:Graph
# ---------------------------------------------------------------------------
# Node (node)
# ---------------------------------------------------------------------------
Node:
description: >-
A node (vertex) in a graph. May be labeled and connected to other nodes
via arcs.
class_uri: tei:node
mixins:
- TEIElement
slots:
- node_id
- node_label
- node_degree
- in_degree
- out_degree
attributes:
label:
description: Display label for node
range: Label
value:
description: Value or content of node
range: string
adj:
description: Adjacent nodes (undirected)
range: string
multivalued: true
adj_to:
description: Nodes this node points to (directed)
range: string
multivalued: true
adj_from:
description: Nodes pointing to this node (directed)
range: string
multivalued: true
children:
description: Child nodes (for trees)
range: string
multivalued: true
parent:
description: Parent node reference (for trees)
range: string
annotations:
tei_element: node
tei_module: nets
glam_hypernym: graph_node
ontology_mapping: rdfs:Resource
# ---------------------------------------------------------------------------
# Arc (arc)
# ---------------------------------------------------------------------------
Arc:
description: >-
An arc (edge) connecting two nodes in a graph. In directed graphs,
represents a connection from source to target.
class_uri: tei:arc
mixins:
- TEIElement
slots:
- arc_from
- arc_to
- arc_label
- arc_weight
attributes:
label:
description: Label for the arc
range: Label
annotations:
tei_element: arc
tei_module: nets
glam_hypernym: graph_edge
ontology_mapping: rdf:Property
# ---------------------------------------------------------------------------
# Tree (tree)
# ---------------------------------------------------------------------------
Tree:
description: >-
A tree structure - a connected acyclic graph with a designated root.
Used for hierarchies, parse trees, and organizational structures.
class_uri: tei:tree
mixins:
- TEIElement
slots:
- tree_type
- tree_root
- tree_depth
- graph_order
attributes:
label:
description: Label for the tree
range: string
root:
description: Root node of the tree
range: TreeNode
nodes:
description: All nodes in tree (alternative to root-first traversal)
range: TreeNode
multivalued: true
annotations:
tei_element: tree
tei_module: nets
glam_hypernym: tree_structure
# ---------------------------------------------------------------------------
# Tree Node (root, iNode, leaf)
# ---------------------------------------------------------------------------
TreeNode:
description: >-
A node in a tree structure. Can be root, internal node (iNode), or leaf.
class_uri: tei:eTree
mixins:
- TEIElement
slots:
- node_id
- node_label
attributes:
value:
description: Value or label at this node
range: string
children:
description: Child nodes
range: TreeNode
multivalued: true
parent_ref:
description: Reference to parent node
range: string
ord:
description: Order among siblings
range: integer
is_leaf:
description: Whether this is a leaf node
range: boolean
annotations:
tei_element: eTree
tei_module: nets
glam_hypernym: tree_node
# ---------------------------------------------------------------------------
# Root Node (root)
# ---------------------------------------------------------------------------
RootNode:
description: >-
The root node of a tree structure.
class_uri: tei:root
is_a: TreeNode
annotations:
tei_element: root
tei_module: nets
glam_hypernym: root_node
# ---------------------------------------------------------------------------
# Internal Node (iNode)
# ---------------------------------------------------------------------------
InternalNode:
description: >-
An internal (non-leaf, non-root) node in a tree.
class_uri: tei:iNode
is_a: TreeNode
annotations:
tei_element: iNode
tei_module: nets
glam_hypernym: internal_node
# ---------------------------------------------------------------------------
# Leaf Node (leaf)
# ---------------------------------------------------------------------------
LeafNode:
description: >-
A leaf (terminal) node in a tree with no children.
class_uri: tei:leaf
is_a: TreeNode
attributes:
is_leaf:
description: Always true for leaf nodes
range: boolean
ifabsent: "true"
annotations:
tei_element: leaf
tei_module: nets
glam_hypernym: leaf_node
# ---------------------------------------------------------------------------
# Label (label)
# ---------------------------------------------------------------------------
Label:
description: >-
A label or heading for a node, arc, or graph component.
class_uri: tei:label
mixins:
- TEIElement
attributes:
text:
description: Text content of label
range: string
type:
description: Type of label
range: string
annotations:
tei_element: label
tei_module: nets
glam_hypernym: label
# ---------------------------------------------------------------------------
# Forest (eForest) - Collection of Trees
# ---------------------------------------------------------------------------
Forest:
description: >-
A forest - a collection of disjoint trees.
class_uri: tei:forest
mixins:
- TEIElement
attributes:
trees:
description: Trees in the forest
range: Tree
multivalued: true
tree_count:
description: Number of trees
range: integer
annotations:
tei_element: forest
tei_module: nets
glam_hypernym: tree_forest
# ---------------------------------------------------------------------------
# STEMMATOLOGY: Textual Transmission Trees
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# Stemma (listWit + graph structure)
# ---------------------------------------------------------------------------
Stemma:
description: >-
A stemma codicum - a tree or graph representing the transmission history
of a text through manuscript witnesses.
class_uri: glam:Stemma
is_a: Graph
slots:
- tree_root
attributes:
archetype:
description: Hypothetical archetype/original
range: Witness
witnesses:
description: All witnesses in stemma
range: Witness
multivalued: true
relationships:
description: Transmission relationships
range: TransmissionRelation
multivalued: true
is_contaminated:
description: Whether stemma shows contamination
range: boolean
annotations:
tei_element: graph
tei_module: nets
glam_hypernym: stemma_codicum
ontology_mapping: crm:E33_Linguistic_Object
# ---------------------------------------------------------------------------
# Witness (witness)
# ---------------------------------------------------------------------------
Witness:
description: >-
A textual witness - a manuscript, edition, or other source in a stemma.
class_uri: tei:witness
mixins:
- TEIElement
slots:
- witness_type
- siglum
attributes:
witness_id:
description: Unique identifier
range: string
identifier: true
label:
description: Full name/description
range: string
date:
description: Date of witness
range: string
location:
description: Current location/repository
range: string
shelfmark:
description: Shelfmark or call number
range: string
is_hypothetical:
description: Whether witness is hypothetical (lost/inferred)
range: boolean
annotations:
tei_element: witness
tei_module: textcrit
glam_hypernym: textual_witness
ontology_mapping: crm:E22_Human-Made_Object
# ---------------------------------------------------------------------------
# Transmission Relation
# ---------------------------------------------------------------------------
TransmissionRelation:
description: >-
A relationship between witnesses in textual transmission (copying,
derivation, contamination).
class_uri: glam:TransmissionRelation
is_a: Arc
attributes:
ancestor:
description: Source/ancestor witness
range: string
required: true
descendant:
description: Derived/descendant witness
range: string
required: true
relation_type:
description: Type of transmission
range: TransmissionTypeEnum
is_direct:
description: Whether copying was direct
range: boolean
contamination_source:
description: Source of contamination (if applicable)
range: string
annotations:
glam_hypernym: textual_transmission
# ---------------------------------------------------------------------------
# GLAM-NER EXTENSIONS: Entity Relationship Graphs
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# Entity Relationship Graph
# ---------------------------------------------------------------------------
EntityRelationGraph:
description: >-
A knowledge graph of entities and their relationships extracted from
heritage documents. Nodes are entities, arcs are typed relations.
class_uri: glam:EntityRelationGraph
is_a: Graph
slots:
- graph_type
attributes:
source_document:
description: Source document(s) for extraction
range: string
multivalued: true
extraction_date:
description: Date of extraction
range: datetime
entity_nodes:
description: Entity nodes in graph
range: EntityNode
multivalued: true
relation_arcs:
description: Relation arcs between entities
range: RelationArc
multivalued: true
entity_count:
description: Total number of entities
range: integer
relation_count:
description: Total number of relations
range: integer
annotations:
glam_hypernym: knowledge_graph
ontology_mapping: rdf:Graph
# ---------------------------------------------------------------------------
# Entity Node
# ---------------------------------------------------------------------------
EntityNode:
description: >-
A node representing a named entity in a knowledge graph.
class_uri: glam:EntityNode
is_a: Node
slots:
- node_type
attributes:
entity_id:
description: Entity identifier
range: string
identifier: true
entity_type:
description: NER entity type (PERSON, ORG, GPE, etc.)
range: string
required: true
canonical_name:
description: Canonical/normalized name
range: string
aliases:
description: Alternative names/aliases
range: string
multivalued: true
wikidata_id:
description: Wikidata Q-number
range: string
confidence:
description: Extraction confidence score
range: float
annotations:
glam_hypernym: entity_node
ontology_mapping: crm:E1_CRM_Entity
# ---------------------------------------------------------------------------
# Relation Arc
# ---------------------------------------------------------------------------
RelationArc:
description: >-
An arc representing a relationship between two entities.
class_uri: glam:RelationArc
is_a: Arc
slots:
- relation_type
- arc_weight
attributes:
relation_id:
description: Relation identifier
range: string
source_entity:
description: Source entity ID
range: string
required: true
target_entity:
description: Target entity ID
range: string
required: true
relation_label:
description: Human-readable relation label
range: string
evidence_spans:
description: Text spans supporting this relation
range: string
multivalued: true
confidence:
description: Relation extraction confidence
range: float
is_symmetric:
description: Whether relation is symmetric
range: boolean
annotations:
glam_hypernym: relation_edge
ontology_mapping: rdf:Property
# ---------------------------------------------------------------------------
# Coreference Chain
# ---------------------------------------------------------------------------
CoreferenceChain:
description: >-
A chain of entity mentions that refer to the same entity.
class_uri: glam:CoreferenceChain
mixins:
- TEIElement
attributes:
chain_id:
description: Chain identifier
range: string
identifier: true
canonical_entity:
description: Reference to canonical entity
range: string
mentions:
description: Ordered list of coreferent mentions
range: EntityMention
multivalued: true
mention_count:
description: Number of mentions in chain
range: integer
annotations:
glam_hypernym: coreference_chain
ontology_mapping: oa:Annotation
# ---------------------------------------------------------------------------
# Entity Mention (for coreference)
# ---------------------------------------------------------------------------
EntityMention:
description: >-
A mention of an entity in text, part of a coreference chain.
class_uri: glam:EntityMention
mixins:
- TEIElement
attributes:
mention_id:
description: Mention identifier
range: string
identifier: true
surface_form:
description: Text as it appears in document
range: string
required: true
mention_type:
description: Type of mention (proper name, pronoun, nominal, etc.)
range: MentionTypeEnum
start_offset:
description: Character offset start
range: integer
end_offset:
description: Character offset end
range: integer
sentence_id:
description: Reference to containing sentence
range: string
annotations:
glam_hypernym: entity_mention
ontology_mapping: oa:TextQuoteSelector
# ---------------------------------------------------------------------------
# Dependency Parse Tree
# ---------------------------------------------------------------------------
DependencyTree:
description: >-
A dependency parse tree representing syntactic structure of a sentence.
Nodes are tokens, arcs are typed dependency relations.
class_uri: glam:DependencyTree
is_a: Tree
attributes:
sentence_id:
description: Reference to source sentence
range: string
sentence_text:
description: Original sentence text
range: string
root_token:
description: Root token of the tree
range: TokenNode
tokens:
description: All tokens as nodes
range: TokenNode
multivalued: true
dependencies:
description: Dependency relations as arcs
range: DependencyArc
multivalued: true
annotations:
glam_hypernym: dependency_tree
ontology_mapping: nif:Sentence
# ---------------------------------------------------------------------------
# Token Node (for dependency trees)
# ---------------------------------------------------------------------------
TokenNode:
description: >-
A token node in a dependency tree.
class_uri: glam:TokenNode
is_a: Node
attributes:
token_id:
description: Token position (1-indexed)
range: integer
identifier: true
form:
description: Surface form of token
range: string
required: true
lemma:
description: Lemma/base form
range: string
pos_tag:
description: Part-of-speech tag
range: string
features:
description: Morphological features
range: string
head:
description: ID of head token (0 for root)
range: integer
deprel:
description: Dependency relation to head
range: string
annotations:
glam_hypernym: token_node
# ---------------------------------------------------------------------------
# Dependency Arc
# ---------------------------------------------------------------------------
DependencyArc:
description: >-
A dependency relation arc between tokens.
class_uri: glam:DependencyArc
is_a: Arc
attributes:
head_id:
description: Head token ID
range: integer
required: true
dependent_id:
description: Dependent token ID
range: integer
required: true
deprel:
description: Dependency relation type (Universal Dependencies)
range: string
required: true
enhanced:
description: Enhanced dependency information
range: string
annotations:
glam_hypernym: dependency_arc
# ---------------------------------------------------------------------------
# Mixin: TEI Element
# ---------------------------------------------------------------------------
TEIElement:
description: Base mixin for all TEI elements
mixin: true
attributes:
xml_id:
description: XML identifier
slot_uri: dcterms:identifier
range: string
xml_lang:
description: Language code
slot_uri: dcterms:language
range: string
rend:
description: Rendition or appearance
range: string
corresp:
description: Correspondence to another element
range: string
n:
description: Number or label
range: string
# =============================================================================
# ADDITIONAL ENUMS
# =============================================================================
TransmissionTypeEnum:
description: Types of textual transmission
permissible_values:
direct_copy:
description: Direct copying from exemplar
indirect_copy:
description: Copying through lost intermediate
contamination:
description: Readings from multiple sources
correction:
description: Corrected/emended text
excerpt:
description: Excerpted/abridged text
translation:
description: Translation to another language
MentionTypeEnum:
description: Types of entity mentions
permissible_values:
proper_name:
description: Proper noun mention (John Smith)
nominal:
description: Nominal mention (the president)
pronominal:
description: Pronominal mention (he, she, it)
demonstrative:
description: Demonstrative mention (this person)
relative:
description: Relative pronoun (who, which)