1393 lines
38 KiB
YAML
1393 lines
38 KiB
YAML
# =============================================================================
|
|
# GLAM-NER: TEI P5 LINKING, SEGMENTATION, AND ALIGNMENT MODULE
|
|
# =============================================================================
|
|
# Module: modules/advanced/tei/linking.yaml
|
|
# Parent: entity_annotation_rules_v1.7.0_unified.yaml
|
|
# Purpose: LinkML schema for TEI P5 Chapter 17 - Linking, Segmentation, Alignment
|
|
# Source: TEI P5 4.10.2 (September 2025) - linking module
|
|
# =============================================================================
|
|
# This module provides LinkML class definitions for TEI standoff markup,
|
|
# linking, segmentation, and alignment elements. Essential for:
|
|
# - Standoff annotation (Web Annotation Data Model compatible)
|
|
# - Cross-document linking
|
|
# - Text segmentation and anchoring
|
|
# - Parallel text alignment
|
|
# - NER annotation pipelines
|
|
#
|
|
# Key TEI Elements Covered (14 core + related elements):
|
|
# - standOff: Container for standoff annotations
|
|
# - annotation: W3C Web Annotation compatible
|
|
# - listAnnotation: Annotation collections
|
|
# - annotationBlock: Grouped annotations
|
|
# - link: Hyperlinks between elements
|
|
# - linkGrp: Link groups
|
|
# - seg: Arbitrary text segments
|
|
# - anchor: Anchor points for linking
|
|
# - join: Aggregation of fragments
|
|
# - alt: Alternative interpretations
|
|
# - timeline/when: Temporal alignment
|
|
#
|
|
# Ontology Alignments:
|
|
# - W3C Web Annotation (oa:Annotation)
|
|
# - NIF (Natural Language Processing Interchange Format)
|
|
# - CIDOC-CRM: E13_Attribute_Assignment
|
|
# - Schema.org: Action, Comment
|
|
# =============================================================================
|
|
|
|
id: https://w3id.org/glam/ner/tei/linking
|
|
name: glam-ner-tei-linking
|
|
title: TEI P5 Linking, Segmentation, and Alignment Module for GLAM-NER
|
|
version: "1.0.0"
|
|
license: https://creativecommons.org/licenses/by/4.0/
|
|
|
|
prefixes:
|
|
tei: http://www.tei-c.org/ns/1.0/
|
|
glam: https://w3id.org/glam/ner/
|
|
linkml: https://w3id.org/linkml/
|
|
oa: http://www.w3.org/ns/oa#
|
|
as: https://www.w3.org/ns/activitystreams#
|
|
nif: http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#
|
|
crm: http://www.cidoc-crm.org/cidoc-crm/
|
|
schema: http://schema.org/
|
|
dcterms: http://purl.org/dc/terms/
|
|
prov: http://www.w3.org/ns/prov#
|
|
xsd: http://www.w3.org/2001/XMLSchema#
|
|
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
|
|
|
|
default_prefix: glam
|
|
default_range: string
|
|
|
|
# =============================================================================
|
|
# IMPORTS
|
|
# =============================================================================
|
|
imports:
|
|
- linkml:types
|
|
- ./namesdates # Import shared types and mixins
|
|
|
|
# =============================================================================
|
|
# ENUMS
|
|
# =============================================================================
|
|
enums:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Annotation Motivation (W3C Web Annotation aligned)
|
|
# ---------------------------------------------------------------------------
|
|
AnnotationMotivation:
|
|
description: |
|
|
Motivation for creating an annotation.
|
|
Aligned with W3C Web Annotation motivations.
|
|
permissible_values:
|
|
assessing:
|
|
description: Quality assessment
|
|
meaning: oa:assessing
|
|
bookmarking:
|
|
description: Marking for future reference
|
|
meaning: oa:bookmarking
|
|
classifying:
|
|
description: Categorization or tagging
|
|
meaning: oa:classifying
|
|
commenting:
|
|
description: Commentary or discussion
|
|
meaning: oa:commenting
|
|
describing:
|
|
description: Descriptive note
|
|
meaning: oa:describing
|
|
editing:
|
|
description: Editorial suggestion
|
|
meaning: oa:editing
|
|
highlighting:
|
|
description: Visual emphasis
|
|
meaning: oa:highlighting
|
|
identifying:
|
|
description: Entity identification (NER)
|
|
meaning: oa:identifying
|
|
linking:
|
|
description: Linking to related resource
|
|
meaning: oa:linking
|
|
moderating:
|
|
description: Content moderation
|
|
meaning: oa:moderating
|
|
questioning:
|
|
description: Raising a question
|
|
meaning: oa:questioning
|
|
replying:
|
|
description: Reply to another annotation
|
|
meaning: oa:replying
|
|
tagging:
|
|
description: Adding keywords/tags
|
|
meaning: oa:tagging
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Link Function Types
|
|
# ---------------------------------------------------------------------------
|
|
LinkFunctionType:
|
|
description: Function or purpose of a link
|
|
permissible_values:
|
|
pointer:
|
|
description: Simple reference
|
|
aggregation:
|
|
description: Aggregation of fragments
|
|
alignment:
|
|
description: Parallel text alignment
|
|
correspondence:
|
|
description: Correspondence relation
|
|
translation:
|
|
description: Translation relationship
|
|
alternative:
|
|
description: Alternative reading
|
|
exclusion:
|
|
description: Mutual exclusion
|
|
temporal:
|
|
description: Temporal synchronization
|
|
citation:
|
|
description: Citation reference
|
|
cross_reference:
|
|
description: Cross-reference
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Segment Types
|
|
# ---------------------------------------------------------------------------
|
|
SegmentType:
|
|
description: Types of text segments
|
|
permissible_values:
|
|
phrase:
|
|
description: Phrasal unit
|
|
clause:
|
|
description: Clausal unit
|
|
sentence:
|
|
description: Sentence unit
|
|
paragraph:
|
|
description: Paragraph unit
|
|
token:
|
|
description: Token (word/punctuation)
|
|
entity:
|
|
description: Named entity span
|
|
annotation:
|
|
description: Annotation target span
|
|
quotation:
|
|
description: Quoted text span
|
|
arbitrary:
|
|
description: Arbitrary segment
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Selector Types (Web Annotation aligned)
|
|
# ---------------------------------------------------------------------------
|
|
SelectorType:
|
|
description: |
|
|
Types of selectors for identifying annotation targets.
|
|
Aligned with W3C Web Annotation selectors.
|
|
permissible_values:
|
|
text_quote:
|
|
description: Exact text match
|
|
meaning: oa:TextQuoteSelector
|
|
text_position:
|
|
description: Character offset position
|
|
meaning: oa:TextPositionSelector
|
|
xpath:
|
|
description: XPath expression
|
|
meaning: oa:XPathSelector
|
|
css:
|
|
description: CSS selector
|
|
meaning: oa:CssSelector
|
|
fragment:
|
|
description: Media fragment
|
|
meaning: oa:FragmentSelector
|
|
range:
|
|
description: Range selector (start/end)
|
|
meaning: oa:RangeSelector
|
|
svg:
|
|
description: SVG shape selector
|
|
meaning: oa:SvgSelector
|
|
data_position:
|
|
description: Data position selector
|
|
meaning: oa:DataPositionSelector
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Certainty Types
|
|
# ---------------------------------------------------------------------------
|
|
CertaintyDegree:
|
|
description: Degree of certainty for annotations
|
|
permissible_values:
|
|
high:
|
|
description: High confidence
|
|
medium:
|
|
description: Medium confidence
|
|
low:
|
|
description: Low confidence
|
|
unknown:
|
|
description: Confidence not specified
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Part Types (for fragmented elements)
|
|
# ---------------------------------------------------------------------------
|
|
PartType:
|
|
description: Part indicator for fragmented elements
|
|
permissible_values:
|
|
initial:
|
|
description: Initial fragment
|
|
medial:
|
|
description: Middle fragment
|
|
final:
|
|
description: Final fragment
|
|
yes:
|
|
description: Fragment (unspecified position)
|
|
no:
|
|
description: Complete (not fragmented)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Time Units
|
|
# ---------------------------------------------------------------------------
|
|
TimeUnit:
|
|
description: Units for temporal measurement
|
|
permissible_values:
|
|
d:
|
|
description: Days
|
|
h:
|
|
description: Hours
|
|
min:
|
|
description: Minutes
|
|
s:
|
|
description: Seconds
|
|
ms:
|
|
description: Milliseconds
|
|
|
|
# =============================================================================
|
|
# TYPES
|
|
# =============================================================================
|
|
types:
|
|
|
|
TEIPointerList:
|
|
uri: tei:pointerList
|
|
typeof: string
|
|
description: |
|
|
Space-separated list of TEI pointers/URIs.
|
|
Used for multi-target references.
|
|
pattern: "^(#?[\\w\\-\\.]+|https?://[^\\s]+)(\\s+(#?[\\w\\-\\.]+|https?://[^\\s]+))*$"
|
|
|
|
XPathExpression:
|
|
uri: xsd:string
|
|
typeof: string
|
|
description: |
|
|
XPath expression for element selection.
|
|
TEI uses xpath() pointer scheme.
|
|
|
|
CSSSelector:
|
|
uri: xsd:string
|
|
typeof: string
|
|
description: CSS selector expression.
|
|
|
|
MediaFragment:
|
|
uri: xsd:string
|
|
typeof: string
|
|
description: |
|
|
Media fragment identifier (RFC 5147 for text, W3C for media).
|
|
Format: #char=start,end or #t=start,end
|
|
|
|
# =============================================================================
|
|
# MIXINS
|
|
# =============================================================================
|
|
mixins:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Pointing Attributes
|
|
# ---------------------------------------------------------------------------
|
|
TEIPointingAttributes:
|
|
description: |
|
|
Attributes for elements that point to other elements.
|
|
TEI att.pointing class.
|
|
mixin: true
|
|
slots:
|
|
- target
|
|
- target_lang
|
|
- evaluate
|
|
slot_usage:
|
|
target:
|
|
description: Target(s) of pointer (URI or space-separated URIs)
|
|
range: TEIPointerList
|
|
slot_uri: tei:target
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Segmentation Attributes
|
|
# ---------------------------------------------------------------------------
|
|
TEISegmentationAttributes:
|
|
description: |
|
|
Attributes for segmentable elements.
|
|
TEI att.segLike class.
|
|
mixin: true
|
|
slots:
|
|
- function
|
|
- part
|
|
slot_usage:
|
|
function:
|
|
description: Linguistic function of segment
|
|
range: string
|
|
part:
|
|
description: Fragment indicator (I/M/F/Y/N)
|
|
range: PartType
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Typed Pointing Attributes
|
|
# ---------------------------------------------------------------------------
|
|
TEITypedPointingAttributes:
|
|
description: |
|
|
Typed pointer attributes.
|
|
TEI att.typed class combined with att.pointing.
|
|
mixin: true
|
|
slots:
|
|
- link_type
|
|
- subtype
|
|
slot_usage:
|
|
link_type:
|
|
description: Type classification of link
|
|
range: string
|
|
slot_uri: tei:type
|
|
|
|
# =============================================================================
|
|
# SLOTS
|
|
# =============================================================================
|
|
slots:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Core Pointing Slots
|
|
# ---------------------------------------------------------------------------
|
|
target:
|
|
description: Target(s) of pointer reference
|
|
range: TEIPointerList
|
|
slot_uri: tei:target
|
|
|
|
target_lang:
|
|
description: Language of target resource
|
|
range: string
|
|
slot_uri: tei:targetLang
|
|
|
|
evaluate:
|
|
description: Evaluation context for XPointer
|
|
range: string
|
|
slot_uri: tei:evaluate
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Annotation Slots
|
|
# ---------------------------------------------------------------------------
|
|
annotation_id:
|
|
description: Unique identifier for annotation
|
|
range: string
|
|
identifier: true
|
|
slot_uri: tei:id
|
|
|
|
motivation:
|
|
description: Motivation for annotation
|
|
range: AnnotationMotivation
|
|
slot_uri: oa:motivatedBy
|
|
|
|
annotation_body:
|
|
description: Body content of annotation
|
|
range: AnnotationBody
|
|
inlined: true
|
|
slot_uri: oa:hasBody
|
|
|
|
annotation_target:
|
|
description: Target of annotation
|
|
range: AnnotationTarget
|
|
inlined: true
|
|
slot_uri: oa:hasTarget
|
|
|
|
creator:
|
|
description: Creator of annotation
|
|
range: string
|
|
slot_uri: dcterms:creator
|
|
|
|
created:
|
|
description: Creation timestamp
|
|
range: datetime
|
|
slot_uri: dcterms:created
|
|
|
|
modified:
|
|
description: Modification timestamp
|
|
range: datetime
|
|
slot_uri: dcterms:modified
|
|
|
|
generator:
|
|
description: Software/tool that generated annotation
|
|
range: string
|
|
slot_uri: as:generator
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Selector Slots
|
|
# ---------------------------------------------------------------------------
|
|
selector:
|
|
description: Selector for identifying target
|
|
range: Selector
|
|
inlined: true
|
|
slot_uri: oa:hasSelector
|
|
|
|
selector_type:
|
|
description: Type of selector
|
|
range: SelectorType
|
|
|
|
exact_match:
|
|
description: Exact text to match
|
|
range: string
|
|
slot_uri: oa:exact
|
|
|
|
prefix_context:
|
|
description: Text before the match
|
|
range: string
|
|
slot_uri: oa:prefix
|
|
|
|
suffix_context:
|
|
description: Text after the match
|
|
range: string
|
|
slot_uri: oa:suffix
|
|
|
|
start_position:
|
|
description: Start character offset (0-based)
|
|
range: integer
|
|
slot_uri: oa:start
|
|
|
|
end_position:
|
|
description: End character offset
|
|
range: integer
|
|
slot_uri: oa:end
|
|
|
|
xpath_value:
|
|
description: XPath expression
|
|
range: XPathExpression
|
|
|
|
css_value:
|
|
description: CSS selector expression
|
|
range: CSSSelector
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Link Slots
|
|
# ---------------------------------------------------------------------------
|
|
link_type:
|
|
description: Type of link
|
|
range: string
|
|
slot_uri: tei:type
|
|
|
|
subtype:
|
|
description: Subtype classification
|
|
range: string
|
|
slot_uri: tei:subtype
|
|
|
|
link_function:
|
|
description: Function of link
|
|
range: LinkFunctionType
|
|
|
|
targets:
|
|
description: Multiple targets for link
|
|
range: TEIPointerList
|
|
slot_uri: tei:target
|
|
|
|
mutual:
|
|
description: Whether link is bidirectional
|
|
range: boolean
|
|
slot_uri: tei:mutual
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Segment Slots
|
|
# ---------------------------------------------------------------------------
|
|
segment_type:
|
|
description: Type of segment
|
|
range: SegmentType
|
|
slot_uri: tei:type
|
|
|
|
function:
|
|
description: Linguistic function
|
|
range: string
|
|
slot_uri: tei:function
|
|
|
|
part:
|
|
description: Part indicator for fragments
|
|
range: PartType
|
|
slot_uri: tei:part
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Anchor Slots
|
|
# ---------------------------------------------------------------------------
|
|
anchor_type:
|
|
description: Type of anchor point
|
|
range: string
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Join/Aggregation Slots
|
|
# ---------------------------------------------------------------------------
|
|
result:
|
|
description: Element type resulting from join
|
|
range: string
|
|
slot_uri: tei:result
|
|
|
|
scope:
|
|
description: Scope of join (root/branches)
|
|
range: string
|
|
slot_uri: tei:scope
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Alternative Slots
|
|
# ---------------------------------------------------------------------------
|
|
mode:
|
|
description: Mode of alternation (excl/incl)
|
|
range: string
|
|
slot_uri: tei:mode
|
|
|
|
weights:
|
|
description: Probability weights for alternatives
|
|
range: string
|
|
slot_uri: tei:weights
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Timeline Slots
|
|
# ---------------------------------------------------------------------------
|
|
origin_point:
|
|
description: Origin point of timeline
|
|
range: string
|
|
slot_uri: tei:origin
|
|
|
|
interval:
|
|
description: Time interval value
|
|
range: float
|
|
slot_uri: tei:interval
|
|
|
|
time_unit:
|
|
description: Unit of time measurement
|
|
range: TimeUnit
|
|
slot_uri: tei:unit
|
|
|
|
absolute_time:
|
|
description: Absolute time value
|
|
range: datetime
|
|
slot_uri: tei:absolute
|
|
|
|
since:
|
|
description: Reference point for relative time
|
|
range: string
|
|
slot_uri: tei:since
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Certainty Slots
|
|
# ---------------------------------------------------------------------------
|
|
certainty_degree:
|
|
description: Degree of certainty
|
|
range: CertaintyDegree
|
|
slot_uri: tei:cert
|
|
|
|
certainty_locus:
|
|
description: Aspect affected by uncertainty
|
|
range: string
|
|
slot_uri: tei:locus
|
|
|
|
responsible:
|
|
description: Entity responsible for assertion
|
|
range: string
|
|
slot_uri: tei:resp
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Standoff Container Slots
|
|
# ---------------------------------------------------------------------------
|
|
standoff_type:
|
|
description: Type of standoff content
|
|
range: string
|
|
|
|
annotations:
|
|
description: Contained annotations
|
|
range: TEIAnnotation
|
|
multivalued: true
|
|
inlined: true
|
|
|
|
annotation_blocks:
|
|
description: Grouped annotation blocks
|
|
range: TEIAnnotationBlock
|
|
multivalued: true
|
|
inlined: true
|
|
|
|
links:
|
|
description: Link elements
|
|
range: TEILink
|
|
multivalued: true
|
|
inlined: true
|
|
|
|
link_groups:
|
|
description: Link group elements
|
|
range: TEILinkGrp
|
|
multivalued: true
|
|
inlined: true
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# GLAM-NER Slots
|
|
# ---------------------------------------------------------------------------
|
|
glam_hypernym:
|
|
description: GLAM-NER hypernym for entity type
|
|
range: string
|
|
annotations:
|
|
glam_ner: true
|
|
|
|
nif_context:
|
|
description: NIF context URI for NLP interop
|
|
range: uriorcurie
|
|
slot_uri: nif:Context
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Common Slots
|
|
# ---------------------------------------------------------------------------
|
|
xml_id:
|
|
description: XML identifier
|
|
range: string
|
|
identifier: true
|
|
slot_uri: tei:id
|
|
|
|
# =============================================================================
|
|
# CLASSES
|
|
# =============================================================================
|
|
classes:
|
|
|
|
# ===========================================================================
|
|
# STANDOFF CONTAINER
|
|
# ===========================================================================
|
|
|
|
TEIStandOff:
|
|
description: |
|
|
Container for standoff markup and annotations.
|
|
TEI element: <standOff>
|
|
|
|
Holds content that doesn't fit in <text> or <teiHeader>, including:
|
|
- Contextual information (listPerson, listPlace, listOrg)
|
|
- Annotations pointing to text
|
|
- Linked data and external references
|
|
|
|
Ontology Mappings:
|
|
- W3C Web Annotation: oa:AnnotationCollection
|
|
- NIF: nif:Context
|
|
class_uri: tei:standOff
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
slots:
|
|
- xml_id
|
|
- standoff_type
|
|
- annotations
|
|
- annotation_blocks
|
|
- links
|
|
- link_groups
|
|
annotations:
|
|
tei_chapter: "17.10"
|
|
oa_class: oa:AnnotationCollection
|
|
|
|
# ===========================================================================
|
|
# ANNOTATION CLASSES (W3C Web Annotation aligned)
|
|
# ===========================================================================
|
|
|
|
TEIAnnotation:
|
|
description: |
|
|
Annotation element compatible with W3C Web Annotation Data Model.
|
|
TEI element: <annotation>
|
|
|
|
Represents a single annotation with body, target, and metadata.
|
|
|
|
GLAM-NER: Core class for NER annotation output.
|
|
|
|
Ontology Mappings:
|
|
- W3C Web Annotation: oa:Annotation
|
|
- CIDOC-CRM: crm:E13_Attribute_Assignment
|
|
- NIF: nif:Annotation
|
|
class_uri: tei:annotation
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
- TEIPointingAttributes
|
|
slots:
|
|
- xml_id
|
|
- glam_hypernym
|
|
- motivation
|
|
- annotation_body
|
|
- annotation_target
|
|
- creator
|
|
- created
|
|
- modified
|
|
- generator
|
|
- certainty_degree
|
|
slot_usage:
|
|
motivation:
|
|
description: Reason for creating annotation (typically 'identifying' for NER)
|
|
annotations:
|
|
tei_chapter: "17.11"
|
|
oa_class: oa:Annotation
|
|
cidoc_crm: crm:E13_Attribute_Assignment
|
|
nif_class: nif:Annotation
|
|
|
|
TEIListAnnotation:
|
|
description: |
|
|
Container for multiple annotations.
|
|
TEI element: <listAnnotation>
|
|
|
|
Groups related annotations, e.g., all NER annotations for a document.
|
|
|
|
Ontology Mapping: oa:AnnotationCollection
|
|
class_uri: tei:listAnnotation
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
slots:
|
|
- xml_id
|
|
- annotations
|
|
- annotation_blocks
|
|
annotations:
|
|
tei_chapter: "17.11"
|
|
oa_class: oa:AnnotationCollection
|
|
|
|
TEIAnnotationBlock:
|
|
description: |
|
|
Block of annotations with shared properties.
|
|
TEI element: <annotationBlock>
|
|
|
|
Groups annotations that share creator, motivation, or other metadata.
|
|
class_uri: tei:annotationBlock
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
slots:
|
|
- xml_id
|
|
- creator
|
|
- created
|
|
- motivation
|
|
- annotations
|
|
annotations:
|
|
tei_chapter: "17.11"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Annotation Body and Target
|
|
# ---------------------------------------------------------------------------
|
|
|
|
AnnotationBody:
|
|
description: |
|
|
Body of an annotation (the content/assertion).
|
|
|
|
For NER: Typically contains the entity type and any additional metadata.
|
|
|
|
Ontology Mapping: oa:hasBody range
|
|
class_uri: oa:Body
|
|
slots:
|
|
- body_value
|
|
- body_type
|
|
- body_format
|
|
- body_language
|
|
- entity_type
|
|
- entity_ref
|
|
annotations:
|
|
oa_property: oa:hasBody
|
|
|
|
AnnotationTarget:
|
|
description: |
|
|
Target of an annotation (what is being annotated).
|
|
|
|
For NER: The text span containing the entity mention.
|
|
|
|
Ontology Mapping: oa:hasTarget range
|
|
class_uri: oa:Target
|
|
slots:
|
|
- source_uri
|
|
- selector
|
|
- state
|
|
annotations:
|
|
oa_property: oa:hasTarget
|
|
|
|
Selector:
|
|
description: |
|
|
Selector identifying specific part of target resource.
|
|
|
|
W3C Web Annotation aligned selector types.
|
|
class_uri: oa:Selector
|
|
slots:
|
|
- selector_type
|
|
- exact_match
|
|
- prefix_context
|
|
- suffix_context
|
|
- start_position
|
|
- end_position
|
|
- xpath_value
|
|
- css_value
|
|
annotations:
|
|
oa_class: oa:Selector
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Annotation Body/Target Slots
|
|
# ---------------------------------------------------------------------------
|
|
body_value:
|
|
description: Textual value of body
|
|
range: string
|
|
slot_uri: rdf:value
|
|
|
|
body_type:
|
|
description: Type of body content
|
|
range: string
|
|
slot_uri: dcterms:type
|
|
|
|
body_format:
|
|
description: MIME type of body
|
|
range: string
|
|
slot_uri: dcterms:format
|
|
|
|
body_language:
|
|
description: Language of body content
|
|
range: string
|
|
slot_uri: dcterms:language
|
|
|
|
entity_type:
|
|
description: GLAM-NER entity type (hypernym)
|
|
range: string
|
|
|
|
entity_ref:
|
|
description: Authority URI for entity
|
|
range: uriorcurie
|
|
|
|
source_uri:
|
|
description: Source document URI
|
|
range: uriorcurie
|
|
slot_uri: oa:hasSource
|
|
|
|
state:
|
|
description: State of target at annotation time
|
|
range: string
|
|
slot_uri: oa:hasState
|
|
|
|
# ===========================================================================
|
|
# LINK CLASSES
|
|
# ===========================================================================
|
|
|
|
TEILink:
|
|
description: |
|
|
Link between elements or passages.
|
|
TEI element: <link>
|
|
|
|
Creates associations between two or more targets.
|
|
|
|
GLAM-NER: Used for relationship annotations between entities.
|
|
class_uri: tei:link
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
- TEITypedPointingAttributes
|
|
slots:
|
|
- xml_id
|
|
- targets
|
|
- link_function
|
|
- mutual
|
|
slot_usage:
|
|
targets:
|
|
required: true
|
|
description: Space-separated list of target references
|
|
annotations:
|
|
tei_chapter: "17.1"
|
|
|
|
TEILinkGrp:
|
|
description: |
|
|
Group of related links.
|
|
TEI element: <linkGrp>
|
|
|
|
Collects links that share common properties or form a coherent set.
|
|
class_uri: tei:linkGrp
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
- TEITypedPointingAttributes
|
|
slots:
|
|
- xml_id
|
|
- links
|
|
- link_type
|
|
annotations:
|
|
tei_chapter: "17.1"
|
|
|
|
TEIPtr:
|
|
description: |
|
|
Pointer to another location.
|
|
TEI element: <ptr>
|
|
|
|
Empty element pointing to a target without inline content.
|
|
class_uri: tei:ptr
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
- TEIPointingAttributes
|
|
- TEITypedPointingAttributes
|
|
slots:
|
|
- xml_id
|
|
- target
|
|
slot_usage:
|
|
target:
|
|
required: true
|
|
annotations:
|
|
tei_chapter: "3.7"
|
|
|
|
TEIRef:
|
|
description: |
|
|
Reference with inline content.
|
|
TEI element: <ref>
|
|
|
|
Contains text that links to a target.
|
|
|
|
GLAM-NER Hypernym: May contain entity mentions with authority links.
|
|
class_uri: tei:ref
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
- TEIPointingAttributes
|
|
- TEITypedPointingAttributes
|
|
slots:
|
|
- xml_id
|
|
- glam_hypernym
|
|
- target
|
|
- ref_text
|
|
annotations:
|
|
tei_chapter: "3.7"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Ref Slots
|
|
# ---------------------------------------------------------------------------
|
|
ref_text:
|
|
description: Text content of reference
|
|
range: string
|
|
|
|
# ===========================================================================
|
|
# SEGMENTATION CLASSES
|
|
# ===========================================================================
|
|
|
|
TEISeg:
|
|
description: |
|
|
Arbitrary segment of text.
|
|
TEI element: <seg>
|
|
|
|
Marks spans for annotation, analysis, or reference.
|
|
|
|
GLAM-NER: Used to delimit entity mention spans.
|
|
|
|
Ontology Mappings:
|
|
- NIF: nif:String, nif:Phrase
|
|
class_uri: tei:seg
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
- TEISegmentationAttributes
|
|
slots:
|
|
- xml_id
|
|
- glam_hypernym
|
|
- segment_type
|
|
- seg_text
|
|
- subtype
|
|
annotations:
|
|
tei_chapter: "17.3"
|
|
nif_class: nif:String
|
|
|
|
TEIAnchor:
|
|
description: |
|
|
Anchor point for linking.
|
|
TEI element: <anchor>
|
|
|
|
Empty element providing attachment point for links.
|
|
class_uri: tei:anchor
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
slots:
|
|
- xml_id
|
|
- anchor_type
|
|
annotations:
|
|
tei_chapter: "17.3"
|
|
|
|
TEIMilestone:
|
|
description: |
|
|
Milestone marker in text.
|
|
TEI element: <milestone>
|
|
|
|
Marks boundary point for structures that don't nest properly.
|
|
class_uri: tei:milestone
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
slots:
|
|
- xml_id
|
|
- milestone_unit
|
|
- milestone_n
|
|
annotations:
|
|
tei_chapter: "3.11"
|
|
|
|
TEIAb:
|
|
description: |
|
|
Anonymous block.
|
|
TEI element: <ab>
|
|
|
|
Generic block-level container for segmentation.
|
|
class_uri: tei:ab
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
- TEISegmentationAttributes
|
|
slots:
|
|
- xml_id
|
|
- ab_type
|
|
- ab_content
|
|
annotations:
|
|
tei_chapter: "17.3"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Segmentation Slots
|
|
# ---------------------------------------------------------------------------
|
|
seg_text:
|
|
description: Text content of segment
|
|
range: string
|
|
|
|
milestone_unit:
|
|
description: Unit marked by milestone
|
|
range: string
|
|
slot_uri: tei:unit
|
|
|
|
milestone_n:
|
|
description: Number/identifier for milestone
|
|
range: string
|
|
slot_uri: tei:n
|
|
|
|
ab_type:
|
|
description: Type of anonymous block
|
|
range: string
|
|
|
|
ab_content:
|
|
description: Content of anonymous block
|
|
range: string
|
|
|
|
# ===========================================================================
|
|
# AGGREGATION AND ALTERNATION CLASSES
|
|
# ===========================================================================
|
|
|
|
TEIJoin:
|
|
description: |
|
|
Join element for aggregating fragments.
|
|
TEI element: <join>
|
|
|
|
Identifies fragments that should be read as a single unit.
|
|
class_uri: tei:join
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
- TEIPointingAttributes
|
|
slots:
|
|
- xml_id
|
|
- targets
|
|
- result
|
|
- scope
|
|
annotations:
|
|
tei_chapter: "17.7"
|
|
|
|
TEIJoinGrp:
|
|
description: |
|
|
Group of joins.
|
|
TEI element: <joinGrp>
|
|
class_uri: tei:joinGrp
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
slots:
|
|
- xml_id
|
|
- joins
|
|
- result
|
|
annotations:
|
|
tei_chapter: "17.7"
|
|
|
|
TEIAlt:
|
|
description: |
|
|
Alternative readings or interpretations.
|
|
TEI element: <alt>
|
|
|
|
Marks elements as alternatives (exclusive or inclusive).
|
|
class_uri: tei:alt
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
- TEIPointingAttributes
|
|
slots:
|
|
- xml_id
|
|
- targets
|
|
- mode
|
|
- weights
|
|
annotations:
|
|
tei_chapter: "17.8"
|
|
|
|
TEIAltGrp:
|
|
description: |
|
|
Group of alternatives.
|
|
TEI element: <altGrp>
|
|
class_uri: tei:altGrp
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
slots:
|
|
- xml_id
|
|
- alternatives
|
|
- mode
|
|
annotations:
|
|
tei_chapter: "17.8"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Aggregation Slots
|
|
# ---------------------------------------------------------------------------
|
|
joins:
|
|
description: Join elements in group
|
|
range: TEIJoin
|
|
multivalued: true
|
|
inlined: true
|
|
|
|
alternatives:
|
|
description: Alternative elements
|
|
range: TEIAlt
|
|
multivalued: true
|
|
inlined: true
|
|
|
|
# ===========================================================================
|
|
# TEMPORAL ALIGNMENT CLASSES
|
|
# ===========================================================================
|
|
|
|
TEITimeline:
|
|
description: |
|
|
Timeline for temporal alignment.
|
|
TEI element: <timeline>
|
|
|
|
Provides temporal framework for synchronizing text with time.
|
|
class_uri: tei:timeline
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
slots:
|
|
- xml_id
|
|
- origin_point
|
|
- interval
|
|
- time_unit
|
|
- when_points
|
|
annotations:
|
|
tei_chapter: "17.4"
|
|
|
|
TEIWhen:
|
|
description: |
|
|
Point in time on a timeline.
|
|
TEI element: <when>
|
|
|
|
Specifies absolute or relative time point for alignment.
|
|
class_uri: tei:when
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
slots:
|
|
- xml_id
|
|
- absolute_time
|
|
- interval
|
|
- time_unit
|
|
- since
|
|
annotations:
|
|
tei_chapter: "17.4"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Timeline Slots
|
|
# ---------------------------------------------------------------------------
|
|
when_points:
|
|
description: Time points in timeline
|
|
range: TEIWhen
|
|
multivalued: true
|
|
inlined: true
|
|
|
|
# ===========================================================================
|
|
# CERTAINTY AND RESPONSIBILITY
|
|
# ===========================================================================
|
|
|
|
TEICertainty:
|
|
description: |
|
|
Certainty annotation.
|
|
TEI element: <certainty>
|
|
|
|
Expresses degree of confidence in encoding.
|
|
|
|
GLAM-NER: Used for confidence scores on entity annotations.
|
|
class_uri: tei:certainty
|
|
mixins:
|
|
- TEIGlobalAttributes
|
|
- TEIPointingAttributes
|
|
slots:
|
|
- xml_id
|
|
- certainty_degree
|
|
- certainty_locus
|
|
- target
|
|
- responsible
|
|
- certainty_description
|
|
annotations:
|
|
tei_chapter: "22.2"
|
|
|
|
TEIRespStmt:
|
|
description: |
|
|
Responsibility statement.
|
|
TEI element: <respStmt>
|
|
|
|
Documents who is responsible for what.
|
|
class_uri: tei:respStmt
|
|
slots:
|
|
- xml_id
|
|
- resp_description
|
|
- person_name
|
|
- org_name
|
|
annotations:
|
|
tei_chapter: "3.12"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Certainty Slots
|
|
# ---------------------------------------------------------------------------
|
|
certainty_description:
|
|
description: Description of certainty
|
|
range: string
|
|
|
|
resp_description:
|
|
description: Nature of responsibility
|
|
range: string
|
|
slot_uri: tei:resp
|
|
|
|
person_name:
|
|
description: Person responsible
|
|
range: string
|
|
slot_uri: tei:persName
|
|
|
|
org_name:
|
|
description: Organization responsible
|
|
range: string
|
|
slot_uri: tei:orgName
|
|
|
|
# ===========================================================================
|
|
# NER-SPECIFIC ANNOTATION CLASSES
|
|
# ===========================================================================
|
|
|
|
NERAnnotation:
|
|
description: |
|
|
Named Entity Recognition annotation.
|
|
|
|
Specialized annotation class for NER pipeline output.
|
|
Extends TEIAnnotation with NER-specific fields.
|
|
|
|
GLAM-NER Hypernyms: All entity types (AGT, GRP, GEO, TMP, etc.)
|
|
is_a: TEIAnnotation
|
|
slots:
|
|
- glam_hypernym
|
|
- entity_type
|
|
- entity_ref
|
|
- confidence_score
|
|
- ner_method
|
|
- entity_label
|
|
slot_usage:
|
|
motivation:
|
|
ifabsent: string(identifying)
|
|
glam_hypernym:
|
|
required: true
|
|
description: GLAM-NER hypernym (e.g., AGT.PER, GEO.SET)
|
|
annotations:
|
|
purpose: NER pipeline output
|
|
glam_ner: true
|
|
|
|
EntityMention:
|
|
description: |
|
|
Entity mention in text.
|
|
|
|
Represents the textual span where an entity is mentioned.
|
|
Aligned with NIF (NLP Interchange Format).
|
|
|
|
Ontology Mappings:
|
|
- NIF: nif:String
|
|
- Schema.org: schema:Thing (via entity type)
|
|
class_uri: nif:String
|
|
slots:
|
|
- xml_id
|
|
- glam_hypernym
|
|
- mention_text
|
|
- begin_index
|
|
- end_index
|
|
- anchor_of
|
|
- reference_context
|
|
- entity_ref
|
|
- confidence_score
|
|
annotations:
|
|
nif_class: nif:String
|
|
glam_ner: true
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# NER-Specific Slots
|
|
# ---------------------------------------------------------------------------
|
|
confidence_score:
|
|
description: Confidence score (0.0-1.0)
|
|
range: float
|
|
minimum_value: 0.0
|
|
maximum_value: 1.0
|
|
|
|
ner_method:
|
|
description: NER method/model used
|
|
range: string
|
|
|
|
entity_label:
|
|
description: Human-readable entity label
|
|
range: string
|
|
|
|
mention_text:
|
|
description: Exact text of entity mention
|
|
range: string
|
|
slot_uri: nif:anchorOf
|
|
|
|
begin_index:
|
|
description: Start character offset
|
|
range: integer
|
|
slot_uri: nif:beginIndex
|
|
|
|
end_index:
|
|
description: End character offset
|
|
range: integer
|
|
slot_uri: nif:endIndex
|
|
|
|
anchor_of:
|
|
description: Context containing this mention
|
|
range: string
|
|
slot_uri: nif:anchorOf
|
|
|
|
reference_context:
|
|
description: URI of containing context
|
|
range: uriorcurie
|
|
slot_uri: nif:referenceContext
|
|
|
|
# =============================================================================
|
|
# INTEGRATION NOTES
|
|
# =============================================================================
|
|
#
|
|
# W3C Web Annotation Data Model Integration:
|
|
#
|
|
# TEI Element → Web Annotation Class
|
|
# -------------------------------------------------
|
|
# annotation → oa:Annotation
|
|
# listAnnotation → oa:AnnotationCollection
|
|
# annotationBlock → oa:AnnotationPage (partial)
|
|
#
|
|
# Motivation values align with oa:Motivation subclasses.
|
|
# Selector types align with oa:Selector subclasses.
|
|
#
|
|
# NIF (NLP Interchange Format) Integration:
|
|
#
|
|
# For NER pipeline interoperability:
|
|
# - EntityMention → nif:String
|
|
# - begin_index → nif:beginIndex
|
|
# - end_index → nif:endIndex
|
|
# - mention_text → nif:anchorOf
|
|
# - reference_context → nif:referenceContext
|
|
#
|
|
# GLAM-NER Hypernym Usage:
|
|
#
|
|
# All entity annotations should include glam_hypernym:
|
|
# - annotation.body.entity_type → hypernym code
|
|
# - seg[@type] → hypernym for span markup
|
|
# - ref[@type] → hypernym for inline references
|
|
#
|
|
# Standoff vs Inline Annotation:
|
|
#
|
|
# Standoff (preferred for NER):
|
|
# - Annotations stored in <standOff>
|
|
# - Target text via selectors (XPath, offset, text quote)
|
|
# - Non-invasive, multiple annotation layers possible
|
|
#
|
|
# Inline:
|
|
# - Entity mentions wrapped in <persName>, <placeName>, etc.
|
|
# - Immediate context, simpler processing
|
|
# - Hierarchical constraints apply
|
|
#
|
|
# Example NER Annotation Output:
|
|
#
|
|
# <standOff>
|
|
# <listAnnotation>
|
|
# <annotation xml:id="ann1"
|
|
# target="#string-range(//p[1],0,12)">
|
|
# <note type="glam_hypernym">AGT.PER</note>
|
|
# <rs type="person" ref="https://viaf.org/viaf/123456">
|
|
# William Shakespeare
|
|
# </rs>
|
|
# <certainty cert="high" resp="#ner-model-v1"/>
|
|
# </annotation>
|
|
# </listAnnotation>
|
|
# </standOff>
|
|
#
|
|
# =============================================================================
|