glam/data/entity_annotation/modules/advanced/tei/linking.yaml
2025-12-05 15:30:23 +01:00

1393 lines
38 KiB
YAML

# =============================================================================
# GLAM-NER: TEI P5 LINKING, SEGMENTATION, AND ALIGNMENT MODULE
# =============================================================================
# Module: modules/advanced/tei/linking.yaml
# Parent: entity_annotation_rules_v1.7.0_unified.yaml
# Purpose: LinkML schema for TEI P5 Chapter 17 - Linking, Segmentation, Alignment
# Source: TEI P5 4.10.2 (September 2025) - linking module
# =============================================================================
# This module provides LinkML class definitions for TEI standoff markup,
# linking, segmentation, and alignment elements. Essential for:
# - Standoff annotation (Web Annotation Data Model compatible)
# - Cross-document linking
# - Text segmentation and anchoring
# - Parallel text alignment
# - NER annotation pipelines
#
# Key TEI Elements Covered (14 core + related elements):
# - standOff: Container for standoff annotations
# - annotation: W3C Web Annotation compatible
# - listAnnotation: Annotation collections
# - annotationBlock: Grouped annotations
# - link: Hyperlinks between elements
# - linkGrp: Link groups
# - seg: Arbitrary text segments
# - anchor: Anchor points for linking
# - join: Aggregation of fragments
# - alt: Alternative interpretations
# - timeline/when: Temporal alignment
#
# Ontology Alignments:
# - W3C Web Annotation (oa:Annotation)
# - NIF (Natural Language Processing Interchange Format)
# - CIDOC-CRM: E13_Attribute_Assignment
# - Schema.org: Action, Comment
# =============================================================================
id: https://w3id.org/glam/ner/tei/linking
name: glam-ner-tei-linking
title: TEI P5 Linking, Segmentation, and Alignment Module for GLAM-NER
version: "1.0.0"
license: https://creativecommons.org/licenses/by/4.0/
prefixes:
tei: http://www.tei-c.org/ns/1.0/
glam: https://w3id.org/glam/ner/
linkml: https://w3id.org/linkml/
oa: http://www.w3.org/ns/oa#
as: https://www.w3.org/ns/activitystreams#
nif: http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#
crm: http://www.cidoc-crm.org/cidoc-crm/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
xsd: http://www.w3.org/2001/XMLSchema#
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
default_prefix: glam
default_range: string
# =============================================================================
# IMPORTS
# =============================================================================
imports:
- linkml:types
- ./namesdates # Import shared types and mixins
# =============================================================================
# ENUMS
# =============================================================================
enums:
# ---------------------------------------------------------------------------
# Annotation Motivation (W3C Web Annotation aligned)
# ---------------------------------------------------------------------------
AnnotationMotivation:
description: |
Motivation for creating an annotation.
Aligned with W3C Web Annotation motivations.
permissible_values:
assessing:
description: Quality assessment
meaning: oa:assessing
bookmarking:
description: Marking for future reference
meaning: oa:bookmarking
classifying:
description: Categorization or tagging
meaning: oa:classifying
commenting:
description: Commentary or discussion
meaning: oa:commenting
describing:
description: Descriptive note
meaning: oa:describing
editing:
description: Editorial suggestion
meaning: oa:editing
highlighting:
description: Visual emphasis
meaning: oa:highlighting
identifying:
description: Entity identification (NER)
meaning: oa:identifying
linking:
description: Linking to related resource
meaning: oa:linking
moderating:
description: Content moderation
meaning: oa:moderating
questioning:
description: Raising a question
meaning: oa:questioning
replying:
description: Reply to another annotation
meaning: oa:replying
tagging:
description: Adding keywords/tags
meaning: oa:tagging
# ---------------------------------------------------------------------------
# Link Function Types
# ---------------------------------------------------------------------------
LinkFunctionType:
description: Function or purpose of a link
permissible_values:
pointer:
description: Simple reference
aggregation:
description: Aggregation of fragments
alignment:
description: Parallel text alignment
correspondence:
description: Correspondence relation
translation:
description: Translation relationship
alternative:
description: Alternative reading
exclusion:
description: Mutual exclusion
temporal:
description: Temporal synchronization
citation:
description: Citation reference
cross_reference:
description: Cross-reference
# ---------------------------------------------------------------------------
# Segment Types
# ---------------------------------------------------------------------------
SegmentType:
description: Types of text segments
permissible_values:
phrase:
description: Phrasal unit
clause:
description: Clausal unit
sentence:
description: Sentence unit
paragraph:
description: Paragraph unit
token:
description: Token (word/punctuation)
entity:
description: Named entity span
annotation:
description: Annotation target span
quotation:
description: Quoted text span
arbitrary:
description: Arbitrary segment
# ---------------------------------------------------------------------------
# Selector Types (Web Annotation aligned)
# ---------------------------------------------------------------------------
SelectorType:
description: |
Types of selectors for identifying annotation targets.
Aligned with W3C Web Annotation selectors.
permissible_values:
text_quote:
description: Exact text match
meaning: oa:TextQuoteSelector
text_position:
description: Character offset position
meaning: oa:TextPositionSelector
xpath:
description: XPath expression
meaning: oa:XPathSelector
css:
description: CSS selector
meaning: oa:CssSelector
fragment:
description: Media fragment
meaning: oa:FragmentSelector
range:
description: Range selector (start/end)
meaning: oa:RangeSelector
svg:
description: SVG shape selector
meaning: oa:SvgSelector
data_position:
description: Data position selector
meaning: oa:DataPositionSelector
# ---------------------------------------------------------------------------
# Certainty Types
# ---------------------------------------------------------------------------
CertaintyDegree:
description: Degree of certainty for annotations
permissible_values:
high:
description: High confidence
medium:
description: Medium confidence
low:
description: Low confidence
unknown:
description: Confidence not specified
# ---------------------------------------------------------------------------
# Part Types (for fragmented elements)
# ---------------------------------------------------------------------------
PartType:
description: Part indicator for fragmented elements
permissible_values:
initial:
description: Initial fragment
medial:
description: Middle fragment
final:
description: Final fragment
yes:
description: Fragment (unspecified position)
no:
description: Complete (not fragmented)
# ---------------------------------------------------------------------------
# Time Units
# ---------------------------------------------------------------------------
TimeUnit:
description: Units for temporal measurement
permissible_values:
d:
description: Days
h:
description: Hours
min:
description: Minutes
s:
description: Seconds
ms:
description: Milliseconds
# =============================================================================
# TYPES
# =============================================================================
types:
TEIPointerList:
uri: tei:pointerList
typeof: string
description: |
Space-separated list of TEI pointers/URIs.
Used for multi-target references.
pattern: "^(#?[\\w\\-\\.]+|https?://[^\\s]+)(\\s+(#?[\\w\\-\\.]+|https?://[^\\s]+))*$"
XPathExpression:
uri: xsd:string
typeof: string
description: |
XPath expression for element selection.
TEI uses xpath() pointer scheme.
CSSSelector:
uri: xsd:string
typeof: string
description: CSS selector expression.
MediaFragment:
uri: xsd:string
typeof: string
description: |
Media fragment identifier (RFC 5147 for text, W3C for media).
Format: #char=start,end or #t=start,end
# =============================================================================
# MIXINS
# =============================================================================
mixins:
# ---------------------------------------------------------------------------
# Pointing Attributes
# ---------------------------------------------------------------------------
TEIPointingAttributes:
description: |
Attributes for elements that point to other elements.
TEI att.pointing class.
mixin: true
slots:
- target
- target_lang
- evaluate
slot_usage:
target:
description: Target(s) of pointer (URI or space-separated URIs)
range: TEIPointerList
slot_uri: tei:target
# ---------------------------------------------------------------------------
# Segmentation Attributes
# ---------------------------------------------------------------------------
TEISegmentationAttributes:
description: |
Attributes for segmentable elements.
TEI att.segLike class.
mixin: true
slots:
- function
- part
slot_usage:
function:
description: Linguistic function of segment
range: string
part:
description: Fragment indicator (I/M/F/Y/N)
range: PartType
# ---------------------------------------------------------------------------
# Typed Pointing Attributes
# ---------------------------------------------------------------------------
TEITypedPointingAttributes:
description: |
Typed pointer attributes.
TEI att.typed class combined with att.pointing.
mixin: true
slots:
- link_type
- subtype
slot_usage:
link_type:
description: Type classification of link
range: string
slot_uri: tei:type
# =============================================================================
# SLOTS
# =============================================================================
slots:
# ---------------------------------------------------------------------------
# Core Pointing Slots
# ---------------------------------------------------------------------------
target:
description: Target(s) of pointer reference
range: TEIPointerList
slot_uri: tei:target
target_lang:
description: Language of target resource
range: string
slot_uri: tei:targetLang
evaluate:
description: Evaluation context for XPointer
range: string
slot_uri: tei:evaluate
# ---------------------------------------------------------------------------
# Annotation Slots
# ---------------------------------------------------------------------------
annotation_id:
description: Unique identifier for annotation
range: string
identifier: true
slot_uri: tei:id
motivation:
description: Motivation for annotation
range: AnnotationMotivation
slot_uri: oa:motivatedBy
annotation_body:
description: Body content of annotation
range: AnnotationBody
inlined: true
slot_uri: oa:hasBody
annotation_target:
description: Target of annotation
range: AnnotationTarget
inlined: true
slot_uri: oa:hasTarget
creator:
description: Creator of annotation
range: string
slot_uri: dcterms:creator
created:
description: Creation timestamp
range: datetime
slot_uri: dcterms:created
modified:
description: Modification timestamp
range: datetime
slot_uri: dcterms:modified
generator:
description: Software/tool that generated annotation
range: string
slot_uri: as:generator
# ---------------------------------------------------------------------------
# Selector Slots
# ---------------------------------------------------------------------------
selector:
description: Selector for identifying target
range: Selector
inlined: true
slot_uri: oa:hasSelector
selector_type:
description: Type of selector
range: SelectorType
exact_match:
description: Exact text to match
range: string
slot_uri: oa:exact
prefix_context:
description: Text before the match
range: string
slot_uri: oa:prefix
suffix_context:
description: Text after the match
range: string
slot_uri: oa:suffix
start_position:
description: Start character offset (0-based)
range: integer
slot_uri: oa:start
end_position:
description: End character offset
range: integer
slot_uri: oa:end
xpath_value:
description: XPath expression
range: XPathExpression
css_value:
description: CSS selector expression
range: CSSSelector
# ---------------------------------------------------------------------------
# Link Slots
# ---------------------------------------------------------------------------
link_type:
description: Type of link
range: string
slot_uri: tei:type
subtype:
description: Subtype classification
range: string
slot_uri: tei:subtype
link_function:
description: Function of link
range: LinkFunctionType
targets:
description: Multiple targets for link
range: TEIPointerList
slot_uri: tei:target
mutual:
description: Whether link is bidirectional
range: boolean
slot_uri: tei:mutual
# ---------------------------------------------------------------------------
# Segment Slots
# ---------------------------------------------------------------------------
segment_type:
description: Type of segment
range: SegmentType
slot_uri: tei:type
function:
description: Linguistic function
range: string
slot_uri: tei:function
part:
description: Part indicator for fragments
range: PartType
slot_uri: tei:part
# ---------------------------------------------------------------------------
# Anchor Slots
# ---------------------------------------------------------------------------
anchor_type:
description: Type of anchor point
range: string
# ---------------------------------------------------------------------------
# Join/Aggregation Slots
# ---------------------------------------------------------------------------
result:
description: Element type resulting from join
range: string
slot_uri: tei:result
scope:
description: Scope of join (root/branches)
range: string
slot_uri: tei:scope
# ---------------------------------------------------------------------------
# Alternative Slots
# ---------------------------------------------------------------------------
mode:
description: Mode of alternation (excl/incl)
range: string
slot_uri: tei:mode
weights:
description: Probability weights for alternatives
range: string
slot_uri: tei:weights
# ---------------------------------------------------------------------------
# Timeline Slots
# ---------------------------------------------------------------------------
origin_point:
description: Origin point of timeline
range: string
slot_uri: tei:origin
interval:
description: Time interval value
range: float
slot_uri: tei:interval
time_unit:
description: Unit of time measurement
range: TimeUnit
slot_uri: tei:unit
absolute_time:
description: Absolute time value
range: datetime
slot_uri: tei:absolute
since:
description: Reference point for relative time
range: string
slot_uri: tei:since
# ---------------------------------------------------------------------------
# Certainty Slots
# ---------------------------------------------------------------------------
certainty_degree:
description: Degree of certainty
range: CertaintyDegree
slot_uri: tei:cert
certainty_locus:
description: Aspect affected by uncertainty
range: string
slot_uri: tei:locus
responsible:
description: Entity responsible for assertion
range: string
slot_uri: tei:resp
# ---------------------------------------------------------------------------
# Standoff Container Slots
# ---------------------------------------------------------------------------
standoff_type:
description: Type of standoff content
range: string
annotations:
description: Contained annotations
range: TEIAnnotation
multivalued: true
inlined: true
annotation_blocks:
description: Grouped annotation blocks
range: TEIAnnotationBlock
multivalued: true
inlined: true
links:
description: Link elements
range: TEILink
multivalued: true
inlined: true
link_groups:
description: Link group elements
range: TEILinkGrp
multivalued: true
inlined: true
# ---------------------------------------------------------------------------
# GLAM-NER Slots
# ---------------------------------------------------------------------------
glam_hypernym:
description: GLAM-NER hypernym for entity type
range: string
annotations:
glam_ner: true
nif_context:
description: NIF context URI for NLP interop
range: uriorcurie
slot_uri: nif:Context
# ---------------------------------------------------------------------------
# Common Slots
# ---------------------------------------------------------------------------
xml_id:
description: XML identifier
range: string
identifier: true
slot_uri: tei:id
# =============================================================================
# CLASSES
# =============================================================================
classes:
# ===========================================================================
# STANDOFF CONTAINER
# ===========================================================================
TEIStandOff:
description: |
Container for standoff markup and annotations.
TEI element: <standOff>
Holds content that doesn't fit in <text> or <teiHeader>, including:
- Contextual information (listPerson, listPlace, listOrg)
- Annotations pointing to text
- Linked data and external references
Ontology Mappings:
- W3C Web Annotation: oa:AnnotationCollection
- NIF: nif:Context
class_uri: tei:standOff
mixins:
- TEIGlobalAttributes
slots:
- xml_id
- standoff_type
- annotations
- annotation_blocks
- links
- link_groups
annotations:
tei_chapter: "17.10"
oa_class: oa:AnnotationCollection
# ===========================================================================
# ANNOTATION CLASSES (W3C Web Annotation aligned)
# ===========================================================================
TEIAnnotation:
description: |
Annotation element compatible with W3C Web Annotation Data Model.
TEI element: <annotation>
Represents a single annotation with body, target, and metadata.
GLAM-NER: Core class for NER annotation output.
Ontology Mappings:
- W3C Web Annotation: oa:Annotation
- CIDOC-CRM: crm:E13_Attribute_Assignment
- NIF: nif:Annotation
class_uri: tei:annotation
mixins:
- TEIGlobalAttributes
- TEIPointingAttributes
slots:
- xml_id
- glam_hypernym
- motivation
- annotation_body
- annotation_target
- creator
- created
- modified
- generator
- certainty_degree
slot_usage:
motivation:
description: Reason for creating annotation (typically 'identifying' for NER)
annotations:
tei_chapter: "17.11"
oa_class: oa:Annotation
cidoc_crm: crm:E13_Attribute_Assignment
nif_class: nif:Annotation
TEIListAnnotation:
description: |
Container for multiple annotations.
TEI element: <listAnnotation>
Groups related annotations, e.g., all NER annotations for a document.
Ontology Mapping: oa:AnnotationCollection
class_uri: tei:listAnnotation
mixins:
- TEIGlobalAttributes
slots:
- xml_id
- annotations
- annotation_blocks
annotations:
tei_chapter: "17.11"
oa_class: oa:AnnotationCollection
TEIAnnotationBlock:
description: |
Block of annotations with shared properties.
TEI element: <annotationBlock>
Groups annotations that share creator, motivation, or other metadata.
class_uri: tei:annotationBlock
mixins:
- TEIGlobalAttributes
slots:
- xml_id
- creator
- created
- motivation
- annotations
annotations:
tei_chapter: "17.11"
# ---------------------------------------------------------------------------
# Annotation Body and Target
# ---------------------------------------------------------------------------
AnnotationBody:
description: |
Body of an annotation (the content/assertion).
For NER: Typically contains the entity type and any additional metadata.
Ontology Mapping: oa:hasBody range
class_uri: oa:Body
slots:
- body_value
- body_type
- body_format
- body_language
- entity_type
- entity_ref
annotations:
oa_property: oa:hasBody
AnnotationTarget:
description: |
Target of an annotation (what is being annotated).
For NER: The text span containing the entity mention.
Ontology Mapping: oa:hasTarget range
class_uri: oa:Target
slots:
- source_uri
- selector
- state
annotations:
oa_property: oa:hasTarget
Selector:
description: |
Selector identifying specific part of target resource.
W3C Web Annotation aligned selector types.
class_uri: oa:Selector
slots:
- selector_type
- exact_match
- prefix_context
- suffix_context
- start_position
- end_position
- xpath_value
- css_value
annotations:
oa_class: oa:Selector
# ---------------------------------------------------------------------------
# Annotation Body/Target Slots
# ---------------------------------------------------------------------------
body_value:
description: Textual value of body
range: string
slot_uri: rdf:value
body_type:
description: Type of body content
range: string
slot_uri: dcterms:type
body_format:
description: MIME type of body
range: string
slot_uri: dcterms:format
body_language:
description: Language of body content
range: string
slot_uri: dcterms:language
entity_type:
description: GLAM-NER entity type (hypernym)
range: string
entity_ref:
description: Authority URI for entity
range: uriorcurie
source_uri:
description: Source document URI
range: uriorcurie
slot_uri: oa:hasSource
state:
description: State of target at annotation time
range: string
slot_uri: oa:hasState
# ===========================================================================
# LINK CLASSES
# ===========================================================================
TEILink:
description: |
Link between elements or passages.
TEI element: <link>
Creates associations between two or more targets.
GLAM-NER: Used for relationship annotations between entities.
class_uri: tei:link
mixins:
- TEIGlobalAttributes
- TEITypedPointingAttributes
slots:
- xml_id
- targets
- link_function
- mutual
slot_usage:
targets:
required: true
description: Space-separated list of target references
annotations:
tei_chapter: "17.1"
TEILinkGrp:
description: |
Group of related links.
TEI element: <linkGrp>
Collects links that share common properties or form a coherent set.
class_uri: tei:linkGrp
mixins:
- TEIGlobalAttributes
- TEITypedPointingAttributes
slots:
- xml_id
- links
- link_type
annotations:
tei_chapter: "17.1"
TEIPtr:
description: |
Pointer to another location.
TEI element: <ptr>
Empty element pointing to a target without inline content.
class_uri: tei:ptr
mixins:
- TEIGlobalAttributes
- TEIPointingAttributes
- TEITypedPointingAttributes
slots:
- xml_id
- target
slot_usage:
target:
required: true
annotations:
tei_chapter: "3.7"
TEIRef:
description: |
Reference with inline content.
TEI element: <ref>
Contains text that links to a target.
GLAM-NER Hypernym: May contain entity mentions with authority links.
class_uri: tei:ref
mixins:
- TEIGlobalAttributes
- TEIPointingAttributes
- TEITypedPointingAttributes
slots:
- xml_id
- glam_hypernym
- target
- ref_text
annotations:
tei_chapter: "3.7"
# ---------------------------------------------------------------------------
# Ref Slots
# ---------------------------------------------------------------------------
ref_text:
description: Text content of reference
range: string
# ===========================================================================
# SEGMENTATION CLASSES
# ===========================================================================
TEISeg:
description: |
Arbitrary segment of text.
TEI element: <seg>
Marks spans for annotation, analysis, or reference.
GLAM-NER: Used to delimit entity mention spans.
Ontology Mappings:
- NIF: nif:String, nif:Phrase
class_uri: tei:seg
mixins:
- TEIGlobalAttributes
- TEISegmentationAttributes
slots:
- xml_id
- glam_hypernym
- segment_type
- seg_text
- subtype
annotations:
tei_chapter: "17.3"
nif_class: nif:String
TEIAnchor:
description: |
Anchor point for linking.
TEI element: <anchor>
Empty element providing attachment point for links.
class_uri: tei:anchor
mixins:
- TEIGlobalAttributes
slots:
- xml_id
- anchor_type
annotations:
tei_chapter: "17.3"
TEIMilestone:
description: |
Milestone marker in text.
TEI element: <milestone>
Marks boundary point for structures that don't nest properly.
class_uri: tei:milestone
mixins:
- TEIGlobalAttributes
slots:
- xml_id
- milestone_unit
- milestone_n
annotations:
tei_chapter: "3.11"
TEIAb:
description: |
Anonymous block.
TEI element: <ab>
Generic block-level container for segmentation.
class_uri: tei:ab
mixins:
- TEIGlobalAttributes
- TEISegmentationAttributes
slots:
- xml_id
- ab_type
- ab_content
annotations:
tei_chapter: "17.3"
# ---------------------------------------------------------------------------
# Segmentation Slots
# ---------------------------------------------------------------------------
seg_text:
description: Text content of segment
range: string
milestone_unit:
description: Unit marked by milestone
range: string
slot_uri: tei:unit
milestone_n:
description: Number/identifier for milestone
range: string
slot_uri: tei:n
ab_type:
description: Type of anonymous block
range: string
ab_content:
description: Content of anonymous block
range: string
# ===========================================================================
# AGGREGATION AND ALTERNATION CLASSES
# ===========================================================================
TEIJoin:
description: |
Join element for aggregating fragments.
TEI element: <join>
Identifies fragments that should be read as a single unit.
class_uri: tei:join
mixins:
- TEIGlobalAttributes
- TEIPointingAttributes
slots:
- xml_id
- targets
- result
- scope
annotations:
tei_chapter: "17.7"
TEIJoinGrp:
description: |
Group of joins.
TEI element: <joinGrp>
class_uri: tei:joinGrp
mixins:
- TEIGlobalAttributes
slots:
- xml_id
- joins
- result
annotations:
tei_chapter: "17.7"
TEIAlt:
description: |
Alternative readings or interpretations.
TEI element: <alt>
Marks elements as alternatives (exclusive or inclusive).
class_uri: tei:alt
mixins:
- TEIGlobalAttributes
- TEIPointingAttributes
slots:
- xml_id
- targets
- mode
- weights
annotations:
tei_chapter: "17.8"
TEIAltGrp:
description: |
Group of alternatives.
TEI element: <altGrp>
class_uri: tei:altGrp
mixins:
- TEIGlobalAttributes
slots:
- xml_id
- alternatives
- mode
annotations:
tei_chapter: "17.8"
# ---------------------------------------------------------------------------
# Aggregation Slots
# ---------------------------------------------------------------------------
joins:
description: Join elements in group
range: TEIJoin
multivalued: true
inlined: true
alternatives:
description: Alternative elements
range: TEIAlt
multivalued: true
inlined: true
# ===========================================================================
# TEMPORAL ALIGNMENT CLASSES
# ===========================================================================
TEITimeline:
description: |
Timeline for temporal alignment.
TEI element: <timeline>
Provides temporal framework for synchronizing text with time.
class_uri: tei:timeline
mixins:
- TEIGlobalAttributes
slots:
- xml_id
- origin_point
- interval
- time_unit
- when_points
annotations:
tei_chapter: "17.4"
TEIWhen:
description: |
Point in time on a timeline.
TEI element: <when>
Specifies absolute or relative time point for alignment.
class_uri: tei:when
mixins:
- TEIGlobalAttributes
slots:
- xml_id
- absolute_time
- interval
- time_unit
- since
annotations:
tei_chapter: "17.4"
# ---------------------------------------------------------------------------
# Timeline Slots
# ---------------------------------------------------------------------------
when_points:
description: Time points in timeline
range: TEIWhen
multivalued: true
inlined: true
# ===========================================================================
# CERTAINTY AND RESPONSIBILITY
# ===========================================================================
TEICertainty:
description: |
Certainty annotation.
TEI element: <certainty>
Expresses degree of confidence in encoding.
GLAM-NER: Used for confidence scores on entity annotations.
class_uri: tei:certainty
mixins:
- TEIGlobalAttributes
- TEIPointingAttributes
slots:
- xml_id
- certainty_degree
- certainty_locus
- target
- responsible
- certainty_description
annotations:
tei_chapter: "22.2"
TEIRespStmt:
description: |
Responsibility statement.
TEI element: <respStmt>
Documents who is responsible for what.
class_uri: tei:respStmt
slots:
- xml_id
- resp_description
- person_name
- org_name
annotations:
tei_chapter: "3.12"
# ---------------------------------------------------------------------------
# Certainty Slots
# ---------------------------------------------------------------------------
certainty_description:
description: Description of certainty
range: string
resp_description:
description: Nature of responsibility
range: string
slot_uri: tei:resp
person_name:
description: Person responsible
range: string
slot_uri: tei:persName
org_name:
description: Organization responsible
range: string
slot_uri: tei:orgName
# ===========================================================================
# NER-SPECIFIC ANNOTATION CLASSES
# ===========================================================================
NERAnnotation:
description: |
Named Entity Recognition annotation.
Specialized annotation class for NER pipeline output.
Extends TEIAnnotation with NER-specific fields.
GLAM-NER Hypernyms: All entity types (AGT, GRP, GEO, TMP, etc.)
is_a: TEIAnnotation
slots:
- glam_hypernym
- entity_type
- entity_ref
- confidence_score
- ner_method
- entity_label
slot_usage:
motivation:
ifabsent: string(identifying)
glam_hypernym:
required: true
description: GLAM-NER hypernym (e.g., AGT.PER, GEO.SET)
annotations:
purpose: NER pipeline output
glam_ner: true
EntityMention:
description: |
Entity mention in text.
Represents the textual span where an entity is mentioned.
Aligned with NIF (NLP Interchange Format).
Ontology Mappings:
- NIF: nif:String
- Schema.org: schema:Thing (via entity type)
class_uri: nif:String
slots:
- xml_id
- glam_hypernym
- mention_text
- begin_index
- end_index
- anchor_of
- reference_context
- entity_ref
- confidence_score
annotations:
nif_class: nif:String
glam_ner: true
# ---------------------------------------------------------------------------
# NER-Specific Slots
# ---------------------------------------------------------------------------
confidence_score:
description: Confidence score (0.0-1.0)
range: float
minimum_value: 0.0
maximum_value: 1.0
ner_method:
description: NER method/model used
range: string
entity_label:
description: Human-readable entity label
range: string
mention_text:
description: Exact text of entity mention
range: string
slot_uri: nif:anchorOf
begin_index:
description: Start character offset
range: integer
slot_uri: nif:beginIndex
end_index:
description: End character offset
range: integer
slot_uri: nif:endIndex
anchor_of:
description: Context containing this mention
range: string
slot_uri: nif:anchorOf
reference_context:
description: URI of containing context
range: uriorcurie
slot_uri: nif:referenceContext
# =============================================================================
# INTEGRATION NOTES
# =============================================================================
#
# W3C Web Annotation Data Model Integration:
#
# TEI Element → Web Annotation Class
# -------------------------------------------------
# annotation → oa:Annotation
# listAnnotation → oa:AnnotationCollection
# annotationBlock → oa:AnnotationPage (partial)
#
# Motivation values align with oa:Motivation subclasses.
# Selector types align with oa:Selector subclasses.
#
# NIF (NLP Interchange Format) Integration:
#
# For NER pipeline interoperability:
# - EntityMention → nif:String
# - begin_index → nif:beginIndex
# - end_index → nif:endIndex
# - mention_text → nif:anchorOf
# - reference_context → nif:referenceContext
#
# GLAM-NER Hypernym Usage:
#
# All entity annotations should include glam_hypernym:
# - annotation.body.entity_type → hypernym code
# - seg[@type] → hypernym for span markup
# - ref[@type] → hypernym for inline references
#
# Standoff vs Inline Annotation:
#
# Standoff (preferred for NER):
# - Annotations stored in <standOff>
# - Target text via selectors (XPath, offset, text quote)
# - Non-invasive, multiple annotation layers possible
#
# Inline:
# - Entity mentions wrapped in <persName>, <placeName>, etc.
# - Immediate context, simpler processing
# - Hierarchical constraints apply
#
# Example NER Annotation Output:
#
# <standOff>
# <listAnnotation>
# <annotation xml:id="ann1"
# target="#string-range(//p[1],0,12)">
# <note type="glam_hypernym">AGT.PER</note>
# <rs type="person" ref="https://viaf.org/viaf/123456">
# William Shakespeare
# </rs>
# <certainty cert="high" resp="#ner-model-v1"/>
# </annotation>
# </listAnnotation>
# </standOff>
#
# =============================================================================