976 lines
28 KiB
YAML
976 lines
28 KiB
YAML
# =============================================================================
|
|
# GLAM-NER: TEI Analysis Module (LinkML)
|
|
# =============================================================================
|
|
# Module: modules/advanced/tei/analysis.yaml
|
|
# TEI Chapter: 18 - Simple Analytic Mechanisms
|
|
# TEI Module: analysis
|
|
# Version: 1.0.0
|
|
# =============================================================================
|
|
#
|
|
# This module defines LinkML classes for TEI P5 Chapter 18 elements used for
|
|
# simple analytic mechanisms including linguistic segmentation, POS tagging,
|
|
# morphological analysis, syntactic annotation, and interpretive markup.
|
|
#
|
|
# Key Element Groups:
|
|
# - Linguistic Segments: s, cl, phr, w, m, c, pc
|
|
# - Spans and Interpretations: span, spanGrp, interp, interpGrp
|
|
# - Analysis Attributes: @ana, @lemma, @pos, @msd, @join
|
|
#
|
|
# GLAM-NER Integration:
|
|
# - Linguistic annotation for NLP preprocessing
|
|
# - POS tagging and lemmatization output
|
|
# - Syntactic structure annotation
|
|
# - Interpretive annotation for entity classification
|
|
#
|
|
# Ontology Alignments:
|
|
# - NIF (NLP Interchange Format): Linguistic annotation
|
|
# - OntoLex-Lemon: Lexical entries and forms
|
|
# - OLiA (Ontologies of Linguistic Annotation): POS tags
|
|
# - CIDOC-CRM: Interpretive assertions
|
|
# - Web Annotation: Span annotations
|
|
#
|
|
# =============================================================================
|
|
|
|
id: https://w3id.org/glam/ner/tei/analysis
|
|
name: glam-ner-tei-analysis
|
|
title: "TEI Analysis Module for GLAM-NER"
|
|
version: "1.0.0"
|
|
|
|
license: https://creativecommons.org/licenses/by/4.0/
|
|
see_also:
|
|
- https://tei-c.org/release/doc/tei-p5-doc/en/html/AI.html
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
tei: http://www.tei-c.org/ns/1.0/
|
|
glam: https://w3id.org/glam/ner/
|
|
nif: http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#
|
|
ontolex: http://www.w3.org/ns/lemon/ontolex#
|
|
olia: http://purl.org/olia/olia.owl#
|
|
crm: http://www.cidoc-crm.org/cidoc-crm/
|
|
oa: http://www.w3.org/ns/oa#
|
|
skos: http://www.w3.org/2004/02/skos/core#
|
|
schema: http://schema.org/
|
|
|
|
default_prefix: glam
|
|
default_range: string
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
|
|
# =============================================================================
|
|
# ENUMERATIONS
|
|
# =============================================================================
|
|
|
|
enums:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Linguistic Unit Type Enum
|
|
# ---------------------------------------------------------------------------
|
|
LinguisticUnitTypeEnum:
|
|
description: >-
|
|
Types of linguistic units for segmentation.
|
|
permissible_values:
|
|
sentence:
|
|
description: Sentence unit (s-unit)
|
|
meaning: nif:Sentence
|
|
clause:
|
|
description: Clause (syntactic unit)
|
|
meaning: olia:Clause
|
|
phrase:
|
|
description: Phrase (noun phrase, verb phrase, etc.)
|
|
meaning: olia:Phrase
|
|
word:
|
|
description: Word (grammatical word)
|
|
meaning: nif:Word
|
|
aliases:
|
|
- token
|
|
morpheme:
|
|
description: Morpheme (minimal meaningful unit)
|
|
meaning: olia:Morpheme
|
|
character:
|
|
description: Character (single grapheme)
|
|
meaning: nif:Character
|
|
punctuation:
|
|
description: Punctuation mark
|
|
meaning: olia:Punctuation
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Part of Speech Enum (Universal Dependencies tagset)
|
|
# ---------------------------------------------------------------------------
|
|
POSTagEnum:
|
|
description: >-
|
|
Part of speech tags based on Universal Dependencies (UD) tagset.
|
|
Extended with additional categories for compatibility with other tagsets.
|
|
permissible_values:
|
|
# Open class words
|
|
ADJ:
|
|
description: Adjective
|
|
meaning: olia:Adjective
|
|
ADV:
|
|
description: Adverb
|
|
meaning: olia:Adverb
|
|
INTJ:
|
|
description: Interjection
|
|
meaning: olia:Interjection
|
|
NOUN:
|
|
description: Noun
|
|
meaning: olia:Noun
|
|
PROPN:
|
|
description: Proper noun
|
|
meaning: olia:ProperNoun
|
|
VERB:
|
|
description: Verb
|
|
meaning: olia:Verb
|
|
# Closed class words
|
|
ADP:
|
|
description: Adposition (preposition/postposition)
|
|
meaning: olia:Adposition
|
|
AUX:
|
|
description: Auxiliary verb
|
|
meaning: olia:AuxiliaryVerb
|
|
CCONJ:
|
|
description: Coordinating conjunction
|
|
meaning: olia:CoordinatingConjunction
|
|
DET:
|
|
description: Determiner
|
|
meaning: olia:Determiner
|
|
NUM:
|
|
description: Numeral
|
|
meaning: olia:Numeral
|
|
PART:
|
|
description: Particle
|
|
meaning: olia:Particle
|
|
PRON:
|
|
description: Pronoun
|
|
meaning: olia:Pronoun
|
|
SCONJ:
|
|
description: Subordinating conjunction
|
|
meaning: olia:SubordinatingConjunction
|
|
# Other
|
|
PUNCT:
|
|
description: Punctuation
|
|
meaning: olia:Punctuation
|
|
SYM:
|
|
description: Symbol
|
|
meaning: olia:Symbol
|
|
X:
|
|
description: Other/Unknown
|
|
meaning: olia:Residual
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Phrase Type Enum
|
|
# ---------------------------------------------------------------------------
|
|
PhraseTypeEnum:
|
|
description: >-
|
|
Types of syntactic phrases.
|
|
permissible_values:
|
|
NP:
|
|
description: Noun phrase
|
|
meaning: olia:NounPhrase
|
|
VP:
|
|
description: Verb phrase
|
|
meaning: olia:VerbPhrase
|
|
PP:
|
|
description: Prepositional phrase
|
|
meaning: olia:PrepositionalPhrase
|
|
AP:
|
|
description: Adjective phrase
|
|
meaning: olia:AdjectivePhrase
|
|
ADVP:
|
|
description: Adverb phrase
|
|
meaning: olia:AdverbPhrase
|
|
S:
|
|
description: Sentence/clause
|
|
meaning: olia:Sentence
|
|
SBAR:
|
|
description: Subordinate clause
|
|
meaning: olia:SubordinateClause
|
|
CP:
|
|
description: Complementizer phrase
|
|
DP:
|
|
description: Determiner phrase
|
|
IP:
|
|
description: Inflectional phrase
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Join Direction Enum
|
|
# ---------------------------------------------------------------------------
|
|
JoinDirectionEnum:
|
|
description: >-
|
|
Direction of token joining (for handling whitespace).
|
|
permissible_values:
|
|
left:
|
|
description: Join to preceding token (no space before)
|
|
right:
|
|
description: Join to following token (no space after)
|
|
both:
|
|
description: Join to both adjacent tokens
|
|
overlap:
|
|
description: Token overlaps with neighbors
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Morphological Feature Type Enum
|
|
# ---------------------------------------------------------------------------
|
|
MorphFeatureTypeEnum:
|
|
description: >-
|
|
Types of morphological features (Universal Features).
|
|
permissible_values:
|
|
# Nominal features
|
|
Case:
|
|
description: Grammatical case
|
|
meaning: olia:hasCase
|
|
Definite:
|
|
description: Definiteness
|
|
meaning: olia:hasDefiniteness
|
|
Gender:
|
|
description: Grammatical gender
|
|
meaning: olia:hasGender
|
|
Number:
|
|
description: Grammatical number
|
|
meaning: olia:hasNumber
|
|
# Verbal features
|
|
Aspect:
|
|
description: Grammatical aspect
|
|
meaning: olia:hasAspect
|
|
Mood:
|
|
description: Grammatical mood
|
|
meaning: olia:hasMood
|
|
Person:
|
|
description: Grammatical person
|
|
meaning: olia:hasPerson
|
|
Tense:
|
|
description: Grammatical tense
|
|
meaning: olia:hasTense
|
|
VerbForm:
|
|
description: Verb form (finite, infinitive, participle, etc.)
|
|
meaning: olia:hasVerbForm
|
|
Voice:
|
|
description: Grammatical voice
|
|
meaning: olia:hasVoice
|
|
# Other features
|
|
Degree:
|
|
description: Degree of comparison
|
|
meaning: olia:hasDegree
|
|
Polarity:
|
|
description: Polarity (affirmative/negative)
|
|
meaning: olia:hasPolarity
|
|
Poss:
|
|
description: Possessive
|
|
PronType:
|
|
description: Pronoun type
|
|
Reflex:
|
|
description: Reflexive
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Interpretation Type Enum
|
|
# ---------------------------------------------------------------------------
|
|
InterpretationTypeEnum:
|
|
description: >-
|
|
Types of interpretive annotation.
|
|
permissible_values:
|
|
semantic:
|
|
description: Semantic interpretation
|
|
thematic:
|
|
description: Thematic/topic interpretation
|
|
stylistic:
|
|
description: Stylistic interpretation
|
|
pragmatic:
|
|
description: Pragmatic interpretation
|
|
discourse:
|
|
description: Discourse-level interpretation
|
|
rhetorical:
|
|
description: Rhetorical interpretation
|
|
cultural:
|
|
description: Cultural/historical interpretation
|
|
entity:
|
|
description: Named entity interpretation
|
|
meaning: oa:identifying
|
|
|
|
|
|
# =============================================================================
|
|
# SLOTS (Attributes)
|
|
# =============================================================================
|
|
|
|
slots:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Global Analysis Attributes
|
|
# ---------------------------------------------------------------------------
|
|
ana:
|
|
description: >-
|
|
Analysis reference - points to interpretation element(s) for this segment.
|
|
TEI @ana attribute, equivalent to NIF annotation reference.
|
|
range: uriorcurie
|
|
multivalued: true
|
|
slot_uri: tei:ana
|
|
annotations:
|
|
tei_attribute: ana
|
|
nif_mapping: nif:annotation
|
|
|
|
lemma:
|
|
description: >-
|
|
Base form (lemma/dictionary form) of a word.
|
|
TEI @lemma attribute.
|
|
range: string
|
|
slot_uri: tei:lemma
|
|
annotations:
|
|
tei_attribute: lemma
|
|
ontolex_mapping: ontolex:canonicalForm
|
|
|
|
pos:
|
|
description: >-
|
|
Part of speech tag. Can use any tagset but Universal Dependencies
|
|
recommended for interoperability.
|
|
range: string
|
|
slot_uri: tei:pos
|
|
annotations:
|
|
tei_attribute: pos
|
|
olia_mapping: olia:hasTag
|
|
|
|
msd:
|
|
description: >-
|
|
Morphosyntactic description - detailed morphological features.
|
|
Typically in CONLL-U or similar format (e.g., "Case=Nom|Number=Sing").
|
|
range: string
|
|
slot_uri: tei:msd
|
|
annotations:
|
|
tei_attribute: msd
|
|
|
|
join_direction:
|
|
description: >-
|
|
How this token joins with adjacent tokens (whitespace handling).
|
|
TEI @join attribute.
|
|
range: JoinDirectionEnum
|
|
slot_uri: tei:join
|
|
annotations:
|
|
tei_attribute: join
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Segmentation Attributes
|
|
# ---------------------------------------------------------------------------
|
|
segment_type:
|
|
description: >-
|
|
Type of linguistic segment.
|
|
range: LinguisticUnitTypeEnum
|
|
|
|
segment_function:
|
|
description: >-
|
|
Grammatical or functional role of segment.
|
|
range: string
|
|
|
|
real:
|
|
description: >-
|
|
Whether segment represents actual occurrence in source.
|
|
False for editorial/analytical additions.
|
|
range: boolean
|
|
slot_uri: tei:real
|
|
annotations:
|
|
tei_attribute: real
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Word/Token Attributes
|
|
# ---------------------------------------------------------------------------
|
|
norm:
|
|
description: >-
|
|
Normalized/regularized form of word.
|
|
range: string
|
|
slot_uri: tei:norm
|
|
annotations:
|
|
tei_attribute: norm
|
|
|
|
orig:
|
|
description: >-
|
|
Original form (before normalization).
|
|
range: string
|
|
slot_uri: tei:orig
|
|
annotations:
|
|
tei_attribute: orig
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Span Attributes
|
|
# ---------------------------------------------------------------------------
|
|
span_from:
|
|
description: >-
|
|
Start point of span annotation (TEI @from).
|
|
range: uriorcurie
|
|
slot_uri: tei:from
|
|
annotations:
|
|
tei_attribute: from
|
|
|
|
span_to:
|
|
description: >-
|
|
End point of span annotation (TEI @to).
|
|
range: uriorcurie
|
|
slot_uri: tei:to
|
|
annotations:
|
|
tei_attribute: to
|
|
|
|
span_target:
|
|
description: >-
|
|
Target element(s) of span annotation.
|
|
range: uriorcurie
|
|
multivalued: true
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Interpretation Attributes
|
|
# ---------------------------------------------------------------------------
|
|
interp_type:
|
|
description: >-
|
|
Type of interpretation.
|
|
range: InterpretationTypeEnum
|
|
|
|
interp_value:
|
|
description: >-
|
|
Value or content of interpretation.
|
|
range: string
|
|
|
|
inst:
|
|
description: >-
|
|
Instances (element IDs) to which interpretation applies.
|
|
TEI @inst attribute.
|
|
range: uriorcurie
|
|
multivalued: true
|
|
slot_uri: tei:inst
|
|
annotations:
|
|
tei_attribute: inst
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Phrase Attributes
|
|
# ---------------------------------------------------------------------------
|
|
phrase_type:
|
|
description: >-
|
|
Type of syntactic phrase.
|
|
range: PhraseTypeEnum
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Morphological Feature Slots
|
|
# ---------------------------------------------------------------------------
|
|
morph_features:
|
|
description: >-
|
|
Collection of morphological features as key-value pairs.
|
|
range: MorphFeature
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
|
|
|
|
# =============================================================================
|
|
# CLASSES
|
|
# =============================================================================
|
|
|
|
classes:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# BASE CLASSES
|
|
# ---------------------------------------------------------------------------
|
|
|
|
AnalysisElement:
|
|
description: >-
|
|
Abstract base class for all analysis module elements.
|
|
abstract: true
|
|
slots:
|
|
- ana
|
|
class_uri: tei:AnalysisElement
|
|
annotations:
|
|
tei_module: analysis
|
|
|
|
LinguisticSegment:
|
|
description: >-
|
|
Abstract base class for linguistic segmentation elements.
|
|
Maps to NIF String for interoperability.
|
|
abstract: true
|
|
is_a: AnalysisElement
|
|
slots:
|
|
- segment_type
|
|
- segment_function
|
|
- real
|
|
class_uri: nif:String
|
|
annotations:
|
|
tei_module: analysis
|
|
nif_mapping: nif:String
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SENTENCE AND CLAUSE ELEMENTS
|
|
# ---------------------------------------------------------------------------
|
|
|
|
Sentence:
|
|
description: >-
|
|
Sentence or s-unit - a grammatical sentence or equivalent unit.
|
|
TEI <s> element. Maps to NIF Sentence.
|
|
is_a: LinguisticSegment
|
|
slots:
|
|
- segment_type
|
|
slot_usage:
|
|
segment_type:
|
|
ifabsent: "string(sentence)"
|
|
class_uri: tei:s
|
|
annotations:
|
|
tei_element: s
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.SEN
|
|
nif_mapping: nif:Sentence
|
|
|
|
Clause:
|
|
description: >-
|
|
Clause - a syntactic unit including a finite or non-finite verb.
|
|
TEI <cl> element.
|
|
is_a: LinguisticSegment
|
|
slots:
|
|
- phrase_type
|
|
slot_usage:
|
|
segment_type:
|
|
ifabsent: "string(clause)"
|
|
class_uri: tei:cl
|
|
annotations:
|
|
tei_element: cl
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.CLS
|
|
olia_mapping: olia:Clause
|
|
|
|
Phrase:
|
|
description: >-
|
|
Phrase - a syntactic phrase (NP, VP, PP, etc.).
|
|
TEI <phr> element.
|
|
is_a: LinguisticSegment
|
|
slots:
|
|
- phrase_type
|
|
slot_usage:
|
|
segment_type:
|
|
ifabsent: "string(phrase)"
|
|
class_uri: tei:phr
|
|
annotations:
|
|
tei_element: phr
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.PHR
|
|
olia_mapping: olia:Phrase
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# TOKEN-LEVEL ELEMENTS
|
|
# ---------------------------------------------------------------------------
|
|
|
|
Word:
|
|
description: >-
|
|
Word - a grammatical (not necessarily orthographic) word.
|
|
TEI <w> element. Core element for POS tagging and lemmatization.
|
|
Maps to NIF Word and OntoLex Form.
|
|
is_a: LinguisticSegment
|
|
slots:
|
|
- lemma
|
|
- pos
|
|
- msd
|
|
- join_direction
|
|
- norm
|
|
- orig
|
|
- morph_features
|
|
slot_usage:
|
|
segment_type:
|
|
ifabsent: "string(word)"
|
|
class_uri: tei:w
|
|
annotations:
|
|
tei_element: w
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.WRD
|
|
nif_mapping: nif:Word
|
|
ontolex_mapping: ontolex:Form
|
|
|
|
Morpheme:
|
|
description: >-
|
|
Morpheme - a minimal meaningful unit within a word.
|
|
TEI <m> element.
|
|
is_a: LinguisticSegment
|
|
slots:
|
|
- lemma
|
|
- pos
|
|
- msd
|
|
attributes:
|
|
base_form:
|
|
description: >-
|
|
Base form of the morpheme.
|
|
range: string
|
|
annotations:
|
|
tei_attribute: baseForm
|
|
slot_usage:
|
|
segment_type:
|
|
ifabsent: "string(morpheme)"
|
|
class_uri: tei:m
|
|
annotations:
|
|
tei_element: m
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.MOR
|
|
olia_mapping: olia:Morpheme
|
|
|
|
Character:
|
|
description: >-
|
|
Character - a single character (grapheme).
|
|
TEI <c> element. Used for character-level annotation.
|
|
is_a: LinguisticSegment
|
|
slot_usage:
|
|
segment_type:
|
|
ifabsent: "string(character)"
|
|
class_uri: tei:c
|
|
annotations:
|
|
tei_element: c
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.CHR
|
|
nif_mapping: nif:Character
|
|
|
|
Punctuation:
|
|
description: >-
|
|
Punctuation character or string regarded as a single punctuation mark.
|
|
TEI <pc> element.
|
|
is_a: LinguisticSegment
|
|
slots:
|
|
- pos
|
|
- join_direction
|
|
attributes:
|
|
unit:
|
|
description: >-
|
|
Whether this is a unit-initial or unit-final punctuation.
|
|
range: string
|
|
annotations:
|
|
tei_attribute: unit
|
|
pre:
|
|
description: >-
|
|
Whitespace or punctuation appearing before.
|
|
range: string
|
|
annotations:
|
|
tei_attribute: pre
|
|
force:
|
|
description: >-
|
|
Strength of punctuation (strong, weak).
|
|
range: string
|
|
annotations:
|
|
tei_attribute: force
|
|
slot_usage:
|
|
segment_type:
|
|
ifabsent: "string(punctuation)"
|
|
class_uri: tei:pc
|
|
annotations:
|
|
tei_element: pc
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.PNC
|
|
olia_mapping: olia:Punctuation
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# MORPHOLOGICAL FEATURES
|
|
# ---------------------------------------------------------------------------
|
|
|
|
MorphFeature:
|
|
description: >-
|
|
A single morphological feature (key-value pair).
|
|
Used to represent Universal Dependencies-style morphological annotation.
|
|
attributes:
|
|
feature_name:
|
|
description: >-
|
|
Name of morphological feature (e.g., Case, Number, Gender).
|
|
range: MorphFeatureTypeEnum
|
|
required: true
|
|
feature_value:
|
|
description: >-
|
|
Value of morphological feature (e.g., Nom, Sing, Masc).
|
|
range: string
|
|
required: true
|
|
class_uri: olia:MorphologicalFeature
|
|
annotations:
|
|
glam_hypernym: TXT.ANA.MFT
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SPAN AND INTERPRETATION ELEMENTS
|
|
# ---------------------------------------------------------------------------
|
|
|
|
Span:
|
|
description: >-
|
|
Span - associates interpretive annotation with a span of text.
|
|
TEI <span> element. Maps to Web Annotation for standoff annotation.
|
|
is_a: AnalysisElement
|
|
slots:
|
|
- span_from
|
|
- span_to
|
|
- span_target
|
|
- interp_type
|
|
- interp_value
|
|
class_uri: tei:span
|
|
annotations:
|
|
tei_element: span
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.SPN
|
|
oa_mapping: oa:Annotation
|
|
|
|
SpanGroup:
|
|
description: >-
|
|
Span group - groups related span annotations.
|
|
TEI <spanGrp> element.
|
|
is_a: AnalysisElement
|
|
slots:
|
|
- interp_type
|
|
attributes:
|
|
spans:
|
|
description: >-
|
|
Collection of span annotations in this group.
|
|
range: Span
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
class_uri: tei:spanGrp
|
|
annotations:
|
|
tei_element: spanGrp
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.SPG
|
|
|
|
Interp:
|
|
description: >-
|
|
Interpretation - summarizes an interpretive annotation.
|
|
TEI <interp> element. Used for entity classification, thematic coding, etc.
|
|
is_a: AnalysisElement
|
|
slots:
|
|
- interp_type
|
|
- interp_value
|
|
- inst
|
|
class_uri: tei:interp
|
|
annotations:
|
|
tei_element: interp
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.INT
|
|
crm_mapping: crm:E13_Attribute_Assignment
|
|
|
|
InterpGroup:
|
|
description: >-
|
|
Interpretation group - collects related interpretation elements.
|
|
TEI <interpGrp> element. Can represent a tagset or coding scheme.
|
|
is_a: AnalysisElement
|
|
slots:
|
|
- interp_type
|
|
attributes:
|
|
interps:
|
|
description: >-
|
|
Collection of interpretation definitions.
|
|
range: Interp
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
class_uri: tei:interpGrp
|
|
annotations:
|
|
tei_element: interpGrp
|
|
tei_module: analysis
|
|
glam_hypernym: TXT.ANA.IGP
|
|
skos_mapping: skos:ConceptScheme
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# NLP OUTPUT CLASSES
|
|
# ---------------------------------------------------------------------------
|
|
|
|
TokenizedText:
|
|
description: >-
|
|
Container for tokenized text output from NLP pipeline.
|
|
Holds a sequence of sentences, each containing tokens.
|
|
attributes:
|
|
sentences:
|
|
description: >-
|
|
Tokenized sentences.
|
|
range: TokenizedSentence
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
tokenizer:
|
|
description: >-
|
|
Name of tokenizer used (e.g., "spacy-en_core_web_sm").
|
|
range: string
|
|
tokenization_date:
|
|
description: >-
|
|
Date/time of tokenization.
|
|
range: datetime
|
|
class_uri: nif:Context
|
|
annotations:
|
|
glam_hypernym: TXT.ANA.TOK
|
|
|
|
TokenizedSentence:
|
|
description: >-
|
|
A tokenized sentence with word/token sequence.
|
|
Extends TEI Sentence with NLP-specific fields.
|
|
is_a: Sentence
|
|
attributes:
|
|
tokens:
|
|
description: >-
|
|
Sequence of tokens in sentence.
|
|
range: Word
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
sentence_text:
|
|
description: >-
|
|
Original sentence text.
|
|
range: string
|
|
begin_offset:
|
|
description: >-
|
|
Character offset of sentence start in source document.
|
|
range: integer
|
|
end_offset:
|
|
description: >-
|
|
Character offset of sentence end in source document.
|
|
range: integer
|
|
class_uri: nif:Sentence
|
|
annotations:
|
|
glam_hypernym: TXT.ANA.TSN
|
|
|
|
POSTaggedToken:
|
|
description: >-
|
|
A token with POS tag and optional morphological analysis.
|
|
Specialized Word class for POS tagger output.
|
|
is_a: Word
|
|
attributes:
|
|
pos_tag:
|
|
description: >-
|
|
Part of speech tag.
|
|
range: POSTagEnum
|
|
pos_confidence:
|
|
description: >-
|
|
Confidence score for POS tag (0.0-1.0).
|
|
range: float
|
|
minimum_value: 0.0
|
|
maximum_value: 1.0
|
|
pos_alternatives:
|
|
description: >-
|
|
Alternative POS tags with scores.
|
|
range: POSAlternative
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
class_uri: olia:Token
|
|
annotations:
|
|
glam_hypernym: TXT.ANA.POS
|
|
|
|
POSAlternative:
|
|
description: >-
|
|
An alternative POS tag with confidence score.
|
|
attributes:
|
|
tag:
|
|
description: >-
|
|
Alternative POS tag.
|
|
range: POSTagEnum
|
|
score:
|
|
description: >-
|
|
Confidence score (0.0-1.0).
|
|
range: float
|
|
minimum_value: 0.0
|
|
maximum_value: 1.0
|
|
annotations:
|
|
glam_hypernym: TXT.ANA.PAL
|
|
|
|
DependencyParse:
|
|
description: >-
|
|
Dependency parse tree for a sentence.
|
|
Represents syntactic dependencies between tokens.
|
|
attributes:
|
|
sentence_ref:
|
|
description: >-
|
|
Reference to source sentence.
|
|
range: uriorcurie
|
|
dependencies:
|
|
description: >-
|
|
Dependency relations.
|
|
range: DependencyRelation
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
parse_method:
|
|
description: >-
|
|
Parser used (e.g., "spacy", "stanza", "stanford").
|
|
range: string
|
|
class_uri: nif:DependencyTree
|
|
annotations:
|
|
glam_hypernym: TXT.ANA.DEP
|
|
|
|
DependencyRelation:
|
|
description: >-
|
|
A single dependency relation between two tokens.
|
|
attributes:
|
|
head:
|
|
description: >-
|
|
Index of head token (0 = root).
|
|
range: integer
|
|
required: true
|
|
dependent:
|
|
description: >-
|
|
Index of dependent token.
|
|
range: integer
|
|
required: true
|
|
relation:
|
|
description: >-
|
|
Dependency relation type (e.g., nsubj, dobj, amod).
|
|
range: string
|
|
required: true
|
|
enhanced:
|
|
description: >-
|
|
Whether this is an enhanced dependency.
|
|
range: boolean
|
|
annotations:
|
|
glam_hypernym: TXT.ANA.DRL
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# LINGUISTIC ANNOTATION SCHEMES
|
|
# ---------------------------------------------------------------------------
|
|
|
|
AnnotationScheme:
|
|
description: >-
|
|
Definition of a linguistic annotation scheme (tagset, coding scheme).
|
|
Used to document the vocabulary for @ana, @pos, etc.
|
|
attributes:
|
|
scheme_id:
|
|
description: >-
|
|
Unique identifier for scheme.
|
|
range: uriorcurie
|
|
identifier: true
|
|
scheme_name:
|
|
description: >-
|
|
Human-readable name.
|
|
range: string
|
|
required: true
|
|
scheme_uri:
|
|
description: >-
|
|
URI of external scheme definition.
|
|
range: uri
|
|
scheme_type:
|
|
description: >-
|
|
Type of scheme (pos_tagset, ner_tagset, dependency_scheme, etc.).
|
|
range: string
|
|
tags:
|
|
description: >-
|
|
Tag definitions in scheme.
|
|
range: TagDefinition
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
class_uri: skos:ConceptScheme
|
|
annotations:
|
|
glam_hypernym: DOC.MET.SCH
|
|
|
|
TagDefinition:
|
|
description: >-
|
|
Definition of a single tag in an annotation scheme.
|
|
attributes:
|
|
tag_id:
|
|
description: >-
|
|
Tag identifier/code.
|
|
range: string
|
|
required: true
|
|
tag_label:
|
|
description: >-
|
|
Human-readable label.
|
|
range: string
|
|
tag_description:
|
|
description: >-
|
|
Full description of tag meaning.
|
|
range: string
|
|
tag_uri:
|
|
description: >-
|
|
URI linking to external definition (OLiA, etc.).
|
|
range: uri
|
|
tag_parent:
|
|
description: >-
|
|
Parent tag in hierarchy (if applicable).
|
|
range: string
|
|
class_uri: skos:Concept
|
|
annotations:
|
|
glam_hypernym: DOC.MET.TAG
|