glam/data/entity_annotation/modules/advanced/tei/transcr.yaml
2025-12-05 15:30:23 +01:00

1745 lines
54 KiB
YAML

# =============================================================================
# GLAM-NER: TEI P5 TRANSCRIPTION MODULE
# =============================================================================
# Module: modules/advanced/tei/transcr.yaml
# Parent: entity_annotation_rules_v1.7.0_unified.yaml
# Purpose: LinkML schema for TEI P5 Chapter 12 - Representation of Primary Sources
# Source: TEI P5 4.10.2 (September 2025) - transcr module
# =============================================================================
# This module provides LinkML class definitions for transcribing primary source
# materials including facsimiles, surfaces, zones, damage, additions, deletions,
# substitutions, abbreviations, and editorial interventions. Essential for
# manuscript studies, diplomatic editions, genetic criticism, and archival work.
# =============================================================================
id: https://w3id.org/glam/ner/tei/transcr
name: glam-ner-tei-transcr
title: TEI P5 Primary Source Transcription Module for GLAM-NER
version: "1.0.0"
license: https://creativecommons.org/licenses/by/4.0/
prefixes:
tei: http://www.tei-c.org/ns/1.0/
glam: https://w3id.org/glam/ner/
linkml: https://w3id.org/linkml/
crm: http://www.cidoc-crm.org/cidoc-crm/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
foaf: http://xmlns.com/foaf/0.1/
prov: http://www.w3.org/ns/prov#
oa: http://www.w3.org/ns/oa#
xsd: http://www.w3.org/2001/XMLSchema#
iiif: http://iiif.io/api/presentation/3#
sc: http://iiif.io/api/presentation/2#
default_prefix: glam
default_range: string
# =============================================================================
# IMPORTS
# =============================================================================
imports:
- linkml:types
# =============================================================================
# ENUMS
# =============================================================================
enums:
# ---------------------------------------------------------------------------
# Damage Agent Type
# ---------------------------------------------------------------------------
DamageAgentType:
description: |
Categorizes the cause of damage to a text witness.
Used on @agent attribute of <damage> and <gap> elements.
permissible_values:
rubbing:
description: Damage from rubbing or abrasion
mildew:
description: Damage from mildew or mold
smoke:
description: Damage from smoke or fire
water:
description: Water damage (staining, ink running)
fading:
description: Ink or pigment fading over time
tearing:
description: Physical tearing of the support
trimming:
description: Text loss from trimming during binding
rodent:
description: Damage from rodents or vermin
insect:
description: Insect damage (bookworm, etc.)
chemical:
description: Chemical deterioration
blotting:
description: Ink blots obscuring text
erasure:
description: Deliberate erasure (scraping, washing)
unknown:
description: Cause of damage unknown
# ---------------------------------------------------------------------------
# Deletion Rendering Type
# ---------------------------------------------------------------------------
DeletionRenderType:
description: |
How a deletion was effected in the source.
Used on @rend attribute of <del> element.
permissible_values:
strikethrough:
description: Single horizontal line through text
overstrike:
description: Multiple lines through text
crosshatch:
description: Cross-hatched lines
overwritten:
description: New text written over old
erased:
description: Text erased (scraped, washed)
expunctuated:
description: Dots placed below letters (medieval)
bracketed:
description: Enclosed in deletion brackets
underline:
description: Underlined for deletion
blotted:
description: Blotted out with ink
circled:
description: Circled for deletion
vertical_stroke:
description: Vertical stroke through text
# ---------------------------------------------------------------------------
# Addition Place Type
# ---------------------------------------------------------------------------
AdditionPlaceType:
description: |
Where an addition was placed relative to the main text.
Used on @place attribute of <add> element.
permissible_values:
above:
description: Interlinear, above the line
below:
description: Interlinear, below the line
inline:
description: Inline with surrounding text
margin:
description: In the margin (unspecified)
margin_left:
description: Left margin
margin_right:
description: Right margin
margin_top:
description: Top margin
margin_bottom:
description: Bottom margin
foot:
description: At foot of page
end:
description: At end of text block
superimposed:
description: Written over existing text
opposite:
description: On facing page
overleaf:
description: On verso of current leaf
# ---------------------------------------------------------------------------
# Gap Reason Type
# ---------------------------------------------------------------------------
GapReasonType:
description: |
Reason for omission in transcription.
Used on @reason attribute of <gap> element.
permissible_values:
cancelled:
description: Text cancelled/deleted and illegible
deleted:
description: Text deleted in source
editorial:
description: Editorial decision to omit
illegible:
description: Text illegible in source
inaudible:
description: Speech inaudible (for audio sources)
irrelevant:
description: Content deemed irrelevant
sampling:
description: Sampling practice (partial transcription)
lost:
description: Text lost through physical damage
missing:
description: Expected text is missing
# ---------------------------------------------------------------------------
# Hand Scope Type
# ---------------------------------------------------------------------------
HandScopeType:
description: |
Scope of a hand's contribution to the document.
Used on @scope attribute of <handNote> element.
permissible_values:
sole:
description: Only hand in the document
major:
description: Main/principal hand
minor:
description: Secondary/occasional hand
# ---------------------------------------------------------------------------
# Metamark Function Type
# ---------------------------------------------------------------------------
MetamarkFunctionType:
description: |
Function of a metamark in the document.
Used on @function attribute of <metamark> element.
permissible_values:
reorder:
description: Indicates reordering/transposition
flag:
description: Flags text for attention
delete:
description: Marks text for deletion
insert:
description: Marks insertion point
used:
description: Indicates text has been used/copied
sequence:
description: Indicates sequence number
note:
description: Points to annotation
transposition:
description: Indicates text transposition
# ---------------------------------------------------------------------------
# Forme Work Type
# ---------------------------------------------------------------------------
FormeWorkType:
description: |
Types of forme work elements in printed texts.
Used on @type attribute of <fw> element.
permissible_values:
head:
description: Running head/header
foot:
description: Running foot/footer
pageNum:
description: Page number
sig:
description: Signature (gathering mark)
catch:
description: Catchword
header:
description: Header text
footer:
description: Footer text
# ---------------------------------------------------------------------------
# Supplied Reason Type
# ---------------------------------------------------------------------------
SuppliedReasonType:
description: |
Reason text was supplied by editor.
Used on @reason attribute of <supplied> element.
permissible_values:
illegible:
description: Original illegible
damage:
description: Original damaged
omitted-in-original:
description: Omission by scribe/author
lost-folio:
description: Lost leaf/folio
faded-ink:
description: Ink has faded
overbinding:
description: Hidden by binding
lost:
description: Lost due to physical damage
lacuna:
description: Gap in text tradition
# ---------------------------------------------------------------------------
# Surface Type
# ---------------------------------------------------------------------------
SurfaceType:
description: |
Type of written surface.
Used on @type attribute of <surface> element.
permissible_values:
recto:
description: Front side of leaf
verso:
description: Back side of leaf
cover:
description: Cover (front or back)
spine:
description: Spine of codex
fore_edge:
description: Fore-edge of codex
top_edge:
description: Top edge of codex
bottom_edge:
description: Bottom edge of codex
patch:
description: Attached patch/slip
flyleaf:
description: Flyleaf
pastedown:
description: Pastedown
seal:
description: Seal attached to document
# =============================================================================
# SLOTS (Attributes)
# =============================================================================
slots:
# ---------------------------------------------------------------------------
# Global TEI Attributes
# ---------------------------------------------------------------------------
xml_id:
description: Unique identifier for the element (xml:id)
range: string
slot_uri: tei:id
xml_lang:
description: Language of the element content (xml:lang)
range: string
slot_uri: tei:lang
# ---------------------------------------------------------------------------
# Facsimile/Coordinate Attributes
# ---------------------------------------------------------------------------
facs:
description: |
Points to image or portion of image corresponding to element (@facs).
URI reference to facsimile.
range: uriorcurie
slot_uri: tei:facs
ulx:
description: X coordinate of upper left corner
range: float
slot_uri: tei:ulx
uly:
description: Y coordinate of upper left corner
range: float
slot_uri: tei:uly
lrx:
description: X coordinate of lower right corner
range: float
slot_uri: tei:lrx
lry:
description: Y coordinate of lower right corner
range: float
slot_uri: tei:lry
points:
description: |
Coordinates defining non-rectangular zone (@points).
Space-separated list of x,y coordinate pairs.
range: string
slot_uri: tei:points
# ---------------------------------------------------------------------------
# Hand/Responsibility Attributes
# ---------------------------------------------------------------------------
hand:
description: |
Reference to handNote describing responsible hand (@hand).
Points to handNote element in header.
range: uriorcurie
slot_uri: tei:hand
resp:
description: |
Person responsible for intervention (@resp).
Points to person/respStmt in header.
range: uriorcurie
slot_uri: tei:resp
cert:
description: |
Degree of certainty (high, medium, low) (@cert).
range: string
slot_uri: tei:cert
# ---------------------------------------------------------------------------
# Change/Revision Attributes
# ---------------------------------------------------------------------------
change_ref:
description: |
Reference to change/revision campaign (@change).
Points to change element in header.
range: uriorcurie
slot_uri: tei:change
seq:
description: |
Sequence number indicating order of interventions (@seq).
range: integer
slot_uri: tei:seq
# ---------------------------------------------------------------------------
# Damage/Extent Attributes
# ---------------------------------------------------------------------------
agent:
description: |
Agent/cause of damage (@agent).
range: DamageAgentType
slot_uri: tei:agent
degree:
description: |
Degree of damage (0-1 or high/medium/low) (@degree).
range: string
slot_uri: tei:degree
extent:
description: |
Extent of element (e.g., "3 lines", "1 word") (@extent).
range: string
slot_uri: tei:extent
quantity:
description: |
Numeric quantity (@quantity).
range: float
slot_uri: tei:quantity
unit:
description: |
Unit of measurement (@unit).
Values: cm, mm, in, line, char, word, etc.
range: string
slot_uri: tei:unit
# ---------------------------------------------------------------------------
# Spanning Attribute
# ---------------------------------------------------------------------------
span_to:
description: |
Points to end of spanned text (@spanTo).
URI reference to anchor element.
range: uriorcurie
slot_uri: tei:spanTo
# ---------------------------------------------------------------------------
# Place/Position Attribute
# ---------------------------------------------------------------------------
place:
description: |
Location of addition or note (@place).
range: AdditionPlaceType
slot_uri: tei:place
# ---------------------------------------------------------------------------
# Rendering Attribute
# ---------------------------------------------------------------------------
rend:
description: |
Rendering/appearance of element (@rend).
range: string
slot_uri: tei:rend
# ---------------------------------------------------------------------------
# Type Attribute
# ---------------------------------------------------------------------------
element_type:
description: |
Classification of element (@type).
range: string
slot_uri: tei:type
# ---------------------------------------------------------------------------
# Source Attribute
# ---------------------------------------------------------------------------
source:
description: |
Source from which reading is drawn (@source).
Points to witness or external source.
range: uriorcurie
slot_uri: tei:source
# ---------------------------------------------------------------------------
# Reason Attribute
# ---------------------------------------------------------------------------
reason:
description: |
Reason for gap, supplied text, etc. (@reason).
range: string
slot_uri: tei:reason
# ---------------------------------------------------------------------------
# Target Attribute
# ---------------------------------------------------------------------------
target:
description: |
Target element(s) for annotation (@target).
Space-separated list of URIs.
range: string
slot_uri: tei:target
# =============================================================================
# CLASSES - FACSIMILE AND SURFACE ELEMENTS
# =============================================================================
classes:
# ---------------------------------------------------------------------------
# facsimile - Digital Facsimile Container
# ---------------------------------------------------------------------------
Facsimile:
class_uri: tei:facsimile
description: |
Facsimile (facsimile) contains a representation of some written source
in the form of a set of images rather than as transcribed or encoded text.
The facsimile element may contain:
- front/back matter
- surface elements defining written surfaces
- graphic elements for images
TEI P5 Chapter 12.1 (Digital Facsimiles)
slots:
- xml_id
attributes:
surfaces:
description: Written surfaces in this facsimile
range: Surface
multivalued: true
front_matter:
description: Front matter for facsimile
range: string
back_matter:
description: Back matter for facsimile
range: string
exact_mappings:
- iiif:Manifest
- schema:ImageObject
annotations:
tei_element: facsimile
tei_module: transcr
glam_hypernym: DOC.FAC
# ---------------------------------------------------------------------------
# sourceDoc - Source Document Container
# ---------------------------------------------------------------------------
SourceDoc:
class_uri: tei:sourceDoc
description: |
Source document (sourceDoc) contains a transcription or other
representation of a single source document potentially forming
part of a dossier génétique or collection of sources.
Unlike facsimile (images only), sourceDoc can contain embedded
transcriptions within surface and zone elements.
TEI P5 Chapter 12.2 (Combining Transcription with Facsimile)
slots:
- xml_id
attributes:
surfaces:
description: Written surfaces in this source document
range: Surface
multivalued: true
required: true
exact_mappings:
- crm:E22_Human-Made_Object
- schema:ArchiveComponent
annotations:
tei_element: sourceDoc
tei_module: transcr
glam_hypernym: DOC.SRC
# ---------------------------------------------------------------------------
# surface - Written Surface
# ---------------------------------------------------------------------------
Surface:
class_uri: tei:surface
description: |
Surface (surface) defines a written surface as a two-dimensional
coordinate space, optionally grouping one or more graphic
representations of that space, zones of interest within that
space, and transcriptions of the writing within them.
Surfaces can represent:
- One side of a leaf (recto/verso)
- A page spread (two-page opening)
- A face of a monument
- A patch or slip attached to a page
TEI P5 Chapter 12.1 (Digital Facsimiles)
slots:
- xml_id
- ulx
- uly
- lrx
- lry
- facs
attributes:
surface_type:
description: Type of surface (recto, verso, patch, etc.)
range: SurfaceType
start_ref:
description: Reference to start of transcribed text (@start)
range: uriorcurie
attachment:
description: How surface is attached (glue, pin, etc.)
range: string
flipping:
description: Whether surface can be flipped
range: boolean
graphic:
description: Image of this surface
range: Graphic
zones:
description: Zones within this surface
range: Zone
multivalued: true
nested_surfaces:
description: Surfaces nested within this one (patches, slips)
range: Surface
multivalued: true
lines:
description: Lines of text on this surface (embedded transcription)
range: Line
multivalued: true
exact_mappings:
- iiif:Canvas
- crm:E25_Human-Made_Feature
annotations:
tei_element: surface
tei_module: transcr
glam_hypernym: THG.SRF
# ---------------------------------------------------------------------------
# surfaceGrp - Surface Group
# ---------------------------------------------------------------------------
SurfaceGrp:
class_uri: tei:surfaceGrp
description: |
Surface group (surfaceGrp) defines any kind of useful grouping
of written surfaces, for example the recto and verso of a single
leaf, or all surfaces in a quire/gathering.
TEI P5 Chapter 12.1 (Digital Facsimiles)
slots:
- xml_id
attributes:
group_type:
description: Type of group (leaf, quire, gathering, etc.)
range: string
group_n:
description: Number/identifier for group
range: string
surfaces:
description: Surfaces in this group
range: Surface
multivalued: true
required: true
exact_mappings:
- crm:E78_Curated_Holding
annotations:
tei_element: surfaceGrp
tei_module: transcr
glam_hypernym: GRP.SRF
# ---------------------------------------------------------------------------
# zone - Zone of Interest
# ---------------------------------------------------------------------------
Zone:
class_uri: tei:zone
description: |
Zone (zone) defines any two-dimensional area within a surface
element. Zones can be rectangular (ulx/uly/lrx/lry) or
non-rectangular (points attribute for polygons).
Zones can contain:
- Graphics (images of the zone)
- Lines of text (embedded transcription)
- Nested zones
- Arbitrary text content
TEI P5 Chapter 12.1 (Digital Facsimiles)
slots:
- xml_id
- ulx
- uly
- lrx
- lry
- points
attributes:
zone_type:
description: Type of zone (text, image, decoration, etc.)
range: string
rotate:
description: Rotation of zone content in degrees
range: float
graphic:
description: Image of this zone
range: Graphic
lines:
description: Lines of text in this zone
range: Line
multivalued: true
nested_zones:
description: Zones nested within this one
range: Zone
multivalued: true
zone_content:
description: Text content of zone (if not structured as lines)
range: string
exact_mappings:
- oa:FragmentSelector
annotations:
tei_element: zone
tei_module: transcr
glam_hypernym: THG.ZON
# ---------------------------------------------------------------------------
# path - Line Path
# ---------------------------------------------------------------------------
Path:
class_uri: tei:path
description: |
Path (path) defines any line passing through two or more points
within a surface element. Used to identify specific lines on an
object, such as ruling lines, decorative elements, or trajectories.
TEI P5 Chapter 12.1 (Digital Facsimiles)
slots:
- xml_id
- points
attributes:
path_type:
description: Type of path (ruling, decoration, etc.)
range: string
annotations:
tei_element: path
tei_module: transcr
glam_hypernym: THG.PTH
# ---------------------------------------------------------------------------
# line - Topographic Line
# ---------------------------------------------------------------------------
Line:
class_uri: tei:line
description: |
Line (line) contains the transcription of a topographic line in
the source document. Used in embedded transcription to represent
lines of writing as they appear on the page.
TEI P5 Chapter 12.2.2 (Embedded Transcription)
slots:
- xml_id
- facs
- hand
- change_ref
attributes:
line_content:
description: Text content of the line
range: string
n:
description: Line number
range: string
exact_mappings:
- crm:E33_Linguistic_Object
annotations:
tei_element: line
tei_module: transcr
glam_hypernym: TXT.LIN
# ---------------------------------------------------------------------------
# graphic - Graphic Element
# ---------------------------------------------------------------------------
Graphic:
class_uri: tei:graphic
description: |
Graphic (graphic) indicates the location of a graphic or
illustration, either forming part of a text, or providing an
image of it.
TEI P5 Chapter 3.10 (Graphics and Other Non-textual Components)
slots:
- xml_id
attributes:
url:
description: URL of the graphic file
range: uriorcurie
required: true
width:
description: Width of graphic
range: string
height:
description: Height of graphic
range: string
mime_type:
description: MIME type of graphic
range: string
exact_mappings:
- schema:ImageObject
- iiif:Image
annotations:
tei_element: graphic
tei_module: core
glam_hypernym: THG.IMG
# =============================================================================
# CLASSES - EDITORIAL INTERVENTION ELEMENTS
# =============================================================================
# ---------------------------------------------------------------------------
# add - Addition
# ---------------------------------------------------------------------------
Add:
class_uri: tei:add
description: |
Addition (add) contains letters, words, or phrases inserted in
the source text by an author, scribe, or a previous annotator
or corrector.
TEI P5 Chapter 12.3.1.4 (Additions and Deletions)
slots:
- xml_id
- hand
- place
- resp
- cert
- change_ref
- seq
attributes:
addition_content:
description: The added text
range: string
required: true
instant:
description: Whether addition was instant (same moment as main text)
range: boolean
exact_mappings:
- crm:E13_Attribute_Assignment
annotations:
tei_element: add
tei_module: core
glam_hypernym: TXT.ADD
# ---------------------------------------------------------------------------
# addSpan - Addition Span
# ---------------------------------------------------------------------------
AddSpan:
class_uri: tei:addSpan
description: |
Addition span (addSpan) marks the beginning of a longer sequence
of text added by an author, scribe, annotator or corrector.
Used when addition crosses structural boundaries.
TEI P5 Chapter 12.3.1.4 (Additions and Deletions)
slots:
- xml_id
- span_to
- hand
- place
- resp
- change_ref
annotations:
tei_element: addSpan
tei_module: transcr
glam_hypernym: TXT.ADD
# ---------------------------------------------------------------------------
# del - Deletion
# ---------------------------------------------------------------------------
Del:
class_uri: tei:del
description: |
Deletion (del) contains a letter, word, or passage deleted,
marked as deleted, or otherwise indicated as superfluous or
spurious in the copy text by an author, scribe, or a previous
annotator or corrector.
TEI P5 Chapter 12.3.1.4 (Additions and Deletions)
slots:
- xml_id
- hand
- rend
- resp
- cert
- change_ref
- seq
attributes:
deleted_content:
description: The deleted text (if legible)
range: string
status:
description: Status of deletion (complete, partial, etc.)
range: string
instant:
description: Whether deletion was instant (false start)
range: boolean
exact_mappings:
- crm:E79_Part_Removal
annotations:
tei_element: del
tei_module: core
glam_hypernym: TXT.DEL
# ---------------------------------------------------------------------------
# delSpan - Deletion Span
# ---------------------------------------------------------------------------
DelSpan:
class_uri: tei:delSpan
description: |
Deletion span (delSpan) marks the beginning of a longer sequence
of text deleted, marked as deleted, or otherwise signaled as
superfluous or spurious by an author, scribe, annotator, or
corrector. Used when deletion crosses structural boundaries.
TEI P5 Chapter 12.3.1.4 (Additions and Deletions)
slots:
- xml_id
- span_to
- hand
- rend
- resp
- change_ref
annotations:
tei_element: delSpan
tei_module: transcr
glam_hypernym: TXT.DEL
# ---------------------------------------------------------------------------
# subst - Substitution
# ---------------------------------------------------------------------------
Subst:
class_uri: tei:subst
description: |
Substitution (subst) groups one or more deletions with one or
more additions when the combination is to be regarded as a
single intervention in the text.
TEI P5 Chapter 12.3.1.5 (Substitutions)
slots:
- xml_id
- hand
- change_ref
- seq
attributes:
deletions:
description: Deleted text in substitution
range: Del
multivalued: true
additions:
description: Added text in substitution
range: Add
multivalued: true
exact_mappings:
- crm:E13_Attribute_Assignment
annotations:
tei_element: subst
tei_module: transcr
glam_hypernym: TXT.SUB
# ---------------------------------------------------------------------------
# substJoin - Substitution Join
# ---------------------------------------------------------------------------
SubstJoin:
class_uri: tei:substJoin
description: |
Substitution join (substJoin) identifies a series of possibly
fragmented additions, deletions, or other revisions on a
manuscript that combine to make up a single intervention in
the text. Used when additions and deletions are not contiguous.
TEI P5 Chapter 12.3.1.5 (Substitutions)
slots:
- xml_id
- target
- change_ref
annotations:
tei_element: substJoin
tei_module: transcr
glam_hypernym: TXT.SUB
# =============================================================================
# CLASSES - DAMAGE AND ILLEGIBILITY ELEMENTS
# =============================================================================
# ---------------------------------------------------------------------------
# damage - Damaged Text
# ---------------------------------------------------------------------------
Damage:
class_uri: tei:damage
description: |
Damage (damage) contains an area of damage to the text witness.
The text within damage can still be read; if completely illegible,
use gap instead.
TEI P5 Chapter 12.3.3.1 (Damage, Illegibility, and Supplied Text)
slots:
- xml_id
- agent
- degree
- extent
- unit
- quantity
- hand
- resp
attributes:
damaged_content:
description: The damaged text (if legible)
range: string
group_id:
description: Links stretches of damage from same physical cause
range: string
exact_mappings:
- crm:E14_Condition_Assessment
annotations:
tei_element: damage
tei_module: transcr
glam_hypernym: DOC.DMG
# ---------------------------------------------------------------------------
# damageSpan - Damage Span
# ---------------------------------------------------------------------------
DamageSpan:
class_uri: tei:damageSpan
description: |
Damage span (damageSpan) marks the beginning of a longer sequence
of text which is damaged in some way but still legible.
Used when damage crosses structural boundaries.
TEI P5 Chapter 12.3.3.1 (Damage, Illegibility, and Supplied Text)
slots:
- xml_id
- span_to
- agent
- degree
- extent
- resp
annotations:
tei_element: damageSpan
tei_module: transcr
glam_hypernym: DOC.DMG
# ---------------------------------------------------------------------------
# gap - Gap in Text
# ---------------------------------------------------------------------------
Gap:
class_uri: tei:gap
description: |
Gap (gap) indicates a point where material has been omitted in
a transcription, whether for editorial reasons, as part of
sampling practice, or because the material is illegible,
invisible, or inaudible.
TEI P5 Chapter 12.3.1.7 (Text Omitted from or Supplied in the Transcription)
slots:
- xml_id
- reason
- agent
- extent
- unit
- quantity
- resp
exact_mappings:
- crm:E79_Part_Removal
annotations:
tei_element: gap
tei_module: core
glam_hypernym: TXT.GAP
# ---------------------------------------------------------------------------
# unclear - Unclear Text
# ---------------------------------------------------------------------------
Unclear:
class_uri: tei:unclear
description: |
Unclear (unclear) contains a word, phrase, or passage which
cannot be transcribed with certainty because it is illegible
or inaudible in the source.
TEI P5 Chapter 12.3.3.1 (Damage, Illegibility, and Supplied Text)
slots:
- xml_id
- reason
- agent
- resp
- cert
attributes:
unclear_content:
description: Best-guess transcription
range: string
exact_mappings:
- crm:E33_Linguistic_Object
annotations:
tei_element: unclear
tei_module: core
glam_hypernym: TXT.UNC
# ---------------------------------------------------------------------------
# supplied - Supplied Text
# ---------------------------------------------------------------------------
Supplied:
class_uri: tei:supplied
description: |
Supplied (supplied) signifies text supplied by the transcriber
or editor for any reason; for example because the original
cannot be read due to physical damage, or because of an
obvious omission by the author or scribe.
TEI P5 Chapter 12.3.1.7 (Text Omitted from or Supplied in the Transcription)
slots:
- xml_id
- reason
- source
- resp
- cert
attributes:
supplied_content:
description: The supplied text
range: string
required: true
exact_mappings:
- crm:E13_Attribute_Assignment
annotations:
tei_element: supplied
tei_module: transcr
glam_hypernym: TXT.SUP
# ---------------------------------------------------------------------------
# surplus - Surplus Text
# ---------------------------------------------------------------------------
Surplus:
class_uri: tei:surplus
description: |
Surplus (surplus) marks text present in the source which the
editor believes to be superfluous or redundant, such as scribal
dittography or interpolation.
TEI P5 Chapter 12.3.1.7 (Text Omitted from or Supplied in the Transcription)
slots:
- xml_id
- reason
- resp
- cert
attributes:
surplus_content:
description: The surplus text
range: string
required: true
annotations:
tei_element: surplus
tei_module: transcr
glam_hypernym: TXT.SUR
# ---------------------------------------------------------------------------
# secl - Secluded Text
# ---------------------------------------------------------------------------
Secl:
class_uri: tei:secl
description: |
Secluded text (secl) marks text present in the source which the
editor believes to be genuine but out of its original place
(which is unknown). The text is secluded, not deleted.
TEI P5 Chapter 12.3.1.7 (Text Omitted from or Supplied in the Transcription)
slots:
- xml_id
- reason
- resp
- cert
attributes:
secluded_content:
description: The secluded text
range: string
required: true
annotations:
tei_element: secl
tei_module: transcr
glam_hypernym: TXT.SEC
# =============================================================================
# CLASSES - ABBREVIATION ELEMENTS
# =============================================================================
# ---------------------------------------------------------------------------
# abbr - Abbreviation
# ---------------------------------------------------------------------------
Abbr:
class_uri: tei:abbr
description: |
Abbreviation (abbr) contains an abbreviation of any sort.
May contain the literal abbreviation as written in the source.
TEI P5 Chapter 12.3.1.2 (Abbreviation and Expansion)
slots:
- xml_id
- resp
- cert
attributes:
abbr_content:
description: The abbreviated text
range: string
required: true
abbr_type:
description: Type of abbreviation (suspension, contraction, etc.)
range: string
exact_mappings:
- crm:E33_Linguistic_Object
annotations:
tei_element: abbr
tei_module: core
glam_hypernym: TXT.ABR
# ---------------------------------------------------------------------------
# expan - Expansion
# ---------------------------------------------------------------------------
Expan:
class_uri: tei:expan
description: |
Expansion (expan) contains the expansion of an abbreviation.
TEI P5 Chapter 12.3.1.2 (Abbreviation and Expansion)
slots:
- xml_id
- resp
- cert
attributes:
expan_content:
description: The expanded text
range: string
required: true
exact_mappings:
- crm:E33_Linguistic_Object
annotations:
tei_element: expan
tei_module: core
glam_hypernym: TXT.EXP
# ---------------------------------------------------------------------------
# am - Abbreviation Marker
# ---------------------------------------------------------------------------
Am:
class_uri: tei:am
description: |
Abbreviation marker (am) contains a sequence of letters or signs
present in an abbreviation which are omitted or replaced in the
expanded form of the abbreviation.
TEI P5 Chapter 12.3.1.2 (Abbreviation and Expansion)
slots:
- xml_id
attributes:
marker_content:
description: The abbreviation marker (tittle, tilde, etc.)
range: string
annotations:
tei_element: am
tei_module: transcr
glam_hypernym: TXT.ABR.MRK
# ---------------------------------------------------------------------------
# ex - Editorial Expansion
# ---------------------------------------------------------------------------
Ex:
class_uri: tei:ex
description: |
Editorial expansion (ex) contains a sequence of letters added
by an editor or transcriber when expanding an abbreviation.
TEI P5 Chapter 12.3.1.2 (Abbreviation and Expansion)
slots:
- xml_id
- resp
- cert
attributes:
expanded_letters:
description: The letters supplied by editor
range: string
required: true
annotations:
tei_element: ex
tei_module: transcr
glam_hypernym: TXT.EXP.ED
# =============================================================================
# CLASSES - HAND AND CHANGE TRACKING ELEMENTS
# =============================================================================
# ---------------------------------------------------------------------------
# handNotes - Hand Notes Container
# ---------------------------------------------------------------------------
HandNotes:
class_uri: tei:handNotes
description: |
Hand notes (handNotes) contains one or more handNote elements
documenting the different hands identified within the source texts.
TEI P5 Chapter 12.3.2.1 (Document Hands)
slots:
- xml_id
attributes:
hand_notes:
description: Individual hand descriptions
range: HandNote
multivalued: true
required: true
annotations:
tei_element: handNotes
tei_module: transcr
glam_hypernym: DOC.MET
# ---------------------------------------------------------------------------
# handShift - Hand Shift
# ---------------------------------------------------------------------------
HandShift:
class_uri: tei:handShift
description: |
Hand shift (handShift) marks the beginning of a sequence of text
written in a new hand, or the beginning of a scribal stint.
TEI P5 Chapter 12.3.2.1 (Document Hands)
slots:
- xml_id
- resp
attributes:
new_hand:
description: Reference to new hand (@new)
range: uriorcurie
required: true
medium:
description: Writing medium (ink color, pencil, etc.)
range: string
script:
description: Script type (secretary, copperplate, etc.)
range: string
annotations:
tei_element: handShift
tei_module: transcr
glam_hypernym: TXT.HND
# =============================================================================
# CLASSES - MODIFICATION AND METAMARK ELEMENTS
# =============================================================================
# ---------------------------------------------------------------------------
# mod - Generic Modification
# ---------------------------------------------------------------------------
Mod:
class_uri: tei:mod
description: |
Modification (mod) represents any kind of modification identified
within a single document. A generic element for cases where
specific elements (add, del, etc.) involve too much interpretation.
TEI P5 Chapter 12.3.4.1 (Generic Modification)
slots:
- xml_id
- element_type
- rend
- span_to
- hand
- change_ref
attributes:
mod_content:
description: Content of modification
range: string
annotations:
tei_element: mod
tei_module: transcr
glam_hypernym: TXT.MOD
# ---------------------------------------------------------------------------
# metamark - Metamark
# ---------------------------------------------------------------------------
Metamark:
class_uri: tei:metamark
description: |
Metamark (metamark) contains or describes any kind of graphic
or written signal within a document the function of which is
to determine how it should be read rather than forming part
of the actual content of the document.
Examples: arrows, asterisks, numbers indicating transposition,
deletion marks, insertion marks.
TEI P5 Chapter 12.3.4.2 (Metamarks)
slots:
- xml_id
- target
- span_to
- rend
- place
- hand
- change_ref
attributes:
function:
description: Function of the metamark
range: MetamarkFunctionType
metamark_content:
description: Content/description of metamark
range: string
exact_mappings:
- crm:E37_Mark
annotations:
tei_element: metamark
tei_module: transcr
glam_hypernym: TXT.MRK
# ---------------------------------------------------------------------------
# restore - Restore
# ---------------------------------------------------------------------------
Restore:
class_uri: tei:restore
description: |
Restore (restore) indicates restoration of text to an earlier
state by cancellation of an editorial or authorial marking or
instruction. Used when a deletion is later cancelled ("stet").
TEI P5 Chapter 12.3.1.6 (Cancellation of Deletions and Other Markings)
slots:
- xml_id
- element_type
- hand
- resp
- change_ref
attributes:
restored_content:
description: The restored text
range: string
exact_mappings:
- crm:E13_Attribute_Assignment
annotations:
tei_element: restore
tei_module: transcr
glam_hypernym: TXT.RST
# ---------------------------------------------------------------------------
# retrace - Retraced Text
# ---------------------------------------------------------------------------
Retrace:
class_uri: tei:retrace
description: |
Retrace (retrace) contains a sequence of writing which has been
retraced, for example by over-inking, to clarify or fix it.
TEI P5 Chapter 12.3.4.3 (Fixation and Clarification)
slots:
- xml_id
- hand
- change_ref
attributes:
retraced_content:
description: The retraced text
range: string
required: true
cause:
description: Reason for retracing (unclear, fixation)
range: string
annotations:
tei_element: retrace
tei_module: transcr
glam_hypernym: TXT.RTR
# ---------------------------------------------------------------------------
# undo - Undo Intervention
# ---------------------------------------------------------------------------
Undo:
class_uri: tei:undo
description: |
Undo (undo) indicates one or more marked-up interventions in a
document which have subsequently been marked for cancellation.
More general than restore.
TEI P5 Chapter 12.3.4.4 (Confirmation, Cancellation, and Reinstatement)
slots:
- xml_id
- target
- span_to
- rend
- change_ref
annotations:
tei_element: undo
tei_module: transcr
glam_hypernym: TXT.UND
# ---------------------------------------------------------------------------
# redo - Redo Intervention
# ---------------------------------------------------------------------------
Redo:
class_uri: tei:redo
description: |
Redo (redo) indicates one or more cancelled interventions in a
document which have subsequently been marked as reaffirmed or
repeated.
TEI P5 Chapter 12.3.4.4 (Confirmation, Cancellation, and Reinstatement)
slots:
- xml_id
- target
- span_to
- rend
- change_ref
annotations:
tei_element: redo
tei_module: transcr
glam_hypernym: TXT.RDO
# =============================================================================
# CLASSES - TRANSPOSITION ELEMENTS
# =============================================================================
# ---------------------------------------------------------------------------
# listTranspose - List of Transpositions
# ---------------------------------------------------------------------------
ListTranspose:
class_uri: tei:listTranspose
description: |
List transpose (listTranspose) supplies a list of transpositions,
each of which is indicated at some point in a document typically
by means of metamarks.
TEI P5 Chapter 12.3.4.5 (Transpositions)
slots:
- xml_id
attributes:
transpositions:
description: Individual transposition instructions
range: Transpose
multivalued: true
required: true
annotations:
tei_element: listTranspose
tei_module: transcr
glam_hypernym: TXT.TRN
# ---------------------------------------------------------------------------
# transpose - Transposition
# ---------------------------------------------------------------------------
Transpose:
class_uri: tei:transpose
description: |
Transpose (transpose) describes a single textual transposition
as an ordered list of at least two pointers specifying the order
in which the elements indicated should be re-combined.
TEI P5 Chapter 12.3.4.5 (Transpositions)
slots:
- xml_id
attributes:
pointer_sequence:
description: Ordered pointers to transposed elements
range: string
multivalued: true
required: true
annotations:
tei_element: transpose
tei_module: transcr
glam_hypernym: TXT.TRN
# =============================================================================
# CLASSES - FORME WORK AND SPACE ELEMENTS
# =============================================================================
# ---------------------------------------------------------------------------
# fw - Forme Work
# ---------------------------------------------------------------------------
FormeWork:
class_uri: tei:fw
description: |
Forme work (fw) contains a running head (e.g. a header, footer),
catchword, or similar material appearing on the current page.
Named after the "forme" used to hold movable type.
TEI P5 Chapter 12.6 (Headers, Footers, and Similar Matter)
slots:
- xml_id
- place
attributes:
fw_type:
description: Type of forme work
range: FormeWorkType
fw_content:
description: Content of forme work
range: string
required: true
annotations:
tei_element: fw
tei_module: transcr
glam_hypernym: TXT.FW
# ---------------------------------------------------------------------------
# space - Significant Space
# ---------------------------------------------------------------------------
Space:
class_uri: tei:space
description: |
Space (space) indicates the location of a significant space in
the text, such as space left for a word or initial capital that
was never filled in.
TEI P5 Chapter 12.4.1 (Space)
slots:
- xml_id
- quantity
- unit
- extent
- resp
exact_mappings:
- crm:E25_Human-Made_Feature
annotations:
tei_element: space
tei_module: transcr
glam_hypernym: TXT.SPC
# =============================================================================
# GLAM-NER HYPERNYM MAPPINGS SUMMARY
# =============================================================================
#
# This module covers primary source transcription from TEI Chapter 12.
# Primary GLAM-NER hypernym mappings:
#
# DOC.FAC (Digital Facsimile):
# - Facsimile - Digital image representation
#
# DOC.SRC (Source Document):
# - SourceDoc - Source document with embedded transcription
#
# THG.SRF (Written Surface):
# - Surface - Physical written surface
# - SurfaceGrp - Group of surfaces (leaf, quire)
#
# THG.ZON (Zone):
# - Zone - Area of interest on surface
#
# THG.IMG (Image):
# - Graphic - Image file reference
#
# TXT.LIN (Line):
# - Line - Topographic line of text
#
# TXT.ADD (Addition):
# - Add, AddSpan - Text additions
#
# TXT.DEL (Deletion):
# - Del, DelSpan - Text deletions
#
# TXT.SUB (Substitution):
# - Subst, SubstJoin - Substitutions
#
# TXT.GAP (Gap):
# - Gap - Omission in transcription
#
# TXT.UNC (Unclear):
# - Unclear - Illegible/uncertain text
#
# TXT.SUP (Supplied):
# - Supplied - Editorially supplied text
#
# TXT.ABR (Abbreviation):
# - Abbr, Am - Abbreviations
#
# TXT.EXP (Expansion):
# - Expan, Ex - Expansions
#
# TXT.MOD (Modification):
# - Mod - Generic modification
#
# TXT.MRK (Metamark):
# - Metamark - Writing system markup
#
# DOC.DMG (Damage):
# - Damage, DamageSpan - Physical damage
#
# DOC.MET (Document Metadata):
# - HandNotes - Hand descriptions
#
# =============================================================================
# ONTOLOGY MAPPINGS
# =============================================================================
#
# CIDOC-CRM:
# - Facsimile → crm:E22_Human-Made_Object
# - Surface → crm:E25_Human-Made_Feature
# - Add, Subst → crm:E13_Attribute_Assignment
# - Del, Gap → crm:E79_Part_Removal
# - Damage → crm:E14_Condition_Assessment
# - Metamark → crm:E37_Mark
#
# Schema.org:
# - Facsimile → schema:ImageObject
# - SourceDoc → schema:ArchiveComponent
# - Graphic → schema:ImageObject
#
# IIIF:
# - Facsimile → iiif:Manifest
# - Surface → iiif:Canvas
# - Graphic → iiif:Image
#
# Web Annotation:
# - Zone → oa:FragmentSelector
#
# =============================================================================
# USAGE NOTES
# =============================================================================
#
# Primary source transcription is used for:
# 1. Manuscript transcription and description
# 2. Diplomatic editions
# 3. Genetic criticism and textual scholarship
# 4. Archival finding aid creation
# 5. Digital facsimile editions
# 6. Critical apparatus preparation
# 7. Palaeographic analysis
# 8. Conservation documentation
#
# Key patterns:
# - <facsimile>/<surface> for image-based editions
# - <sourceDoc>/<surface>/<zone>/<line> for embedded transcription
# - <add>/<del>/<subst> for editorial interventions
# - <damage>/<gap>/<unclear>/<supplied> for illegibility
# - <abbr>/<expan>/<am>/<ex> for abbreviations
# - <metamark> for authorial/scribal markup
# - <handShift> for change of hand
# - <listTranspose>/<transpose> for indicated transpositions
#
# Integration with NER:
# - Person names in transcribed text → AGT.PER
# - Place names → GEO
# - Organization names → GRP.ORG
# - Date references → TMP.DAT
# - Transcription metadata → DOC.MET
#
# Heritage Institution Applications:
# - Manuscript catalog production
# - Digital editions of archival materials
# - Transcription of historical documents
# - Conservation condition reporting
# - Provenance documentation
# - Scholarly critical editions
#
# =============================================================================
# VERSION HISTORY
# =============================================================================
#
# Version 1.0.0 (2025-12-03):
# - Initial release covering TEI P5 Chapter 12 (Primary Sources)
# - 38 class definitions for transcription elements
# - 11 enum definitions for classification
# - Ontology mappings to CIDOC-CRM, Schema.org, IIIF, Web Annotation
# - GLAM-NER hypernym annotations for all relevant elements
# - Facsimile and surface support for digital editions
# - Editorial intervention elements (add, del, subst, etc.)
# - Damage and illegibility handling
# - Abbreviation expansion support
# - Metamark and transposition elements
# - Hand and change tracking
#
# =============================================================================