glam/data/entity_annotation/modules/advanced/tei/textstructure.yaml
2025-12-05 15:30:23 +01:00

767 lines
21 KiB
YAML

# =============================================================================
# GLAM-NER: TEI Text Structure Module (LinkML)
# =============================================================================
# Module: modules/advanced/tei/textstructure.yaml
# TEI Chapter: 4 - Default Text Structure
# TEI Module: textstructure
# Version: 1.0.0
# =============================================================================
#
# This module defines LinkML classes for TEI P5 Chapter 4 elements used for
# the default structure of texts including divisions, front/back matter,
# title pages, and document organization.
#
# Key Element Groups:
# - Document Structure: TEI, text, body, front, back, group
# - Divisions: div, div1-div7
# - Front Matter: titlePage, docTitle, docAuthor, dedication, preface
# - Back Matter: appendix, trailer, colophon
# - Components: head, opener, closer, salute, signed, dateline
#
# GLAM-NER Integration:
# - Document structure for NER scope
# - Front matter metadata extraction
# - Author/editor attribution
# - Publication information
#
# Ontology Alignments:
# - Schema.org: Book, Chapter, Article
# - Dublin Core: title, creator, publisher
# - BIBO: Document structure
# - CIDOC-CRM: E33_Linguistic_Object
#
# =============================================================================
id: https://w3id.org/glam/ner/tei/textstructure
name: glam-ner-tei-textstructure
title: "TEI Text Structure Module for GLAM-NER"
version: "1.0.0"
license: https://creativecommons.org/licenses/by/4.0/
see_also:
- https://tei-c.org/release/doc/tei-p5-doc/en/html/DS.html
prefixes:
linkml: https://w3id.org/linkml/
tei: http://www.tei-c.org/ns/1.0/
glam: https://w3id.org/glam/ner/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
bibo: http://purl.org/ontology/bibo/
crm: http://www.cidoc-crm.org/cidoc-crm/
foaf: http://xmlns.com/foaf/0.1/
default_prefix: glam
default_range: string
imports:
- linkml:types
# =============================================================================
# ENUMERATIONS
# =============================================================================
enums:
# ---------------------------------------------------------------------------
# Division Type Enum
# ---------------------------------------------------------------------------
DivisionTypeEnum:
description: >-
Types of text divisions.
permissible_values:
book:
description: Book or major division
meaning: bibo:Book
chapter:
description: Chapter
meaning: bibo:Chapter
part:
description: Part of a work
meaning: bibo:DocumentPart
section:
description: Section
subsection:
description: Subsection
article:
description: Article
meaning: bibo:Article
entry:
description: Dictionary/encyclopedia entry
act:
description: Act of a play
scene:
description: Scene of a play
canto:
description: Canto of a poem
letter:
description: Letter/epistle
meaning: bibo:Letter
dedication:
description: Dedication
preface:
description: Preface
introduction:
description: Introduction
conclusion:
description: Conclusion
appendix:
description: Appendix
index:
description: Index
bibliography:
description: Bibliography
glossary:
description: Glossary
notes:
description: Notes section
# ---------------------------------------------------------------------------
# Title Type Enum
# ---------------------------------------------------------------------------
TitleTypeEnum:
description: >-
Types of titles on title page.
permissible_values:
main:
description: Main title
sub:
description: Subtitle
alt:
description: Alternative title
short:
description: Short/abbreviated title
desc:
description: Descriptive title
# ---------------------------------------------------------------------------
# Front Matter Type Enum
# ---------------------------------------------------------------------------
FrontMatterTypeEnum:
description: >-
Types of front matter elements.
permissible_values:
titlePage:
description: Title page
dedication:
description: Dedication
preface:
description: Preface/foreword
acknowledgements:
description: Acknowledgements
abstract:
description: Abstract/summary
contents:
description: Table of contents
listFigures:
description: List of figures
listTables:
description: List of tables
# ---------------------------------------------------------------------------
# Back Matter Type Enum
# ---------------------------------------------------------------------------
BackMatterTypeEnum:
description: >-
Types of back matter elements.
permissible_values:
appendix:
description: Appendix
notes:
description: Notes/endnotes
bibliography:
description: Bibliography
index:
description: Index
glossary:
description: Glossary
colophon:
description: Colophon
errata:
description: Errata list
# =============================================================================
# SLOTS (Attributes)
# =============================================================================
slots:
# ---------------------------------------------------------------------------
# Division Attributes
# ---------------------------------------------------------------------------
div_type:
description: >-
Type of division.
range: DivisionTypeEnum
slot_uri: tei:type
annotations:
tei_attribute: type
div_n:
description: >-
Number or label for division.
range: string
slot_uri: tei:n
annotations:
tei_attribute: n
# ---------------------------------------------------------------------------
# Document Attributes
# ---------------------------------------------------------------------------
doc_status:
description: >-
Status of document (draft, complete, etc.).
range: string
rendition:
description: >-
Rendering/display style.
range: string
slot_uri: tei:rend
annotations:
tei_attribute: rend
# =============================================================================
# CLASSES
# =============================================================================
classes:
# ---------------------------------------------------------------------------
# BASE CLASSES
# ---------------------------------------------------------------------------
TextStructureElement:
description: >-
Abstract base class for text structure elements.
abstract: true
class_uri: tei:TextStructureElement
annotations:
tei_module: textstructure
# ---------------------------------------------------------------------------
# TOP-LEVEL DOCUMENT STRUCTURE
# ---------------------------------------------------------------------------
TEIDocument:
description: >-
Root element for TEI document - contains header and text.
TEI <TEI> element.
is_a: TextStructureElement
attributes:
tei_id:
description: Unique identifier for document.
range: string
identifier: true
version:
description: TEI version.
range: string
header:
description: TEI header metadata.
range: string
annotations:
notes: "Links to header module classes"
text:
description: Document text content.
range: Text
inlined: true
class_uri: tei:TEI
annotations:
tei_element: TEI
tei_module: textstructure
glam_hypernym: DOC
schema_mapping: schema:CreativeWork
Text:
description: >-
Text container - contains front, body, back matter.
TEI <text> element.
is_a: TextStructureElement
attributes:
text_id:
description: Unique identifier.
range: string
front:
description: Front matter.
range: Front
inlined: true
body:
description: Main body text.
range: Body
inlined: true
back:
description: Back matter.
range: Back
inlined: true
group:
description: Group of texts (for composite works).
range: Group
inlined: true
class_uri: tei:text
annotations:
tei_element: text
tei_module: textstructure
glam_hypernym: DOC.TXT
crm_mapping: crm:E33_Linguistic_Object
Body:
description: >-
Body of text - main content container.
TEI <body> element.
is_a: TextStructureElement
attributes:
body_id:
description: Unique identifier.
range: string
divisions:
description: Text divisions.
range: Division
multivalued: true
inlined: true
inlined_as_list: true
class_uri: tei:body
annotations:
tei_element: body
tei_module: textstructure
glam_hypernym: DOC.TXT.BDY
Group:
description: >-
Group of texts - for composite/collected works.
TEI <group> element.
is_a: TextStructureElement
attributes:
group_id:
description: Unique identifier.
range: string
texts:
description: Grouped texts.
range: Text
multivalued: true
inlined: true
inlined_as_list: true
head:
description: Heading for group.
range: string
class_uri: tei:group
annotations:
tei_element: group
tei_module: textstructure
glam_hypernym: DOC.TXT.GRP
# ---------------------------------------------------------------------------
# DIVISIONS
# ---------------------------------------------------------------------------
Division:
description: >-
Text division - numbered or unnumbered section.
TEI <div> element. Generic division container.
is_a: TextStructureElement
slots:
- div_type
- div_n
attributes:
div_id:
description: Unique identifier.
range: string
identifier: true
head:
description: Division heading.
range: string
opener:
description: Opening formula.
range: Opener
inlined: true
closer:
description: Closing formula.
range: Closer
inlined: true
subdivisions:
description: Nested divisions.
range: Division
multivalued: true
inlined: true
inlined_as_list: true
class_uri: tei:div
annotations:
tei_element: div
tei_module: textstructure
glam_hypernym: DOC.TXT.DIV
bibo_mapping: bibo:DocumentPart
# ---------------------------------------------------------------------------
# FRONT MATTER
# ---------------------------------------------------------------------------
Front:
description: >-
Front matter container - preliminary material.
TEI <front> element.
is_a: TextStructureElement
attributes:
front_id:
description: Unique identifier.
range: string
title_page:
description: Title page.
range: TitlePage
inlined: true
divisions:
description: Front matter divisions (dedication, preface, etc.).
range: Division
multivalued: true
inlined: true
inlined_as_list: true
class_uri: tei:front
annotations:
tei_element: front
tei_module: textstructure
glam_hypernym: DOC.TXT.FRT
TitlePage:
description: >-
Title page - contains document title information.
TEI <titlePage> element.
is_a: TextStructureElement
attributes:
titlepage_id:
description: Unique identifier.
range: string
doc_title:
description: Document title.
range: DocTitle
inlined: true
doc_author:
description: Document author(s).
range: DocAuthor
multivalued: true
inlined: true
inlined_as_list: true
doc_imprint:
description: Publication imprint.
range: DocImprint
inlined: true
doc_edition:
description: Edition statement.
range: string
doc_date:
description: Publication date.
range: string
epigraphs:
description: Epigraph(s) on title page.
range: Epigraph
multivalued: true
inlined: true
inlined_as_list: true
imprimatur:
description: Imprimatur statement.
range: string
graphic:
description: Title page illustration.
range: string
class_uri: tei:titlePage
annotations:
tei_element: titlePage
tei_module: textstructure
glam_hypernym: DOC.TXT.TTP
DocTitle:
description: >-
Document title - title as given on title page.
TEI <docTitle> element.
is_a: TextStructureElement
attributes:
title_parts:
description: Title parts (main, sub, etc.).
range: TitlePart
multivalued: true
inlined: true
inlined_as_list: true
class_uri: tei:docTitle
annotations:
tei_element: docTitle
tei_module: textstructure
glam_hypernym: DOC.TXT.DTL
dcterms_mapping: dcterms:title
TitlePart:
description: >-
Part of document title (main, subtitle, etc.).
TEI <titlePart> element.
is_a: TextStructureElement
attributes:
title_type:
description: Type of title part.
range: TitleTypeEnum
title_text:
description: Title text content.
range: string
required: true
class_uri: tei:titlePart
annotations:
tei_element: titlePart
tei_module: textstructure
glam_hypernym: DOC.TXT.TPR
DocAuthor:
description: >-
Document author as given on title page.
TEI <docAuthor> element.
is_a: TextStructureElement
attributes:
author_name:
description: Author name.
range: string
required: true
author_ref:
description: Authority URI for author.
range: uri
class_uri: tei:docAuthor
annotations:
tei_element: docAuthor
tei_module: textstructure
glam_hypernym: AGT.PER.AUT
dcterms_mapping: dcterms:creator
foaf_mapping: foaf:Person
DocImprint:
description: >-
Document imprint - publication info on title page.
TEI <docImprint> element.
is_a: TextStructureElement
attributes:
publisher:
description: Publisher name.
range: string
pub_place:
description: Place of publication.
range: string
pub_date:
description: Publication date.
range: string
printer:
description: Printer name.
range: string
class_uri: tei:docImprint
annotations:
tei_element: docImprint
tei_module: textstructure
glam_hypernym: DOC.TXT.IMP
dcterms_mapping: dcterms:publisher
Byline:
description: >-
Byline - authorship statement.
TEI <byline> element.
is_a: TextStructureElement
attributes:
byline_text:
description: Byline text content.
range: string
required: true
doc_authors:
description: Authors mentioned.
range: DocAuthor
multivalued: true
inlined: true
inlined_as_list: true
class_uri: tei:byline
annotations:
tei_element: byline
tei_module: textstructure
glam_hypernym: DOC.TXT.BYL
Epigraph:
description: >-
Epigraph - quotation at start of section.
TEI <epigraph> element.
is_a: TextStructureElement
attributes:
quote:
description: Quoted text.
range: string
cit:
description: Citation/attribution.
range: string
bibl:
description: Bibliographic reference.
range: string
class_uri: tei:epigraph
annotations:
tei_element: epigraph
tei_module: textstructure
glam_hypernym: DOC.TXT.EPG
Argument:
description: >-
Argument - summary of section contents.
TEI <argument> element.
is_a: TextStructureElement
attributes:
argument_text:
description: Argument content.
range: string
class_uri: tei:argument
annotations:
tei_element: argument
tei_module: textstructure
glam_hypernym: DOC.TXT.ARG
dcterms_mapping: dcterms:abstract
# ---------------------------------------------------------------------------
# BACK MATTER
# ---------------------------------------------------------------------------
Back:
description: >-
Back matter container - appendices, indexes, etc.
TEI <back> element.
is_a: TextStructureElement
attributes:
back_id:
description: Unique identifier.
range: string
divisions:
description: Back matter divisions.
range: Division
multivalued: true
inlined: true
inlined_as_list: true
class_uri: tei:back
annotations:
tei_element: back
tei_module: textstructure
glam_hypernym: DOC.TXT.BCK
Trailer:
description: >-
Trailer - closing title or statement.
TEI <trailer> element.
is_a: TextStructureElement
attributes:
trailer_text:
description: Trailer content.
range: string
class_uri: tei:trailer
annotations:
tei_element: trailer
tei_module: textstructure
glam_hypernym: DOC.TXT.TRL
# ---------------------------------------------------------------------------
# OPENER/CLOSER COMPONENTS
# ---------------------------------------------------------------------------
Opener:
description: >-
Opening formula - salutation, dateline at start.
TEI <opener> element.
is_a: TextStructureElement
attributes:
salute:
description: Salutation.
range: string
dateline:
description: Date and place line.
range: Dateline
inlined: true
class_uri: tei:opener
annotations:
tei_element: opener
tei_module: textstructure
glam_hypernym: DOC.TXT.OPN
Closer:
description: >-
Closing formula - signature, dateline at end.
TEI <closer> element.
is_a: TextStructureElement
attributes:
salute:
description: Closing salutation.
range: string
signed:
description: Signature.
range: string
dateline:
description: Date and place line.
range: Dateline
inlined: true
class_uri: tei:closer
annotations:
tei_element: closer
tei_module: textstructure
glam_hypernym: DOC.TXT.CLS
Dateline:
description: >-
Dateline - date and place of composition.
TEI <dateline> element.
is_a: TextStructureElement
attributes:
place_name:
description: Place of composition.
range: string
date:
description: Date of composition.
range: string
dateline_text:
description: Full dateline text.
range: string
class_uri: tei:dateline
annotations:
tei_element: dateline
tei_module: textstructure
glam_hypernym: TMP.DAT
dcterms_mapping: dcterms:date
# ---------------------------------------------------------------------------
# POSTSCRIPT
# ---------------------------------------------------------------------------
Postscript:
description: >-
Postscript - text added after main content.
TEI <postscript> element.
is_a: TextStructureElement
attributes:
label:
description: Postscript label (e.g., "P.S.").
range: string
content:
description: Postscript content.
range: string
class_uri: tei:postscript
annotations:
tei_element: postscript
tei_module: textstructure
glam_hypernym: DOC.TXT.PSC
# ---------------------------------------------------------------------------
# FLOATING TEXT
# ---------------------------------------------------------------------------
FloatingText:
description: >-
Floating text - embedded text that interrupts main flow.
TEI <floatingText> element.
is_a: TextStructureElement
slots:
- div_type
attributes:
floating_id:
description: Unique identifier.
range: string
body:
description: Content body.
range: Body
inlined: true
class_uri: tei:floatingText
annotations:
tei_element: floatingText
tei_module: textstructure
glam_hypernym: DOC.TXT.FLT