767 lines
21 KiB
YAML
767 lines
21 KiB
YAML
# =============================================================================
|
|
# GLAM-NER: TEI Text Structure Module (LinkML)
|
|
# =============================================================================
|
|
# Module: modules/advanced/tei/textstructure.yaml
|
|
# TEI Chapter: 4 - Default Text Structure
|
|
# TEI Module: textstructure
|
|
# Version: 1.0.0
|
|
# =============================================================================
|
|
#
|
|
# This module defines LinkML classes for TEI P5 Chapter 4 elements used for
|
|
# the default structure of texts including divisions, front/back matter,
|
|
# title pages, and document organization.
|
|
#
|
|
# Key Element Groups:
|
|
# - Document Structure: TEI, text, body, front, back, group
|
|
# - Divisions: div, div1-div7
|
|
# - Front Matter: titlePage, docTitle, docAuthor, dedication, preface
|
|
# - Back Matter: appendix, trailer, colophon
|
|
# - Components: head, opener, closer, salute, signed, dateline
|
|
#
|
|
# GLAM-NER Integration:
|
|
# - Document structure for NER scope
|
|
# - Front matter metadata extraction
|
|
# - Author/editor attribution
|
|
# - Publication information
|
|
#
|
|
# Ontology Alignments:
|
|
# - Schema.org: Book, Chapter, Article
|
|
# - Dublin Core: title, creator, publisher
|
|
# - BIBO: Document structure
|
|
# - CIDOC-CRM: E33_Linguistic_Object
|
|
#
|
|
# =============================================================================
|
|
|
|
id: https://w3id.org/glam/ner/tei/textstructure
|
|
name: glam-ner-tei-textstructure
|
|
title: "TEI Text Structure Module for GLAM-NER"
|
|
version: "1.0.0"
|
|
|
|
license: https://creativecommons.org/licenses/by/4.0/
|
|
see_also:
|
|
- https://tei-c.org/release/doc/tei-p5-doc/en/html/DS.html
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
tei: http://www.tei-c.org/ns/1.0/
|
|
glam: https://w3id.org/glam/ner/
|
|
schema: http://schema.org/
|
|
dcterms: http://purl.org/dc/terms/
|
|
bibo: http://purl.org/ontology/bibo/
|
|
crm: http://www.cidoc-crm.org/cidoc-crm/
|
|
foaf: http://xmlns.com/foaf/0.1/
|
|
|
|
default_prefix: glam
|
|
default_range: string
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
|
|
# =============================================================================
|
|
# ENUMERATIONS
|
|
# =============================================================================
|
|
|
|
enums:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Division Type Enum
|
|
# ---------------------------------------------------------------------------
|
|
DivisionTypeEnum:
|
|
description: >-
|
|
Types of text divisions.
|
|
permissible_values:
|
|
book:
|
|
description: Book or major division
|
|
meaning: bibo:Book
|
|
chapter:
|
|
description: Chapter
|
|
meaning: bibo:Chapter
|
|
part:
|
|
description: Part of a work
|
|
meaning: bibo:DocumentPart
|
|
section:
|
|
description: Section
|
|
subsection:
|
|
description: Subsection
|
|
article:
|
|
description: Article
|
|
meaning: bibo:Article
|
|
entry:
|
|
description: Dictionary/encyclopedia entry
|
|
act:
|
|
description: Act of a play
|
|
scene:
|
|
description: Scene of a play
|
|
canto:
|
|
description: Canto of a poem
|
|
letter:
|
|
description: Letter/epistle
|
|
meaning: bibo:Letter
|
|
dedication:
|
|
description: Dedication
|
|
preface:
|
|
description: Preface
|
|
introduction:
|
|
description: Introduction
|
|
conclusion:
|
|
description: Conclusion
|
|
appendix:
|
|
description: Appendix
|
|
index:
|
|
description: Index
|
|
bibliography:
|
|
description: Bibliography
|
|
glossary:
|
|
description: Glossary
|
|
notes:
|
|
description: Notes section
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Title Type Enum
|
|
# ---------------------------------------------------------------------------
|
|
TitleTypeEnum:
|
|
description: >-
|
|
Types of titles on title page.
|
|
permissible_values:
|
|
main:
|
|
description: Main title
|
|
sub:
|
|
description: Subtitle
|
|
alt:
|
|
description: Alternative title
|
|
short:
|
|
description: Short/abbreviated title
|
|
desc:
|
|
description: Descriptive title
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Front Matter Type Enum
|
|
# ---------------------------------------------------------------------------
|
|
FrontMatterTypeEnum:
|
|
description: >-
|
|
Types of front matter elements.
|
|
permissible_values:
|
|
titlePage:
|
|
description: Title page
|
|
dedication:
|
|
description: Dedication
|
|
preface:
|
|
description: Preface/foreword
|
|
acknowledgements:
|
|
description: Acknowledgements
|
|
abstract:
|
|
description: Abstract/summary
|
|
contents:
|
|
description: Table of contents
|
|
listFigures:
|
|
description: List of figures
|
|
listTables:
|
|
description: List of tables
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Back Matter Type Enum
|
|
# ---------------------------------------------------------------------------
|
|
BackMatterTypeEnum:
|
|
description: >-
|
|
Types of back matter elements.
|
|
permissible_values:
|
|
appendix:
|
|
description: Appendix
|
|
notes:
|
|
description: Notes/endnotes
|
|
bibliography:
|
|
description: Bibliography
|
|
index:
|
|
description: Index
|
|
glossary:
|
|
description: Glossary
|
|
colophon:
|
|
description: Colophon
|
|
errata:
|
|
description: Errata list
|
|
|
|
|
|
# =============================================================================
|
|
# SLOTS (Attributes)
|
|
# =============================================================================
|
|
|
|
slots:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Division Attributes
|
|
# ---------------------------------------------------------------------------
|
|
div_type:
|
|
description: >-
|
|
Type of division.
|
|
range: DivisionTypeEnum
|
|
slot_uri: tei:type
|
|
annotations:
|
|
tei_attribute: type
|
|
|
|
div_n:
|
|
description: >-
|
|
Number or label for division.
|
|
range: string
|
|
slot_uri: tei:n
|
|
annotations:
|
|
tei_attribute: n
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Document Attributes
|
|
# ---------------------------------------------------------------------------
|
|
doc_status:
|
|
description: >-
|
|
Status of document (draft, complete, etc.).
|
|
range: string
|
|
|
|
rendition:
|
|
description: >-
|
|
Rendering/display style.
|
|
range: string
|
|
slot_uri: tei:rend
|
|
annotations:
|
|
tei_attribute: rend
|
|
|
|
|
|
# =============================================================================
|
|
# CLASSES
|
|
# =============================================================================
|
|
|
|
classes:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# BASE CLASSES
|
|
# ---------------------------------------------------------------------------
|
|
|
|
TextStructureElement:
|
|
description: >-
|
|
Abstract base class for text structure elements.
|
|
abstract: true
|
|
class_uri: tei:TextStructureElement
|
|
annotations:
|
|
tei_module: textstructure
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# TOP-LEVEL DOCUMENT STRUCTURE
|
|
# ---------------------------------------------------------------------------
|
|
|
|
TEIDocument:
|
|
description: >-
|
|
Root element for TEI document - contains header and text.
|
|
TEI <TEI> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
tei_id:
|
|
description: Unique identifier for document.
|
|
range: string
|
|
identifier: true
|
|
version:
|
|
description: TEI version.
|
|
range: string
|
|
header:
|
|
description: TEI header metadata.
|
|
range: string
|
|
annotations:
|
|
notes: "Links to header module classes"
|
|
text:
|
|
description: Document text content.
|
|
range: Text
|
|
inlined: true
|
|
class_uri: tei:TEI
|
|
annotations:
|
|
tei_element: TEI
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC
|
|
schema_mapping: schema:CreativeWork
|
|
|
|
Text:
|
|
description: >-
|
|
Text container - contains front, body, back matter.
|
|
TEI <text> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
text_id:
|
|
description: Unique identifier.
|
|
range: string
|
|
front:
|
|
description: Front matter.
|
|
range: Front
|
|
inlined: true
|
|
body:
|
|
description: Main body text.
|
|
range: Body
|
|
inlined: true
|
|
back:
|
|
description: Back matter.
|
|
range: Back
|
|
inlined: true
|
|
group:
|
|
description: Group of texts (for composite works).
|
|
range: Group
|
|
inlined: true
|
|
class_uri: tei:text
|
|
annotations:
|
|
tei_element: text
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT
|
|
crm_mapping: crm:E33_Linguistic_Object
|
|
|
|
Body:
|
|
description: >-
|
|
Body of text - main content container.
|
|
TEI <body> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
body_id:
|
|
description: Unique identifier.
|
|
range: string
|
|
divisions:
|
|
description: Text divisions.
|
|
range: Division
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
class_uri: tei:body
|
|
annotations:
|
|
tei_element: body
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.BDY
|
|
|
|
Group:
|
|
description: >-
|
|
Group of texts - for composite/collected works.
|
|
TEI <group> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
group_id:
|
|
description: Unique identifier.
|
|
range: string
|
|
texts:
|
|
description: Grouped texts.
|
|
range: Text
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
head:
|
|
description: Heading for group.
|
|
range: string
|
|
class_uri: tei:group
|
|
annotations:
|
|
tei_element: group
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.GRP
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# DIVISIONS
|
|
# ---------------------------------------------------------------------------
|
|
|
|
Division:
|
|
description: >-
|
|
Text division - numbered or unnumbered section.
|
|
TEI <div> element. Generic division container.
|
|
is_a: TextStructureElement
|
|
slots:
|
|
- div_type
|
|
- div_n
|
|
attributes:
|
|
div_id:
|
|
description: Unique identifier.
|
|
range: string
|
|
identifier: true
|
|
head:
|
|
description: Division heading.
|
|
range: string
|
|
opener:
|
|
description: Opening formula.
|
|
range: Opener
|
|
inlined: true
|
|
closer:
|
|
description: Closing formula.
|
|
range: Closer
|
|
inlined: true
|
|
subdivisions:
|
|
description: Nested divisions.
|
|
range: Division
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
class_uri: tei:div
|
|
annotations:
|
|
tei_element: div
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.DIV
|
|
bibo_mapping: bibo:DocumentPart
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# FRONT MATTER
|
|
# ---------------------------------------------------------------------------
|
|
|
|
Front:
|
|
description: >-
|
|
Front matter container - preliminary material.
|
|
TEI <front> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
front_id:
|
|
description: Unique identifier.
|
|
range: string
|
|
title_page:
|
|
description: Title page.
|
|
range: TitlePage
|
|
inlined: true
|
|
divisions:
|
|
description: Front matter divisions (dedication, preface, etc.).
|
|
range: Division
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
class_uri: tei:front
|
|
annotations:
|
|
tei_element: front
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.FRT
|
|
|
|
TitlePage:
|
|
description: >-
|
|
Title page - contains document title information.
|
|
TEI <titlePage> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
titlepage_id:
|
|
description: Unique identifier.
|
|
range: string
|
|
doc_title:
|
|
description: Document title.
|
|
range: DocTitle
|
|
inlined: true
|
|
doc_author:
|
|
description: Document author(s).
|
|
range: DocAuthor
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
doc_imprint:
|
|
description: Publication imprint.
|
|
range: DocImprint
|
|
inlined: true
|
|
doc_edition:
|
|
description: Edition statement.
|
|
range: string
|
|
doc_date:
|
|
description: Publication date.
|
|
range: string
|
|
epigraphs:
|
|
description: Epigraph(s) on title page.
|
|
range: Epigraph
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
imprimatur:
|
|
description: Imprimatur statement.
|
|
range: string
|
|
graphic:
|
|
description: Title page illustration.
|
|
range: string
|
|
class_uri: tei:titlePage
|
|
annotations:
|
|
tei_element: titlePage
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.TTP
|
|
|
|
DocTitle:
|
|
description: >-
|
|
Document title - title as given on title page.
|
|
TEI <docTitle> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
title_parts:
|
|
description: Title parts (main, sub, etc.).
|
|
range: TitlePart
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
class_uri: tei:docTitle
|
|
annotations:
|
|
tei_element: docTitle
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.DTL
|
|
dcterms_mapping: dcterms:title
|
|
|
|
TitlePart:
|
|
description: >-
|
|
Part of document title (main, subtitle, etc.).
|
|
TEI <titlePart> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
title_type:
|
|
description: Type of title part.
|
|
range: TitleTypeEnum
|
|
title_text:
|
|
description: Title text content.
|
|
range: string
|
|
required: true
|
|
class_uri: tei:titlePart
|
|
annotations:
|
|
tei_element: titlePart
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.TPR
|
|
|
|
DocAuthor:
|
|
description: >-
|
|
Document author as given on title page.
|
|
TEI <docAuthor> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
author_name:
|
|
description: Author name.
|
|
range: string
|
|
required: true
|
|
author_ref:
|
|
description: Authority URI for author.
|
|
range: uri
|
|
class_uri: tei:docAuthor
|
|
annotations:
|
|
tei_element: docAuthor
|
|
tei_module: textstructure
|
|
glam_hypernym: AGT.PER.AUT
|
|
dcterms_mapping: dcterms:creator
|
|
foaf_mapping: foaf:Person
|
|
|
|
DocImprint:
|
|
description: >-
|
|
Document imprint - publication info on title page.
|
|
TEI <docImprint> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
publisher:
|
|
description: Publisher name.
|
|
range: string
|
|
pub_place:
|
|
description: Place of publication.
|
|
range: string
|
|
pub_date:
|
|
description: Publication date.
|
|
range: string
|
|
printer:
|
|
description: Printer name.
|
|
range: string
|
|
class_uri: tei:docImprint
|
|
annotations:
|
|
tei_element: docImprint
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.IMP
|
|
dcterms_mapping: dcterms:publisher
|
|
|
|
Byline:
|
|
description: >-
|
|
Byline - authorship statement.
|
|
TEI <byline> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
byline_text:
|
|
description: Byline text content.
|
|
range: string
|
|
required: true
|
|
doc_authors:
|
|
description: Authors mentioned.
|
|
range: DocAuthor
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
class_uri: tei:byline
|
|
annotations:
|
|
tei_element: byline
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.BYL
|
|
|
|
Epigraph:
|
|
description: >-
|
|
Epigraph - quotation at start of section.
|
|
TEI <epigraph> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
quote:
|
|
description: Quoted text.
|
|
range: string
|
|
cit:
|
|
description: Citation/attribution.
|
|
range: string
|
|
bibl:
|
|
description: Bibliographic reference.
|
|
range: string
|
|
class_uri: tei:epigraph
|
|
annotations:
|
|
tei_element: epigraph
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.EPG
|
|
|
|
Argument:
|
|
description: >-
|
|
Argument - summary of section contents.
|
|
TEI <argument> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
argument_text:
|
|
description: Argument content.
|
|
range: string
|
|
class_uri: tei:argument
|
|
annotations:
|
|
tei_element: argument
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.ARG
|
|
dcterms_mapping: dcterms:abstract
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# BACK MATTER
|
|
# ---------------------------------------------------------------------------
|
|
|
|
Back:
|
|
description: >-
|
|
Back matter container - appendices, indexes, etc.
|
|
TEI <back> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
back_id:
|
|
description: Unique identifier.
|
|
range: string
|
|
divisions:
|
|
description: Back matter divisions.
|
|
range: Division
|
|
multivalued: true
|
|
inlined: true
|
|
inlined_as_list: true
|
|
class_uri: tei:back
|
|
annotations:
|
|
tei_element: back
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.BCK
|
|
|
|
Trailer:
|
|
description: >-
|
|
Trailer - closing title or statement.
|
|
TEI <trailer> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
trailer_text:
|
|
description: Trailer content.
|
|
range: string
|
|
class_uri: tei:trailer
|
|
annotations:
|
|
tei_element: trailer
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.TRL
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# OPENER/CLOSER COMPONENTS
|
|
# ---------------------------------------------------------------------------
|
|
|
|
Opener:
|
|
description: >-
|
|
Opening formula - salutation, dateline at start.
|
|
TEI <opener> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
salute:
|
|
description: Salutation.
|
|
range: string
|
|
dateline:
|
|
description: Date and place line.
|
|
range: Dateline
|
|
inlined: true
|
|
class_uri: tei:opener
|
|
annotations:
|
|
tei_element: opener
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.OPN
|
|
|
|
Closer:
|
|
description: >-
|
|
Closing formula - signature, dateline at end.
|
|
TEI <closer> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
salute:
|
|
description: Closing salutation.
|
|
range: string
|
|
signed:
|
|
description: Signature.
|
|
range: string
|
|
dateline:
|
|
description: Date and place line.
|
|
range: Dateline
|
|
inlined: true
|
|
class_uri: tei:closer
|
|
annotations:
|
|
tei_element: closer
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.CLS
|
|
|
|
Dateline:
|
|
description: >-
|
|
Dateline - date and place of composition.
|
|
TEI <dateline> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
place_name:
|
|
description: Place of composition.
|
|
range: string
|
|
date:
|
|
description: Date of composition.
|
|
range: string
|
|
dateline_text:
|
|
description: Full dateline text.
|
|
range: string
|
|
class_uri: tei:dateline
|
|
annotations:
|
|
tei_element: dateline
|
|
tei_module: textstructure
|
|
glam_hypernym: TMP.DAT
|
|
dcterms_mapping: dcterms:date
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# POSTSCRIPT
|
|
# ---------------------------------------------------------------------------
|
|
|
|
Postscript:
|
|
description: >-
|
|
Postscript - text added after main content.
|
|
TEI <postscript> element.
|
|
is_a: TextStructureElement
|
|
attributes:
|
|
label:
|
|
description: Postscript label (e.g., "P.S.").
|
|
range: string
|
|
content:
|
|
description: Postscript content.
|
|
range: string
|
|
class_uri: tei:postscript
|
|
annotations:
|
|
tei_element: postscript
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.PSC
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# FLOATING TEXT
|
|
# ---------------------------------------------------------------------------
|
|
|
|
FloatingText:
|
|
description: >-
|
|
Floating text - embedded text that interrupts main flow.
|
|
TEI <floatingText> element.
|
|
is_a: TextStructureElement
|
|
slots:
|
|
- div_type
|
|
attributes:
|
|
floating_id:
|
|
description: Unique identifier.
|
|
range: string
|
|
body:
|
|
description: Content body.
|
|
range: Body
|
|
inlined: true
|
|
class_uri: tei:floatingText
|
|
annotations:
|
|
tei_element: floatingText
|
|
tei_module: textstructure
|
|
glam_hypernym: DOC.TXT.FLT
|