1792 lines
66 KiB
YAML
1792 lines
66 KiB
YAML
# =============================================================================
|
|
# GLAM-NER: TEI P5 LINKML MODULES INDEX
|
|
# =============================================================================
|
|
# Module: modules/advanced/tei/index.yaml
|
|
# Purpose: Index of TEI P5 chapter modules converted to LinkML
|
|
# Source: TEI P5 4.10.2 (September 2025)
|
|
# =============================================================================
|
|
#
|
|
# This directory contains LinkML schema representations of TEI P5 element
|
|
# definitions, enabling interoperability between TEI XML annotations and
|
|
# GLAM-NER entity extraction pipelines.
|
|
#
|
|
# Each module corresponds to a TEI P5 chapter and provides:
|
|
# - LinkML class definitions for TEI elements
|
|
# - Mappings to CIDOC-CRM, Schema.org, FOAF, and other ontologies
|
|
# - GLAM-NER hypernym annotations
|
|
# - Authority file integration (VIAF, Wikidata, GeoNames, etc.)
|
|
#
|
|
# =============================================================================
|
|
|
|
id: https://w3id.org/glam/ner/tei
|
|
name: glam-ner-tei-modules
|
|
title: TEI P5 LinkML Modules for GLAM-NER
|
|
version: "1.16.0"
|
|
|
|
tei_source:
|
|
specification: "TEI P5 Guidelines"
|
|
version: "4.10.2"
|
|
release_date: "2025-09-04"
|
|
url: "https://tei-c.org/release/doc/tei-p5-doc/en/html/"
|
|
commit: "bcfa98f42"
|
|
namespace: "http://www.tei-c.org/ns/1.0"
|
|
total_elements: 588
|
|
|
|
# =============================================================================
|
|
# AVAILABLE MODULES
|
|
# =============================================================================
|
|
|
|
modules:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 2: TEI Header
|
|
# ---------------------------------------------------------------------------
|
|
header:
|
|
path: "header.yaml"
|
|
tei_chapter: 2
|
|
tei_module_name: "header"
|
|
title: "TEI Header"
|
|
description: |
|
|
Metadata elements for describing TEI documents including file description,
|
|
encoding description, profile description, and revision history. Essential
|
|
for document provenance, rights management, and NER pipeline metadata.
|
|
element_count: 85
|
|
line_count: 3678
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- teiHeader: "Header container (required)"
|
|
- fileDesc: "File description (required)"
|
|
- titleStmt: "Title statement with authors/editors"
|
|
- publicationStmt: "Publication/distribution info"
|
|
- sourceDesc: "Source description (provenance)"
|
|
- encodingDesc: "Encoding practices"
|
|
- profileDesc: "Non-bibliographic aspects"
|
|
- revisionDesc: "Revision history"
|
|
- correspDesc: "Correspondence description"
|
|
- availability: "Access/licence information"
|
|
|
|
glam_hypernym_mappings:
|
|
DOC.MET: ["teiHeader", "fileDesc", "encodingDesc", "profileDesc", "revisionDesc"]
|
|
AGT.PER: ["author", "editor", "principal", "person"]
|
|
GRP.ORG: ["publisher", "distributor", "authority", "sponsor", "funder", "orgName"]
|
|
GEO: ["pubPlace", "place", "setting"]
|
|
GEO.ADR: ["address", "addrLine"]
|
|
TMP.DAT: ["date"]
|
|
TMP.EVT: ["change", "correspAction", "creation"]
|
|
WRK: ["title", "bibl", "biblStruct", "biblFull"]
|
|
REL: ["relatedItem", "correspContext", "ref", "ptr"]
|
|
QTY: ["measure", "extent", "unitDef"]
|
|
APP.LBL: ["idno"]
|
|
ROL: ["resp"]
|
|
|
|
ontology_mappings:
|
|
dcterms:
|
|
- TEIHeader: "dcterms:BibliographicResource"
|
|
- TitleStmt: "dcterms:title"
|
|
- Author: "dcterms:creator"
|
|
- Publisher: "dcterms:publisher"
|
|
- SourceDesc: "dcterms:source"
|
|
- Availability: "dcterms:accessRights"
|
|
- Licence: "dcterms:license"
|
|
- Abstract: "dcterms:abstract"
|
|
schema:
|
|
- TEIHeader: "schema:CreativeWork"
|
|
- Author: "schema:author"
|
|
- Publisher: "schema:publisher"
|
|
- Keywords: "schema:keywords"
|
|
- Application: "schema:SoftwareApplication"
|
|
prov:
|
|
- Change: "prov:Activity"
|
|
- RevisionDesc: "prov:Activity"
|
|
- CorrespAction: "prov:Activity"
|
|
- Creation: "prov:wasGeneratedBy"
|
|
- RespStmt: "prov:wasAttributedTo"
|
|
- Application: "prov:SoftwareAgent"
|
|
bibo:
|
|
- Editor: "bibo:editor"
|
|
- Edition: "bibo:edition"
|
|
- BiblStruct: "bibo:Document"
|
|
- Series: "bibo:Series"
|
|
foaf:
|
|
- Person: "foaf:Person"
|
|
- PersonGrp: "foaf:Group"
|
|
- OrgName: "foaf:Organization"
|
|
skos:
|
|
- Taxonomy: "skos:ConceptScheme"
|
|
- Category: "skos:Concept"
|
|
- Term: "skos:Concept"
|
|
premis:
|
|
- EncodingDesc: "premis:PreservationMetadata"
|
|
- Change: "premis:Event"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 3: Core Elements
|
|
# ---------------------------------------------------------------------------
|
|
core:
|
|
path: "core.yaml"
|
|
tei_chapter: 3
|
|
tei_module_name: "core"
|
|
title: "Core Elements"
|
|
description: |
|
|
Elements available in all TEI documents regardless of customization.
|
|
Includes date, time, name, title, measure, and bibliographic elements.
|
|
element_count: 45
|
|
line_count: 1575
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- date: "Date expressions (TimeML aligned)"
|
|
- time: "Time expressions"
|
|
- name: "Generic proper noun"
|
|
- title: "Titles of works"
|
|
- measure: "Quantities with units (QUDT aligned)"
|
|
- num: "Numeric values"
|
|
- address: "Postal addresses"
|
|
- rs: "Referencing strings"
|
|
- bibl: "Bibliographic citations"
|
|
- quote: "Quotations"
|
|
- ptr: "Pointers"
|
|
- ref: "References"
|
|
|
|
glam_hypernym_mappings:
|
|
TMP.DAT: ["date"]
|
|
TMP.TIM: ["time"]
|
|
APP: ["name", "rs"]
|
|
WRK: ["title", "bibl"]
|
|
QTY: ["measure", "num"]
|
|
GEO.ADR: ["address"]
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 14: Names, Dates, People, Places
|
|
# ---------------------------------------------------------------------------
|
|
namesdates:
|
|
path: "namesdates.yaml"
|
|
tei_chapter: 14
|
|
tei_module_name: "namesdates"
|
|
title: "Names, Dates, People, Places"
|
|
description: |
|
|
TEI elements for encoding names and information about named entities
|
|
including persons, places, organizations, and events. This is the
|
|
primary module for Named Entity Recognition interoperability.
|
|
element_count: 58
|
|
line_count: 1962
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- persName: "Personal names (with forename, surname, addName components)"
|
|
- placeName: "Place names (with settlement, region, country, geogName)"
|
|
- orgName: "Organization names"
|
|
- objectName: "Named objects (artworks, artifacts)"
|
|
- eventName: "Named events"
|
|
- person: "Person descriptions (standOff)"
|
|
- place: "Place descriptions (standOff)"
|
|
- org: "Organization descriptions (standOff)"
|
|
- event: "Event descriptions (standOff)"
|
|
- relation: "Relationships between entities"
|
|
|
|
glam_hypernym_mappings:
|
|
AGT.PER: ["persName", "person", "persona"]
|
|
GRP.ORG: ["orgName", "org"]
|
|
GRP: ["personGrp"]
|
|
GEO: ["placeName", "place", "location"]
|
|
GEO.SET: ["settlement"]
|
|
GEO.REG: ["region"]
|
|
GEO.CTY: ["country"]
|
|
GEO.BLC: ["bloc"]
|
|
GEO.DST: ["district"]
|
|
GEO.FEA: ["geogName", "geogFeat"]
|
|
TMP.EVT: ["event", "eventName", "birth", "death"]
|
|
TMP.PER: ["floruit"]
|
|
THG.OBJ: ["objectName", "object"]
|
|
ROL: ["roleName"]
|
|
ROL.OCC: ["occupation"]
|
|
ROL.AFF: ["affiliation"]
|
|
ROL.EDU: ["education"]
|
|
APP.REL: ["faith"]
|
|
REL: ["relation", "listRelation"]
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 11: Manuscript Description
|
|
# ---------------------------------------------------------------------------
|
|
msdescription:
|
|
path: "msdescription.yaml"
|
|
tei_chapter: 11
|
|
tei_module_name: "msdescription"
|
|
title: "Manuscript Description"
|
|
description: |
|
|
Elements for describing manuscripts and similar primary sources including
|
|
physical description, history, and intellectual content. Essential for
|
|
heritage institution cataloging and archival finding aids.
|
|
element_count: 58
|
|
line_count: 1923
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- msDesc: "Manuscript description container"
|
|
- msIdentifier: "Identifier (repository, shelfmark, settlement)"
|
|
- physDesc: "Physical description (support, extent, dimensions)"
|
|
- objectDesc: "Object form and layout"
|
|
- handDesc: "Handwriting description"
|
|
- decoDesc: "Decoration description"
|
|
- bindingDesc: "Binding description"
|
|
- history: "Origin, provenance, acquisition"
|
|
- origin: "Place and date of origin"
|
|
- provenance: "Provenance events"
|
|
- msContents: "Intellectual content"
|
|
- msItem: "Individual texts/works"
|
|
- msPart: "Composite manuscript parts"
|
|
|
|
glam_hypernym_mappings:
|
|
THG.OBJ: ["msDesc", "msPart", "seal", "decoNote"]
|
|
AGT.PER: ["handNote", "provenance (personal)", "author"]
|
|
GRP.ORG: ["repository", "institution", "provenance (institutional)"]
|
|
GEO: ["origPlace", "settlement", "provenance (location)"]
|
|
TMP.EVT: ["provenance", "acquisition", "custEvent"]
|
|
TMP.PER: ["origDate"]
|
|
WRK: ["msItem", "title"]
|
|
APP.LBL: ["msIdentifier", "shelfmark"]
|
|
|
|
ontology_mappings:
|
|
cidoc_crm:
|
|
- msDesc: "crm:E22_Human-Made_Object"
|
|
- provenance: "crm:E10_Transfer_of_Custody"
|
|
- acquisition: "crm:E8_Acquisition"
|
|
- origin: "crm:E12_Production"
|
|
rico:
|
|
- msDesc: "rico:RecordResource"
|
|
- repository: "rico:Agent"
|
|
schema:
|
|
- msDesc: "schema:ArchiveComponent"
|
|
- msItem: "schema:CreativeWork"
|
|
frbroo:
|
|
- msDesc: "frbroo:F4_Manifestation_Singleton"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 17: Linking, Segmentation, Alignment
|
|
# ---------------------------------------------------------------------------
|
|
linking:
|
|
path: "linking.yaml"
|
|
tei_chapter: 17
|
|
tei_module_name: "linking"
|
|
title: "Linking, Segmentation, Alignment"
|
|
description: |
|
|
Elements for creating links, segments, and alignments within and
|
|
between documents. Essential for standoff annotation and NER pipelines.
|
|
Aligned with W3C Web Annotation Data Model.
|
|
element_count: 20
|
|
line_count: 1393
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- standOff: "Standoff annotation container"
|
|
- annotation: "W3C Web Annotation compatible annotation"
|
|
- listAnnotation: "Annotation collection"
|
|
- annotationBlock: "Grouped annotations"
|
|
- link: "Hyperlink between elements"
|
|
- linkGrp: "Link group"
|
|
- seg: "Arbitrary text segment"
|
|
- anchor: "Anchor point for linking"
|
|
- join: "Fragment aggregation"
|
|
- alt: "Alternative readings"
|
|
- timeline: "Temporal alignment"
|
|
- when: "Time point"
|
|
- certainty: "Certainty annotation"
|
|
|
|
glam_hypernym_mappings:
|
|
# Linking module provides infrastructure, not entities
|
|
# Entity types assigned via contained annotations
|
|
|
|
ontology_mappings:
|
|
web_annotation:
|
|
- annotation: "oa:Annotation"
|
|
- listAnnotation: "oa:AnnotationCollection"
|
|
- selector: "oa:Selector"
|
|
- motivation: "oa:Motivation"
|
|
nif:
|
|
- seg: "nif:String"
|
|
- EntityMention: "nif:String"
|
|
- begin_index: "nif:beginIndex"
|
|
- end_index: "nif:endIndex"
|
|
cidoc_crm:
|
|
- annotation: "crm:E13_Attribute_Assignment"
|
|
|
|
ner_integration:
|
|
description: |
|
|
The linking module provides core infrastructure for NER annotation:
|
|
- TEIAnnotation: Primary class for NER output
|
|
- NERAnnotation: Specialized subclass with confidence scores
|
|
- EntityMention: NIF-aligned entity span class
|
|
- Selector: Target identification (text quote, offset, XPath)
|
|
- TEISeg: Inline entity span markup
|
|
output_format: |
|
|
NER pipelines should output to <standOff><listAnnotation> with:
|
|
- @motivation="identifying" (oa:identifying)
|
|
- Selector with exact text match and offsets
|
|
- Entity type in body (glam_hypernym)
|
|
- Authority URI in @ref (VIAF, Wikidata, etc.)
|
|
|
|
# =============================================================================
|
|
# AVAILABLE MODULES (continued)
|
|
# =============================================================================
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 8: Transcriptions of Speech
|
|
# ---------------------------------------------------------------------------
|
|
spoken:
|
|
path: "spoken.yaml"
|
|
tei_chapter: 8
|
|
tei_module_name: "spoken"
|
|
title: "Transcriptions of Speech"
|
|
description: |
|
|
Elements for transcribing spoken language including utterances, pauses,
|
|
vocal sounds, kinesic events, paralinguistic features, and temporal
|
|
alignment. Essential for oral history, sociolinguistics, discourse
|
|
analysis, and heritage institution audio/video collections.
|
|
element_count: 18
|
|
line_count: 1153
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- u: "Utterance (primary speech unit)"
|
|
- pause: "Pause in speech"
|
|
- vocal: "Vocal sounds (laughter, cough, etc.)"
|
|
- incident: "Non-vocal events"
|
|
- kinesic: "Body language and gestures"
|
|
- shift: "Paralinguistic feature changes"
|
|
- writing: "Written text in spoken context"
|
|
- unclear: "Inaudible/unclear speech"
|
|
- timeline: "Temporal alignment structure"
|
|
- when: "Time synchronization point"
|
|
- recording: "Recording metadata"
|
|
- broadcast: "Broadcast source information"
|
|
|
|
glam_hypernym_mappings:
|
|
TXT.SPK: ["u", "seg", "unclear"]
|
|
TXT.SPK.VOC: ["vocal"]
|
|
TXT.SPK.KIN: ["kinesic"]
|
|
TXT.SPK.PAR: ["shift"]
|
|
TXT.WRT: ["writing"]
|
|
TMP.DUR: ["pause"]
|
|
TMP.EVT: ["incident", "broadcast"]
|
|
TMP.TLN: ["timeline"]
|
|
TMP.PNT: ["when"]
|
|
THG.REC: ["recording"]
|
|
THG.EQP: ["equipment"]
|
|
DOC.MET: ["recordingStmt", "transcriptionDesc"]
|
|
WRK: ["scriptStmt"]
|
|
|
|
ontology_mappings:
|
|
cidoc_crm:
|
|
- Utterance: "crm:E33_Linguistic_Object"
|
|
- Vocal: "crm:E7_Activity"
|
|
- Kinesic: "crm:E7_Activity"
|
|
- Incident: "crm:E5_Event"
|
|
- Pause: "crm:E52_Time-Span"
|
|
- Timeline: "crm:E52_Time-Span"
|
|
- When: "crm:E61_Time_Primitive"
|
|
- Shift: "crm:E13_Attribute_Assignment"
|
|
- Recording: "crm:E65_Creation"
|
|
schema:
|
|
- Utterance: "schema:SpeakAction"
|
|
- Recording: "schema:AudioObject"
|
|
- Broadcast: "schema:BroadcastEvent"
|
|
time:
|
|
- When: "time:Instant"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 12: Representation of Primary Sources
|
|
# ---------------------------------------------------------------------------
|
|
transcr:
|
|
path: "transcr.yaml"
|
|
tei_chapter: 12
|
|
tei_module_name: "transcr"
|
|
title: "Representation of Primary Sources"
|
|
description: |
|
|
Elements for transcribing primary source materials including facsimiles,
|
|
surfaces, zones, damage, additions, deletions, substitutions, abbreviations,
|
|
and editorial interventions. Essential for manuscript studies, diplomatic
|
|
editions, genetic criticism, and archival work.
|
|
element_count: 38
|
|
line_count: 1746
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- facsimile: "Digital facsimile container (images)"
|
|
- sourceDoc: "Source document with embedded transcription"
|
|
- surface: "Written surface (recto/verso/patch)"
|
|
- zone: "Area of interest on surface"
|
|
- add: "Text addition"
|
|
- del: "Text deletion"
|
|
- subst: "Substitution (deletion + addition)"
|
|
- damage: "Damaged text area"
|
|
- gap: "Gap/omission in transcription"
|
|
- unclear: "Illegible/uncertain text"
|
|
- supplied: "Editorially supplied text"
|
|
- abbr: "Abbreviation"
|
|
- expan: "Expansion of abbreviation"
|
|
- metamark: "Authorial/scribal markup"
|
|
- handShift: "Change of scribal hand"
|
|
|
|
glam_hypernym_mappings:
|
|
DOC.FAC: ["facsimile"]
|
|
DOC.SRC: ["sourceDoc"]
|
|
DOC.DMG: ["damage", "damageSpan"]
|
|
DOC.MET: ["handNotes"]
|
|
THG.SRF: ["surface", "surfaceGrp"]
|
|
THG.ZON: ["zone"]
|
|
THG.IMG: ["graphic"]
|
|
THG.PTH: ["path"]
|
|
TXT.LIN: ["line"]
|
|
TXT.ADD: ["add", "addSpan"]
|
|
TXT.DEL: ["del", "delSpan"]
|
|
TXT.SUB: ["subst", "substJoin"]
|
|
TXT.GAP: ["gap"]
|
|
TXT.UNC: ["unclear"]
|
|
TXT.SUP: ["supplied"]
|
|
TXT.SUR: ["surplus"]
|
|
TXT.SEC: ["secl"]
|
|
TXT.ABR: ["abbr", "am"]
|
|
TXT.EXP: ["expan", "ex"]
|
|
TXT.MOD: ["mod"]
|
|
TXT.MRK: ["metamark"]
|
|
TXT.HND: ["handShift"]
|
|
TXT.RST: ["restore"]
|
|
TXT.RTR: ["retrace"]
|
|
TXT.UND: ["undo"]
|
|
TXT.RDO: ["redo"]
|
|
TXT.TRN: ["listTranspose", "transpose"]
|
|
TXT.FW: ["fw"]
|
|
TXT.SPC: ["space"]
|
|
|
|
ontology_mappings:
|
|
cidoc_crm:
|
|
- SourceDoc: "crm:E22_Human-Made_Object"
|
|
- Surface: "crm:E25_Human-Made_Feature"
|
|
- Add: "crm:E13_Attribute_Assignment"
|
|
- Del: "crm:E79_Part_Removal"
|
|
- Damage: "crm:E14_Condition_Assessment"
|
|
- Metamark: "crm:E37_Mark"
|
|
- Line: "crm:E33_Linguistic_Object"
|
|
iiif:
|
|
- Facsimile: "iiif:Manifest"
|
|
- Surface: "iiif:Canvas"
|
|
- Graphic: "iiif:Image"
|
|
schema:
|
|
- Facsimile: "schema:ImageObject"
|
|
- SourceDoc: "schema:ArchiveComponent"
|
|
- Graphic: "schema:ImageObject"
|
|
web_annotation:
|
|
- Zone: "oa:FragmentSelector"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 13: Critical Apparatus
|
|
# ---------------------------------------------------------------------------
|
|
textcrit:
|
|
path: "textcrit.yaml"
|
|
tei_chapter: 13
|
|
tei_module_name: "textcrit"
|
|
title: "Critical Apparatus"
|
|
description: |
|
|
Elements for encoding textual variants and critical apparatus.
|
|
Essential for scholarly editions, manuscript collation, and
|
|
philological research. Supports both inline and external apparatus.
|
|
element_count: 16
|
|
line_count: 720
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- app: "Apparatus entry (container for variants)"
|
|
- lem: "Lemma (base text reading)"
|
|
- rdg: "Reading (variant reading)"
|
|
- rdgGrp: "Reading group (related variants)"
|
|
- witness: "Witness description"
|
|
- listWit: "List of witnesses"
|
|
- witDetail: "Witness details"
|
|
- witStart: "Witness start marker"
|
|
- witEnd: "Witness end marker"
|
|
- lacunaStart: "Lacuna start marker"
|
|
- lacunaEnd: "Lacuna end marker"
|
|
- listApp: "List of apparatus entries"
|
|
|
|
glam_hypernym_mappings:
|
|
TXT.VAR: ["app", "lem", "rdg", "rdgGrp"]
|
|
THG.OBJ: ["witness", "listWit"]
|
|
DOC.MET: ["witDetail", "witStart", "witEnd", "lacunaStart", "lacunaEnd", "listApp"]
|
|
WRK: ["bibl"]
|
|
|
|
ontology_mappings:
|
|
cidoc_crm:
|
|
- App: "crm:E33_Linguistic_Object"
|
|
- Lem: "crm:E33_Linguistic_Object"
|
|
- Rdg: "crm:E33_Linguistic_Object"
|
|
- Witness: "crm:E22_Human-Made_Object"
|
|
frbroo:
|
|
- Witness: "frbroo:F4_Manifestation_Singleton"
|
|
dcterms:
|
|
- Bibl: "dcterms:bibliographicCitation"
|
|
web_annotation:
|
|
- Note: "oa:Annotation"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 6: Verse
|
|
# ---------------------------------------------------------------------------
|
|
verse:
|
|
path: "verse.yaml"
|
|
tei_chapter: 6
|
|
tei_module_name: "verse"
|
|
title: "Verse"
|
|
description: |
|
|
Elements for encoding verse texts including verse lines, line groups
|
|
(stanzas), rhyme, meter, caesura, and metrical analysis. Essential
|
|
for poetry collections, literary archives, manuscript transcription,
|
|
and text analysis.
|
|
element_count: 7
|
|
line_count: 689
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- l: "Verse line"
|
|
- lg: "Line group (stanza)"
|
|
- seg: "Verse segment (metrical unit)"
|
|
- rhyme: "Rhyming portion"
|
|
- caesura: "Break point in verse line"
|
|
- metDecl: "Metrical notation declaration"
|
|
- metSym: "Metrical symbol definition"
|
|
|
|
glam_hypernym_mappings:
|
|
TXT.VRS.LIN: ["l"]
|
|
TXT.VRS.STZ: ["lg"]
|
|
TXT.VRS.SEG: ["seg"]
|
|
TXT.VRS.RHY: ["rhyme"]
|
|
TXT.VRS.CES: ["caesura"]
|
|
DOC.MET: ["metDecl"]
|
|
DOC.MET.SYM: ["metSym"]
|
|
|
|
ontology_mappings:
|
|
cidoc_crm:
|
|
- VerseLine: "crm:E33_Linguistic_Object"
|
|
- LineGroup: "crm:E33_Linguistic_Object"
|
|
- VerseSegment: "crm:E33_Linguistic_Object"
|
|
- Rhyme: "crm:E33_Linguistic_Object"
|
|
schema:
|
|
- VerseLine: "schema:CreativeWork"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 7: Performance Texts
|
|
# ---------------------------------------------------------------------------
|
|
drama:
|
|
path: "drama.yaml"
|
|
tei_chapter: 7
|
|
tei_module_name: "drama"
|
|
title: "Performance Texts"
|
|
description: |
|
|
Elements for encoding dramatic texts and performance scripts including
|
|
cast lists, speeches, speakers, stage directions, and performance
|
|
metadata. Essential for theater archives, opera collections, film
|
|
scripts, and performance studies.
|
|
element_count: 15
|
|
line_count: 781
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- castList: "Cast list container"
|
|
- castGroup: "Grouped cast members"
|
|
- castItem: "Individual cast entry"
|
|
- role: "Character/role name"
|
|
- roleDesc: "Role description"
|
|
- actor: "Actor/performer name"
|
|
- sp: "Speech container"
|
|
- speaker: "Speaker identification"
|
|
- spGrp: "Speech group"
|
|
- stage: "Stage direction"
|
|
- move: "Character movement"
|
|
- set: "Set/scenery description"
|
|
- prologue: "Prologue section"
|
|
- epilogue: "Epilogue section"
|
|
- performance: "Performance metadata"
|
|
|
|
glam_hypernym_mappings:
|
|
TXT.DRM.CST: ["castList", "castGroup", "castItem"]
|
|
TXT.DRM.ROL: ["role", "roleDesc"]
|
|
TXT.DRM.SPK: ["sp", "speaker", "spGrp"]
|
|
TXT.DRM.STG: ["stage", "move", "set"]
|
|
TXT.DRM.FRM: ["prologue", "epilogue"]
|
|
AGT.PER: ["actor"]
|
|
TMP.EVT: ["performance"]
|
|
WRK.DRM: ["castList", "performance"]
|
|
|
|
ontology_mappings:
|
|
cidoc_crm:
|
|
- CastList: "crm:E33_Linguistic_Object"
|
|
- Role: "crm:E33_Linguistic_Object"
|
|
- Speech: "crm:E33_Linguistic_Object"
|
|
- StageDirection: "crm:E33_Linguistic_Object"
|
|
- Performance: "crm:E7_Activity"
|
|
schema:
|
|
- CastList: "schema:CreativeWork"
|
|
- Actor: "schema:Person"
|
|
- Performance: "schema:TheaterEvent"
|
|
frbroo:
|
|
- Performance: "frbroo:F31_Performance"
|
|
- Role: "frbroo:F38_Character"
|
|
foaf:
|
|
- Actor: "foaf:Person"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 10: Dictionaries
|
|
# ---------------------------------------------------------------------------
|
|
dictionaries:
|
|
path: "dictionaries.yaml"
|
|
tei_chapter: 10
|
|
tei_module_name: "dictionaries"
|
|
title: "Dictionaries"
|
|
description: |
|
|
Elements for encoding dictionary entries including headwords, forms,
|
|
pronunciation, grammatical information, senses, definitions, etymology,
|
|
usage labels, and cross-references. Essential for lexicography, historical
|
|
linguistics, terminology management, and heritage language collections.
|
|
element_count: 35
|
|
line_count: 1740
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- entry: "Dictionary entry container"
|
|
- form: "Form information (orthography, pronunciation)"
|
|
- orth: "Orthographic (written) form"
|
|
- pron: "Pronunciation"
|
|
- gramGrp: "Grammatical information group"
|
|
- pos: "Part of speech"
|
|
- sense: "Sense/meaning definition"
|
|
- def: "Definition text"
|
|
- cit: "Citation/example"
|
|
- etym: "Etymology"
|
|
- usg: "Usage information"
|
|
- xr: "Cross-reference"
|
|
- re: "Related entry"
|
|
|
|
glam_hypernym_mappings:
|
|
TXT.LEX.ENT: ["entry", "entryFree", "superEntry"]
|
|
TXT.LEX.FRM: ["form"]
|
|
TXT.LEX.ORT: ["orth"]
|
|
TXT.LEX.PRN: ["pron"]
|
|
TXT.LEX.GRM: ["gramGrp"]
|
|
TXT.LEX.POS: ["pos"]
|
|
TXT.LEX.SNS: ["sense"]
|
|
TXT.LEX.DEF: ["def"]
|
|
TXT.LEX.CIT: ["cit"]
|
|
TXT.LEX.ETY: ["etym"]
|
|
TXT.LEX.USG: ["usg"]
|
|
TXT.LEX.XRF: ["xr"]
|
|
TXT.LEX.REL: ["re"]
|
|
APP.LNG: ["lang"]
|
|
|
|
ontology_mappings:
|
|
ontolex:
|
|
- DictEntry: "ontolex:LexicalEntry"
|
|
- DictForm: "ontolex:Form"
|
|
- DictSense: "ontolex:LexicalSense"
|
|
- DictOrth: "ontolex:writtenRep"
|
|
- DictPron: "ontolex:phoneticRep"
|
|
lexinfo:
|
|
- DictGramGrp: "lexinfo:MorphosyntacticProperty"
|
|
- DictPOS: "lexinfo:partOfSpeech"
|
|
- DictGen: "lexinfo:gender"
|
|
- DictNumber: "lexinfo:number"
|
|
- DictCase: "lexinfo:case"
|
|
- DictEtym: "lexinfo:etymology"
|
|
- DictUsg: "lexinfo:usageNote"
|
|
skos:
|
|
- DictDef: "skos:definition"
|
|
- DictXRef: "skos:related"
|
|
cidoc_crm:
|
|
- DictMentioned: "crm:E33_Linguistic_Object"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 5: Characters, Glyphs, and Writing Modes
|
|
# ---------------------------------------------------------------------------
|
|
gaiji:
|
|
path: "gaiji.yaml"
|
|
tei_chapter: 5
|
|
tei_module_name: "gaiji"
|
|
title: "Characters, Glyphs, and Writing Modes"
|
|
description: |
|
|
Elements for documenting non-standard characters, glyph variants, and
|
|
writing modes. Essential for medieval manuscripts, CJK texts, historical
|
|
documents, and texts using Unicode Private Use Area characters.
|
|
element_count: 18
|
|
line_count: 948
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- charDecl: "Character declarations container"
|
|
- char: "Character definition"
|
|
- glyph: "Glyph variant definition"
|
|
- g: "Inline gaiji reference"
|
|
- charName: "Character name"
|
|
- charProp: "Character property"
|
|
- unicodeName: "Unicode property name"
|
|
- localProp: "Local property"
|
|
- mapping: "Character mapping"
|
|
- figure: "Glyph image container"
|
|
- graphic: "Glyph image"
|
|
|
|
glam_hypernym_mappings:
|
|
DOC.MET.CHR: ["charDecl"]
|
|
TXT.CHR: ["char"]
|
|
TXT.GLY: ["glyph"]
|
|
TXT.CHR.REF: ["g"]
|
|
TXT.CHR.NAM: ["charName"]
|
|
TXT.GLY.NAM: ["glyphName"]
|
|
TXT.CHR.PRP: ["charProp"]
|
|
TXT.CHR.UNI: ["unicodeName"]
|
|
TXT.CHR.LCL: ["localProp"]
|
|
TXT.CHR.MAP: ["mapping"]
|
|
THG.IMG: ["graphic", "figure"]
|
|
|
|
ontology_mappings:
|
|
cidoc_crm:
|
|
- CharDecl: "crm:E90_Symbolic_Object"
|
|
- CharDef: "crm:E90_Symbolic_Object"
|
|
- GlyphDef: "crm:E90_Symbolic_Object"
|
|
- GaijiRef: "crm:E90_Symbolic_Object"
|
|
- CharProp: "crm:E55_Type"
|
|
skos:
|
|
- CharName: "skos:prefLabel"
|
|
- GlyphName: "skos:prefLabel"
|
|
- CharMapping: "skos:exactMatch"
|
|
schema:
|
|
- CharFigure: "schema:ImageObject"
|
|
- CharGraphic: "schema:ImageObject"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 22: Certainty, Precision, and Responsibility
|
|
# ---------------------------------------------------------------------------
|
|
certainty:
|
|
path: "certainty.yaml"
|
|
tei_chapter: 22
|
|
tei_module_name: "certainty"
|
|
title: "Certainty, Precision, and Responsibility"
|
|
description: |
|
|
Elements for encoding certainty, precision, and responsibility for
|
|
annotations and assertions. Essential for NER confidence scoring,
|
|
annotation provenance, and scholarly attribution of interpretations.
|
|
element_count: 10
|
|
line_count: 662
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- certainty: "Certainty annotation"
|
|
- precision: "Precision of values"
|
|
- respons: "Responsibility for content"
|
|
- NERConfidenceScore: "NER confidence metadata"
|
|
- AnnotationProvenance: "Annotation attribution"
|
|
- ModelAssertionSet: "ML model assertion bundle"
|
|
|
|
glam_hypernym_mappings:
|
|
DOC.MET.CRT: ["certainty"]
|
|
DOC.MET.PRC: ["precision"]
|
|
DOC.MET.RSP: ["respons"]
|
|
DOC.MET.CNF: ["NERConfidenceScore"]
|
|
DOC.MET.PRV: ["AnnotationProvenance"]
|
|
DOC.MET.ASR: ["ModelAssertionSet"]
|
|
|
|
ontology_mappings:
|
|
prov:
|
|
- Respons: "prov:wasAttributedTo"
|
|
- AnnotationProvenance: "prov:Activity"
|
|
- AnnotationAgent: "prov:Agent"
|
|
web_annotation:
|
|
- Certainty: "oa:Annotation"
|
|
- Precision: "oa:Annotation"
|
|
cidoc_crm:
|
|
- Certainty: "crm:E13_Attribute_Assignment"
|
|
mls:
|
|
- NERConfidenceScore: "mls:hasConfidence"
|
|
- ModelAssertionSet: "mls:Model"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 18: Simple Analytic Mechanisms
|
|
# ---------------------------------------------------------------------------
|
|
analysis:
|
|
path: "analysis.yaml"
|
|
tei_chapter: 18
|
|
tei_module_name: "analysis"
|
|
title: "Simple Analytic Mechanisms"
|
|
description: |
|
|
Elements for linguistic annotation including segmentation, POS tagging,
|
|
lemmatization, morphological analysis, syntactic parsing, and interpretive
|
|
annotation. Essential for NLP preprocessing and corpus linguistics.
|
|
element_count: 22
|
|
line_count: 976
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- s: "Sentence (s-unit)"
|
|
- cl: "Clause"
|
|
- phr: "Phrase (NP, VP, etc.)"
|
|
- w: "Word token"
|
|
- m: "Morpheme"
|
|
- c: "Character"
|
|
- pc: "Punctuation"
|
|
- span: "Span annotation"
|
|
- spanGrp: "Span group"
|
|
- interp: "Interpretation"
|
|
- interpGrp: "Interpretation group"
|
|
|
|
glam_hypernym_mappings:
|
|
TXT.ANA.SEN: ["s"]
|
|
TXT.ANA.CLS: ["cl"]
|
|
TXT.ANA.PHR: ["phr"]
|
|
TXT.ANA.WRD: ["w"]
|
|
TXT.ANA.MOR: ["m"]
|
|
TXT.ANA.CHR: ["c"]
|
|
TXT.ANA.PNC: ["pc"]
|
|
TXT.ANA.SPN: ["span"]
|
|
TXT.ANA.SPG: ["spanGrp"]
|
|
TXT.ANA.INT: ["interp"]
|
|
TXT.ANA.IGP: ["interpGrp"]
|
|
|
|
ontology_mappings:
|
|
nif:
|
|
- Sentence: "nif:Sentence"
|
|
- Word: "nif:Word"
|
|
- Character: "nif:Character"
|
|
- TokenizedText: "nif:Context"
|
|
- DependencyParse: "nif:DependencyTree"
|
|
olia:
|
|
- Clause: "olia:Clause"
|
|
- Phrase: "olia:Phrase"
|
|
- Morpheme: "olia:Morpheme"
|
|
- Punctuation: "olia:Punctuation"
|
|
- POSTaggedToken: "olia:Token"
|
|
ontolex:
|
|
- Word: "ontolex:Form"
|
|
cidoc_crm:
|
|
- Interp: "crm:E13_Attribute_Assignment"
|
|
web_annotation:
|
|
- Span: "oa:Annotation"
|
|
skos:
|
|
- InterpGroup: "skos:ConceptScheme"
|
|
- AnnotationScheme: "skos:ConceptScheme"
|
|
- TagDefinition: "skos:Concept"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 15: Tables, Formulae, Graphics, and Notated Music
|
|
# ---------------------------------------------------------------------------
|
|
figures:
|
|
path: "figures.yaml"
|
|
tei_chapter: 15
|
|
tei_module_name: "figures"
|
|
title: "Tables, Formulae, Graphics, and Notated Music"
|
|
description: |
|
|
Elements for encoding tables, mathematical formulae, graphic images,
|
|
and notated music. Essential for scientific publications, illustrated
|
|
manuscripts, heritage image collections, and digital humanities projects.
|
|
element_count: 18
|
|
line_count: 743
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- figure: "Figure container"
|
|
- graphic: "Graphic image reference"
|
|
- figDesc: "Figure description (accessibility)"
|
|
- table: "Table container"
|
|
- row: "Table row"
|
|
- cell: "Table cell"
|
|
- formula: "Mathematical/chemical formula"
|
|
- notatedMusic: "Music notation"
|
|
- media: "Audio/video media"
|
|
- binaryObject: "Embedded binary data"
|
|
|
|
glam_hypernym_mappings:
|
|
THG.IMG.FIG: ["figure"]
|
|
THG.IMG.GRA: ["graphic"]
|
|
THG.IMG.DES: ["figDesc"]
|
|
THG.TBL: ["table"]
|
|
THG.TBL.ROW: ["row"]
|
|
THG.TBL.CEL: ["cell"]
|
|
THG.FRM: ["formula"]
|
|
THG.MUS: ["notatedMusic"]
|
|
THG.MED: ["media"]
|
|
THG.BIN: ["binaryObject"]
|
|
|
|
ontology_mappings:
|
|
schema:
|
|
- Figure: "schema:ImageObject"
|
|
- Table: "schema:Table"
|
|
- Cell: "schema:TableCell"
|
|
- Media: "schema:MediaObject"
|
|
- NotatedMusic: "schema:MusicComposition"
|
|
iiif:
|
|
- Graphic: "iiif:Image"
|
|
- IIIFManifest: "iiif:Manifest"
|
|
- IIIFImageService: "iiif:ImageService"
|
|
cidoc_crm:
|
|
- HeritageImageMetadata: "crm:E38_Image"
|
|
mathml:
|
|
- Formula: "mathml:math"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 4: Default Text Structure
|
|
# ---------------------------------------------------------------------------
|
|
textstructure:
|
|
path: "textstructure.yaml"
|
|
tei_chapter: 4
|
|
tei_module_name: "textstructure"
|
|
title: "Default Text Structure"
|
|
description: |
|
|
Elements for default document structure including divisions, front/back
|
|
matter, title pages, and document organization. Essential for document
|
|
parsing, structure navigation, and metadata extraction.
|
|
element_count: 24
|
|
line_count: 767
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- TEI: "Root document element"
|
|
- text: "Text container"
|
|
- body: "Main body content"
|
|
- front: "Front matter"
|
|
- back: "Back matter"
|
|
- div: "Text division"
|
|
- group: "Text group (composite works)"
|
|
- titlePage: "Title page"
|
|
- docTitle: "Document title"
|
|
- docAuthor: "Document author"
|
|
- docImprint: "Publication imprint"
|
|
- opener: "Opening formula"
|
|
- closer: "Closing formula"
|
|
|
|
glam_hypernym_mappings:
|
|
DOC: ["TEI"]
|
|
DOC.TXT: ["text"]
|
|
DOC.TXT.BDY: ["body"]
|
|
DOC.TXT.FRT: ["front"]
|
|
DOC.TXT.BCK: ["back"]
|
|
DOC.TXT.DIV: ["div"]
|
|
DOC.TXT.GRP: ["group"]
|
|
DOC.TXT.TTP: ["titlePage"]
|
|
DOC.TXT.DTL: ["docTitle"]
|
|
AGT.PER.AUT: ["docAuthor"]
|
|
DOC.TXT.IMP: ["docImprint"]
|
|
|
|
ontology_mappings:
|
|
schema:
|
|
- TEIDocument: "schema:CreativeWork"
|
|
bibo:
|
|
- Division: "bibo:DocumentPart"
|
|
dcterms:
|
|
- DocTitle: "dcterms:title"
|
|
- DocAuthor: "dcterms:creator"
|
|
- DocImprint: "dcterms:publisher"
|
|
- Dateline: "dcterms:date"
|
|
cidoc_crm:
|
|
- Text: "crm:E33_Linguistic_Object"
|
|
foaf:
|
|
- DocAuthor: "foaf:Person"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 16: Language Corpora
|
|
# ---------------------------------------------------------------------------
|
|
corpus:
|
|
path: "corpus.yaml"
|
|
tei_chapter: 16
|
|
tei_module_name: "corpus"
|
|
title: "Language Corpora"
|
|
description: |
|
|
Elements for representing language corpora including corpus structure,
|
|
text descriptions, participant information, and sampling methodology.
|
|
Essential for NER training data management, corpus linguistics,
|
|
and heritage document collections.
|
|
element_count: 15
|
|
line_count: 1050
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- teiCorpus: "Corpus container (collection of TEI documents)"
|
|
- textDesc: "Text description (genre, channel, purpose)"
|
|
- channel: "Communication channel (spoken, written, mixed)"
|
|
- constitution: "Text completeness information"
|
|
- derivation: "Original vs. derivative text status"
|
|
- domain: "Subject domain classification"
|
|
- factuality: "Factual vs. fictional status"
|
|
- interaction: "Interaction type (none, partial, active)"
|
|
- preparedness: "Degree of text preparation"
|
|
- purpose: "Communicative purpose"
|
|
- particDesc: "Participant description"
|
|
- person: "Participant person"
|
|
- personGrp: "Participant group"
|
|
- settingDesc: "Setting/context description"
|
|
- setting: "Individual setting"
|
|
|
|
glam_hypernym_mappings:
|
|
DOC.CRP: ["teiCorpus"]
|
|
DOC.MET.TXT: ["textDesc"]
|
|
DOC.MET.CHN: ["channel"]
|
|
DOC.MET.CON: ["constitution"]
|
|
DOC.MET.DRV: ["derivation"]
|
|
DOC.MET.DOM: ["domain"]
|
|
DOC.MET.FCT: ["factuality"]
|
|
DOC.MET.INT: ["interaction"]
|
|
DOC.MET.PRE: ["preparedness"]
|
|
DOC.MET.PUR: ["purpose"]
|
|
AGT: ["particDesc", "person", "personGrp"]
|
|
GEO: ["settingDesc", "setting"]
|
|
|
|
glam_ner_extensions:
|
|
- NERTrainingCorpus: "Corpus for NER model training"
|
|
- EntityTypeCount: "Entity type statistics"
|
|
- CorpusSplit: "Train/dev/test splits"
|
|
- HeritageDocumentCollection: "Heritage document corpus"
|
|
|
|
ontology_mappings:
|
|
void:
|
|
- TEICorpus: "void:Dataset"
|
|
- NERTrainingCorpus: "void:Dataset"
|
|
nif:
|
|
- TEICorpus: "nif:Context"
|
|
oa:
|
|
- CorpusAnnotation: "oa:Annotation"
|
|
dcat:
|
|
- TEICorpus: "dcat:Dataset"
|
|
- CorpusSplit: "dcat:Distribution"
|
|
prov:
|
|
- TextDesc: "prov:Entity"
|
|
- SamplingDeclaration: "prov:Activity"
|
|
foaf:
|
|
- Participant: "foaf:Person"
|
|
- ParticipantGroup: "foaf:Group"
|
|
skos:
|
|
- Domain: "skos:Concept"
|
|
- Purpose: "skos:Concept"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 20: Graphs, Networks, and Trees
|
|
# ---------------------------------------------------------------------------
|
|
nets:
|
|
path: "nets.yaml"
|
|
tei_chapter: 20
|
|
tei_module_name: "nets"
|
|
title: "Graphs, Networks, and Trees"
|
|
description: |
|
|
Elements for encoding graphs, networks, and tree structures including
|
|
nodes, arcs, trees, forests, and stemmatology. Essential for entity
|
|
relationship graphs, coreference chains, dependency parsing, and
|
|
manuscript stemma visualization.
|
|
element_count: 14
|
|
line_count: 980
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- graph: "Graph container (directed, undirected)"
|
|
- node: "Graph node"
|
|
- arc: "Graph edge/arc"
|
|
- tree: "Tree structure"
|
|
- root: "Tree root node"
|
|
- iNode: "Internal tree node"
|
|
- leaf: "Tree leaf node"
|
|
- label: "Node/arc label"
|
|
- forest: "Collection of trees"
|
|
- eTree: "Embedded tree"
|
|
- triangle: "Collapsed subtree representation"
|
|
- eLeaf: "Embedded leaf"
|
|
|
|
glam_hypernym_mappings:
|
|
DOC.GRF: ["graph", "forest"]
|
|
DOC.GRF.NOD: ["node", "root", "iNode", "leaf", "eLeaf"]
|
|
DOC.GRF.ARC: ["arc"]
|
|
DOC.GRF.TRE: ["tree", "eTree"]
|
|
DOC.GRF.LBL: ["label"]
|
|
DOC.GRF.TRI: ["triangle"]
|
|
|
|
glam_ner_extensions:
|
|
- EntityRelationGraph: "Entity relationship network"
|
|
- EntityNode: "Entity as graph node"
|
|
- RelationArc: "Typed relation between entities"
|
|
- CoreferenceChain: "Coreference resolution chain"
|
|
- EntityMention: "Entity mention in coreference"
|
|
- DependencyTree: "Syntactic dependency tree"
|
|
- TokenNode: "Token in dependency tree"
|
|
- DependencyArc: "Dependency relation"
|
|
|
|
stemmatology_extensions:
|
|
- Stemma: "Manuscript stemma (witness relationships)"
|
|
- Witness: "Manuscript witness"
|
|
- TransmissionRelation: "Transmission relationship type"
|
|
|
|
ontology_mappings:
|
|
cidoc_crm:
|
|
- Graph: "crm:E89_Propositional_Object"
|
|
- Node: "crm:E1_CRM_Entity"
|
|
- Arc: "crm:E13_Attribute_Assignment"
|
|
- EntityRelationGraph: "crm:E89_Propositional_Object"
|
|
- Stemma: "crm:E89_Propositional_Object"
|
|
- Witness: "crm:E22_Human-Made_Object"
|
|
nif:
|
|
- DependencyTree: "nif:DependencyTree"
|
|
- CoreferenceChain: "nif:String"
|
|
oa:
|
|
- EntityNode: "oa:Annotation"
|
|
- RelationArc: "oa:Annotation"
|
|
skos:
|
|
- Label: "skos:prefLabel"
|
|
owl:
|
|
- RelationArc: "owl:ObjectProperty"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 19: Feature Structures (ISO-FS)
|
|
# ---------------------------------------------------------------------------
|
|
iso-fs:
|
|
path: "iso-fs.yaml"
|
|
tei_chapter: 19
|
|
tei_module_name: "iso-fs"
|
|
title: "Feature Structures"
|
|
description: |
|
|
Elements for encoding feature structures based on ISO/IEC 24610 (Feature
|
|
Structures) standard. Essential for morphological analysis, syntactic
|
|
feature unification, lexical semantics, and NLP feature representation.
|
|
element_count: 22
|
|
line_count: 863
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- fs: "Feature structure container"
|
|
- f: "Feature (name-value pair)"
|
|
- binary: "Binary value (+/-)"
|
|
- symbol: "Symbolic value"
|
|
- numeric: "Numeric value"
|
|
- string: "String value"
|
|
- vColl: "Collection of values"
|
|
- vAlt: "Alternative values (disjunction)"
|
|
- vNot: "Negated value"
|
|
- vMerge: "Merged values"
|
|
- default: "Default value"
|
|
- if: "Conditional feature"
|
|
- then: "Conditional consequent"
|
|
- fLib: "Feature structure library"
|
|
- vLib: "Value library"
|
|
- fvLib: "Feature-value library"
|
|
- fsdDecl: "Feature system declaration"
|
|
- fsDecl: "Feature structure type declaration"
|
|
- fDecl: "Feature declaration"
|
|
- vRange: "Valid value range"
|
|
|
|
glam_hypernym_mappings:
|
|
TXT.ANA.FS: ["fs"]
|
|
TXT.ANA.FEA: ["f"]
|
|
TXT.ANA.VAL: ["binary", "symbol", "numeric", "string"]
|
|
TXT.ANA.COL: ["vColl", "vAlt", "vNot", "vMerge"]
|
|
TXT.ANA.DEF: ["default"]
|
|
TXT.ANA.CND: ["if", "then"]
|
|
DOC.MET.LIB: ["fLib", "vLib", "fvLib"]
|
|
DOC.MET.DCL: ["fsdDecl", "fsDecl", "fDecl", "vRange"]
|
|
|
|
glam_ner_extensions:
|
|
- EntityFeatureStructure: "NER entity features"
|
|
- MorphologicalFS: "Morphological analysis features"
|
|
- SemanticRoleFS: "Semantic role features"
|
|
|
|
ontology_mappings:
|
|
gold:
|
|
- FeatureStructure: "gold:FeatureStructure"
|
|
- Feature: "gold:Feature"
|
|
- FeatureValue: "gold:FeatureValue"
|
|
olia:
|
|
- MorphologicalFS: "olia:MorphologicalCategory"
|
|
- POS: "olia:PartOfSpeech"
|
|
- Case: "olia:Case"
|
|
- Gender: "olia:Gender"
|
|
- Number: "olia:Number"
|
|
- Tense: "olia:Tense"
|
|
lexinfo:
|
|
- MorphologicalFS: "lexinfo:MorphosyntacticProperty"
|
|
cidoc_crm:
|
|
- FeatureStructure: "crm:E55_Type"
|
|
skos:
|
|
- SymbolValue: "skos:Concept"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Chapter 9: Documentation Elements (TEI ODD)
|
|
# ---------------------------------------------------------------------------
|
|
tagdocs:
|
|
path: "tagdocs.yaml"
|
|
tei_chapter: 9
|
|
tei_module_name: "tagdocs"
|
|
title: "Documentation Elements (TEI ODD)"
|
|
description: |
|
|
Elements for TEI ODD (One Document Does it all) customization including
|
|
schema specifications, element definitions, attribute declarations,
|
|
content models, and constraint rules. Essential for defining custom
|
|
annotation schemas and entity type taxonomies for GLAM-NER pipelines.
|
|
element_count: 35
|
|
line_count: 1542
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- schemaSpec: "Schema specification container"
|
|
- moduleRef: "Reference to TEI module"
|
|
- moduleSpec: "Module definition"
|
|
- elementSpec: "Element documentation"
|
|
- attDef: "Attribute definition"
|
|
- attList: "Attribute list"
|
|
- classSpec: "Class specification (model/atts)"
|
|
- memberOf: "Class membership declaration"
|
|
- content: "Content model specification"
|
|
- sequence: "Ordered content sequence"
|
|
- alternate: "Content alternation (choice)"
|
|
- elementRef: "Element reference in content model"
|
|
- classRef: "Class reference in content model"
|
|
- macroSpec: "Reusable pattern/macro"
|
|
- constraintSpec: "Constraint specification (Schematron, etc.)"
|
|
- constraint: "Individual constraint rule"
|
|
- datatype: "Data type specification"
|
|
- valList: "Valid values list"
|
|
- valItem: "Valid value item"
|
|
- exemplum: "Usage example"
|
|
- remarks: "Additional documentation"
|
|
|
|
glam_hypernym_mappings:
|
|
DOC.SCH: ["schemaSpec"]
|
|
DOC.SCH.MOD: ["moduleRef", "moduleSpec"]
|
|
DOC.SCH.ELM: ["elementSpec"]
|
|
DOC.SCH.ATT: ["attDef"]
|
|
DOC.SCH.ATL: ["attList"]
|
|
DOC.SCH.CLS: ["classSpec"]
|
|
DOC.SCH.MEM: ["memberOf"]
|
|
DOC.SCH.CNT: ["content"]
|
|
DOC.SCH.SEQ: ["sequence"]
|
|
DOC.SCH.ALT: ["alternate"]
|
|
DOC.SCH.CSP: ["constraintSpec"]
|
|
DOC.SCH.CON: ["constraint"]
|
|
DOC.SCH.DTP: ["datatype"]
|
|
DOC.SCH.VLL: ["valList"]
|
|
DOC.SCH.VLI: ["valItem"]
|
|
DOC.SCH.EXM: ["exemplum"]
|
|
DOC.SCH.REM: ["remarks"]
|
|
|
|
glam_ner_extensions:
|
|
- EntityTypeSpec: "Entity type definition for GLAM-NER"
|
|
- AnnotationSchemaSpec: "Complete annotation schema"
|
|
- RelationTypeSpec: "Relation type definition"
|
|
- OntologyMapping: "Mapping to ontology classes"
|
|
|
|
ontology_mappings:
|
|
owl:
|
|
- SchemaSpec: "owl:Ontology"
|
|
- ModuleRef: "owl:imports"
|
|
rdfs:
|
|
- ElementSpec: "rdfs:Class"
|
|
- AttDef: "rdfs:Property"
|
|
- ClassSpec: "rdfs:Class"
|
|
- MemberOf: "rdfs:subClassOf"
|
|
- Datatype: "rdfs:Datatype"
|
|
skos:
|
|
- ValList: "skos:ConceptScheme"
|
|
- ValItem: "skos:Concept"
|
|
- Exemplum: "skos:example"
|
|
- Remarks: "skos:note"
|
|
shacl:
|
|
- ConstraintSpec: "sh:NodeShape"
|
|
- Constraint: "sh:PropertyShape"
|
|
dcterms:
|
|
- ListRef: "dcterms:references"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Computer-mediated Communication (CMC)
|
|
# ---------------------------------------------------------------------------
|
|
cmc:
|
|
path: "cmc.yaml"
|
|
tei_chapter: "CMC"
|
|
tei_module_name: "cmc"
|
|
title: "Computer-mediated Communication"
|
|
description: |
|
|
Elements for encoding computer-mediated communication including social
|
|
media posts, chat messages, forum threads, wiki discussions, and other
|
|
digital discourse. Essential for social media NER, online discourse
|
|
analysis, and digital heritage collections. Covers the TEI <post> element
|
|
and associated attributes for modality, threading, and content generation.
|
|
element_count: 17
|
|
line_count: 1478
|
|
status: "complete"
|
|
|
|
key_elements:
|
|
- post: "CMC post/message (primary element)"
|
|
- CMCThread: "Thread of related posts"
|
|
- CMCConversation: "Conversation context"
|
|
- CMCParticipant: "User account/identity"
|
|
- CMCEmoji: "Emoji encoding"
|
|
- CMCHashtag: "Hashtag encoding"
|
|
- CMCMention: "@-mention encoding"
|
|
- CMCEmbeddedMedia: "Multimodal content"
|
|
- CMCReaction: "Reactions/engagement"
|
|
- CMCCorpus: "CMC corpus structure"
|
|
- CMCEntityMention: "NER entity from CMC"
|
|
|
|
glam_hypernym_mappings:
|
|
TXT.CMC: ["post"]
|
|
TXT.CMC.PST: ["CMCPost"]
|
|
TXT.CMC.THR: ["CMCThread"]
|
|
TXT.CMC.CNV: ["CMCConversation"]
|
|
TXT.CMC.EMJ: ["CMCEmoji"]
|
|
TXT.CMC.EMO: ["CMCEmoticon"]
|
|
TXT.CMC.RXN: ["CMCReaction"]
|
|
AGT.CMC.USR: ["CMCParticipant"]
|
|
GRP.CMC: ["CMCParticipantGroup"]
|
|
APP.CMC.HTG: ["CMCHashtag"]
|
|
APP.CMC.MEN: ["CMCMention"]
|
|
THG.CMC.MED: ["CMCEmbeddedMedia"]
|
|
DOC.MET.CMC: ["CMCPlatformMetadata", "CMCPostMetadata"]
|
|
DOC.CRP.CMC: ["CMCCorpus"]
|
|
NER.CMC.ENT: ["CMCEntityMention"]
|
|
NER.CMC.NRM: ["CMCEntityNormalization"]
|
|
|
|
glam_ner_extensions:
|
|
- CMCEntityMention: "Entity mention from CMC text"
|
|
- CMCEntityNormalization: "Informal entity normalization"
|
|
- CMCCorpus: "CMC corpus for NER training"
|
|
|
|
ontology_mappings:
|
|
sioc:
|
|
- CMCPost: "sioc:Post"
|
|
- CMCThread: "sioc:Thread"
|
|
- CMCConversation: "sioc:Forum"
|
|
- CMCParticipant: "sioc:UserAccount"
|
|
- CMCParticipantGroup: "sioc:Usergroup"
|
|
activitystreams:
|
|
- CMCPost: "as:Note"
|
|
- CMCReaction: "as:Like"
|
|
- CMCParticipant: "as:Person"
|
|
schema:
|
|
- CMCPost: "schema:SocialMediaPosting"
|
|
- CMCThread: "schema:DiscussionForumPosting"
|
|
- CMCParticipant: "schema:Person"
|
|
- CMCEmbeddedMedia: "schema:MediaObject"
|
|
foaf:
|
|
- CMCParticipant: "foaf:OnlineAccount"
|
|
- CMCParticipantGroup: "foaf:Group"
|
|
cidoc_crm:
|
|
- CMCPost: "crm:E33_Linguistic_Object"
|
|
- CMCParticipant: "crm:E39_Actor"
|
|
prov:
|
|
- CMCPlatformMetadata: "prov:Activity"
|
|
- CMCCorpus: "prov:Collection"
|
|
nif:
|
|
- CMCEntityMention: "nif:String"
|
|
|
|
# =============================================================================
|
|
# PLANNED MODULES
|
|
# =============================================================================
|
|
|
|
planned_modules:
|
|
|
|
# =============================================================================
|
|
# INTEGRATION NOTES
|
|
# =============================================================================
|
|
|
|
integration:
|
|
|
|
linkml_usage: |
|
|
These modules can be imported into LinkML schemas using:
|
|
|
|
```yaml
|
|
imports:
|
|
- https://w3id.org/glam/ner/tei/header
|
|
- https://w3id.org/glam/ner/tei/namesdates
|
|
- https://w3id.org/glam/ner/tei/msdescription
|
|
- https://w3id.org/glam/ner/tei/linking
|
|
```
|
|
|
|
Or locally:
|
|
|
|
```yaml
|
|
imports:
|
|
- modules/advanced/tei/header
|
|
- modules/advanced/tei/namesdates
|
|
- modules/advanced/tei/msdescription
|
|
- modules/advanced/tei/linking
|
|
```
|
|
|
|
tei_xml_conversion: |
|
|
TEI XML documents can be converted to LinkML-compliant YAML/JSON using
|
|
XSLT or Python transformations. The key mappings are:
|
|
|
|
- TEI element → LinkML class
|
|
- TEI @xml:id → LinkML identifier
|
|
- TEI @ref/@sameAs → LinkML TEIPointer (URI reference)
|
|
- TEI att.datable → TEIDatableAttributes mixin
|
|
- TEI att.global → TEIGlobalAttributes mixin
|
|
- TEI <standOff> → LinkML TEIStandOff class
|
|
- TEI <annotation> → LinkML TEIAnnotation class
|
|
|
|
ner_pipeline_integration: |
|
|
NER pipelines can output annotations using these LinkML classes:
|
|
|
|
1. Extract entity mentions from text
|
|
2. Create TEIAnnotation or NERAnnotation instances
|
|
3. Use Selector (TextQuoteSelector) for target identification
|
|
4. Set motivation to "identifying" for entity recognition
|
|
5. Populate body with:
|
|
- entity_type (GLAM-NER hypernym)
|
|
- entity_ref (authority URI: VIAF, Wikidata, GeoNames)
|
|
- entity_label (human-readable name)
|
|
6. Add confidence_score and ner_method
|
|
7. Serialize to JSON-LD, YAML, or TEI XML
|
|
|
|
Example output structure:
|
|
```yaml
|
|
- class: NERAnnotation
|
|
xml_id: ann-001
|
|
motivation: identifying
|
|
glam_hypernym: AGT.PER
|
|
annotation_target:
|
|
source_uri: "document.xml"
|
|
selector:
|
|
selector_type: text_quote
|
|
exact_match: "William Shakespeare"
|
|
prefix_context: "the playwright "
|
|
suffix_context: " was born"
|
|
start_position: 45
|
|
end_position: 65
|
|
annotation_body:
|
|
entity_type: AGT.PER
|
|
entity_ref: https://viaf.org/viaf/96994048
|
|
entity_label: "William Shakespeare"
|
|
confidence_score: 0.95
|
|
ner_method: "spacy-en-core-web-trf"
|
|
```
|
|
|
|
manuscript_cataloging: |
|
|
For manuscript cataloging, use msdescription module:
|
|
|
|
1. Create MsDesc instance as container
|
|
2. Populate MsIdentifier with:
|
|
- repository_name, institution_name
|
|
- settlement_name, country_name
|
|
- shelfmark (required)
|
|
3. Add MsPhysDesc for physical attributes:
|
|
- object_form (codex, scroll, etc.)
|
|
- support_material (parchment, paper, etc.)
|
|
- dimensions, extent
|
|
- hand_desc, deco_desc, binding_desc
|
|
4. Add MsHistory for provenance:
|
|
- origin (place and date)
|
|
- provenance_events (ownership chain)
|
|
- acquisition_info
|
|
5. Add MsContents for intellectual content:
|
|
- content_items (MsItem instances)
|
|
- text_language
|
|
|
|
Ontology mappings ensure interoperability with:
|
|
- CIDOC-CRM (museum/heritage sector)
|
|
- RiC-O (archives)
|
|
- Schema.org (web discovery)
|
|
- FRBRoo (bibliographic)
|
|
|
|
# =============================================================================
|
|
# STATISTICS
|
|
# =============================================================================
|
|
|
|
statistics:
|
|
completed_modules: 21
|
|
total_elements_covered: 590
|
|
total_line_count: 27367
|
|
|
|
element_coverage:
|
|
header: "85/85 (100%)"
|
|
core: "45/90 (50%)"
|
|
namesdates: "58/58 (100%)"
|
|
msdescription: "58/58 (100%)"
|
|
linking: "20/20 (100%)"
|
|
textcrit: "16/16 (100%)"
|
|
spoken: "18/18 (100%)"
|
|
transcr: "38/38 (100%)"
|
|
verse: "7/7 (100%)"
|
|
drama: "15/15 (100%)"
|
|
dictionaries: "35/35 (100%)"
|
|
gaiji: "18/18 (100%)"
|
|
certainty: "10/10 (100%)"
|
|
analysis: "22/22 (100%)"
|
|
figures: "18/18 (100%)"
|
|
textstructure: "24/24 (100%)"
|
|
corpus: "15/15 (100%)"
|
|
nets: "14/14 (100%)"
|
|
iso-fs: "22/22 (100%)"
|
|
tagdocs: "35/35 (100%)"
|
|
cmc: "17/17 (100%)"
|
|
|
|
ontology_coverage:
|
|
dcterms: "Full alignment for header metadata"
|
|
cidoc_crm: "Full alignment for heritage classes"
|
|
schema_org: "Full alignment for web discovery"
|
|
web_annotation: "Full alignment for linking module"
|
|
prov_o: "Full alignment for header provenance"
|
|
nif: "Full alignment for analysis module (NLP)"
|
|
rico: "Partial alignment for archival classes"
|
|
frbroo: "Partial alignment for bibliographic classes"
|
|
foaf: "Used in namesdates, header, corpus, cmc modules"
|
|
qudt: "Used in core and header modules (measures)"
|
|
timeml: "Used in core module (temporal)"
|
|
bibo: "Used in header module (bibliography)"
|
|
skos: "Used in header, corpus, nets, iso-fs modules (taxonomies)"
|
|
premis: "Used in header module (preservation)"
|
|
olia: "Full alignment for analysis, iso-fs modules (linguistic annotation)"
|
|
iiif: "Full alignment for figures module (image delivery)"
|
|
mathml: "Partial alignment for figures module (formulae)"
|
|
void: "Used in corpus module (datasets)"
|
|
dcat: "Used in corpus module (data catalogs)"
|
|
gold: "Used in iso-fs module (feature structures)"
|
|
lexinfo: "Used in dictionaries, iso-fs modules (lexical info)"
|
|
ontolex: "Used in dictionaries module (lexical entries)"
|
|
owl: "Used in nets, tagdocs modules (relations, ontology definitions)"
|
|
rdfs: "Used in tagdocs module (class/property definitions)"
|
|
shacl: "Used in tagdocs module (constraint shapes)"
|
|
sioc: "Full alignment for cmc module (online communities)"
|
|
activitystreams: "Used in cmc module (social activities)"
|
|
|
|
# =============================================================================
|
|
# VERSION HISTORY
|
|
# =============================================================================
|
|
|
|
version_history:
|
|
- version: "1.16.0"
|
|
date: "2025-12-04"
|
|
changes:
|
|
- "Added cmc.yaml (Computer-mediated Communication) - 1478 lines, 17 classes, 8 enums"
|
|
- "TEI CMC module for social media, chat, forums, wiki discussions"
|
|
- "Core post element with modality, generatedBy, replyTo, indentLevel attributes"
|
|
- "Threading structures: CMCThread, CMCConversation"
|
|
- "Participant metadata: CMCParticipant, CMCParticipantGroup with anonymization"
|
|
- "Emoji/emoticon encoding: CMCEmoji, CMCEmoticon with Unicode and sentiment"
|
|
- "Hashtag and mention encoding: CMCHashtag, CMCMention for entity extraction"
|
|
- "Multimodal content: CMCEmbeddedMedia for images, video, audio, GIFs"
|
|
- "Engagement tracking: CMCReaction, CMCReactionSet"
|
|
- "Platform metadata: CMCPlatformMetadata, CMCPostMetadata"
|
|
- "Corpus support: CMCCorpus for NER training data management"
|
|
- "NER extensions: CMCEntityMention, CMCEntityNormalization for informal text"
|
|
- "Ontology mappings: SIOC (online communities), Activity Streams, Schema.org"
|
|
- "Privacy support: anonymization levels for GDPR compliance"
|
|
- "Bot detection: generatedBy attribute for human/bot/system content"
|
|
- "Updated total line count to 27367"
|
|
- "Updated total elements covered to 590"
|
|
- "Updated total modules to 21"
|
|
|
|
- version: "1.15.0"
|
|
date: "2025-12-04"
|
|
changes:
|
|
- "Added tagdocs.yaml (Chapter 9) - 1542 lines, 35 classes, 8 enums"
|
|
- "TEI ODD documentation elements for schema specification"
|
|
- "Schema elements: schemaSpec, moduleRef, moduleSpec"
|
|
- "Element definition: elementSpec, attDef, attList, classSpec, memberOf"
|
|
- "Content models: content, sequence, alternate, elementRef, classRef"
|
|
- "Constraints: constraintSpec, constraint (Schematron/RelaxNG)"
|
|
- "Data types and values: datatype, valList, valItem"
|
|
- "Documentation: exemplum, remarks, listRef"
|
|
- "GLAM-NER extensions: EntityTypeSpec, AnnotationSchemaSpec, RelationTypeSpec"
|
|
- "Ontology mappings: OWL (ontology), RDFS (class definitions), SHACL (constraints)"
|
|
- "Updated total line count to 25889"
|
|
- "Updated total elements covered to 573"
|
|
- "Updated total modules to 20"
|
|
|
|
- version: "1.14.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added corpus.yaml (Chapter 16) - 1050 lines, 15 classes, 9 enums"
|
|
- "Added nets.yaml (Chapter 20) - 980 lines, 14 classes, 6 enums"
|
|
- "Added iso-fs.yaml (Chapter 19) - 863 lines, 22 classes, 7 enums"
|
|
- "Corpus module: TEI corpus structure, text descriptions, participant info"
|
|
- "GLAM-NER corpus extensions: NERTrainingCorpus, CorpusSplit, EntityTypeCount"
|
|
- "Heritage document collection support for corpus linguistics"
|
|
- "Nets module: graphs, trees, forests for relationship modeling"
|
|
- "Entity relationship graphs and coreference chains for NER"
|
|
- "Stemmatology extensions for manuscript tradition visualization"
|
|
- "Dependency tree support for syntactic parsing output"
|
|
- "ISO-FS module: feature structures per ISO/IEC 24610"
|
|
- "Morphological feature structures for linguistic analysis"
|
|
- "Feature libraries and declarations for reusable feature sets"
|
|
- "Ontology mappings: void, dcat, nif, gold, OLiA, LexInfo"
|
|
- "Updated total line count to 24347"
|
|
- "Updated total elements covered to 538"
|
|
- "Updated total modules to 19"
|
|
|
|
- version: "1.13.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added textstructure.yaml (Chapter 4) - 767 lines, 24 classes, 4 enums"
|
|
- "Moved textstructure from planned to completed modules"
|
|
- "Document structure: TEI, text, body, front, back, group"
|
|
- "Divisions: div with type enumeration"
|
|
- "Front matter: titlePage, docTitle, docAuthor, docImprint, byline, epigraph"
|
|
- "Back matter: trailer, closer, opener, dateline, postscript"
|
|
- "Floating text support for embedded texts"
|
|
- "Ontology mappings: Schema.org, BIBO, Dublin Core, CIDOC-CRM, FOAF"
|
|
- "Updated total line count to 21456"
|
|
- "Updated total elements covered to 487"
|
|
|
|
- version: "1.12.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added figures.yaml (Chapter 15) - 743 lines, 18 classes, 6 enums"
|
|
- "Moved figures from planned to completed modules"
|
|
- "Figure elements: figure, graphic, figDesc"
|
|
- "Table elements: table, row, cell"
|
|
- "Formula support: formula with MathML, TeX, image options"
|
|
- "Notated music: notatedMusic with MEI, MusicXML support"
|
|
- "Media elements: media, binaryObject"
|
|
- "IIIF integration: IIIFManifest, IIIFImageService classes"
|
|
- "Heritage image metadata with CIDOC-CRM alignment"
|
|
- "Ontology mappings: Schema.org, IIIF, CIDOC-CRM, MathML"
|
|
- "Updated total line count to 20689"
|
|
- "Updated total elements covered to 463"
|
|
|
|
- version: "1.11.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added analysis.yaml (Chapter 18) - 976 lines, 22 classes, 6 enums"
|
|
- "Moved analysis from planned to completed modules"
|
|
- "Linguistic segments: s (sentence), cl (clause), phr (phrase)"
|
|
- "Token elements: w (word), m (morpheme), c (character), pc (punctuation)"
|
|
- "Span annotations: span, spanGrp for standoff annotation"
|
|
- "Interpretations: interp, interpGrp for coding schemes"
|
|
- "NLP output classes: TokenizedText, POSTaggedToken, DependencyParse"
|
|
- "Universal Dependencies POS tagset integration"
|
|
- "Morphological feature support (CONLL-U compatible)"
|
|
- "Ontology mappings: NIF, OLiA, OntoLex, Web Annotation"
|
|
- "Updated total line count to 19946"
|
|
- "Updated total elements covered to 445"
|
|
|
|
- version: "1.10.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added certainty.yaml (Chapter 22) - 662 lines, 10 classes, 7 enums"
|
|
- "Moved certainty from planned to completed modules"
|
|
- "TEI certainty elements: certainty, precision, respons"
|
|
- "NER confidence scoring: NERConfidenceScore, ConfidenceMetrics"
|
|
- "Annotation provenance: AnnotationProvenance, AnnotationAgent"
|
|
- "ML model support: ModelAssertionSet for batch predictions"
|
|
- "Ontology mappings: PROV-O, Web Annotation, CIDOC-CRM, ML Schema"
|
|
- "Updated total line count to 18970"
|
|
- "Updated total elements covered to 423"
|
|
|
|
- version: "1.9.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added gaiji.yaml (Chapter 5) - 948 lines, 18 classes, 6 enums"
|
|
- "Moved gaiji from planned to completed modules"
|
|
- "Character declarations: charDecl, char, glyph"
|
|
- "Character properties: charName, charProp, unicodeName, localProp"
|
|
- "Character mappings: mapping with type support"
|
|
- "Glyph graphics: figure, graphic"
|
|
- "Inline reference: g (gaiji)"
|
|
- "Writing modes: direction, writing-mode, text-orientation"
|
|
- "CJK extensions: radical, strokes, readings"
|
|
- "Medieval manuscript extensions: letterforms, abbreviations"
|
|
- "Ontology mappings: CIDOC-CRM, SKOS, Schema.org"
|
|
- "Updated total line count to 18308"
|
|
- "Updated total elements covered to 413"
|
|
|
|
- version: "1.8.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added dictionaries.yaml (Chapter 10) - 1740 lines, 35 classes, 13 enums"
|
|
- "Moved dictionaries from planned to completed modules"
|
|
- "Entry structure: entry, entryFree, superEntry, hom"
|
|
- "Form elements: form, orth, pron, hyph, syll, stress"
|
|
- "Grammar elements: gramGrp, pos, gen, number, case, tns, mood, per, iType, subc"
|
|
- "Sense elements: sense, def, cit, quote, trans"
|
|
- "Etymology elements: etym, lang, mentioned, gloss"
|
|
- "Usage and cross-references: usg, lbl, xr, re"
|
|
- "Ontology mappings: OntoLex-Lemon, LexInfo, SKOS, CIDOC-CRM"
|
|
- "Updated total line count to 17360"
|
|
- "Updated total elements covered to 395"
|
|
|
|
- version: "1.7.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added drama.yaml (Chapter 7) - 781 lines, 15 classes, 4 enums"
|
|
- "Moved drama from planned to completed modules"
|
|
- "Cast list elements: castList, castGroup, castItem, role, roleDesc, actor"
|
|
- "Speech elements: sp (speech), speaker, spGrp (speech group)"
|
|
- "Stage directions: stage, move, set"
|
|
- "Framing elements: prologue, epilogue"
|
|
- "Performance metadata tracking"
|
|
- "Ontology mappings: CIDOC-CRM, Schema.org, FRBRoo, FOAF"
|
|
- "Updated total line count to 15620"
|
|
- "Updated total elements covered to 360"
|
|
|
|
- version: "1.6.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added verse.yaml (Chapter 6) - 689 lines, 7 classes, 6 enums"
|
|
- "Moved verse from planned to completed modules"
|
|
- "Core verse elements: l (line), lg (stanza), seg, rhyme, caesura"
|
|
- "Metrical declaration: metDecl, metSym"
|
|
- "Support for meter patterns, rhyme schemes, enjambment"
|
|
- "Ontology mappings: CIDOC-CRM, Schema.org"
|
|
- "Updated total line count to 14839"
|
|
- "Updated total elements covered to 345"
|
|
|
|
- version: "1.5.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added transcr.yaml (Chapter 12) - 1746 lines, 38 classes, 11 enums"
|
|
- "Moved transcr from planned to completed modules"
|
|
- "Full facsimile/surface support for digital editions (IIIF aligned)"
|
|
- "Editorial interventions: add, del, subst, restore, retrace"
|
|
- "Damage/illegibility: damage, gap, unclear, supplied, surplus, secl"
|
|
- "Abbreviation handling: abbr, expan, am, ex"
|
|
- "Metamark and transposition support"
|
|
- "Hand tracking: handNotes, handShift"
|
|
- "Ontology mappings: CIDOC-CRM, Schema.org, IIIF, Web Annotation"
|
|
- "Updated total line count to 14150"
|
|
- "Updated total elements covered to 338"
|
|
|
|
- version: "1.4.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added spoken.yaml (Chapter 8) - 1153 lines, 18 classes, 10 enums"
|
|
- "Moved spoken from planned to completed modules"
|
|
- "Full utterance, pause, vocal, kinesic, incident support"
|
|
- "Temporal alignment (timeline, when) for audio/video sync"
|
|
- "Recording metadata (recordingStmt, recording, equipment, broadcast)"
|
|
- "Paralinguistic feature tracking (shift)"
|
|
- "Ontology mappings: CIDOC-CRM, Schema.org, W3C Time"
|
|
- "Updated total line count to 12404"
|
|
- "Updated total elements covered to 300"
|
|
|
|
- version: "1.3.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added textcrit.yaml (Chapter 13) - 720 lines, 16 classes"
|
|
- "Moved textcrit from planned to completed modules"
|
|
- "Updated total line count to 11251"
|
|
- "Updated total elements covered to 282"
|
|
|
|
- version: "1.2.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added header.yaml (Chapter 2) - 3678 lines, 85 classes"
|
|
- "Comprehensive TEI Header metadata support"
|
|
- "Full correspondence description (correspDesc, correspAction)"
|
|
- "Complete encoding description coverage"
|
|
- "Revision history tracking (revisionDesc, change, listChange)"
|
|
- "Rich ontology mappings: Dublin Core, Schema.org, PROV-O, BIBO, FOAF, SKOS, PREMIS"
|
|
- "Updated total line count to 10531"
|
|
- "Updated total elements covered to 266"
|
|
|
|
- version: "1.1.0"
|
|
date: "2025-12-03"
|
|
changes:
|
|
- "Added msdescription.yaml (Chapter 11) - 1923 lines"
|
|
- "Added linking.yaml (Chapter 17) - 1393 lines"
|
|
- "Marked core.yaml as complete - 1575 lines"
|
|
- "Updated statistics and element coverage"
|
|
- "Added detailed NER pipeline integration notes"
|
|
- "Added manuscript cataloging integration notes"
|
|
|
|
- version: "1.0.0"
|
|
date: "2025-12-02"
|
|
changes:
|
|
- "Initial release with namesdates.yaml (Chapter 14)"
|
|
- "Created core.yaml structure (partial)"
|
|
- "Established module architecture and index"
|