# ============================================================================= # GLAM-NER: TEI P5 GAIJI MODULE (LINKML) # ============================================================================= # Module: modules/advanced/tei/gaiji.yaml # TEI Chapter: 5 - Characters, Glyphs, and Writing Modes # TEI Module: gaiji # Version: 1.0.0 # ============================================================================= # # This module provides LinkML class definitions for TEI P5 character and # glyph documentation elements. Essential for manuscript studies, East Asian # texts, historical documents, and any text using non-standard characters. # # TEI Source: https://tei-c.org/release/doc/tei-p5-doc/en/html/WD.html # # Key Element Groups: # - Character declarations: charDecl, char, glyph # - Character reference: g (gaiji) # - Character properties: charName, charProp, unicodeName, localProp # - Character mapping: mapping, figure, graphic # - Writing modes: style attributes for directionality # # Use Cases: # - Medieval manuscript transcription (special letter forms) # - CJK (Chinese, Japanese, Korean) character variants # - Unicode Private Use Area documentation # - Historical script representation # - Rare or obsolete character forms # # ============================================================================= id: https://w3id.org/glam/ner/tei/gaiji name: tei-gaiji title: TEI Gaiji Module for GLAM-NER version: "1.0.0" prefixes: linkml: https://w3id.org/linkml/ tei: http://www.tei-c.org/ns/1.0/ glam: https://w3id.org/glam/ner/ crm: http://www.cidoc-crm.org/cidoc-crm/ schema: http://schema.org/ unicode: http://www.w3.org/2005/Incubator/urw3/ dcterms: http://purl.org/dc/terms/ skos: http://www.w3.org/2004/02/skos/core# default_prefix: glam default_range: string imports: - linkml:types # ============================================================================= # ENUMERATIONS # ============================================================================= enums: # --------------------------------------------------------------------------- # Mapping Type # --------------------------------------------------------------------------- CharMappingTypeEnum: description: >- Type of character mapping. From TEI @type on mapping element. permissible_values: standard: description: Standard Unicode character mapping PUA: description: Unicode Private Use Area codepoint variant: description: Variant character form normalized: description: Normalized form for searching/indexing simplified: description: Simplified form (e.g., simplified Chinese) traditional: description: Traditional form (e.g., traditional Chinese) historical: description: Historical character form # --------------------------------------------------------------------------- # Unicode Category # --------------------------------------------------------------------------- UnicodeCategoryEnum: description: >- Unicode General Category values. permissible_values: Lu: description: Letter, Uppercase Ll: description: Letter, Lowercase Lt: description: Letter, Titlecase Lm: description: Letter, Modifier Lo: description: Letter, Other Mn: description: Mark, Nonspacing Mc: description: Mark, Spacing Combining Me: description: Mark, Enclosing Nd: description: Number, Decimal Digit Nl: description: Number, Letter No: description: Number, Other Pc: description: Punctuation, Connector Pd: description: Punctuation, Dash Ps: description: Punctuation, Open Pe: description: Punctuation, Close Pi: description: Punctuation, Initial quote Pf: description: Punctuation, Final quote Po: description: Punctuation, Other Sm: description: Symbol, Math Sc: description: Symbol, Currency Sk: description: Symbol, Modifier So: description: Symbol, Other Zs: description: Separator, Space Zl: description: Separator, Line Zp: description: Separator, Paragraph Cc: description: Other, Control Cf: description: Other, Format Cs: description: Other, Surrogate Co: description: Other, Private Use Cn: description: Other, Not Assigned # --------------------------------------------------------------------------- # Writing Direction # --------------------------------------------------------------------------- WritingDirectionEnum: description: >- Writing direction for text blocks. Based on CSS Writing Modes. permissible_values: ltr: description: Left to right (English, Latin scripts) rtl: description: Right to left (Arabic, Hebrew) ttb: description: Top to bottom (vertical East Asian) btt: description: Bottom to top (rare, e.g., some Ogham) # --------------------------------------------------------------------------- # Writing Mode # --------------------------------------------------------------------------- WritingModeEnum: description: >- CSS Writing Mode values for text orientation. permissible_values: horizontal-tb: description: Horizontal, top to bottom line progression vertical-rl: description: Vertical, right to left line progression vertical-lr: description: Vertical, left to right line progression sideways-rl: description: Sideways right to left sideways-lr: description: Sideways left to right # --------------------------------------------------------------------------- # Text Orientation # --------------------------------------------------------------------------- TextOrientationEnum: description: >- CSS text-orientation values for glyph orientation in vertical text. permissible_values: mixed: description: Mixed orientation (CJK upright, others rotated) upright: description: All glyphs upright sideways: description: All glyphs rotated 90 degrees # --------------------------------------------------------------------------- # Character Property Type # --------------------------------------------------------------------------- CharPropertyTypeEnum: description: >- Types of character properties that can be documented. permissible_values: unicode_name: description: Official Unicode character name unicode_category: description: Unicode general category unicode_block: description: Unicode block name unicode_script: description: Unicode script property radical: description: CJK radical number stroke_count: description: Number of strokes (CJK) reading: description: Pronunciation/reading meaning: description: Semantic meaning decomposition: description: Character decomposition variant_of: description: Base character this is a variant of # ============================================================================= # SLOTS (ATTRIBUTES) # ============================================================================= slots: # --------------------------------------------------------------------------- # Character identification # --------------------------------------------------------------------------- unicode_codepoint: description: Unicode code point (e.g., U+4E2D) range: string pattern: "^U\\+[0-9A-Fa-f]{4,6}$" unicode_name: description: Official Unicode character name range: string unicode_category: description: Unicode general category range: UnicodeCategoryEnum unicode_block: description: Unicode block name range: string # --------------------------------------------------------------------------- # Character representation # --------------------------------------------------------------------------- glyph_ref: description: Reference to glyph definition range: string character_string: description: The actual character as a string range: string # --------------------------------------------------------------------------- # Mapping attributes # --------------------------------------------------------------------------- mapping_type: description: Type of character mapping range: CharMappingTypeEnum mapping_target: description: Target character or codepoint of mapping range: string # --------------------------------------------------------------------------- # Property attributes # --------------------------------------------------------------------------- property_name: description: Name of the character property range: string property_value: description: Value of the character property range: string property_type: description: Type of character property range: CharPropertyTypeEnum # --------------------------------------------------------------------------- # Writing mode attributes # --------------------------------------------------------------------------- writing_direction: description: Direction of text flow range: WritingDirectionEnum writing_mode: description: CSS writing mode range: WritingModeEnum text_orientation: description: Glyph orientation in vertical text range: TextOrientationEnum # ============================================================================= # CLASSES # ============================================================================= classes: # =========================================================================== # DECLARATION CLASSES # =========================================================================== # --------------------------------------------------------------------------- # CharDecl - charDecl # --------------------------------------------------------------------------- CharDecl: description: >- Container for character and glyph declarations used in a document. Corresponds to TEI element. Typically placed in encodingDesc. class_uri: crm:E90_Symbolic_Object annotations: tei_element: charDecl tei_module: gaiji glam_hypernym: DOC.MET.CHR attributes: xml_id: description: Unique identifier range: string desc: description: Description of the character declaration set range: string characters: description: Character definitions range: CharDef multivalued: true glyphs: description: Glyph definitions range: GlyphDef multivalued: true # --------------------------------------------------------------------------- # CharDef - char # --------------------------------------------------------------------------- CharDef: description: >- Definition of a non-standard character used in the document. Corresponds to TEI element. Used for characters not in Unicode or requiring special documentation. class_uri: crm:E90_Symbolic_Object annotations: tei_element: char tei_module: gaiji glam_hypernym: TXT.CHR slots: - unicode_codepoint attributes: xml_id: description: Unique identifier for the character range: string required: true char_name: description: Name of the character range: CharName char_props: description: Unicode and other properties range: CharProp multivalued: true mappings: description: Mappings to other characters range: CharMapping multivalued: true figure: description: Graphical representation range: CharFigure desc: description: Description of the character range: string note: description: Editorial notes range: string multivalued: true # --------------------------------------------------------------------------- # GlyphDef - glyph # --------------------------------------------------------------------------- GlyphDef: description: >- Definition of a specific glyph (visual form) of a character. Corresponds to TEI element. Used when multiple visual forms exist for a single character. class_uri: crm:E90_Symbolic_Object annotations: tei_element: glyph tei_module: gaiji glam_hypernym: TXT.GLY attributes: xml_id: description: Unique identifier for the glyph range: string required: true glyph_name: description: Name of the glyph range: GlyphName mappings: description: Mappings to characters range: CharMapping multivalued: true figure: description: Graphical representation range: CharFigure desc: description: Description of the glyph range: string note: description: Editorial notes range: string multivalued: true # =========================================================================== # NAME AND PROPERTY CLASSES # =========================================================================== # --------------------------------------------------------------------------- # CharName - charName # --------------------------------------------------------------------------- CharName: description: >- Name of a non-standard character. Corresponds to TEI element. class_uri: skos:prefLabel annotations: tei_element: charName tei_module: gaiji glam_hypernym: TXT.CHR.NAM attributes: value: description: Character name text range: string required: true # --------------------------------------------------------------------------- # GlyphName - glyphName # --------------------------------------------------------------------------- GlyphName: description: >- Name of a glyph. Corresponds to TEI element. class_uri: skos:prefLabel annotations: tei_element: glyphName tei_module: gaiji glam_hypernym: TXT.GLY.NAM attributes: value: description: Glyph name text range: string required: true # --------------------------------------------------------------------------- # CharProp - charProp # --------------------------------------------------------------------------- CharProp: description: >- A property of a character (Unicode name, category, etc.). Corresponds to TEI element. class_uri: crm:E55_Type annotations: tei_element: charProp tei_module: gaiji glam_hypernym: TXT.CHR.PRP slots: - property_type attributes: local_name: description: Local property name element range: LocalProp unicode_name: description: Unicode property name element range: UnicodeName value: description: Property value element range: PropValue # --------------------------------------------------------------------------- # LocalProp - localProp # --------------------------------------------------------------------------- LocalProp: description: >- Local (project-specific) property name for a character. Corresponds to TEI element. annotations: tei_element: localProp tei_module: gaiji glam_hypernym: TXT.CHR.LCL slots: - property_name attributes: name: description: Property name range: string required: true value: description: Property value range: string required: true # --------------------------------------------------------------------------- # UnicodeName - unicodeName # --------------------------------------------------------------------------- UnicodeName: description: >- Official Unicode property name. Corresponds to TEI element. class_uri: unicode:Property annotations: tei_element: unicodeName tei_module: gaiji glam_hypernym: TXT.CHR.UNI slots: - unicode_name attributes: value: description: Unicode property name range: string required: true # --------------------------------------------------------------------------- # PropValue - value # --------------------------------------------------------------------------- PropValue: description: >- Value element for character properties. Corresponds to TEI element within charProp. annotations: tei_element: value tei_module: gaiji glam_hypernym: TXT.CHR.VAL slots: - property_value attributes: value: description: Property value content range: string required: true # =========================================================================== # MAPPING CLASSES # =========================================================================== # --------------------------------------------------------------------------- # CharMapping - mapping # --------------------------------------------------------------------------- CharMapping: description: >- Maps a character to another character or codepoint. Corresponds to TEI element. Used for normalization, variant relationships, and PUA mappings. class_uri: skos:exactMatch annotations: tei_element: mapping tei_module: gaiji glam_hypernym: TXT.CHR.MAP slots: - mapping_type - mapping_target attributes: type: description: Type of mapping (standard, PUA, variant, normalized) range: CharMappingTypeEnum target_char: description: Target character string range: string target_codepoint: description: Target Unicode codepoint range: string subtype: description: More specific mapping type range: string # =========================================================================== # FIGURE/GRAPHIC CLASSES # =========================================================================== # --------------------------------------------------------------------------- # CharFigure - figure # --------------------------------------------------------------------------- CharFigure: description: >- Graphical representation of a character or glyph. Corresponds to TEI
element within char/glyph context. Contains graphic elements pointing to glyph images. class_uri: schema:ImageObject annotations: tei_element: figure tei_module: gaiji glam_hypernym: THG.IMG.CHR attributes: xml_id: description: Unique identifier range: string graphics: description: Graphic representations range: CharGraphic multivalued: true desc: description: Description of the figure range: string # --------------------------------------------------------------------------- # CharGraphic - graphic # --------------------------------------------------------------------------- CharGraphic: description: >- Graphic image representing a character glyph. Corresponds to TEI element within figure. class_uri: schema:ImageObject annotations: tei_element: graphic tei_module: gaiji glam_hypernym: THG.IMG attributes: url: description: URL to the glyph image range: uri required: true width: description: Image width range: string height: description: Image height range: string mime_type: description: MIME type of the image range: string desc: description: Description range: string # =========================================================================== # REFERENCE CLASS # =========================================================================== # --------------------------------------------------------------------------- # GaijiRef - g # --------------------------------------------------------------------------- GaijiRef: description: >- Inline reference to a defined character or glyph. Corresponds to TEI element. Used in text to mark non-standard characters that are defined in charDecl. class_uri: crm:E90_Symbolic_Object annotations: tei_element: g tei_module: gaiji glam_hypernym: TXT.CHR.REF slots: - glyph_ref - character_string attributes: ref: description: Reference to char or glyph definition (URI fragment) range: string required: true n: description: Number or identifier range: string content: description: Fallback character content range: string # =========================================================================== # WRITING MODE CLASSES # =========================================================================== # --------------------------------------------------------------------------- # WritingModeSpec - (style attribute values) # --------------------------------------------------------------------------- WritingModeSpec: description: >- Specification of writing mode properties for a text block. Captures CSS Writing Modes properties as used in TEI @style attribute. Not a TEI element but a value structure. annotations: tei_element: null tei_module: gaiji glam_hypernym: TXT.WRT.MOD slots: - writing_direction - writing_mode - text_orientation attributes: direction: description: Text direction (ltr, rtl) range: WritingDirectionEnum writing_mode: description: CSS writing-mode property range: WritingModeEnum text_orientation: description: CSS text-orientation property range: TextOrientationEnum transform: description: CSS transform for rotation range: string # --------------------------------------------------------------------------- # BiDiOverride - (for explicit directionality) # --------------------------------------------------------------------------- BiDiOverride: description: >- Specification for bidirectional text override. Captures Unicode Bidi Algorithm override values. annotations: tei_element: null tei_module: gaiji glam_hypernym: TXT.WRT.BDI attributes: unicode_bidi: description: Unicode-bidi property (normal, embed, override, isolate) range: string direction: description: Direction property (ltr, rtl) range: WritingDirectionEnum # =========================================================================== # CJK-SPECIFIC CLASSES # =========================================================================== # --------------------------------------------------------------------------- # CJKCharProperties - (extension for CJK characters) # --------------------------------------------------------------------------- CJKCharProperties: description: >- Extended properties for CJK (Chinese, Japanese, Korean) characters. Captures radical, stroke count, readings, and variant relationships. annotations: tei_element: null tei_module: gaiji glam_hypernym: TXT.CHR.CJK attributes: radical_number: description: Kangxi radical number (1-214) range: integer minimum_value: 1 maximum_value: 214 additional_strokes: description: Strokes beyond the radical range: integer total_strokes: description: Total stroke count range: integer pinyin: description: Mandarin pinyin reading(s) range: string multivalued: true on_reading: description: Japanese on'yomi reading(s) range: string multivalued: true kun_reading: description: Japanese kun'yomi reading(s) range: string multivalued: true hangul: description: Korean hangul reading(s) range: string multivalued: true vietnamese: description: Vietnamese reading(s) range: string multivalued: true simplified_variant: description: Simplified Chinese variant range: string traditional_variant: description: Traditional Chinese variant range: string japanese_variant: description: Japanese shinjitai variant range: string semantic_variant: description: Semantic variant characters range: string multivalued: true z_variant: description: Z-variant (graphical variant) characters range: string multivalued: true # =========================================================================== # MEDIEVAL MANUSCRIPT CLASSES # =========================================================================== # --------------------------------------------------------------------------- # MedievalCharProperties - (extension for medieval scripts) # --------------------------------------------------------------------------- MedievalCharProperties: description: >- Extended properties for medieval manuscript characters. Captures letterforms, abbreviation marks, and scribal conventions. annotations: tei_element: null tei_module: gaiji glam_hypernym: TXT.CHR.MED attributes: letterform: description: Type of letterform (long s, round r, etc.) range: string abbreviation_mark: description: Type of abbreviation mark if applicable range: string expansion: description: Expanded form if abbreviation range: string script_type: description: Script type (caroline, gothic, etc.) range: string period: description: Typical period of use range: string region: description: Geographic region of use range: string # ============================================================================= # ONTOLOGY MAPPINGS SUMMARY # ============================================================================= # # CIDOC-CRM: # - CharDecl: crm:E90_Symbolic_Object # - CharDef: crm:E90_Symbolic_Object # - GlyphDef: crm:E90_Symbolic_Object # - CharProp: crm:E55_Type # - GaijiRef: crm:E90_Symbolic_Object # # SKOS: # - CharName: skos:prefLabel # - GlyphName: skos:prefLabel # - CharMapping: skos:exactMatch # # Schema.org: # - CharFigure: schema:ImageObject # - CharGraphic: schema:ImageObject # # Unicode (W3C): # - UnicodeName: unicode:Property # # =============================================================================