# ============================================================================= # CH-Annotator Entity Annotation Convention - Modular Schema Index # ============================================================================= # Convention ID: ch_annotator-v1_7_0 # Full Name: CH-Annotator (Cultural Heritage Annotator) # Version: 1.7.0 # Date: 2025-12-02 # Renamed: 2025-12-06 (formerly GLAM-NER) # # This is the main entry point for the modular entity annotation convention. # All modules are organized by category and can be imported individually # or as a complete set. # # BREAKING CHANGES in v1.7.0: # - BEING → AGENT (AGT) # - PLACE → TOPONYM (TOP) + GEOMETRY (GEO) # - ORGANISATION → GROUP (GRP) # - TEMPORAL restructured with TimeML/TIMEX3 # - TEXTUAL_REFERENCE → WORK (WRK) with FRBR model # - Added ROLE (ROL) hypernym # ============================================================================= schema: id: ch_annotator name: "CH-Annotator Entity Annotation Convention" version: "1.7.0" version_date: "2025-12-02" status: "stable" formerly_known_as: "GLAM-NER" description: | A comprehensive convention for annotating named entities in heritage, archival, library, and museum (GLAM) contexts. This convention prioritizes Digital Humanities standards (TEI, CIDOC-CRM, TimeML, FRBR, GeoSPARQL) over web-centric NER systems. The convention defines 10 hypernym categories with domain-agnostic subcategories suitable for: - Heritage institutions and collections - Web content and digital platforms - Publishing and scholarly communication - Archives and records management - Legal and governmental documents - Academic and research contexts # ============================================================================= # MODULE IMPORTS # ============================================================================= modules: # --------------------------------------------------------------------------- # CORE MODULES - Convention metadata and namespaces # --------------------------------------------------------------------------- core: - path: "core/convention.yaml" description: "Convention metadata, version, scope, DH authorities" - path: "core/namespaces.yaml" description: "All ontology namespace prefixes with categories" # --------------------------------------------------------------------------- # HYPERNYM MODULES - Entity type definitions # --------------------------------------------------------------------------- hypernyms: # Agents and Persons - path: "hypernyms/agt.yaml" id: "AGT" name: "AGENT" description: "Humans, AI agents, animals, fictional beings" primary_class: "crm:E39_Actor" # Collectives and Organizations - path: "hypernyms/grp.yaml" id: "GRP" name: "GROUP" description: "Formal and informal collectives of agents" primary_class: "crm:E74_Group" # Place Names - path: "hypernyms/top.yaml" id: "TOP" name: "TOPONYM" description: "Place names as nominal references" primary_class: "crm:E53_Place" # Spatial Geometry - path: "hypernyms/geo.yaml" id: "GEO" name: "GEOMETRY" description: "Coordinates, polygons, spatial primitives" primary_class: "geo:Geometry" # Temporal Expressions - path: "hypernyms/tmp.yaml" id: "TMP" name: "TEMPORAL" description: "TimeML/TIMEX3 temporal expressions" primary_class: "crm:E52_Time-Span" # Names and Titles - path: "hypernyms/app.yaml" id: "APP" name: "APPELLATION" description: "Titles, collection names, awards, structured names" primary_class: "crm:E41_Appellation" # Social Positions - path: "hypernyms/rol.yaml" id: "ROL" name: "ROLE" description: "Occupations, honorifics, positions" primary_class: "org:Role" # Intellectual Works - path: "hypernyms/wrk.yaml" id: "WRK" name: "WORK" description: "FRBR Work/Expression/Manifestation/Item" primary_class: "frbroo:F1_Work" # Quantities - path: "hypernyms/qty.yaml" id: "QTY" name: "QUANTITY" description: "Counts, measurements, currency, ranges" primary_class: "crm:E54_Dimension" # Objects and Concepts - path: "hypernyms/thg.yaml" id: "THG" name: "THING" description: "Artworks, artifacts, events, concepts" primary_class: "crm:E70_Thing" # --------------------------------------------------------------------------- # PROCESSING MODULES - Extraction and annotation rules # --------------------------------------------------------------------------- processing: - path: "processing/exclusions.yaml" description: "Universal exclusion rules for all entity types" - path: "processing/double_tagging.yaml" description: "Permitted and prohibited double-tagging patterns" - path: "processing/relationships.yaml" description: "Ontology relationship patterns between entities" # --------------------------------------------------------------------------- # INTEGRATION MODULES - External system mappings # --------------------------------------------------------------------------- integrations: - path: "integrations/pico.yaml" description: "PiCo ontology integration for person observations" - path: "integrations/nif_nerd.yaml" description: "NIF/NERD/Open Annotation compatibility layer with GLAM-NER mappings" # --------------------------------------------------------------------------- # RELATIONSHIP MODULES - Family and social relationship patterns # --------------------------------------------------------------------------- relationships: - path: "relationships/family.yaml" description: "Family relationship properties and historical source patterns (34 relationship types, 13 languages)" line_count: 1503 languages: - "Dutch" - "Latin" - "German" - "Arabic" - "French" - "Ottoman Turkish" - "Hebrew" - "Persian/Farsi" - "Spanish" - "Portuguese" - "Italian" - "Greek" - "Russian" # --------------------------------------------------------------------------- # ADVANCED MODULES - Complex annotation patterns # --------------------------------------------------------------------------- advanced: - path: "advanced/document_structure.yaml" description: "DOC hypernym for layout semantic regions (30+ document region types)" - path: "advanced/relationship_annotations.yaml" description: "11 relationship hypernyms (REL.CRE, REL.TMP, REL.SPA, REL.SOC, etc.)" - path: "advanced/coreference.yaml" description: "Coreference resolution, mention types, entity linking, cross-document" - path: "advanced/uncertainty.yaml" description: "Confidence scoring, epistemic/linguistic uncertainty, calibration" # --------------------------------------------------------------------------- # TEI P5 MODULES - Text Encoding Initiative element schemas # --------------------------------------------------------------------------- tei: index: "advanced/tei/index.yaml" version: "4.10.2" modules: - path: "advanced/tei/core.yaml" description: "TEI P5 Chapter 3 - Core Elements (date, time, measure, bibl)" tei_module: "core" line_count: 1575 status: "complete" - path: "advanced/tei/namesdates.yaml" description: "TEI P5 Chapter 14 - Names, Dates, People, Places (58 elements)" tei_module: "namesdates" line_count: 1962 status: "complete" - path: "advanced/tei/msdescription.yaml" description: "TEI P5 Chapter 11 - Manuscript Description (58 elements)" tei_module: "msdescription" line_count: 1923 status: "complete" - path: "advanced/tei/linking.yaml" description: "TEI P5 Chapter 17 - Linking, Segmentation, Alignment (20 elements)" tei_module: "linking" line_count: 1393 status: "complete" # ============================================================================= # HYPERNYM SUMMARY # ============================================================================= hypernym_codes: AGT: "AGENT - Actors with agency (humans, AI, animals, fictional)" GRP: "GROUP - Collectives of agents (organizations, movements, families)" TOP: "TOPONYM - Place names as nominal references" GEO: "GEOMETRY - Spatial coordinates and geometric primitives" TMP: "TEMPORAL - Time expressions (TimeML/TIMEX3)" APP: "APPELLATION - Names and titles as linguistic constructs" ROL: "ROLE - Social positions and occupations" WRK: "WORK - Intellectual works (FRBR model)" QTY: "QUANTITY - Numeric values and measurements" THG: "THING - Objects, concepts, events" # ============================================================================= # DIGITAL HUMANITIES AUTHORITIES # ============================================================================= authorities: primary: TEI_P5: name: "Text Encoding Initiative P5 Guidelines" url: "https://tei-c.org/guidelines/p5/" usage: "Document structure, person/place/org names, temporal expressions" CIDOC_CRM: name: "CIDOC Conceptual Reference Model" version: "7.1.3" url: "https://cidoc-crm.org/" usage: "Cultural heritage entity modeling, events, temporal entities" TimeML: name: "TimeML / TIMEX3" url: "https://www.timeml.org/" usage: "Temporal expression annotation (DATE, TIME, DURATION, SET)" FRBR_LRM: name: "IFLA Library Reference Model (FRBR successor)" url: "https://www.ifla.org/publications/ifla-library-reference-model/" usage: "Work/Expression/Manifestation/Item for bibliographic entities" GeoSPARQL: name: "OGC GeoSPARQL Standard" url: "https://www.ogc.org/standards/geosparql" usage: "Spatial geometry representation in RDF" Pleiades: name: "Pleiades Gazetteer of Ancient Places" url: "https://pleiades.stoa.org/" usage: "Historical and ancient world toponyms" secondary: W3C_Org: name: "W3C Organization Ontology" url: "https://www.w3.org/TR/vocab-org/" usage: "Organizational structure, roles, memberships" RiC_O: name: "Records in Contexts Ontology" url: "https://www.ica.org/standards/RiC/ontology" usage: "Archival description and record relationships" PNV: name: "Person Name Vocabulary" url: "https://w3id.org/pnv" usage: "Structured person name components" PiCo: name: "Person in Context Ontology" url: "https://w3id.org/pico" usage: "Person observations in historical sources" deprecated: NERD: name: "Named Entity Recognition and Disambiguation" status: "DEPRECATED for DH use" note: | NERD classes are retained ONLY for NLP pipeline interchange. Use DH authorities (TEI, CIDOC-CRM, TimeML, FRBR) for semantic precision.