glam/data/entity_annotation/modules/advanced/tei/certainty.yaml
2025-12-05 15:30:23 +01:00

662 lines
20 KiB
YAML

# =============================================================================
# GLAM-NER: TEI P5 CERTAINTY MODULE (LINKML)
# =============================================================================
# Module: modules/advanced/tei/certainty.yaml
# TEI Chapter: 22 - Certainty, Precision, and Responsibility
# TEI Module: certainty
# Version: 1.0.0
# =============================================================================
#
# This module provides LinkML class definitions for TEI P5 certainty,
# precision, and responsibility elements. Essential for NER confidence
# scoring, scholarly annotation, and editorial attribution.
#
# TEI Source: https://tei-c.org/release/doc/tei-p5-doc/en/html/CE.html
#
# Key Element Groups:
# - Certainty: certainty (degree of confidence in markup)
# - Precision: precision (numerical precision)
# - Responsibility: respons (attribution of interpretations)
#
# NER Integration:
# - Map NER confidence scores to TEI certainty/@degree
# - Track annotation provenance with respons
# - Express precision of extracted values
#
# =============================================================================
id: https://w3id.org/glam/ner/tei/certainty
name: tei-certainty
title: TEI Certainty Module for GLAM-NER
version: "1.0.0"
prefixes:
linkml: https://w3id.org/linkml/
tei: http://www.tei-c.org/ns/1.0/
glam: https://w3id.org/glam/ner/
crm: http://www.cidoc-crm.org/cidoc-crm/
prov: http://www.w3.org/ns/prov#
oa: http://www.w3.org/ns/oa#
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
default_prefix: glam
default_range: string
imports:
- linkml:types
# =============================================================================
# ENUMERATIONS
# =============================================================================
enums:
# ---------------------------------------------------------------------------
# Certainty Locus
# ---------------------------------------------------------------------------
CertaintyLocusEnum:
description: >-
Aspect of the markup to which certainty applies. From TEI @locus.
permissible_values:
name:
description: Certainty about element/attribute name (e.g., persName vs placeName)
value:
description: Certainty about attribute value
location:
description: Certainty about element location/boundaries
start:
description: Certainty about start position
end:
description: Certainty about end position
gi:
description: Certainty about generic identifier (element type)
# ---------------------------------------------------------------------------
# Certainty Degree Categories
# ---------------------------------------------------------------------------
CertaintyDegreeEnum:
description: >-
Categorical certainty levels (alternative to numeric degree).
permissible_values:
high:
description: High confidence (0.9-1.0)
medium:
description: Medium confidence (0.6-0.89)
low:
description: Low confidence (0.3-0.59)
unknown:
description: Unknown confidence (0.0-0.29)
# ---------------------------------------------------------------------------
# Precision Standard Reference
# ---------------------------------------------------------------------------
PrecisionStdRefEnum:
description: >-
Standard reference for precision values.
permissible_values:
iso8601:
description: ISO 8601 date/time precision
si:
description: SI unit precision
custom:
description: Custom precision scheme
# ---------------------------------------------------------------------------
# Responsibility Type
# ---------------------------------------------------------------------------
ResponsibilityTypeEnum:
description: >-
Type of responsibility being attributed.
permissible_values:
transcription:
description: Responsibility for transcription
encoding:
description: Responsibility for encoding/markup
interpretation:
description: Responsibility for interpretation
annotation:
description: Responsibility for annotation
verification:
description: Responsibility for verification
ner:
description: Responsibility for named entity recognition
correction:
description: Responsibility for correction
# ---------------------------------------------------------------------------
# Date Precision Type
# ---------------------------------------------------------------------------
DatePrecisionTypeEnum:
description: >-
Common date precision types.
permissible_values:
exact:
description: Exact date known
year:
description: Year precision only
decade:
description: Decade precision
century:
description: Century precision
quarter:
description: Quarter of year/century
half:
description: Half of year/century
early:
description: Early part of period
mid:
description: Middle of period
late:
description: Late part of period
circa:
description: Approximate date
before:
description: Before a given date
after:
description: After a given date
between:
description: Between two dates
# ---------------------------------------------------------------------------
# Annotation Method
# ---------------------------------------------------------------------------
AnnotationMethodEnum:
description: >-
Method used to create an annotation.
permissible_values:
manual:
description: Manual human annotation
ner_ml:
description: Machine learning NER
ner_rule:
description: Rule-based NER
ner_hybrid:
description: Hybrid NER (ML + rules)
dictionary:
description: Dictionary/gazetteer lookup
regex:
description: Regular expression matching
crowdsourced:
description: Crowdsourced annotation
imported:
description: Imported from external source
inferred:
description: Inferred from other annotations
# =============================================================================
# SLOTS (ATTRIBUTES)
# =============================================================================
slots:
# ---------------------------------------------------------------------------
# Certainty attributes
# ---------------------------------------------------------------------------
certainty_target:
description: Element(s) to which certainty applies (URI reference)
range: string
certainty_locus:
description: Aspect of markup to which certainty applies
range: CertaintyLocusEnum
certainty_degree:
description: Numeric degree of certainty (0.0 to 1.0)
range: float
minimum_value: 0.0
maximum_value: 1.0
certainty_degree_cat:
description: Categorical degree of certainty
range: CertaintyDegreeEnum
asserted_value:
description: Alternative value being asserted with given certainty
range: string
given_condition:
description: Condition(s) under which certainty applies (URI refs)
range: string
multivalued: true
# ---------------------------------------------------------------------------
# Precision attributes
# ---------------------------------------------------------------------------
precision_degree:
description: Numeric precision value
range: float
precision_stdref:
description: Standard reference for precision
range: PrecisionStdRefEnum
# ---------------------------------------------------------------------------
# Responsibility attributes
# ---------------------------------------------------------------------------
resp_target:
description: Element(s) for which responsibility is attributed
range: string
resp_locus:
description: Aspect of markup for which responsibility is attributed
range: CertaintyLocusEnum
resp_type:
description: Type of responsibility
range: ResponsibilityTypeEnum
# =============================================================================
# CLASSES
# =============================================================================
classes:
# ===========================================================================
# CERTAINTY CLASSES
# ===========================================================================
# ---------------------------------------------------------------------------
# Certainty - certainty
# ---------------------------------------------------------------------------
Certainty:
description: >-
Indicates the degree of certainty associated with some aspect of
text markup. Corresponds to TEI <certainty> element. Essential for
NER confidence scores and scholarly annotation.
class_uri: oa:Certainty
annotations:
tei_element: certainty
tei_module: certainty
glam_hypernym: ANN.CRT
slots:
- certainty_target
- certainty_locus
- certainty_degree
- certainty_degree_cat
- asserted_value
- given_condition
attributes:
xml_id:
description: Unique identifier
range: string
target:
description: Target element(s) - space-separated URI references
range: string
required: true
locus:
description: Aspect of markup to which certainty applies
range: CertaintyLocusEnum
required: true
degree:
description: Degree of certainty (0.0 = uncertain, 1.0 = certain)
range: float
minimum_value: 0.0
maximum_value: 1.0
assertedValue:
description: Alternative value if certainty is about value
range: string
given:
description: Conditional certainty - depends on other certainty elements
range: string
match:
description: XPath expression for dynamic targets
range: string
desc:
description: Description of the certainty assertion
range: string
resp:
description: Person/entity responsible for certainty assertion
range: string
cert:
description: Meta-certainty about this certainty assertion
range: CertaintyDegreeEnum
# ---------------------------------------------------------------------------
# CertaintyGroup - (for grouping related certainty assertions)
# ---------------------------------------------------------------------------
CertaintyGroup:
description: >-
Groups related certainty assertions. Not a direct TEI element but
useful for organizing multiple certainty statements about the same
markup decision.
annotations:
tei_element: null
tei_module: certainty
glam_hypernym: ANN.CRT.GRP
attributes:
xml_id:
description: Unique identifier
range: string
target:
description: Common target for all certainty assertions
range: string
certainties:
description: Individual certainty assertions
range: Certainty
multivalued: true
required: true
desc:
description: Description of the decision being evaluated
range: string
# ===========================================================================
# PRECISION CLASSES
# ===========================================================================
# ---------------------------------------------------------------------------
# Precision - precision
# ---------------------------------------------------------------------------
Precision:
description: >-
Indicates the numerical precision associated with a value.
Corresponds to TEI <precision> element. Useful for dates,
measurements, and other numeric values.
class_uri: crm:E54_Dimension
annotations:
tei_element: precision
tei_module: certainty
glam_hypernym: ANN.PRC
slots:
- precision_degree
- precision_stdref
attributes:
xml_id:
description: Unique identifier
range: string
target:
description: Target element(s) with imprecise value
range: string
match:
description: XPath expression for dynamic targets
range: string
degree:
description: Degree of precision (interpretation varies by context)
range: float
stdRef:
description: Reference to precision standard
range: string
atLeast:
description: Lower bound of imprecise value
range: string
atMost:
description: Upper bound of imprecise value
range: string
desc:
description: Description of the precision
range: string
resp:
description: Person/entity responsible for precision assertion
range: string
# ---------------------------------------------------------------------------
# DatePrecision - (specialized for dates)
# ---------------------------------------------------------------------------
DatePrecision:
description: >-
Specialized precision for date values. Captures common date
precision patterns like "circa", "early", "late", decade precision.
is_a: Precision
annotations:
tei_element: precision
tei_module: certainty
glam_hypernym: ANN.PRC.DAT
attributes:
precision_type:
description: Type of date precision
range: DatePrecisionTypeEnum
circa:
description: Whether date is approximate ("circa")
range: boolean
notBefore:
description: Earliest possible date
range: date
notAfter:
description: Latest possible date
range: date
# ===========================================================================
# RESPONSIBILITY CLASSES
# ===========================================================================
# ---------------------------------------------------------------------------
# Respons - respons
# ---------------------------------------------------------------------------
Respons:
description: >-
Indicates the person or entity responsible for some aspect of
the text encoding or interpretation. Corresponds to TEI <respons>.
Essential for annotation provenance and editorial attribution.
class_uri: prov:Attribution
annotations:
tei_element: respons
tei_module: certainty
glam_hypernym: ANN.RSP
slots:
- resp_target
- resp_locus
- resp_type
attributes:
xml_id:
description: Unique identifier
range: string
target:
description: Target element(s) for responsibility
range: string
required: true
locus:
description: Aspect of markup for which responsible
range: CertaintyLocusEnum
required: true
resp:
description: Reference to responsible person/entity
range: string
required: true
match:
description: XPath expression for dynamic targets
range: string
desc:
description: Description of the responsibility
range: string
# ===========================================================================
# NER-SPECIFIC CLASSES
# ===========================================================================
# ---------------------------------------------------------------------------
# NERConfidence - (NER confidence annotation)
# ---------------------------------------------------------------------------
NERConfidence:
description: >-
Confidence score for a named entity recognition annotation.
Maps NER pipeline output to TEI certainty patterns.
is_a: Certainty
class_uri: oa:Certainty
annotations:
tei_element: certainty
tei_module: certainty
glam_hypernym: ANN.CRT.NER
attributes:
ner_method:
description: NER method/model used
range: string
model_version:
description: Version of NER model
range: string
entity_type:
description: Entity type detected
range: string
alternative_types:
description: Alternative entity types with their scores
range: NERAlternative
multivalued: true
context_window:
description: Context used for prediction
range: string
# ---------------------------------------------------------------------------
# NERAlternative - (alternative NER predictions)
# ---------------------------------------------------------------------------
NERAlternative:
description: >-
An alternative entity type prediction from NER with its
confidence score.
annotations:
tei_element: null
tei_module: certainty
glam_hypernym: ANN.CRT.ALT
attributes:
entity_type:
description: Alternative entity type
range: string
required: true
score:
description: Confidence score for this alternative
range: float
minimum_value: 0.0
maximum_value: 1.0
required: true
glam_hypernym:
description: GLAM-NER hypernym for this type
range: string
# ===========================================================================
# ANNOTATION PROVENANCE CLASSES
# ===========================================================================
# ---------------------------------------------------------------------------
# AnnotationProvenance - (comprehensive provenance)
# ---------------------------------------------------------------------------
AnnotationProvenance:
description: >-
Complete provenance information for an annotation including
creator, method, date, and certainty. Combines TEI respons
with W3C PROV patterns.
class_uri: prov:Activity
annotations:
tei_element: null
tei_module: certainty
glam_hypernym: ANN.PRV
attributes:
xml_id:
description: Unique identifier
range: string
created_by:
description: Agent who created the annotation
range: string
required: true
created_at:
description: When annotation was created
range: datetime
method:
description: Method used (manual, NER, rule-based, etc.)
range: AnnotationMethodEnum
method_detail:
description: Detailed method description
range: string
software:
description: Software/tool used
range: string
software_version:
description: Version of software
range: string
reviewed_by:
description: Agent who reviewed the annotation
range: string
reviewed_at:
description: When annotation was reviewed
range: datetime
certainty:
description: Certainty of the annotation
range: Certainty
notes:
description: Additional notes
range: string
multivalued: true
# =============================================================================
# ONTOLOGY MAPPINGS SUMMARY
# =============================================================================
#
# W3C Web Annotation:
# - Certainty: oa:Certainty
# - NERConfidence: oa:Certainty
#
# W3C PROV-O:
# - Respons: prov:Attribution
# - AnnotationProvenance: prov:Activity
#
# CIDOC-CRM:
# - Precision: crm:E54_Dimension
#
# =============================================================================