662 lines
20 KiB
YAML
662 lines
20 KiB
YAML
# =============================================================================
|
|
# GLAM-NER: TEI P5 CERTAINTY MODULE (LINKML)
|
|
# =============================================================================
|
|
# Module: modules/advanced/tei/certainty.yaml
|
|
# TEI Chapter: 22 - Certainty, Precision, and Responsibility
|
|
# TEI Module: certainty
|
|
# Version: 1.0.0
|
|
# =============================================================================
|
|
#
|
|
# This module provides LinkML class definitions for TEI P5 certainty,
|
|
# precision, and responsibility elements. Essential for NER confidence
|
|
# scoring, scholarly annotation, and editorial attribution.
|
|
#
|
|
# TEI Source: https://tei-c.org/release/doc/tei-p5-doc/en/html/CE.html
|
|
#
|
|
# Key Element Groups:
|
|
# - Certainty: certainty (degree of confidence in markup)
|
|
# - Precision: precision (numerical precision)
|
|
# - Responsibility: respons (attribution of interpretations)
|
|
#
|
|
# NER Integration:
|
|
# - Map NER confidence scores to TEI certainty/@degree
|
|
# - Track annotation provenance with respons
|
|
# - Express precision of extracted values
|
|
#
|
|
# =============================================================================
|
|
|
|
id: https://w3id.org/glam/ner/tei/certainty
|
|
name: tei-certainty
|
|
title: TEI Certainty Module for GLAM-NER
|
|
version: "1.0.0"
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
tei: http://www.tei-c.org/ns/1.0/
|
|
glam: https://w3id.org/glam/ner/
|
|
crm: http://www.cidoc-crm.org/cidoc-crm/
|
|
prov: http://www.w3.org/ns/prov#
|
|
oa: http://www.w3.org/ns/oa#
|
|
schema: http://schema.org/
|
|
dcterms: http://purl.org/dc/terms/
|
|
|
|
default_prefix: glam
|
|
default_range: string
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
# =============================================================================
|
|
# ENUMERATIONS
|
|
# =============================================================================
|
|
|
|
enums:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Certainty Locus
|
|
# ---------------------------------------------------------------------------
|
|
CertaintyLocusEnum:
|
|
description: >-
|
|
Aspect of the markup to which certainty applies. From TEI @locus.
|
|
permissible_values:
|
|
name:
|
|
description: Certainty about element/attribute name (e.g., persName vs placeName)
|
|
value:
|
|
description: Certainty about attribute value
|
|
location:
|
|
description: Certainty about element location/boundaries
|
|
start:
|
|
description: Certainty about start position
|
|
end:
|
|
description: Certainty about end position
|
|
gi:
|
|
description: Certainty about generic identifier (element type)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Certainty Degree Categories
|
|
# ---------------------------------------------------------------------------
|
|
CertaintyDegreeEnum:
|
|
description: >-
|
|
Categorical certainty levels (alternative to numeric degree).
|
|
permissible_values:
|
|
high:
|
|
description: High confidence (0.9-1.0)
|
|
medium:
|
|
description: Medium confidence (0.6-0.89)
|
|
low:
|
|
description: Low confidence (0.3-0.59)
|
|
unknown:
|
|
description: Unknown confidence (0.0-0.29)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Precision Standard Reference
|
|
# ---------------------------------------------------------------------------
|
|
PrecisionStdRefEnum:
|
|
description: >-
|
|
Standard reference for precision values.
|
|
permissible_values:
|
|
iso8601:
|
|
description: ISO 8601 date/time precision
|
|
si:
|
|
description: SI unit precision
|
|
custom:
|
|
description: Custom precision scheme
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Responsibility Type
|
|
# ---------------------------------------------------------------------------
|
|
ResponsibilityTypeEnum:
|
|
description: >-
|
|
Type of responsibility being attributed.
|
|
permissible_values:
|
|
transcription:
|
|
description: Responsibility for transcription
|
|
encoding:
|
|
description: Responsibility for encoding/markup
|
|
interpretation:
|
|
description: Responsibility for interpretation
|
|
annotation:
|
|
description: Responsibility for annotation
|
|
verification:
|
|
description: Responsibility for verification
|
|
ner:
|
|
description: Responsibility for named entity recognition
|
|
correction:
|
|
description: Responsibility for correction
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Date Precision Type
|
|
# ---------------------------------------------------------------------------
|
|
DatePrecisionTypeEnum:
|
|
description: >-
|
|
Common date precision types.
|
|
permissible_values:
|
|
exact:
|
|
description: Exact date known
|
|
year:
|
|
description: Year precision only
|
|
decade:
|
|
description: Decade precision
|
|
century:
|
|
description: Century precision
|
|
quarter:
|
|
description: Quarter of year/century
|
|
half:
|
|
description: Half of year/century
|
|
early:
|
|
description: Early part of period
|
|
mid:
|
|
description: Middle of period
|
|
late:
|
|
description: Late part of period
|
|
circa:
|
|
description: Approximate date
|
|
before:
|
|
description: Before a given date
|
|
after:
|
|
description: After a given date
|
|
between:
|
|
description: Between two dates
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Annotation Method
|
|
# ---------------------------------------------------------------------------
|
|
AnnotationMethodEnum:
|
|
description: >-
|
|
Method used to create an annotation.
|
|
permissible_values:
|
|
manual:
|
|
description: Manual human annotation
|
|
ner_ml:
|
|
description: Machine learning NER
|
|
ner_rule:
|
|
description: Rule-based NER
|
|
ner_hybrid:
|
|
description: Hybrid NER (ML + rules)
|
|
dictionary:
|
|
description: Dictionary/gazetteer lookup
|
|
regex:
|
|
description: Regular expression matching
|
|
crowdsourced:
|
|
description: Crowdsourced annotation
|
|
imported:
|
|
description: Imported from external source
|
|
inferred:
|
|
description: Inferred from other annotations
|
|
|
|
|
|
# =============================================================================
|
|
# SLOTS (ATTRIBUTES)
|
|
# =============================================================================
|
|
|
|
slots:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Certainty attributes
|
|
# ---------------------------------------------------------------------------
|
|
certainty_target:
|
|
description: Element(s) to which certainty applies (URI reference)
|
|
range: string
|
|
|
|
certainty_locus:
|
|
description: Aspect of markup to which certainty applies
|
|
range: CertaintyLocusEnum
|
|
|
|
certainty_degree:
|
|
description: Numeric degree of certainty (0.0 to 1.0)
|
|
range: float
|
|
minimum_value: 0.0
|
|
maximum_value: 1.0
|
|
|
|
certainty_degree_cat:
|
|
description: Categorical degree of certainty
|
|
range: CertaintyDegreeEnum
|
|
|
|
asserted_value:
|
|
description: Alternative value being asserted with given certainty
|
|
range: string
|
|
|
|
given_condition:
|
|
description: Condition(s) under which certainty applies (URI refs)
|
|
range: string
|
|
multivalued: true
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Precision attributes
|
|
# ---------------------------------------------------------------------------
|
|
precision_degree:
|
|
description: Numeric precision value
|
|
range: float
|
|
|
|
precision_stdref:
|
|
description: Standard reference for precision
|
|
range: PrecisionStdRefEnum
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Responsibility attributes
|
|
# ---------------------------------------------------------------------------
|
|
resp_target:
|
|
description: Element(s) for which responsibility is attributed
|
|
range: string
|
|
|
|
resp_locus:
|
|
description: Aspect of markup for which responsibility is attributed
|
|
range: CertaintyLocusEnum
|
|
|
|
resp_type:
|
|
description: Type of responsibility
|
|
range: ResponsibilityTypeEnum
|
|
|
|
|
|
# =============================================================================
|
|
# CLASSES
|
|
# =============================================================================
|
|
|
|
classes:
|
|
|
|
# ===========================================================================
|
|
# CERTAINTY CLASSES
|
|
# ===========================================================================
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Certainty - certainty
|
|
# ---------------------------------------------------------------------------
|
|
Certainty:
|
|
description: >-
|
|
Indicates the degree of certainty associated with some aspect of
|
|
text markup. Corresponds to TEI <certainty> element. Essential for
|
|
NER confidence scores and scholarly annotation.
|
|
class_uri: oa:Certainty
|
|
|
|
annotations:
|
|
tei_element: certainty
|
|
tei_module: certainty
|
|
glam_hypernym: ANN.CRT
|
|
|
|
slots:
|
|
- certainty_target
|
|
- certainty_locus
|
|
- certainty_degree
|
|
- certainty_degree_cat
|
|
- asserted_value
|
|
- given_condition
|
|
|
|
attributes:
|
|
xml_id:
|
|
description: Unique identifier
|
|
range: string
|
|
|
|
target:
|
|
description: Target element(s) - space-separated URI references
|
|
range: string
|
|
required: true
|
|
|
|
locus:
|
|
description: Aspect of markup to which certainty applies
|
|
range: CertaintyLocusEnum
|
|
required: true
|
|
|
|
degree:
|
|
description: Degree of certainty (0.0 = uncertain, 1.0 = certain)
|
|
range: float
|
|
minimum_value: 0.0
|
|
maximum_value: 1.0
|
|
|
|
assertedValue:
|
|
description: Alternative value if certainty is about value
|
|
range: string
|
|
|
|
given:
|
|
description: Conditional certainty - depends on other certainty elements
|
|
range: string
|
|
|
|
match:
|
|
description: XPath expression for dynamic targets
|
|
range: string
|
|
|
|
desc:
|
|
description: Description of the certainty assertion
|
|
range: string
|
|
|
|
resp:
|
|
description: Person/entity responsible for certainty assertion
|
|
range: string
|
|
|
|
cert:
|
|
description: Meta-certainty about this certainty assertion
|
|
range: CertaintyDegreeEnum
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CertaintyGroup - (for grouping related certainty assertions)
|
|
# ---------------------------------------------------------------------------
|
|
CertaintyGroup:
|
|
description: >-
|
|
Groups related certainty assertions. Not a direct TEI element but
|
|
useful for organizing multiple certainty statements about the same
|
|
markup decision.
|
|
|
|
annotations:
|
|
tei_element: null
|
|
tei_module: certainty
|
|
glam_hypernym: ANN.CRT.GRP
|
|
|
|
attributes:
|
|
xml_id:
|
|
description: Unique identifier
|
|
range: string
|
|
|
|
target:
|
|
description: Common target for all certainty assertions
|
|
range: string
|
|
|
|
certainties:
|
|
description: Individual certainty assertions
|
|
range: Certainty
|
|
multivalued: true
|
|
required: true
|
|
|
|
desc:
|
|
description: Description of the decision being evaluated
|
|
range: string
|
|
|
|
|
|
# ===========================================================================
|
|
# PRECISION CLASSES
|
|
# ===========================================================================
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Precision - precision
|
|
# ---------------------------------------------------------------------------
|
|
Precision:
|
|
description: >-
|
|
Indicates the numerical precision associated with a value.
|
|
Corresponds to TEI <precision> element. Useful for dates,
|
|
measurements, and other numeric values.
|
|
class_uri: crm:E54_Dimension
|
|
|
|
annotations:
|
|
tei_element: precision
|
|
tei_module: certainty
|
|
glam_hypernym: ANN.PRC
|
|
|
|
slots:
|
|
- precision_degree
|
|
- precision_stdref
|
|
|
|
attributes:
|
|
xml_id:
|
|
description: Unique identifier
|
|
range: string
|
|
|
|
target:
|
|
description: Target element(s) with imprecise value
|
|
range: string
|
|
|
|
match:
|
|
description: XPath expression for dynamic targets
|
|
range: string
|
|
|
|
degree:
|
|
description: Degree of precision (interpretation varies by context)
|
|
range: float
|
|
|
|
stdRef:
|
|
description: Reference to precision standard
|
|
range: string
|
|
|
|
atLeast:
|
|
description: Lower bound of imprecise value
|
|
range: string
|
|
|
|
atMost:
|
|
description: Upper bound of imprecise value
|
|
range: string
|
|
|
|
desc:
|
|
description: Description of the precision
|
|
range: string
|
|
|
|
resp:
|
|
description: Person/entity responsible for precision assertion
|
|
range: string
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# DatePrecision - (specialized for dates)
|
|
# ---------------------------------------------------------------------------
|
|
DatePrecision:
|
|
description: >-
|
|
Specialized precision for date values. Captures common date
|
|
precision patterns like "circa", "early", "late", decade precision.
|
|
is_a: Precision
|
|
|
|
annotations:
|
|
tei_element: precision
|
|
tei_module: certainty
|
|
glam_hypernym: ANN.PRC.DAT
|
|
|
|
attributes:
|
|
precision_type:
|
|
description: Type of date precision
|
|
range: DatePrecisionTypeEnum
|
|
|
|
circa:
|
|
description: Whether date is approximate ("circa")
|
|
range: boolean
|
|
|
|
notBefore:
|
|
description: Earliest possible date
|
|
range: date
|
|
|
|
notAfter:
|
|
description: Latest possible date
|
|
range: date
|
|
|
|
|
|
# ===========================================================================
|
|
# RESPONSIBILITY CLASSES
|
|
# ===========================================================================
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Respons - respons
|
|
# ---------------------------------------------------------------------------
|
|
Respons:
|
|
description: >-
|
|
Indicates the person or entity responsible for some aspect of
|
|
the text encoding or interpretation. Corresponds to TEI <respons>.
|
|
Essential for annotation provenance and editorial attribution.
|
|
class_uri: prov:Attribution
|
|
|
|
annotations:
|
|
tei_element: respons
|
|
tei_module: certainty
|
|
glam_hypernym: ANN.RSP
|
|
|
|
slots:
|
|
- resp_target
|
|
- resp_locus
|
|
- resp_type
|
|
|
|
attributes:
|
|
xml_id:
|
|
description: Unique identifier
|
|
range: string
|
|
|
|
target:
|
|
description: Target element(s) for responsibility
|
|
range: string
|
|
required: true
|
|
|
|
locus:
|
|
description: Aspect of markup for which responsible
|
|
range: CertaintyLocusEnum
|
|
required: true
|
|
|
|
resp:
|
|
description: Reference to responsible person/entity
|
|
range: string
|
|
required: true
|
|
|
|
match:
|
|
description: XPath expression for dynamic targets
|
|
range: string
|
|
|
|
desc:
|
|
description: Description of the responsibility
|
|
range: string
|
|
|
|
|
|
# ===========================================================================
|
|
# NER-SPECIFIC CLASSES
|
|
# ===========================================================================
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# NERConfidence - (NER confidence annotation)
|
|
# ---------------------------------------------------------------------------
|
|
NERConfidence:
|
|
description: >-
|
|
Confidence score for a named entity recognition annotation.
|
|
Maps NER pipeline output to TEI certainty patterns.
|
|
is_a: Certainty
|
|
class_uri: oa:Certainty
|
|
|
|
annotations:
|
|
tei_element: certainty
|
|
tei_module: certainty
|
|
glam_hypernym: ANN.CRT.NER
|
|
|
|
attributes:
|
|
ner_method:
|
|
description: NER method/model used
|
|
range: string
|
|
|
|
model_version:
|
|
description: Version of NER model
|
|
range: string
|
|
|
|
entity_type:
|
|
description: Entity type detected
|
|
range: string
|
|
|
|
alternative_types:
|
|
description: Alternative entity types with their scores
|
|
range: NERAlternative
|
|
multivalued: true
|
|
|
|
context_window:
|
|
description: Context used for prediction
|
|
range: string
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# NERAlternative - (alternative NER predictions)
|
|
# ---------------------------------------------------------------------------
|
|
NERAlternative:
|
|
description: >-
|
|
An alternative entity type prediction from NER with its
|
|
confidence score.
|
|
|
|
annotations:
|
|
tei_element: null
|
|
tei_module: certainty
|
|
glam_hypernym: ANN.CRT.ALT
|
|
|
|
attributes:
|
|
entity_type:
|
|
description: Alternative entity type
|
|
range: string
|
|
required: true
|
|
|
|
score:
|
|
description: Confidence score for this alternative
|
|
range: float
|
|
minimum_value: 0.0
|
|
maximum_value: 1.0
|
|
required: true
|
|
|
|
glam_hypernym:
|
|
description: GLAM-NER hypernym for this type
|
|
range: string
|
|
|
|
|
|
# ===========================================================================
|
|
# ANNOTATION PROVENANCE CLASSES
|
|
# ===========================================================================
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# AnnotationProvenance - (comprehensive provenance)
|
|
# ---------------------------------------------------------------------------
|
|
AnnotationProvenance:
|
|
description: >-
|
|
Complete provenance information for an annotation including
|
|
creator, method, date, and certainty. Combines TEI respons
|
|
with W3C PROV patterns.
|
|
class_uri: prov:Activity
|
|
|
|
annotations:
|
|
tei_element: null
|
|
tei_module: certainty
|
|
glam_hypernym: ANN.PRV
|
|
|
|
attributes:
|
|
xml_id:
|
|
description: Unique identifier
|
|
range: string
|
|
|
|
created_by:
|
|
description: Agent who created the annotation
|
|
range: string
|
|
required: true
|
|
|
|
created_at:
|
|
description: When annotation was created
|
|
range: datetime
|
|
|
|
method:
|
|
description: Method used (manual, NER, rule-based, etc.)
|
|
range: AnnotationMethodEnum
|
|
|
|
method_detail:
|
|
description: Detailed method description
|
|
range: string
|
|
|
|
software:
|
|
description: Software/tool used
|
|
range: string
|
|
|
|
software_version:
|
|
description: Version of software
|
|
range: string
|
|
|
|
reviewed_by:
|
|
description: Agent who reviewed the annotation
|
|
range: string
|
|
|
|
reviewed_at:
|
|
description: When annotation was reviewed
|
|
range: datetime
|
|
|
|
certainty:
|
|
description: Certainty of the annotation
|
|
range: Certainty
|
|
|
|
notes:
|
|
description: Additional notes
|
|
range: string
|
|
multivalued: true
|
|
|
|
|
|
# =============================================================================
|
|
# ONTOLOGY MAPPINGS SUMMARY
|
|
# =============================================================================
|
|
#
|
|
# W3C Web Annotation:
|
|
# - Certainty: oa:Certainty
|
|
# - NERConfidence: oa:Certainty
|
|
#
|
|
# W3C PROV-O:
|
|
# - Respons: prov:Attribution
|
|
# - AnnotationProvenance: prov:Activity
|
|
#
|
|
# CIDOC-CRM:
|
|
# - Precision: crm:E54_Dimension
|
|
#
|
|
# =============================================================================
|