725 lines
25 KiB
YAML
725 lines
25 KiB
YAML
# =============================================================================
|
|
# GLAM-NER Entity Annotation Convention v1.7.0
|
|
# Module: integrations/nif_nerd.yaml
|
|
# =============================================================================
|
|
# NLP Interchange Format (NIF), Named Entity Recognition and Disambiguation
|
|
# (NERD), and W3C Web Annotation (OA) patterns for cross-tool interoperability.
|
|
#
|
|
# Standards covered:
|
|
# - NIF 2.0: String/offset addressing for NLP tool interchange
|
|
# - NERD: Cross-system entity type mappings (10 core classes)
|
|
# - W3C OA: Web Annotation Data Model for annotation provenance
|
|
# - ITS 2.0 (itsrdf): Entity linking predicates
|
|
#
|
|
# References:
|
|
# - NIF: https://persistence.uni-leipzig.org/nlp2rdf/
|
|
# - NERD: http://nerd.eurecom.fr/
|
|
# - W3C OA: https://www.w3.org/TR/annotation-model/
|
|
# - ITS 2.0: https://www.w3.org/TR/its20/
|
|
# =============================================================================
|
|
|
|
nif_nerd_integration:
|
|
|
|
description: |
|
|
This module defines how GLAM-NER annotations integrate with:
|
|
- NIF 2.0: Standard format for NLP tool interchange (string/offset addressing)
|
|
- NERD: Cross-system entity type mappings (10 core classes)
|
|
- W3C OA: Web Annotation Data Model for annotation provenance
|
|
- itsrdf: ITS 2.0 entity linking predicates
|
|
|
|
These standards enable GLAM-NER annotations to be consumed by external
|
|
NLP pipelines, linked data systems, and annotation aggregators.
|
|
|
|
IMPORTANT: NERD mappings are for INTEROPERABILITY only. GLAM-NER types
|
|
provide richer semantics than NERD's 10 classes. Always preserve GLAM-NER
|
|
types alongside NERD mappings.
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# NIF Core Patterns
|
|
# ---------------------------------------------------------------------------
|
|
|
|
nif_core_patterns:
|
|
description: |
|
|
NIF (NLP Interchange Format) provides URI-based addressing for text
|
|
spans. Every annotation creates a nif:Phrase linked to its context.
|
|
|
|
context_class: "nif:Context"
|
|
context_uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#Context"
|
|
context_note: |
|
|
nif:Context represents the full text document. All annotations reference
|
|
this context via nif:referenceContext.
|
|
|
|
phrase_class: "nif:Phrase"
|
|
phrase_uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#Phrase"
|
|
phrase_note: |
|
|
nif:Phrase represents extracted entity mentions. Each GLAM-NER entity
|
|
becomes a nif:Phrase with offset-based URI addressing.
|
|
|
|
# -------------------------------------------------------------------------
|
|
# URI Schemes
|
|
# -------------------------------------------------------------------------
|
|
|
|
uri_schemes:
|
|
offset_based:
|
|
pattern: "{source_url}#offset_{begin}_{end}"
|
|
example: "https://example.org/page#offset_42_58"
|
|
note: "Default scheme. Begin/end are character offsets (0-based)."
|
|
preferred: true
|
|
|
|
rfc5147:
|
|
pattern: "{source_url}#char={begin},{end}"
|
|
example: "https://example.org/page#char=42,58"
|
|
note: "RFC 5147 fragment identifiers for text/plain."
|
|
preferred: false
|
|
|
|
context_hash:
|
|
pattern: "{source_url}#hash_{context_length}_{hash}_{begin}_{end}"
|
|
example: "https://example.org/page#hash_1024_a1b2c3_42_58"
|
|
note: "Hash-based URIs for content-addressing. More stable across edits."
|
|
preferred: false
|
|
use_case: "Long-term preservation where source may change"
|
|
|
|
# -------------------------------------------------------------------------
|
|
# Core Properties
|
|
# -------------------------------------------------------------------------
|
|
|
|
core_properties:
|
|
- property: "nif:beginIndex"
|
|
uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#beginIndex"
|
|
range: "xsd:nonNegativeInteger"
|
|
description: "Character offset where entity span begins (0-based)"
|
|
required: true
|
|
|
|
- property: "nif:endIndex"
|
|
uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#endIndex"
|
|
range: "xsd:nonNegativeInteger"
|
|
description: "Character offset where entity span ends (exclusive)"
|
|
required: true
|
|
|
|
- property: "nif:anchorOf"
|
|
uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#anchorOf"
|
|
range: "xsd:string"
|
|
description: "The exact text string of the entity mention"
|
|
required: true
|
|
|
|
- property: "nif:referenceContext"
|
|
uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#referenceContext"
|
|
range: "nif:Context"
|
|
description: "Link to the document context containing this phrase"
|
|
required: true
|
|
|
|
- property: "nif:sourceUrl"
|
|
uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#sourceUrl"
|
|
range: "xsd:anyURI"
|
|
description: "Original URL of the source document"
|
|
required: false
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# NERD Class Mappings
|
|
# ---------------------------------------------------------------------------
|
|
|
|
nerd_class_mappings:
|
|
description: |
|
|
NERD (Named Entity Recognition and Disambiguation) defines 10 core
|
|
entity classes that map across multiple NER systems (DBpedia Spotlight,
|
|
AlchemyAPI, OpenCalais, Zemanta, etc.). GLAM-NER types map to NERD for
|
|
cross-system interoperability.
|
|
|
|
NOTE: NERD classes are intentionally broad. GLAM-NER types provide
|
|
finer granularity. Always output BOTH for maximum utility.
|
|
|
|
core_classes:
|
|
- nerd_class: "nerd:Thing"
|
|
nerd_uri: "http://nerd.eurecom.fr/ontology#Thing"
|
|
description: "Generic entity (base class)"
|
|
glam_ner_types:
|
|
- "THG"
|
|
note: "Fallback for entities not matching other NERD classes"
|
|
|
|
- nerd_class: "nerd:Person"
|
|
nerd_uri: "http://nerd.eurecom.fr/ontology#Person"
|
|
description: "Human beings"
|
|
glam_ner_types:
|
|
- "AGT.PER"
|
|
- "AGT.STF"
|
|
subclasses:
|
|
- "nerd:Astronaut"
|
|
- "nerd:Politician"
|
|
- "nerd:Artist"
|
|
- "nerd:Athlete"
|
|
- "nerd:Actor"
|
|
|
|
- nerd_class: "nerd:Organization"
|
|
nerd_uri: "http://nerd.eurecom.fr/ontology#Organization"
|
|
description: "Organizations, companies, institutions"
|
|
glam_ner_types:
|
|
- "GRP"
|
|
- "GRP.HER"
|
|
- "GRP.COM"
|
|
- "GRP.GOV"
|
|
- "GRP.EDU"
|
|
- "GRP.REL"
|
|
- "GRP.UNT"
|
|
subclasses:
|
|
- "nerd:Company"
|
|
- "nerd:SportsTeam"
|
|
- "nerd:Band"
|
|
- "nerd:University"
|
|
- "nerd:Museum"
|
|
|
|
- nerd_class: "nerd:Location"
|
|
nerd_uri: "http://nerd.eurecom.fr/ontology#Location"
|
|
description: "Geographic places and features"
|
|
glam_ner_types:
|
|
- "TOP"
|
|
- "TOP.ADM"
|
|
- "TOP.PPL"
|
|
- "TOP.BLD"
|
|
- "TOP.FAC"
|
|
- "TOP.NAT"
|
|
- "GEO"
|
|
subclasses:
|
|
- "nerd:City"
|
|
- "nerd:Country"
|
|
- "nerd:Continent"
|
|
- "nerd:Region"
|
|
- "nerd:Facility"
|
|
|
|
- nerd_class: "nerd:Event"
|
|
nerd_uri: "http://nerd.eurecom.fr/ontology#Event"
|
|
description: "Named events"
|
|
glam_ner_types:
|
|
- "THG.EVT"
|
|
subclasses:
|
|
- "nerd:SportEvent"
|
|
- "nerd:MusicFestival"
|
|
- "nerd:Election"
|
|
|
|
- nerd_class: "nerd:Time"
|
|
nerd_uri: "http://nerd.eurecom.fr/ontology#Time"
|
|
description: "Temporal expressions"
|
|
glam_ner_types:
|
|
- "TMP"
|
|
- "TMP.DAT"
|
|
- "TMP.TIM"
|
|
- "TMP.DUR"
|
|
- "TMP.SET"
|
|
|
|
- nerd_class: "nerd:Amount"
|
|
nerd_uri: "http://nerd.eurecom.fr/ontology#Amount"
|
|
description: "Quantities and measurements"
|
|
glam_ner_types:
|
|
- "QTY"
|
|
- "QTY.CNT"
|
|
- "QTY.MSR"
|
|
- "QTY.MON"
|
|
- "QTY.PCT"
|
|
- "QTY.ORD"
|
|
|
|
- nerd_class: "nerd:Product"
|
|
nerd_uri: "http://nerd.eurecom.fr/ontology#Product"
|
|
description: "Products and creative works"
|
|
glam_ner_types:
|
|
- "THG.OBJ"
|
|
- "WRK"
|
|
- "WRK.WRK"
|
|
- "WRK.EXP"
|
|
- "WRK.MAN"
|
|
- "WRK.ITM"
|
|
subclasses:
|
|
- "nerd:Album"
|
|
- "nerd:Book"
|
|
- "nerd:Movie"
|
|
- "nerd:Software"
|
|
|
|
- nerd_class: "nerd:Animal"
|
|
nerd_uri: "http://nerd.eurecom.fr/ontology#Animal"
|
|
description: "Animals"
|
|
glam_ner_types:
|
|
- "AGT.ANI"
|
|
|
|
- nerd_class: "nerd:Function"
|
|
nerd_uri: "http://nerd.eurecom.fr/ontology#Function"
|
|
description: "Roles, titles, occupations"
|
|
glam_ner_types:
|
|
- "ROL"
|
|
- "ROL.OCC"
|
|
- "ROL.TIT"
|
|
- "ROL.HON"
|
|
note: "GLAM-NER treats roles as separate entities; NERD uses as attribute"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# W3C Web Annotation Patterns
|
|
# ---------------------------------------------------------------------------
|
|
|
|
web_annotation_patterns:
|
|
description: |
|
|
W3C Web Annotation Data Model provides standard annotation structure
|
|
with target selectors for precise text span identification.
|
|
|
|
annotation_class: "oa:Annotation"
|
|
annotation_uri: "http://www.w3.org/ns/oa#Annotation"
|
|
|
|
structure:
|
|
- property: "oa:hasBody"
|
|
uri: "http://www.w3.org/ns/oa#hasBody"
|
|
description: "The annotation content (entity type, confidence)"
|
|
example: "The GLAM-NER entity classification"
|
|
|
|
- property: "oa:hasTarget"
|
|
uri: "http://www.w3.org/ns/oa#hasTarget"
|
|
description: "What is being annotated (text span)"
|
|
example: "TextPositionSelector pointing to entity mention"
|
|
|
|
- property: "oa:motivatedBy"
|
|
uri: "http://www.w3.org/ns/oa#motivatedBy"
|
|
description: "Why the annotation was created"
|
|
value: "oa:classifying"
|
|
note: "NER annotations are classification activities"
|
|
|
|
- property: "oa:annotatedBy"
|
|
uri: "http://www.w3.org/ns/oa#annotatedBy"
|
|
description: "Agent that created the annotation"
|
|
example: "Human curator, ML model, or pipeline"
|
|
|
|
- property: "oa:annotatedAt"
|
|
uri: "http://www.w3.org/ns/oa#annotatedAt"
|
|
description: "When the annotation was created"
|
|
range: "xsd:dateTime"
|
|
|
|
target_selectors:
|
|
text_position:
|
|
class: "oa:TextPositionSelector"
|
|
uri: "http://www.w3.org/ns/oa#TextPositionSelector"
|
|
properties:
|
|
- property: "oa:start"
|
|
description: "Start offset (0-based)"
|
|
- property: "oa:end"
|
|
description: "End offset (exclusive)"
|
|
note: "Equivalent to NIF offset-based addressing"
|
|
|
|
text_quote:
|
|
class: "oa:TextQuoteSelector"
|
|
uri: "http://www.w3.org/ns/oa#TextQuoteSelector"
|
|
properties:
|
|
- property: "oa:exact"
|
|
description: "The exact matched text"
|
|
- property: "oa:prefix"
|
|
description: "Context before (for disambiguation)"
|
|
- property: "oa:suffix"
|
|
description: "Context after (for disambiguation)"
|
|
note: "Provides context for robust text matching"
|
|
|
|
xpath:
|
|
class: "oa:XPathSelector"
|
|
uri: "http://www.w3.org/ns/oa#XPathSelector"
|
|
properties:
|
|
- property: "rdf:value"
|
|
description: "XPath expression to DOM node"
|
|
note: "For HTML/XML sources with DOM structure"
|
|
|
|
css:
|
|
class: "oa:CssSelector"
|
|
uri: "http://www.w3.org/ns/oa#CssSelector"
|
|
properties:
|
|
- property: "rdf:value"
|
|
description: "CSS selector to DOM node"
|
|
note: "Alternative to XPath for HTML sources"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ITS 2.0 Entity Linking
|
|
# ---------------------------------------------------------------------------
|
|
|
|
itsrdf_entity_linking:
|
|
description: |
|
|
ITS 2.0 (Internationalization Tag Set) provides entity linking predicates
|
|
for connecting mentions to knowledge bases.
|
|
|
|
properties:
|
|
- property: "itsrdf:taIdentRef"
|
|
uri: "http://www.w3.org/2005/11/its/rdf#taIdentRef"
|
|
description: "URI reference to entity in knowledge base"
|
|
example: "http://www.wikidata.org/entity/Q190804"
|
|
note: "Primary entity linking predicate"
|
|
required_for_linking: true
|
|
|
|
- property: "itsrdf:taSource"
|
|
uri: "http://www.w3.org/2005/11/its/rdf#taSource"
|
|
description: "Knowledge base source identifier"
|
|
examples:
|
|
- "Wikidata"
|
|
- "DBpedia"
|
|
- "GeoNames"
|
|
- "VIAF"
|
|
- "Getty AAT"
|
|
- "Getty ULAN"
|
|
- "Getty TGN"
|
|
note: "Human-readable source name"
|
|
|
|
- property: "itsrdf:taConfidence"
|
|
uri: "http://www.w3.org/2005/11/its/rdf#taConfidence"
|
|
description: "Linking confidence score (0.0-1.0)"
|
|
range: "xsd:double"
|
|
note: "Different from entity detection confidence"
|
|
|
|
- property: "itsrdf:taClassRef"
|
|
uri: "http://www.w3.org/2005/11/its/rdf#taClassRef"
|
|
description: "URI of entity type in target ontology"
|
|
example: "http://dbpedia.org/ontology/Museum"
|
|
note: "Type in linked KB, may differ from GLAM-NER type"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Complete GLAM-NER to NERD Mapping Table
|
|
# ---------------------------------------------------------------------------
|
|
|
|
glam_ner_to_nerd_mapping:
|
|
description: "Complete mapping table from GLAM-NER v1.7.0 types to NERD classes"
|
|
|
|
mappings:
|
|
# AGENT hypernym mappings
|
|
- glam_type: "AGT"
|
|
glam_code: "AGT"
|
|
nerd_class: "nerd:Person"
|
|
note: "Generic agent defaults to Person"
|
|
|
|
- glam_type: "AGT.PER"
|
|
glam_code: "AGT.PER"
|
|
nerd_class: "nerd:Person"
|
|
nerd_subclasses: ["nerd:Artist", "nerd:Politician", "nerd:Astronaut", "nerd:Athlete"]
|
|
|
|
- glam_type: "AGT.STF"
|
|
glam_code: "AGT.STF"
|
|
nerd_class: "nerd:Person"
|
|
note: "Staff roles map to nerd:Function as secondary annotation"
|
|
|
|
- glam_type: "AGT.ANI"
|
|
glam_code: "AGT.ANI"
|
|
nerd_class: "nerd:Animal"
|
|
|
|
- glam_type: "AGT.MYT"
|
|
glam_code: "AGT.MYT"
|
|
nerd_class: "nerd:Person"
|
|
note: "Mythological/fictional figures treated as Person in NERD"
|
|
|
|
- glam_type: "AGT.AI"
|
|
glam_code: "AGT.AI"
|
|
nerd_class: "nerd:Thing"
|
|
note: "AI agents have no NERD equivalent - map to Thing"
|
|
|
|
# GROUP hypernym mappings
|
|
- glam_type: "GRP"
|
|
glam_code: "GRP"
|
|
nerd_class: "nerd:Organization"
|
|
|
|
- glam_type: "GRP.HER"
|
|
glam_code: "GRP.HER"
|
|
nerd_class: "nerd:Organization"
|
|
nerd_subclasses: ["nerd:Museum"]
|
|
|
|
- glam_type: "GRP.COM"
|
|
glam_code: "GRP.COM"
|
|
nerd_class: "nerd:Organization"
|
|
nerd_subclasses: ["nerd:Company"]
|
|
|
|
- glam_type: "GRP.GOV"
|
|
glam_code: "GRP.GOV"
|
|
nerd_class: "nerd:Organization"
|
|
|
|
- glam_type: "GRP.EDU"
|
|
glam_code: "GRP.EDU"
|
|
nerd_class: "nerd:Organization"
|
|
nerd_subclasses: ["nerd:University"]
|
|
|
|
- glam_type: "GRP.REL"
|
|
glam_code: "GRP.REL"
|
|
nerd_class: "nerd:Organization"
|
|
|
|
- glam_type: "GRP.UNT"
|
|
glam_code: "GRP.UNT"
|
|
nerd_class: "nerd:Organization"
|
|
note: "Organizational units are Organizations in NERD"
|
|
|
|
- glam_type: "GRP.INF"
|
|
glam_code: "GRP.INF"
|
|
nerd_class: "nerd:Organization"
|
|
note: "Informal groups still map to Organization"
|
|
|
|
# TOPONYM hypernym mappings
|
|
- glam_type: "TOP"
|
|
glam_code: "TOP"
|
|
nerd_class: "nerd:Location"
|
|
|
|
- glam_type: "TOP.ADM"
|
|
glam_code: "TOP.ADM"
|
|
nerd_class: "nerd:Location"
|
|
nerd_subclasses: ["nerd:Country", "nerd:Region"]
|
|
|
|
- glam_type: "TOP.PPL"
|
|
glam_code: "TOP.PPL"
|
|
nerd_class: "nerd:Location"
|
|
nerd_subclasses: ["nerd:City"]
|
|
|
|
- glam_type: "TOP.BLD"
|
|
glam_code: "TOP.BLD"
|
|
nerd_class: "nerd:Location"
|
|
nerd_subclasses: ["nerd:Facility"]
|
|
|
|
- glam_type: "TOP.FAC"
|
|
glam_code: "TOP.FAC"
|
|
nerd_class: "nerd:Location"
|
|
nerd_subclasses: ["nerd:Facility"]
|
|
|
|
- glam_type: "TOP.NAT"
|
|
glam_code: "TOP.NAT"
|
|
nerd_class: "nerd:Location"
|
|
|
|
- glam_type: "TOP.HIS"
|
|
glam_code: "TOP.HIS"
|
|
nerd_class: "nerd:Location"
|
|
note: "Historical place names"
|
|
|
|
# GEOMETRY hypernym mappings
|
|
- glam_type: "GEO"
|
|
glam_code: "GEO"
|
|
nerd_class: "nerd:Location"
|
|
note: "Coordinates map to Location"
|
|
|
|
- glam_type: "GEO.PNT"
|
|
glam_code: "GEO.PNT"
|
|
nerd_class: "nerd:Location"
|
|
|
|
- glam_type: "GEO.BOX"
|
|
glam_code: "GEO.BOX"
|
|
nerd_class: "nerd:Location"
|
|
|
|
# TEMPORAL hypernym mappings
|
|
- glam_type: "TMP"
|
|
glam_code: "TMP"
|
|
nerd_class: "nerd:Time"
|
|
|
|
- glam_type: "TMP.DAT"
|
|
glam_code: "TMP.DAT"
|
|
nerd_class: "nerd:Time"
|
|
|
|
- glam_type: "TMP.TIM"
|
|
glam_code: "TMP.TIM"
|
|
nerd_class: "nerd:Time"
|
|
|
|
- glam_type: "TMP.DUR"
|
|
glam_code: "TMP.DUR"
|
|
nerd_class: "nerd:Time"
|
|
|
|
- glam_type: "TMP.SET"
|
|
glam_code: "TMP.SET"
|
|
nerd_class: "nerd:Time"
|
|
|
|
# QUANTITY hypernym mappings
|
|
- glam_type: "QTY"
|
|
glam_code: "QTY"
|
|
nerd_class: "nerd:Amount"
|
|
|
|
- glam_type: "QTY.CNT"
|
|
glam_code: "QTY.CNT"
|
|
nerd_class: "nerd:Amount"
|
|
|
|
- glam_type: "QTY.MSR"
|
|
glam_code: "QTY.MSR"
|
|
nerd_class: "nerd:Amount"
|
|
|
|
- glam_type: "QTY.MON"
|
|
glam_code: "QTY.MON"
|
|
nerd_class: "nerd:Amount"
|
|
|
|
- glam_type: "QTY.PCT"
|
|
glam_code: "QTY.PCT"
|
|
nerd_class: "nerd:Amount"
|
|
|
|
- glam_type: "QTY.ORD"
|
|
glam_code: "QTY.ORD"
|
|
nerd_class: "nerd:Amount"
|
|
|
|
# WORK hypernym mappings (FRBR)
|
|
- glam_type: "WRK"
|
|
glam_code: "WRK"
|
|
nerd_class: "nerd:Product"
|
|
|
|
- glam_type: "WRK.WRK"
|
|
glam_code: "WRK.WRK"
|
|
nerd_class: "nerd:Product"
|
|
note: "FRBR Work level"
|
|
|
|
- glam_type: "WRK.EXP"
|
|
glam_code: "WRK.EXP"
|
|
nerd_class: "nerd:Product"
|
|
note: "FRBR Expression level"
|
|
|
|
- glam_type: "WRK.MAN"
|
|
glam_code: "WRK.MAN"
|
|
nerd_class: "nerd:Product"
|
|
nerd_subclasses: ["nerd:Book", "nerd:Album", "nerd:Movie"]
|
|
note: "FRBR Manifestation level"
|
|
|
|
- glam_type: "WRK.ITM"
|
|
glam_code: "WRK.ITM"
|
|
nerd_class: "nerd:Product"
|
|
note: "FRBR Item level"
|
|
|
|
# THING hypernym mappings
|
|
- glam_type: "THG"
|
|
glam_code: "THG"
|
|
nerd_class: "nerd:Thing"
|
|
|
|
- glam_type: "THG.OBJ"
|
|
glam_code: "THG.OBJ"
|
|
nerd_class: "nerd:Product"
|
|
|
|
- glam_type: "THG.COL"
|
|
glam_code: "THG.COL"
|
|
nerd_class: "nerd:Thing"
|
|
note: "Collections map to generic Thing (no NERD equivalent)"
|
|
|
|
- glam_type: "THG.EVT"
|
|
glam_code: "THG.EVT"
|
|
nerd_class: "nerd:Event"
|
|
nerd_subclasses: ["nerd:SportEvent", "nerd:MusicFestival"]
|
|
|
|
- glam_type: "THG.CON"
|
|
glam_code: "THG.CON"
|
|
nerd_class: "nerd:Thing"
|
|
note: "Abstract concepts"
|
|
|
|
# APPELLATION hypernym mappings
|
|
- glam_type: "APP"
|
|
glam_code: "APP"
|
|
nerd_class: "nerd:Thing"
|
|
note: "Appellations have no direct NERD mapping"
|
|
|
|
- glam_type: "APP.NAM"
|
|
glam_code: "APP.NAM"
|
|
nerd_class: "nerd:Thing"
|
|
|
|
- glam_type: "APP.TIT"
|
|
glam_code: "APP.TIT"
|
|
nerd_class: "nerd:Product"
|
|
note: "Work titles map to Product"
|
|
|
|
# ROLE hypernym mappings
|
|
- glam_type: "ROL"
|
|
glam_code: "ROL"
|
|
nerd_class: "nerd:Function"
|
|
|
|
- glam_type: "ROL.OCC"
|
|
glam_code: "ROL.OCC"
|
|
nerd_class: "nerd:Function"
|
|
|
|
- glam_type: "ROL.TIT"
|
|
glam_code: "ROL.TIT"
|
|
nerd_class: "nerd:Function"
|
|
|
|
- glam_type: "ROL.HON"
|
|
glam_code: "ROL.HON"
|
|
nerd_class: "nerd:Function"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Example NIF Annotation
|
|
# ---------------------------------------------------------------------------
|
|
|
|
example_nif_annotation:
|
|
description: "Complete example of GLAM-NER annotation in NIF/OA format"
|
|
source_text: "The Rijksmuseum in Amsterdam holds over one million objects."
|
|
|
|
turtle_example: |
|
|
@prefix nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#> .
|
|
@prefix nerd: <http://nerd.eurecom.fr/ontology#> .
|
|
@prefix oa: <http://www.w3.org/ns/oa#> .
|
|
@prefix itsrdf: <http://www.w3.org/2005/11/its/rdf#> .
|
|
@prefix glam: <https://w3id.org/glam-ner/> .
|
|
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
|
|
|
|
# Document context
|
|
<https://example.org/doc1#offset_0_60> a nif:Context, nif:OffsetBasedString ;
|
|
nif:isString "The Rijksmuseum in Amsterdam holds over one million objects." ;
|
|
nif:sourceUrl <https://example.org/doc1> ;
|
|
nif:beginIndex "0"^^xsd:nonNegativeInteger ;
|
|
nif:endIndex "60"^^xsd:nonNegativeInteger .
|
|
|
|
# Entity 1: Rijksmuseum (GRP.HER)
|
|
<https://example.org/doc1#offset_4_15> a nif:Phrase, nif:OffsetBasedString ;
|
|
nif:anchorOf "Rijksmuseum" ;
|
|
nif:beginIndex "4"^^xsd:nonNegativeInteger ;
|
|
nif:endIndex "15"^^xsd:nonNegativeInteger ;
|
|
nif:referenceContext <https://example.org/doc1#offset_0_60> ;
|
|
a nerd:Organization, nerd:Museum ;
|
|
glam:entityType "GRP.HER" ;
|
|
glam:entityLabel "GROUP.HERITAGE" ;
|
|
glam:confidence "0.95"^^xsd:double ;
|
|
itsrdf:taIdentRef <http://www.wikidata.org/entity/Q190804> ;
|
|
itsrdf:taSource "Wikidata" ;
|
|
itsrdf:taConfidence "0.92"^^xsd:double .
|
|
|
|
# Entity 2: Amsterdam (TOP.PPL)
|
|
<https://example.org/doc1#offset_19_28> a nif:Phrase, nif:OffsetBasedString ;
|
|
nif:anchorOf "Amsterdam" ;
|
|
nif:beginIndex "19"^^xsd:nonNegativeInteger ;
|
|
nif:endIndex "28"^^xsd:nonNegativeInteger ;
|
|
nif:referenceContext <https://example.org/doc1#offset_0_60> ;
|
|
a nerd:Location, nerd:City ;
|
|
glam:entityType "TOP.PPL" ;
|
|
glam:entityLabel "TOPONYM.POPULATED_PLACE" ;
|
|
glam:confidence "0.98"^^xsd:double ;
|
|
itsrdf:taIdentRef <http://www.wikidata.org/entity/Q727> ;
|
|
itsrdf:taSource "Wikidata" .
|
|
|
|
# Entity 3: one million (QTY.CNT)
|
|
<https://example.org/doc1#offset_41_52> a nif:Phrase, nif:OffsetBasedString ;
|
|
nif:anchorOf "one million" ;
|
|
nif:beginIndex "41"^^xsd:nonNegativeInteger ;
|
|
nif:endIndex "52"^^xsd:nonNegativeInteger ;
|
|
nif:referenceContext <https://example.org/doc1#offset_0_60> ;
|
|
a nerd:Amount ;
|
|
glam:entityType "QTY.CNT" ;
|
|
glam:entityLabel "QUANTITY.COUNT" ;
|
|
glam:normalizedValue "1000000"^^xsd:integer ;
|
|
glam:confidence "0.90"^^xsd:double .
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Output Format Guidelines
|
|
# ---------------------------------------------------------------------------
|
|
|
|
output_guidelines:
|
|
description: "How to serialize GLAM-NER annotations for interoperability"
|
|
|
|
always_include:
|
|
- "GLAM-NER type code (glam:entityType)"
|
|
- "GLAM-NER type label (glam:entityLabel)"
|
|
- "NERD class mapping (rdf:type nerd:*)"
|
|
- "NIF offsets (nif:beginIndex, nif:endIndex)"
|
|
- "Anchor text (nif:anchorOf)"
|
|
- "Confidence score (glam:confidence)"
|
|
|
|
when_linked:
|
|
- "itsrdf:taIdentRef - Entity URI"
|
|
- "itsrdf:taSource - Knowledge base name"
|
|
- "itsrdf:taConfidence - Linking confidence"
|
|
|
|
serialization_formats:
|
|
- format: "Turtle"
|
|
extension: ".ttl"
|
|
mime_type: "text/turtle"
|
|
preferred: true
|
|
|
|
- format: "JSON-LD"
|
|
extension: ".jsonld"
|
|
mime_type: "application/ld+json"
|
|
preferred: true
|
|
note: "Use @context for namespace prefixes"
|
|
|
|
- format: "N-Triples"
|
|
extension: ".nt"
|
|
mime_type: "application/n-triples"
|
|
preferred: false
|
|
note: "For streaming/bulk processing"
|
|
|
|
# =============================================================================
|
|
# END OF MODULE
|
|
# =============================================================================
|