# ============================================================================= # GLAM-NER Entity Annotation Convention v1.7.0 # Module: integrations/nif_nerd.yaml # ============================================================================= # NLP Interchange Format (NIF), Named Entity Recognition and Disambiguation # (NERD), and W3C Web Annotation (OA) patterns for cross-tool interoperability. # # Standards covered: # - NIF 2.0: String/offset addressing for NLP tool interchange # - NERD: Cross-system entity type mappings (10 core classes) # - W3C OA: Web Annotation Data Model for annotation provenance # - ITS 2.0 (itsrdf): Entity linking predicates # # References: # - NIF: https://persistence.uni-leipzig.org/nlp2rdf/ # - NERD: http://nerd.eurecom.fr/ # - W3C OA: https://www.w3.org/TR/annotation-model/ # - ITS 2.0: https://www.w3.org/TR/its20/ # ============================================================================= nif_nerd_integration: description: | This module defines how GLAM-NER annotations integrate with: - NIF 2.0: Standard format for NLP tool interchange (string/offset addressing) - NERD: Cross-system entity type mappings (10 core classes) - W3C OA: Web Annotation Data Model for annotation provenance - itsrdf: ITS 2.0 entity linking predicates These standards enable GLAM-NER annotations to be consumed by external NLP pipelines, linked data systems, and annotation aggregators. IMPORTANT: NERD mappings are for INTEROPERABILITY only. GLAM-NER types provide richer semantics than NERD's 10 classes. Always preserve GLAM-NER types alongside NERD mappings. # --------------------------------------------------------------------------- # NIF Core Patterns # --------------------------------------------------------------------------- nif_core_patterns: description: | NIF (NLP Interchange Format) provides URI-based addressing for text spans. Every annotation creates a nif:Phrase linked to its context. context_class: "nif:Context" context_uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#Context" context_note: | nif:Context represents the full text document. All annotations reference this context via nif:referenceContext. phrase_class: "nif:Phrase" phrase_uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#Phrase" phrase_note: | nif:Phrase represents extracted entity mentions. Each GLAM-NER entity becomes a nif:Phrase with offset-based URI addressing. # ------------------------------------------------------------------------- # URI Schemes # ------------------------------------------------------------------------- uri_schemes: offset_based: pattern: "{source_url}#offset_{begin}_{end}" example: "https://example.org/page#offset_42_58" note: "Default scheme. Begin/end are character offsets (0-based)." preferred: true rfc5147: pattern: "{source_url}#char={begin},{end}" example: "https://example.org/page#char=42,58" note: "RFC 5147 fragment identifiers for text/plain." preferred: false context_hash: pattern: "{source_url}#hash_{context_length}_{hash}_{begin}_{end}" example: "https://example.org/page#hash_1024_a1b2c3_42_58" note: "Hash-based URIs for content-addressing. More stable across edits." preferred: false use_case: "Long-term preservation where source may change" # ------------------------------------------------------------------------- # Core Properties # ------------------------------------------------------------------------- core_properties: - property: "nif:beginIndex" uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#beginIndex" range: "xsd:nonNegativeInteger" description: "Character offset where entity span begins (0-based)" required: true - property: "nif:endIndex" uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#endIndex" range: "xsd:nonNegativeInteger" description: "Character offset where entity span ends (exclusive)" required: true - property: "nif:anchorOf" uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#anchorOf" range: "xsd:string" description: "The exact text string of the entity mention" required: true - property: "nif:referenceContext" uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#referenceContext" range: "nif:Context" description: "Link to the document context containing this phrase" required: true - property: "nif:sourceUrl" uri: "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#sourceUrl" range: "xsd:anyURI" description: "Original URL of the source document" required: false # --------------------------------------------------------------------------- # NERD Class Mappings # --------------------------------------------------------------------------- nerd_class_mappings: description: | NERD (Named Entity Recognition and Disambiguation) defines 10 core entity classes that map across multiple NER systems (DBpedia Spotlight, AlchemyAPI, OpenCalais, Zemanta, etc.). GLAM-NER types map to NERD for cross-system interoperability. NOTE: NERD classes are intentionally broad. GLAM-NER types provide finer granularity. Always output BOTH for maximum utility. core_classes: - nerd_class: "nerd:Thing" nerd_uri: "http://nerd.eurecom.fr/ontology#Thing" description: "Generic entity (base class)" glam_ner_types: - "THG" note: "Fallback for entities not matching other NERD classes" - nerd_class: "nerd:Person" nerd_uri: "http://nerd.eurecom.fr/ontology#Person" description: "Human beings" glam_ner_types: - "AGT.PER" - "AGT.STF" subclasses: - "nerd:Astronaut" - "nerd:Politician" - "nerd:Artist" - "nerd:Athlete" - "nerd:Actor" - nerd_class: "nerd:Organization" nerd_uri: "http://nerd.eurecom.fr/ontology#Organization" description: "Organizations, companies, institutions" glam_ner_types: - "GRP" - "GRP.HER" - "GRP.COM" - "GRP.GOV" - "GRP.EDU" - "GRP.REL" - "GRP.UNT" subclasses: - "nerd:Company" - "nerd:SportsTeam" - "nerd:Band" - "nerd:University" - "nerd:Museum" - nerd_class: "nerd:Location" nerd_uri: "http://nerd.eurecom.fr/ontology#Location" description: "Geographic places and features" glam_ner_types: - "TOP" - "TOP.ADM" - "TOP.PPL" - "TOP.BLD" - "TOP.FAC" - "TOP.NAT" - "GEO" subclasses: - "nerd:City" - "nerd:Country" - "nerd:Continent" - "nerd:Region" - "nerd:Facility" - nerd_class: "nerd:Event" nerd_uri: "http://nerd.eurecom.fr/ontology#Event" description: "Named events" glam_ner_types: - "THG.EVT" subclasses: - "nerd:SportEvent" - "nerd:MusicFestival" - "nerd:Election" - nerd_class: "nerd:Time" nerd_uri: "http://nerd.eurecom.fr/ontology#Time" description: "Temporal expressions" glam_ner_types: - "TMP" - "TMP.DAT" - "TMP.TIM" - "TMP.DUR" - "TMP.SET" - nerd_class: "nerd:Amount" nerd_uri: "http://nerd.eurecom.fr/ontology#Amount" description: "Quantities and measurements" glam_ner_types: - "QTY" - "QTY.CNT" - "QTY.MSR" - "QTY.MON" - "QTY.PCT" - "QTY.ORD" - nerd_class: "nerd:Product" nerd_uri: "http://nerd.eurecom.fr/ontology#Product" description: "Products and creative works" glam_ner_types: - "THG.OBJ" - "WRK" - "WRK.WRK" - "WRK.EXP" - "WRK.MAN" - "WRK.ITM" subclasses: - "nerd:Album" - "nerd:Book" - "nerd:Movie" - "nerd:Software" - nerd_class: "nerd:Animal" nerd_uri: "http://nerd.eurecom.fr/ontology#Animal" description: "Animals" glam_ner_types: - "AGT.ANI" - nerd_class: "nerd:Function" nerd_uri: "http://nerd.eurecom.fr/ontology#Function" description: "Roles, titles, occupations" glam_ner_types: - "ROL" - "ROL.OCC" - "ROL.TIT" - "ROL.HON" note: "GLAM-NER treats roles as separate entities; NERD uses as attribute" # --------------------------------------------------------------------------- # W3C Web Annotation Patterns # --------------------------------------------------------------------------- web_annotation_patterns: description: | W3C Web Annotation Data Model provides standard annotation structure with target selectors for precise text span identification. annotation_class: "oa:Annotation" annotation_uri: "http://www.w3.org/ns/oa#Annotation" structure: - property: "oa:hasBody" uri: "http://www.w3.org/ns/oa#hasBody" description: "The annotation content (entity type, confidence)" example: "The GLAM-NER entity classification" - property: "oa:hasTarget" uri: "http://www.w3.org/ns/oa#hasTarget" description: "What is being annotated (text span)" example: "TextPositionSelector pointing to entity mention" - property: "oa:motivatedBy" uri: "http://www.w3.org/ns/oa#motivatedBy" description: "Why the annotation was created" value: "oa:classifying" note: "NER annotations are classification activities" - property: "oa:annotatedBy" uri: "http://www.w3.org/ns/oa#annotatedBy" description: "Agent that created the annotation" example: "Human curator, ML model, or pipeline" - property: "oa:annotatedAt" uri: "http://www.w3.org/ns/oa#annotatedAt" description: "When the annotation was created" range: "xsd:dateTime" target_selectors: text_position: class: "oa:TextPositionSelector" uri: "http://www.w3.org/ns/oa#TextPositionSelector" properties: - property: "oa:start" description: "Start offset (0-based)" - property: "oa:end" description: "End offset (exclusive)" note: "Equivalent to NIF offset-based addressing" text_quote: class: "oa:TextQuoteSelector" uri: "http://www.w3.org/ns/oa#TextQuoteSelector" properties: - property: "oa:exact" description: "The exact matched text" - property: "oa:prefix" description: "Context before (for disambiguation)" - property: "oa:suffix" description: "Context after (for disambiguation)" note: "Provides context for robust text matching" xpath: class: "oa:XPathSelector" uri: "http://www.w3.org/ns/oa#XPathSelector" properties: - property: "rdf:value" description: "XPath expression to DOM node" note: "For HTML/XML sources with DOM structure" css: class: "oa:CssSelector" uri: "http://www.w3.org/ns/oa#CssSelector" properties: - property: "rdf:value" description: "CSS selector to DOM node" note: "Alternative to XPath for HTML sources" # --------------------------------------------------------------------------- # ITS 2.0 Entity Linking # --------------------------------------------------------------------------- itsrdf_entity_linking: description: | ITS 2.0 (Internationalization Tag Set) provides entity linking predicates for connecting mentions to knowledge bases. properties: - property: "itsrdf:taIdentRef" uri: "http://www.w3.org/2005/11/its/rdf#taIdentRef" description: "URI reference to entity in knowledge base" example: "http://www.wikidata.org/entity/Q190804" note: "Primary entity linking predicate" required_for_linking: true - property: "itsrdf:taSource" uri: "http://www.w3.org/2005/11/its/rdf#taSource" description: "Knowledge base source identifier" examples: - "Wikidata" - "DBpedia" - "GeoNames" - "VIAF" - "Getty AAT" - "Getty ULAN" - "Getty TGN" note: "Human-readable source name" - property: "itsrdf:taConfidence" uri: "http://www.w3.org/2005/11/its/rdf#taConfidence" description: "Linking confidence score (0.0-1.0)" range: "xsd:double" note: "Different from entity detection confidence" - property: "itsrdf:taClassRef" uri: "http://www.w3.org/2005/11/its/rdf#taClassRef" description: "URI of entity type in target ontology" example: "http://dbpedia.org/ontology/Museum" note: "Type in linked KB, may differ from GLAM-NER type" # --------------------------------------------------------------------------- # Complete GLAM-NER to NERD Mapping Table # --------------------------------------------------------------------------- glam_ner_to_nerd_mapping: description: "Complete mapping table from GLAM-NER v1.7.0 types to NERD classes" mappings: # AGENT hypernym mappings - glam_type: "AGT" glam_code: "AGT" nerd_class: "nerd:Person" note: "Generic agent defaults to Person" - glam_type: "AGT.PER" glam_code: "AGT.PER" nerd_class: "nerd:Person" nerd_subclasses: ["nerd:Artist", "nerd:Politician", "nerd:Astronaut", "nerd:Athlete"] - glam_type: "AGT.STF" glam_code: "AGT.STF" nerd_class: "nerd:Person" note: "Staff roles map to nerd:Function as secondary annotation" - glam_type: "AGT.ANI" glam_code: "AGT.ANI" nerd_class: "nerd:Animal" - glam_type: "AGT.MYT" glam_code: "AGT.MYT" nerd_class: "nerd:Person" note: "Mythological/fictional figures treated as Person in NERD" - glam_type: "AGT.AI" glam_code: "AGT.AI" nerd_class: "nerd:Thing" note: "AI agents have no NERD equivalent - map to Thing" # GROUP hypernym mappings - glam_type: "GRP" glam_code: "GRP" nerd_class: "nerd:Organization" - glam_type: "GRP.HER" glam_code: "GRP.HER" nerd_class: "nerd:Organization" nerd_subclasses: ["nerd:Museum"] - glam_type: "GRP.COM" glam_code: "GRP.COM" nerd_class: "nerd:Organization" nerd_subclasses: ["nerd:Company"] - glam_type: "GRP.GOV" glam_code: "GRP.GOV" nerd_class: "nerd:Organization" - glam_type: "GRP.EDU" glam_code: "GRP.EDU" nerd_class: "nerd:Organization" nerd_subclasses: ["nerd:University"] - glam_type: "GRP.REL" glam_code: "GRP.REL" nerd_class: "nerd:Organization" - glam_type: "GRP.UNT" glam_code: "GRP.UNT" nerd_class: "nerd:Organization" note: "Organizational units are Organizations in NERD" - glam_type: "GRP.INF" glam_code: "GRP.INF" nerd_class: "nerd:Organization" note: "Informal groups still map to Organization" # TOPONYM hypernym mappings - glam_type: "TOP" glam_code: "TOP" nerd_class: "nerd:Location" - glam_type: "TOP.ADM" glam_code: "TOP.ADM" nerd_class: "nerd:Location" nerd_subclasses: ["nerd:Country", "nerd:Region"] - glam_type: "TOP.PPL" glam_code: "TOP.PPL" nerd_class: "nerd:Location" nerd_subclasses: ["nerd:City"] - glam_type: "TOP.BLD" glam_code: "TOP.BLD" nerd_class: "nerd:Location" nerd_subclasses: ["nerd:Facility"] - glam_type: "TOP.FAC" glam_code: "TOP.FAC" nerd_class: "nerd:Location" nerd_subclasses: ["nerd:Facility"] - glam_type: "TOP.NAT" glam_code: "TOP.NAT" nerd_class: "nerd:Location" - glam_type: "TOP.HIS" glam_code: "TOP.HIS" nerd_class: "nerd:Location" note: "Historical place names" # GEOMETRY hypernym mappings - glam_type: "GEO" glam_code: "GEO" nerd_class: "nerd:Location" note: "Coordinates map to Location" - glam_type: "GEO.PNT" glam_code: "GEO.PNT" nerd_class: "nerd:Location" - glam_type: "GEO.BOX" glam_code: "GEO.BOX" nerd_class: "nerd:Location" # TEMPORAL hypernym mappings - glam_type: "TMP" glam_code: "TMP" nerd_class: "nerd:Time" - glam_type: "TMP.DAT" glam_code: "TMP.DAT" nerd_class: "nerd:Time" - glam_type: "TMP.TIM" glam_code: "TMP.TIM" nerd_class: "nerd:Time" - glam_type: "TMP.DUR" glam_code: "TMP.DUR" nerd_class: "nerd:Time" - glam_type: "TMP.SET" glam_code: "TMP.SET" nerd_class: "nerd:Time" # QUANTITY hypernym mappings - glam_type: "QTY" glam_code: "QTY" nerd_class: "nerd:Amount" - glam_type: "QTY.CNT" glam_code: "QTY.CNT" nerd_class: "nerd:Amount" - glam_type: "QTY.MSR" glam_code: "QTY.MSR" nerd_class: "nerd:Amount" - glam_type: "QTY.MON" glam_code: "QTY.MON" nerd_class: "nerd:Amount" - glam_type: "QTY.PCT" glam_code: "QTY.PCT" nerd_class: "nerd:Amount" - glam_type: "QTY.ORD" glam_code: "QTY.ORD" nerd_class: "nerd:Amount" # WORK hypernym mappings (FRBR) - glam_type: "WRK" glam_code: "WRK" nerd_class: "nerd:Product" - glam_type: "WRK.WRK" glam_code: "WRK.WRK" nerd_class: "nerd:Product" note: "FRBR Work level" - glam_type: "WRK.EXP" glam_code: "WRK.EXP" nerd_class: "nerd:Product" note: "FRBR Expression level" - glam_type: "WRK.MAN" glam_code: "WRK.MAN" nerd_class: "nerd:Product" nerd_subclasses: ["nerd:Book", "nerd:Album", "nerd:Movie"] note: "FRBR Manifestation level" - glam_type: "WRK.ITM" glam_code: "WRK.ITM" nerd_class: "nerd:Product" note: "FRBR Item level" # THING hypernym mappings - glam_type: "THG" glam_code: "THG" nerd_class: "nerd:Thing" - glam_type: "THG.OBJ" glam_code: "THG.OBJ" nerd_class: "nerd:Product" - glam_type: "THG.COL" glam_code: "THG.COL" nerd_class: "nerd:Thing" note: "Collections map to generic Thing (no NERD equivalent)" - glam_type: "THG.EVT" glam_code: "THG.EVT" nerd_class: "nerd:Event" nerd_subclasses: ["nerd:SportEvent", "nerd:MusicFestival"] - glam_type: "THG.CON" glam_code: "THG.CON" nerd_class: "nerd:Thing" note: "Abstract concepts" # APPELLATION hypernym mappings - glam_type: "APP" glam_code: "APP" nerd_class: "nerd:Thing" note: "Appellations have no direct NERD mapping" - glam_type: "APP.NAM" glam_code: "APP.NAM" nerd_class: "nerd:Thing" - glam_type: "APP.TIT" glam_code: "APP.TIT" nerd_class: "nerd:Product" note: "Work titles map to Product" # ROLE hypernym mappings - glam_type: "ROL" glam_code: "ROL" nerd_class: "nerd:Function" - glam_type: "ROL.OCC" glam_code: "ROL.OCC" nerd_class: "nerd:Function" - glam_type: "ROL.TIT" glam_code: "ROL.TIT" nerd_class: "nerd:Function" - glam_type: "ROL.HON" glam_code: "ROL.HON" nerd_class: "nerd:Function" # --------------------------------------------------------------------------- # Example NIF Annotation # --------------------------------------------------------------------------- example_nif_annotation: description: "Complete example of GLAM-NER annotation in NIF/OA format" source_text: "The Rijksmuseum in Amsterdam holds over one million objects." turtle_example: | @prefix nif: . @prefix nerd: . @prefix oa: . @prefix itsrdf: . @prefix glam: . @prefix xsd: . # Document context a nif:Context, nif:OffsetBasedString ; nif:isString "The Rijksmuseum in Amsterdam holds over one million objects." ; nif:sourceUrl ; nif:beginIndex "0"^^xsd:nonNegativeInteger ; nif:endIndex "60"^^xsd:nonNegativeInteger . # Entity 1: Rijksmuseum (GRP.HER) a nif:Phrase, nif:OffsetBasedString ; nif:anchorOf "Rijksmuseum" ; nif:beginIndex "4"^^xsd:nonNegativeInteger ; nif:endIndex "15"^^xsd:nonNegativeInteger ; nif:referenceContext ; a nerd:Organization, nerd:Museum ; glam:entityType "GRP.HER" ; glam:entityLabel "GROUP.HERITAGE" ; glam:confidence "0.95"^^xsd:double ; itsrdf:taIdentRef ; itsrdf:taSource "Wikidata" ; itsrdf:taConfidence "0.92"^^xsd:double . # Entity 2: Amsterdam (TOP.PPL) a nif:Phrase, nif:OffsetBasedString ; nif:anchorOf "Amsterdam" ; nif:beginIndex "19"^^xsd:nonNegativeInteger ; nif:endIndex "28"^^xsd:nonNegativeInteger ; nif:referenceContext ; a nerd:Location, nerd:City ; glam:entityType "TOP.PPL" ; glam:entityLabel "TOPONYM.POPULATED_PLACE" ; glam:confidence "0.98"^^xsd:double ; itsrdf:taIdentRef ; itsrdf:taSource "Wikidata" . # Entity 3: one million (QTY.CNT) a nif:Phrase, nif:OffsetBasedString ; nif:anchorOf "one million" ; nif:beginIndex "41"^^xsd:nonNegativeInteger ; nif:endIndex "52"^^xsd:nonNegativeInteger ; nif:referenceContext ; a nerd:Amount ; glam:entityType "QTY.CNT" ; glam:entityLabel "QUANTITY.COUNT" ; glam:normalizedValue "1000000"^^xsd:integer ; glam:confidence "0.90"^^xsd:double . # --------------------------------------------------------------------------- # Output Format Guidelines # --------------------------------------------------------------------------- output_guidelines: description: "How to serialize GLAM-NER annotations for interoperability" always_include: - "GLAM-NER type code (glam:entityType)" - "GLAM-NER type label (glam:entityLabel)" - "NERD class mapping (rdf:type nerd:*)" - "NIF offsets (nif:beginIndex, nif:endIndex)" - "Anchor text (nif:anchorOf)" - "Confidence score (glam:confidence)" when_linked: - "itsrdf:taIdentRef - Entity URI" - "itsrdf:taSource - Knowledge base name" - "itsrdf:taConfidence - Linking confidence" serialization_formats: - format: "Turtle" extension: ".ttl" mime_type: "text/turtle" preferred: true - format: "JSON-LD" extension: ".jsonld" mime_type: "application/ld+json" preferred: true note: "Use @context for namespace prefixes" - format: "N-Triples" extension: ".nt" mime_type: "application/n-triples" preferred: false note: "For streaming/bulk processing" # ============================================================================= # END OF MODULE # =============================================================================