373 lines
12 KiB
YAML
373 lines
12 KiB
YAML
# =============================================================================
|
|
# GLAM-NER Entity Annotation Convention v1.7.0
|
|
# Module: integrations/pico.yaml
|
|
# =============================================================================
|
|
# PiCO (Person in Context Ontology) integration for person observation modeling.
|
|
# Enables tracking provenance of person mentions and linking to formal records.
|
|
#
|
|
# Key concepts:
|
|
# - PersonObservation: A textual mention of a person (source-bound)
|
|
# - PersonName (PNV): Structured name components
|
|
# - Person (CIDOC-CRM E21): Reconstructed person entity
|
|
#
|
|
# References:
|
|
# - PiCo Ontology: https://w3id.org/pico
|
|
# - Person Name Vocabulary (PNV): https://w3id.org/pnv
|
|
# - CIDOC-CRM: https://www.cidoc-crm.org/
|
|
# =============================================================================
|
|
|
|
pico_integration:
|
|
|
|
description: |
|
|
PiCO (Person in Context Ontology) models textual observations of persons
|
|
as distinct from reconstructed person entities. This enables:
|
|
- Tracking provenance of person mentions
|
|
- Handling name variations across sources
|
|
- Linking observations to formal person records
|
|
|
|
The observation/reconstruction pattern separates:
|
|
1. What was OBSERVED in text (PersonObservation) - source-bound, exact
|
|
2. What was RECONSTRUCTED as entity (E21_Person) - inferred, normalized
|
|
|
|
This is critical for heritage data where the same person may appear with
|
|
different name forms, titles, or spellings across sources.
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Core Observation Pattern
|
|
# ---------------------------------------------------------------------------
|
|
|
|
observation_pattern:
|
|
description: "Every person mention creates a PersonObservation"
|
|
class: "picom:PersonObservation"
|
|
class_uri: "https://w3id.org/pico/PersonObservation"
|
|
|
|
properties:
|
|
- property: "picom:hasObservedName"
|
|
description: "The name string as it appears in text"
|
|
range: "pnv:PersonName"
|
|
cardinality: "1"
|
|
note: "Exact transcription of name from source"
|
|
|
|
- property: "picom:isObservationOf"
|
|
description: "Links to reconstructed Person entity"
|
|
range: "crm:E21_Person"
|
|
cardinality: "0..1"
|
|
note: "May be null if person not yet identified"
|
|
|
|
- property: "prov:hadPrimarySource"
|
|
description: "The source document/webpage"
|
|
range: "prov:Entity"
|
|
cardinality: "1"
|
|
note: "Required for provenance tracking"
|
|
|
|
- property: "picom:observedAt"
|
|
description: "When the observation was made"
|
|
range: "xsd:dateTime"
|
|
cardinality: "1"
|
|
note: "Extraction timestamp, not document date"
|
|
|
|
- property: "picom:observedInContext"
|
|
description: "Surrounding text context"
|
|
range: "xsd:string"
|
|
cardinality: "0..1"
|
|
note: "For disambiguation when reviewing"
|
|
|
|
- property: "picom:hasRole"
|
|
description: "Role/position observed with the person"
|
|
range: "xsd:string"
|
|
cardinality: "0..*"
|
|
note: "Links to ROLE hypernym when extracted"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Person Name Vocabulary (PNV)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
pnv_name_structure:
|
|
description: |
|
|
Person Name Vocabulary (PNV) provides structured name components.
|
|
This enables proper parsing of complex name structures across cultures.
|
|
|
|
class: "pnv:PersonName"
|
|
class_uri: "https://w3id.org/pnv/PersonName"
|
|
|
|
components:
|
|
- property: "pnv:literalName"
|
|
description: "Full name as single string"
|
|
examples:
|
|
- "Dr. Maria van den Berg"
|
|
- "Rembrandt Harmenszoon van Rijn"
|
|
- "Queen Elizabeth II"
|
|
note: "Original string before parsing"
|
|
|
|
- property: "pnv:givenName"
|
|
description: "First/given name"
|
|
examples:
|
|
- "Rembrandt"
|
|
- "Maria"
|
|
- "Jan"
|
|
- "Elizabeth"
|
|
note: "Personal name, not surname"
|
|
|
|
- property: "pnv:patronym"
|
|
description: "Patronymic name component"
|
|
examples:
|
|
- "Harmenszoon"
|
|
- "Janszoon"
|
|
- "Pietersdochter"
|
|
note: "Common in Dutch, Scandinavian, Slavic names"
|
|
|
|
- property: "pnv:surnamePrefix"
|
|
description: "Prefix to surname (tussenvoegsel)"
|
|
examples:
|
|
- "van"
|
|
- "de"
|
|
- "van den"
|
|
- "van der"
|
|
- "op de"
|
|
- "'t"
|
|
- "von"
|
|
- "di"
|
|
note: "Language-specific, affects sorting"
|
|
|
|
- property: "pnv:baseSurname"
|
|
description: "Core surname without prefix"
|
|
examples:
|
|
- "Rijn"
|
|
- "Berg"
|
|
- "Velde"
|
|
- "Gogh"
|
|
note: "Primary sorting component in Dutch"
|
|
|
|
- property: "pnv:honorificPrefix"
|
|
description: "Title or honorific before name"
|
|
examples:
|
|
- "Dr."
|
|
- "Prof."
|
|
- "Prof. dr."
|
|
- "Sir"
|
|
- "Queen"
|
|
- "Mr."
|
|
- "Drs."
|
|
- "Ir."
|
|
note: "May indicate role - link to ROL"
|
|
|
|
- property: "pnv:honorificSuffix"
|
|
description: "Title or honorific after name"
|
|
examples:
|
|
- "PhD"
|
|
- "Jr."
|
|
- "III"
|
|
- "MD"
|
|
- "RA"
|
|
- "MSc"
|
|
note: "Credentials and generational markers"
|
|
|
|
- property: "pnv:infixTitle"
|
|
description: "Title within name structure"
|
|
examples:
|
|
- "graaf van"
|
|
- "baron de"
|
|
- "duke of"
|
|
note: "Nobility titles embedded in name"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Dutch Name Conventions (Project-Specific)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
dutch_name_patterns:
|
|
description: |
|
|
Special handling for Dutch names with tussenvoegsels (surname prefixes).
|
|
Dutch sorting rules differ from other languages.
|
|
|
|
tussenvoegsel_list:
|
|
- "van"
|
|
- "van de"
|
|
- "van den"
|
|
- "van der"
|
|
- "de"
|
|
- "den"
|
|
- "het"
|
|
- "'t"
|
|
- "ter"
|
|
- "ten"
|
|
- "op de"
|
|
- "op den"
|
|
- "in 't"
|
|
- "in de"
|
|
|
|
sorting_rule: |
|
|
In Dutch, surnames sort by baseSurname, ignoring tussenvoegsel.
|
|
"Vincent van Gogh" sorts under "G" not "V".
|
|
"Maria van den Berg" sorts under "B" not "V".
|
|
|
|
capitalization_rule: |
|
|
Tussenvoegsel lowercase when preceded by given name:
|
|
- "Vincent van Gogh" (not "Vincent Van Gogh")
|
|
- "Van Gogh" (surname alone, capitalized)
|
|
- "de heer Van Gogh" (formal, capitalized)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Integration with GLAM-NER Hypernyms
|
|
# ---------------------------------------------------------------------------
|
|
|
|
hypernym_mapping:
|
|
description: "How PiCo concepts map to GLAM-NER v1.7.0 hypernyms"
|
|
|
|
mappings:
|
|
- pico_class: "picom:PersonObservation"
|
|
glam_hypernym: "AGT.PER"
|
|
glam_code: "AGT.PER"
|
|
note: "Person observations create AGT.PER entities"
|
|
|
|
- pico_class: "picom:PersonObservation"
|
|
glam_hypernym: "AGT.STF"
|
|
glam_code: "AGT.STF"
|
|
condition: "When observed with organizational role"
|
|
note: "Staff members with role context"
|
|
|
|
- pico_class: "pnv:PersonName"
|
|
glam_hypernym: "APP.NAM"
|
|
glam_code: "APP.NAM"
|
|
note: "Name strings as appellations"
|
|
|
|
- pico_class: "picom:hasRole"
|
|
glam_hypernym: "ROL"
|
|
glam_code: "ROL"
|
|
note: "Extracted roles link to ROL hypernym"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Example Annotations
|
|
# ---------------------------------------------------------------------------
|
|
|
|
examples:
|
|
- description: "Staff member with title and role"
|
|
text: "Dr. Maria van den Berg, Director"
|
|
|
|
observation:
|
|
type: "picom:PersonObservation"
|
|
id: "_:obs1"
|
|
|
|
hasObservedName:
|
|
type: "pnv:PersonName"
|
|
literalName: "Dr. Maria van den Berg"
|
|
honorificPrefix: "Dr."
|
|
givenName: "Maria"
|
|
surnamePrefix: "van den"
|
|
baseSurname: "Berg"
|
|
|
|
hasRole: "Director"
|
|
hadPrimarySource: "https://example.org/staff-page"
|
|
observedAt: "2025-12-02T10:30:00Z"
|
|
|
|
glam_ner_annotations:
|
|
- span: "Dr. Maria van den Berg"
|
|
type: "AGT.STF"
|
|
code: "AGT.STF"
|
|
confidence: 0.95
|
|
|
|
- span: "Director"
|
|
type: "ROL.TIT"
|
|
code: "ROL.TIT"
|
|
confidence: 0.98
|
|
|
|
- description: "Historical artist"
|
|
text: "Rembrandt van Rijn painted this in 1642"
|
|
|
|
observation:
|
|
type: "picom:PersonObservation"
|
|
id: "_:obs2"
|
|
|
|
hasObservedName:
|
|
type: "pnv:PersonName"
|
|
literalName: "Rembrandt van Rijn"
|
|
givenName: "Rembrandt"
|
|
surnamePrefix: "van"
|
|
baseSurname: "Rijn"
|
|
|
|
isObservationOf: "wd:Q5598" # Wikidata Rembrandt
|
|
hadPrimarySource: "https://example.org/artwork-page"
|
|
observedAt: "2025-12-02T10:35:00Z"
|
|
|
|
glam_ner_annotations:
|
|
- span: "Rembrandt van Rijn"
|
|
type: "AGT.PER"
|
|
code: "AGT.PER"
|
|
confidence: 0.99
|
|
linking:
|
|
wikidata: "Q5598"
|
|
viaf: "64013650"
|
|
|
|
- description: "Nobility title"
|
|
text: "Count Willem van Loon"
|
|
|
|
observation:
|
|
type: "picom:PersonObservation"
|
|
id: "_:obs3"
|
|
|
|
hasObservedName:
|
|
type: "pnv:PersonName"
|
|
literalName: "Count Willem van Loon"
|
|
honorificPrefix: "Count"
|
|
givenName: "Willem"
|
|
surnamePrefix: "van"
|
|
baseSurname: "Loon"
|
|
|
|
hadPrimarySource: "https://example.org/archive-doc"
|
|
observedAt: "2025-12-02T10:40:00Z"
|
|
|
|
glam_ner_annotations:
|
|
- span: "Count Willem van Loon"
|
|
type: "AGT.PER"
|
|
code: "AGT.PER"
|
|
confidence: 0.95
|
|
|
|
- span: "Count"
|
|
type: "ROL.HON"
|
|
code: "ROL.HON"
|
|
note: "Nobility title - honorific role"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Provenance Chain
|
|
# ---------------------------------------------------------------------------
|
|
|
|
provenance_model:
|
|
description: |
|
|
PiCo observations maintain full provenance chain:
|
|
|
|
Observation → Source Document → Extraction Activity → Agent
|
|
|
|
This enables:
|
|
- Tracking where each name form was found
|
|
- Attributing extractions to human/ML agents
|
|
- Maintaining audit trail for corrections
|
|
|
|
chain_structure:
|
|
observation:
|
|
class: "picom:PersonObservation"
|
|
properties:
|
|
- "prov:hadPrimarySource" # → Source document
|
|
- "prov:wasGeneratedBy" # → Extraction activity
|
|
|
|
source:
|
|
class: "prov:Entity"
|
|
properties:
|
|
- "prov:wasAttributedTo" # → Publisher/author
|
|
- "dct:created" # → Document date
|
|
|
|
activity:
|
|
class: "prov:Activity"
|
|
properties:
|
|
- "prov:wasAssociatedWith" # → Extraction agent
|
|
- "prov:used" # → ML model or rules
|
|
- "prov:startedAtTime" # → Extraction timestamp
|
|
|
|
agent:
|
|
class: "prov:Agent"
|
|
examples:
|
|
- "Human curator"
|
|
- "spaCy NER model"
|
|
- "GLAM-NER extraction pipeline"
|
|
|
|
# =============================================================================
|
|
# END OF MODULE
|
|
# =============================================================================
|