# ============================================================================= # GLAM-NER Entity Annotation Convention v1.7.0 # Module: integrations/pico.yaml # ============================================================================= # PiCO (Person in Context Ontology) integration for person observation modeling. # Enables tracking provenance of person mentions and linking to formal records. # # Key concepts: # - PersonObservation: A textual mention of a person (source-bound) # - PersonName (PNV): Structured name components # - Person (CIDOC-CRM E21): Reconstructed person entity # # References: # - PiCo Ontology: https://w3id.org/pico # - Person Name Vocabulary (PNV): https://w3id.org/pnv # - CIDOC-CRM: https://www.cidoc-crm.org/ # ============================================================================= pico_integration: description: | PiCO (Person in Context Ontology) models textual observations of persons as distinct from reconstructed person entities. This enables: - Tracking provenance of person mentions - Handling name variations across sources - Linking observations to formal person records The observation/reconstruction pattern separates: 1. What was OBSERVED in text (PersonObservation) - source-bound, exact 2. What was RECONSTRUCTED as entity (E21_Person) - inferred, normalized This is critical for heritage data where the same person may appear with different name forms, titles, or spellings across sources. # --------------------------------------------------------------------------- # Core Observation Pattern # --------------------------------------------------------------------------- observation_pattern: description: "Every person mention creates a PersonObservation" class: "picom:PersonObservation" class_uri: "https://w3id.org/pico/PersonObservation" properties: - property: "picom:hasObservedName" description: "The name string as it appears in text" range: "pnv:PersonName" cardinality: "1" note: "Exact transcription of name from source" - property: "picom:isObservationOf" description: "Links to reconstructed Person entity" range: "crm:E21_Person" cardinality: "0..1" note: "May be null if person not yet identified" - property: "prov:hadPrimarySource" description: "The source document/webpage" range: "prov:Entity" cardinality: "1" note: "Required for provenance tracking" - property: "picom:observedAt" description: "When the observation was made" range: "xsd:dateTime" cardinality: "1" note: "Extraction timestamp, not document date" - property: "picom:observedInContext" description: "Surrounding text context" range: "xsd:string" cardinality: "0..1" note: "For disambiguation when reviewing" - property: "picom:hasRole" description: "Role/position observed with the person" range: "xsd:string" cardinality: "0..*" note: "Links to ROLE hypernym when extracted" # --------------------------------------------------------------------------- # Person Name Vocabulary (PNV) # --------------------------------------------------------------------------- pnv_name_structure: description: | Person Name Vocabulary (PNV) provides structured name components. This enables proper parsing of complex name structures across cultures. class: "pnv:PersonName" class_uri: "https://w3id.org/pnv/PersonName" components: - property: "pnv:literalName" description: "Full name as single string" examples: - "Dr. Maria van den Berg" - "Rembrandt Harmenszoon van Rijn" - "Queen Elizabeth II" note: "Original string before parsing" - property: "pnv:givenName" description: "First/given name" examples: - "Rembrandt" - "Maria" - "Jan" - "Elizabeth" note: "Personal name, not surname" - property: "pnv:patronym" description: "Patronymic name component" examples: - "Harmenszoon" - "Janszoon" - "Pietersdochter" note: "Common in Dutch, Scandinavian, Slavic names" - property: "pnv:surnamePrefix" description: "Prefix to surname (tussenvoegsel)" examples: - "van" - "de" - "van den" - "van der" - "op de" - "'t" - "von" - "di" note: "Language-specific, affects sorting" - property: "pnv:baseSurname" description: "Core surname without prefix" examples: - "Rijn" - "Berg" - "Velde" - "Gogh" note: "Primary sorting component in Dutch" - property: "pnv:honorificPrefix" description: "Title or honorific before name" examples: - "Dr." - "Prof." - "Prof. dr." - "Sir" - "Queen" - "Mr." - "Drs." - "Ir." note: "May indicate role - link to ROL" - property: "pnv:honorificSuffix" description: "Title or honorific after name" examples: - "PhD" - "Jr." - "III" - "MD" - "RA" - "MSc" note: "Credentials and generational markers" - property: "pnv:infixTitle" description: "Title within name structure" examples: - "graaf van" - "baron de" - "duke of" note: "Nobility titles embedded in name" # --------------------------------------------------------------------------- # Dutch Name Conventions (Project-Specific) # --------------------------------------------------------------------------- dutch_name_patterns: description: | Special handling for Dutch names with tussenvoegsels (surname prefixes). Dutch sorting rules differ from other languages. tussenvoegsel_list: - "van" - "van de" - "van den" - "van der" - "de" - "den" - "het" - "'t" - "ter" - "ten" - "op de" - "op den" - "in 't" - "in de" sorting_rule: | In Dutch, surnames sort by baseSurname, ignoring tussenvoegsel. "Vincent van Gogh" sorts under "G" not "V". "Maria van den Berg" sorts under "B" not "V". capitalization_rule: | Tussenvoegsel lowercase when preceded by given name: - "Vincent van Gogh" (not "Vincent Van Gogh") - "Van Gogh" (surname alone, capitalized) - "de heer Van Gogh" (formal, capitalized) # --------------------------------------------------------------------------- # Integration with GLAM-NER Hypernyms # --------------------------------------------------------------------------- hypernym_mapping: description: "How PiCo concepts map to GLAM-NER v1.7.0 hypernyms" mappings: - pico_class: "picom:PersonObservation" glam_hypernym: "AGT.PER" glam_code: "AGT.PER" note: "Person observations create AGT.PER entities" - pico_class: "picom:PersonObservation" glam_hypernym: "AGT.STF" glam_code: "AGT.STF" condition: "When observed with organizational role" note: "Staff members with role context" - pico_class: "pnv:PersonName" glam_hypernym: "APP.NAM" glam_code: "APP.NAM" note: "Name strings as appellations" - pico_class: "picom:hasRole" glam_hypernym: "ROL" glam_code: "ROL" note: "Extracted roles link to ROL hypernym" # --------------------------------------------------------------------------- # Example Annotations # --------------------------------------------------------------------------- examples: - description: "Staff member with title and role" text: "Dr. Maria van den Berg, Director" observation: type: "picom:PersonObservation" id: "_:obs1" hasObservedName: type: "pnv:PersonName" literalName: "Dr. Maria van den Berg" honorificPrefix: "Dr." givenName: "Maria" surnamePrefix: "van den" baseSurname: "Berg" hasRole: "Director" hadPrimarySource: "https://example.org/staff-page" observedAt: "2025-12-02T10:30:00Z" glam_ner_annotations: - span: "Dr. Maria van den Berg" type: "AGT.STF" code: "AGT.STF" confidence: 0.95 - span: "Director" type: "ROL.TIT" code: "ROL.TIT" confidence: 0.98 - description: "Historical artist" text: "Rembrandt van Rijn painted this in 1642" observation: type: "picom:PersonObservation" id: "_:obs2" hasObservedName: type: "pnv:PersonName" literalName: "Rembrandt van Rijn" givenName: "Rembrandt" surnamePrefix: "van" baseSurname: "Rijn" isObservationOf: "wd:Q5598" # Wikidata Rembrandt hadPrimarySource: "https://example.org/artwork-page" observedAt: "2025-12-02T10:35:00Z" glam_ner_annotations: - span: "Rembrandt van Rijn" type: "AGT.PER" code: "AGT.PER" confidence: 0.99 linking: wikidata: "Q5598" viaf: "64013650" - description: "Nobility title" text: "Count Willem van Loon" observation: type: "picom:PersonObservation" id: "_:obs3" hasObservedName: type: "pnv:PersonName" literalName: "Count Willem van Loon" honorificPrefix: "Count" givenName: "Willem" surnamePrefix: "van" baseSurname: "Loon" hadPrimarySource: "https://example.org/archive-doc" observedAt: "2025-12-02T10:40:00Z" glam_ner_annotations: - span: "Count Willem van Loon" type: "AGT.PER" code: "AGT.PER" confidence: 0.95 - span: "Count" type: "ROL.HON" code: "ROL.HON" note: "Nobility title - honorific role" # --------------------------------------------------------------------------- # Provenance Chain # --------------------------------------------------------------------------- provenance_model: description: | PiCo observations maintain full provenance chain: Observation → Source Document → Extraction Activity → Agent This enables: - Tracking where each name form was found - Attributing extractions to human/ML agents - Maintaining audit trail for corrections chain_structure: observation: class: "picom:PersonObservation" properties: - "prov:hadPrimarySource" # → Source document - "prov:wasGeneratedBy" # → Extraction activity source: class: "prov:Entity" properties: - "prov:wasAttributedTo" # → Publisher/author - "dct:created" # → Document date activity: class: "prov:Activity" properties: - "prov:wasAssociatedWith" # → Extraction agent - "prov:used" # → ML model or rules - "prov:startedAtTime" # → Extraction timestamp agent: class: "prov:Agent" examples: - "Human curator" - "spaCy NER model" - "GLAM-NER extraction pipeline" # ============================================================================= # END OF MODULE # =============================================================================