glam/data/entity_annotation/modules/integrations/pico.yaml
2025-12-12 12:51:10 +01:00

2244 lines
77 KiB
YAML

# =============================================================================
# GLAM-NER Entity Annotation Convention v1.7.0
# Module: integrations/pico.yaml
# =============================================================================
# PiCO (Person in Context Ontology) integration for person observation modeling.
# Enables tracking provenance of person mentions and linking to formal records.
#
# Key concepts:
# - PersonObservation: A textual mention of a person (source-bound)
# - PersonName (PNV): Structured name components
# - Person (CIDOC-CRM E21): Reconstructed person entity
#
# References:
# - PiCo Ontology: https://w3id.org/pico
# - Person Name Vocabulary (PNV): https://w3id.org/pnv
# - CIDOC-CRM: https://www.cidoc-crm.org/
# =============================================================================
pico_integration:
description: |
PiCO (Person in Context Ontology) models textual observations of persons
as distinct from reconstructed person entities. This enables:
- Tracking provenance of person mentions
- Handling name variations across sources
- Linking observations to formal person records
The observation/reconstruction pattern separates:
1. What was OBSERVED in text (PersonObservation) - source-bound, exact
2. What was RECONSTRUCTED as entity (E21_Person) - inferred, normalized
This is critical for heritage data where the same person may appear with
different name forms, titles, or spellings across sources.
# ---------------------------------------------------------------------------
# Core Observation Pattern
# ---------------------------------------------------------------------------
observation_pattern:
description: "Every person mention creates a PersonObservation"
class: "picom:PersonObservation"
class_uri: "https://w3id.org/pico/PersonObservation"
properties:
- property: "picom:hasObservedName"
description: "The name string as it appears in text"
range: "pnv:PersonName"
cardinality: "1"
note: "Exact transcription of name from source"
- property: "picom:isObservationOf"
description: "Links to reconstructed Person entity"
range: "crm:E21_Person"
cardinality: "0..1"
note: "May be null if person not yet identified"
- property: "prov:hadPrimarySource"
description: "The source document/webpage"
range: "prov:Entity"
cardinality: "1"
note: "Required for provenance tracking"
- property: "picom:observedAt"
description: "When the observation was made"
range: "xsd:dateTime"
cardinality: "1"
note: "Extraction timestamp, not document date"
- property: "picom:observedInContext"
description: "Surrounding text context"
range: "xsd:string"
cardinality: "0..1"
note: "For disambiguation when reviewing"
- property: "picom:hasRole"
description: "Role/position observed with the person"
range: "xsd:string"
cardinality: "0..*"
note: "Links to ROLE hypernym when extracted"
# ---------------------------------------------------------------------------
# Person Name Vocabulary (PNV)
# ---------------------------------------------------------------------------
pnv_name_structure:
description: |
Person Name Vocabulary (PNV) provides structured name components.
This enables proper parsing of complex name structures across cultures.
class: "pnv:PersonName"
class_uri: "https://w3id.org/pnv/PersonName"
components:
- property: "pnv:literalName"
description: "Full name as single string"
examples:
- "Dr. Maria van den Berg"
- "Rembrandt Harmenszoon van Rijn"
- "Queen Elizabeth II"
note: "Original string before parsing"
- property: "pnv:givenName"
description: "First/given name"
examples:
- "Rembrandt"
- "Maria"
- "Jan"
- "Elizabeth"
note: "Personal name, not surname"
- property: "pnv:patronym"
description: "Patronymic name component"
examples:
- "Harmenszoon"
- "Janszoon"
- "Pietersdochter"
note: "Common in Dutch, Scandinavian, Slavic names"
- property: "pnv:surnamePrefix"
description: "Prefix to surname (tussenvoegsel)"
examples:
- "van"
- "de"
- "van den"
- "van der"
- "op de"
- "'t"
- "von"
- "di"
note: "Language-specific, affects sorting"
- property: "pnv:baseSurname"
description: "Core surname without prefix"
examples:
- "Rijn"
- "Berg"
- "Velde"
- "Gogh"
note: "Primary sorting component in Dutch"
- property: "pnv:honorificPrefix"
description: "Title or honorific before name"
examples:
- "Dr."
- "Prof."
- "Prof. dr."
- "Sir"
- "Queen"
- "Mr."
- "Drs."
- "Ir."
note: "May indicate role - link to ROL"
- property: "pnv:honorificSuffix"
description: "Title or honorific after name"
examples:
- "PhD"
- "Jr."
- "III"
- "MD"
- "RA"
- "MSc"
note: "Credentials and generational markers"
- property: "pnv:infixTitle"
description: "Title within name structure"
examples:
- "graaf van"
- "baron de"
- "duke of"
note: "Nobility titles embedded in name"
# ---------------------------------------------------------------------------
# Dutch Name Conventions (Project-Specific)
# ---------------------------------------------------------------------------
dutch_name_patterns:
description: |
Special handling for Dutch names with tussenvoegsels (surname prefixes).
Dutch sorting rules differ from other languages.
tussenvoegsel_list:
- "van"
- "van de"
- "van den"
- "van der"
- "de"
- "den"
- "het"
- "'t"
- "ter"
- "ten"
- "op de"
- "op den"
- "in 't"
- "in de"
sorting_rule: |
In Dutch, surnames sort by baseSurname, ignoring tussenvoegsel.
"Vincent van Gogh" sorts under "G" not "V".
"Maria van den Berg" sorts under "B" not "V".
capitalization_rule: |
Tussenvoegsel lowercase when preceded by given name:
- "Vincent van Gogh" (not "Vincent Van Gogh")
- "Van Gogh" (surname alone, capitalized)
- "de heer Van Gogh" (formal, capitalized)
# ---------------------------------------------------------------------------
# Integration with GLAM-NER Hypernyms
# ---------------------------------------------------------------------------
hypernym_mapping:
description: "How PiCo concepts map to GLAM-NER v1.7.0 hypernyms"
mappings:
- pico_class: "picom:PersonObservation"
glam_hypernym: "AGT.PER"
glam_code: "AGT.PER"
note: "Person observations create AGT.PER entities"
- pico_class: "picom:PersonObservation"
glam_hypernym: "AGT.STF"
glam_code: "AGT.STF"
condition: "When observed with organizational role"
note: "Staff members with role context"
- pico_class: "pnv:PersonName"
glam_hypernym: "APP.NAM"
glam_code: "APP.NAM"
note: "Name strings as appellations"
- pico_class: "picom:hasRole"
glam_hypernym: "ROL"
glam_code: "ROL"
note: "Extracted roles link to ROL hypernym"
# ---------------------------------------------------------------------------
# Example Annotations
# ---------------------------------------------------------------------------
examples:
- description: "Staff member with title and role"
text: "Dr. Maria van den Berg, Director"
observation:
type: "picom:PersonObservation"
id: "_:obs1"
hasObservedName:
type: "pnv:PersonName"
literalName: "Dr. Maria van den Berg"
honorificPrefix: "Dr."
givenName: "Maria"
surnamePrefix: "van den"
baseSurname: "Berg"
hasRole: "Director"
hadPrimarySource: "https://example.org/staff-page"
observedAt: "2025-12-02T10:30:00Z"
glam_ner_annotations:
- span: "Dr. Maria van den Berg"
type: "AGT.STF"
code: "AGT.STF"
confidence: 0.95
- span: "Director"
type: "ROL.TIT"
code: "ROL.TIT"
confidence: 0.98
- description: "Historical artist"
text: "Rembrandt van Rijn painted this in 1642"
observation:
type: "picom:PersonObservation"
id: "_:obs2"
hasObservedName:
type: "pnv:PersonName"
literalName: "Rembrandt van Rijn"
givenName: "Rembrandt"
surnamePrefix: "van"
baseSurname: "Rijn"
isObservationOf: "wd:Q5598" # Wikidata Rembrandt
hadPrimarySource: "https://example.org/artwork-page"
observedAt: "2025-12-02T10:35:00Z"
glam_ner_annotations:
- span: "Rembrandt van Rijn"
type: "AGT.PER"
code: "AGT.PER"
confidence: 0.99
linking:
wikidata: "Q5598"
viaf: "64013650"
- description: "Nobility title"
text: "Count Willem van Loon"
observation:
type: "picom:PersonObservation"
id: "_:obs3"
hasObservedName:
type: "pnv:PersonName"
literalName: "Count Willem van Loon"
honorificPrefix: "Count"
givenName: "Willem"
surnamePrefix: "van"
baseSurname: "Loon"
hadPrimarySource: "https://example.org/archive-doc"
observedAt: "2025-12-02T10:40:00Z"
glam_ner_annotations:
- span: "Count Willem van Loon"
type: "AGT.PER"
code: "AGT.PER"
confidence: 0.95
- span: "Count"
type: "ROL.HON"
code: "ROL.HON"
note: "Nobility title - honorific role"
# ---------------------------------------------------------------------------
# Provenance Chain
# ---------------------------------------------------------------------------
provenance_model:
description: |
PiCo observations maintain full provenance chain:
Observation → Source Document → Extraction Activity → Agent
This enables:
- Tracking where each name form was found
- Attributing extractions to human/ML agents
- Maintaining audit trail for corrections
chain_structure:
observation:
class: "picom:PersonObservation"
properties:
- "prov:hadPrimarySource" # → Source document
- "prov:wasGeneratedBy" # → Extraction activity
source:
class: "prov:Entity"
properties:
- "prov:wasAttributedTo" # → Publisher/author
- "dct:created" # → Document date
activity:
class: "prov:Activity"
properties:
- "prov:wasAssociatedWith" # → Extraction agent
- "prov:used" # → ML model or rules
- "prov:startedAtTime" # → Extraction timestamp
agent:
class: "prov:Agent"
examples:
- "Human curator"
- "spaCy NER model"
- "GLAM-NER extraction pipeline"
# =============================================================================
# SOURCE TYPE EXTENSIONS
# =============================================================================
#
# PiCo PersonObservation can be extracted from many source types.
# Each source type may have specific extraction patterns, but the core
# PiCo model (observation → name → roles → provenance) remains the same.
#
# Source-specific extraction logic belongs in APPLICATION LAYER scripts,
# not in this convention. This section defines the ABSTRACT patterns.
# =============================================================================
source_type_patterns:
description: |
PersonObservation sources fall into categories with different extraction
patterns. The CH-Annotator handles all source types using the same
core PiCo model, with source-specific field mappings at extraction time.
# ---------------------------------------------------------------------------
# Source Categories
# ---------------------------------------------------------------------------
categories:
modern_digital:
description: "Contemporary digital sources with structured data"
examples:
- "LinkedIn profiles"
- "Institutional staff directories"
- "Academic profile pages"
- "ORCID records"
characteristics:
- "Semi-structured HTML/JSON"
- "Current/living persons"
- "Self-reported information"
- "Timestamped updates"
typical_properties:
- "sdo:name"
- "sdo:jobTitle"
- "sdo:hasOccupation"
- "sdo:alumniOf"
- "sdo:knowsAbout"
historical_indices:
description: "Early modern and historical name indices"
examples:
- "Notarial protocol indices"
- "Church register indices"
- "Census indices"
- "Guild membership lists"
- "Property transfer records"
characteristics:
- "Abbreviated names"
- "Patronymics common"
- "Latin/vernacular mixing"
- "Occupation as identifier"
- "Relational identification ('wife of', 'son of')"
typical_properties:
- "pnv:literalName"
- "pnv:patronym"
- "picom:hasRole"
- "crm:P107_has_current_or_former_member"
- "sdo:spouse"
- "sdo:parent"
archival_descriptions:
description: "Finding aids, inventories, and archival descriptions"
examples:
- "EAD finding aids"
- "ISAD(G) descriptions"
- "Collection inventories"
- "RiC-O records"
characteristics:
- "Hierarchical context"
- "Provenance-focused"
- "Creator/contributor roles"
- "Temporal spans"
typical_properties:
- "rico:hasCreator"
- "rico:hasOrHadHolder"
- "crm:P14_carried_out_by"
- "crm:P11_had_participant"
biographical_dictionaries:
description: "Structured biographical reference works"
examples:
- "Dictionary of National Biography"
- "KNAW DWDD"
- "Allgemeines Künstlerlexikon"
- "Thieme-Becker"
characteristics:
- "Standardized entries"
- "Birth/death dates"
- "Career summaries"
- "Cross-references"
typical_properties:
- "sdo:birthDate"
- "sdo:deathDate"
- "sdo:birthPlace"
- "sdo:deathPlace"
- "crm:P98_brought_into_life"
- "crm:P100_was_death_of"
# ---------------------------------------------------------------------------
# Universal Observation Properties (All Source Types)
# ---------------------------------------------------------------------------
universal_properties:
description: |
These properties apply to PersonObservation regardless of source type.
They form the core of the PiCo extraction model.
required:
- property: "picom:hasObservedName"
description: "The name string as it appears in source"
range: "pnv:PersonName"
- property: "prov:hadPrimarySource"
description: "The source document/webpage/record"
range: "prov:Entity"
- property: "picom:observedAt"
description: "When the observation was extracted"
range: "xsd:dateTime"
optional:
- property: "picom:isObservationOf"
description: "Links to reconstructed Person entity (if identified)"
range: "crm:E21_Person"
- property: "picom:hasRole"
description: "Role/position observed with the person"
range: "org:Role"
- property: "picom:observedInContext"
description: "Surrounding text for disambiguation"
range: "xsd:string"
- property: "picom:confidence"
description: "Confidence score for extraction"
range: "xsd:decimal"
# ---------------------------------------------------------------------------
# Heritage Relevance Detection (Universal)
# ---------------------------------------------------------------------------
heritage_relevance:
description: |
Person observations can be tagged for heritage sector relevance using
GLAMORCUBESFIXPHDNT type codes. This applies to all source types.
type_codes:
G: "Gallery"
L: "Library"
A: "Archive"
M: "Museum"
O: "Official institution"
R: "Research center"
C: "Corporation"
U: "Unknown"
B: "Botanical garden / Zoo"
E: "Education provider"
S: "Collecting society"
F: "Feature / Monument"
I: "Intangible heritage"
X: "Mixed types"
P: "Personal collection"
H: "Holy site"
D: "Digital platform"
N: "NGO"
T: "Taste/smell heritage"
detection_approach: |
Heritage relevance detection is SOURCE-SPECIFIC and belongs in the
application layer, not the convention. The convention defines:
1. The type code vocabulary (GLAMORCUBESFIXPHDNT)
2. The property for tagging (picom:heritageRelevance)
3. The expected format (single-letter code + confidence)
Application scripts implement source-specific keyword detection,
organization matching, or ML classification to populate this field.
# =============================================================================
# GLM-4.6 CH-ANNOTATOR INTEGRATION
# =============================================================================
#
# The CH-Annotator can be invoked via GLM-4.6 API for automated extraction.
# The system prompt is SOURCE-AGNOSTIC and works with any text input.
# =============================================================================
glm_annotator_config:
model: "glm-4.6"
api_endpoint: "https://api.z.ai/api/coding/paas/v4/chat/completions"
temperature: 0.1
max_tokens: 4000
# ---------------------------------------------------------------------------
# Core System Prompt (Source-Agnostic)
# ---------------------------------------------------------------------------
system_prompt: |
You are a CH-Annotator (Cultural Heritage Annotator) v1.7.0 extraction agent
with PiCo (Person in Context) ontology integration.
## Your Task
Extract structured person observation data from the provided source text.
The source may be a modern digital profile, historical index, archival
description, or any other document containing person references.
## Core PiCo Pattern
Every person mention creates a PersonObservation that is:
- SOURCE-BOUND: Exact transcription from source, no normalization
- PROVENANCE-TRACKED: Linked to source document and extraction timestamp
- RECONSTRUCTION-READY: Can be linked to formal Person entity later
## Person Name Vocabulary (PNV)
Parse names into components (use null for missing parts):
- literalName: Full name exactly as written in source
- givenName: First/given name
- patronym: Patronymic (Janszoon, -dochter, bin, ibn, mac)
- surnamePrefix: Tussenvoegsel/particle (van, de, von, di, du)
- baseSurname: Core surname without prefix
- honorificPrefix: Title before name (Dr., Prof., Heer, Meester)
- honorificSuffix: Credentials after name (PhD, Jr., III)
- initials: Initials with periods (e.g., "P.R.", "C.Joh.")
## Language-Specific Name Rules
### Dutch
- Tussenvoegsel lowercase after given name: "Jan van Gogh"
- Capitalized when standalone: "Van Gogh painted..."
- Common: van, de, van de, van den, van der, 't, 's, op de
### Historical/Latin
- Patronymics: -zoon/-zn, -dochter/-dr, -s (Janszoon, Pietersdochter)
- Latinized forms: -us, -ius endings (Erasmus Roterodamus)
- Occupational surnames may be literal (de bakker = the baker)
## Role Extraction
Extract roles/occupations with temporal bounds when available:
- Role title exactly as stated
- Associated organization (link to GRP hypernym if institution)
- Start/end dates or period
- Heritage relevance code if applicable (GLAMORCUBESFIXPHDNT)
- Role in source context (from picot_roles thesaurus):
* child, parent, spouse, witness, declarant, bride, groom, godparent, etc.
## Biographical Properties
Extract when present in source (use null if not stated):
- birth_date / death_date: ISO format (YYYY, YYYY-MM, or YYYY-MM-DD)
- birth_place / death_place: Place name as written
- gender: "Male" or "Female" (only if explicitly stated or inferable)
- age: Age as stated (e.g., "30", "4 months", "about 25")
- religion: Religious affiliation if mentioned
- deceased: true only if death indicated but date unknown
- address: Physical address as recorded in source
- floruit: Active period if birth/death unknown
## Family Relationship Extraction
CRITICAL: For PersonObservations, family relationships MUST refer to OTHER
persons mentioned in the SAME source document. Cross-source relationships
belong to PersonReconstructions.
### Core Family Relationships
- parent: A parent of the person (use sdo:parent)
- children: Children of the person (use sdo:children)
- spouse: Current spouse (use sdo:spouse)
- sibling: Brother or sister (use sdo:sibling)
### Extended Family
- grandparent / grandchild
- uncle_aunt / nephew_niece
- cousin (symmetric)
### Step/Half Relations
- stepparent / stepchild
- stepsibling
- half_sibling (one shared parent)
### Ritual/Legal Kinship (common in historical records)
- godparent / godchild: Baptismal sponsors
- foster_parent / foster_child
- legitimized_child: Child recognized through marriage/legal act
### In-Law Relations
- parent_in_law / child_in_law
- sibling_in_law
### Former Partners
- widow_of: Surviving spouse of deceased (subject is the survivor)
- previous_partner: Former spouse/partner
### Historical Source Patterns
Common relationship indicators in historical documents:
- "huijsvrou van" / "wife of" → spouse
- "zoon van" / "son of" → parent (person is child)
- "weduwe van" / "widow of" → widow_of
- "met attestatie van" → from location indicator
- "getuige" / "witness" → role in event, not kinship
- "peter" / "meter" / "godfather" / "godmother" → godparent
## Source Types (for source_type field)
Use appropriate category:
- modern_digital: LinkedIn, staff directories, ORCID
- historical_indices: Notarial protocols, guild lists
- civil_registration: Birth/marriage/death certificates
- church_records: Baptism, marriage, burial registers
- archival_descriptions: Finding aids, inventories
- biographical_dictionaries: DNB, AKL, reference works
- census: Population census records
## Output Format
Return ONLY valid JSON (no markdown, no explanation):
{
"pico_observation": {
"observation_id": "<source-derived-id>",
"observed_at": "<extraction-timestamp>",
"source_type": "<source_category>",
"source_reference": "<source-identifier>"
},
"persons": [
{
"person_index": 0,
"pnv_name": {
"literalName": "Name as written",
"givenName": null,
"patronym": null,
"surnamePrefix": null,
"baseSurname": null,
"honorificPrefix": null,
"honorificSuffix": null,
"initials": null
},
"roles": [
{
"role_title": "Role as stated",
"role_in_source": "child|declarant|witness|bride|groom|null",
"organization": "Org name if mentioned",
"period": "Temporal info if available",
"heritage_relevant": false,
"heritage_type": null
}
],
"biographical": {
"birth_date": null,
"death_date": null,
"birth_place": null,
"death_place": null,
"gender": null,
"age": null,
"religion": null,
"deceased": null,
"address": null,
"floruit": null
},
"family_relationships": {
"parent": [],
"children": [],
"spouse": [],
"sibling": [],
"grandparent": [],
"grandchild": [],
"uncle_aunt": [],
"nephew_niece": [],
"cousin": [],
"stepparent": [],
"stepchild": [],
"stepsibling": [],
"half_sibling": [],
"foster_parent": [],
"foster_child": [],
"godparent": [],
"godchild": [],
"parent_in_law": [],
"child_in_law": [],
"sibling_in_law": [],
"previous_partner": [],
"widow_of": null
},
"context": "Surrounding text for disambiguation"
}
],
"organizations_mentioned": [
{
"name": "Organization name",
"type": "Heritage type code or null",
"role_in_source": "employer|creator|publisher|etc"
}
],
"temporal_references": [
{
"expression": "Date/period as written",
"normalized": "ISO date if parseable",
"type": "DATE|DURATION|SET"
}
],
"locations_mentioned": [
{
"name": "Place name as written",
"type": "city|region|country|address"
}
]
}
## Relationship Reference Format
Family relationship arrays contain references to other persons in same source:
- Use person_index (integer) to reference persons array position
- Include target_name for readability
Example for a marriage record:
```json
{
"person_index": 0,
"pnv_name": {"literalName": "Jan Pietersz"},
"family_relationships": {
"spouse": [{"person_index": 1, "target_name": "Maria Jansdr"}],
"parent": [{"person_index": 2, "target_name": "Pieter Jansz"}]
}
}
```
## Critical Rules
1. ONLY extract data that EXISTS in the source. NEVER fabricate.
2. Use null for missing fields, [] for empty arrays.
3. Preserve original spelling/language from source.
4. heritage_type must be single-letter GLAMORCUBESFIXPHDNT code.
5. For historical sources, preserve archaic spellings exactly.
6. Extract ALL persons mentioned, not just the primary subject.
7. Family relationships MUST reference persons in SAME source only.
8. Use person_index for relationship references (0-based array index).
9. Gender: only "Male"/"Female"/null - never infer without evidence.
10. Age: preserve as stated, include qualifier ("about 25", "4 months").
11. For role_in_source, use picot_roles terms when applicable.
# =============================================================================
# PERSON RECONSTRUCTION PATTERN
# =============================================================================
#
# PersonReconstruction is a reconstructed person entity derived from one or
# more PersonObservations. It represents the scholarly consensus about a
# historical person based on available evidence.
# =============================================================================
person_reconstruction_pattern:
description: |
A PersonReconstruction is created by linking one or more PersonObservations
to form a unified person entity. This is the scholarly interpretation layer
that connects source-bound observations to a conceptual person.
Key distinction:
- PersonObservation: What is OBSERVED in a specific source (exact transcription)
- PersonReconstruction: What is INFERRED about the person (normalized, linked)
A single PersonReconstruction may derive from observations across:
- Multiple sources (birth record + marriage record + death record)
- Different time periods (mentions across decades)
- Various name forms ("Jan Jansz" + "Johannes Jansen" + "J. Jansen")
class: "pico:PersonReconstruction"
class_uri: "https://personsincontext.org/model#PersonReconstruction"
superclass: "pico:Person"
required_properties:
- property: "prov:wasDerivedFrom"
description: "Links to source PersonObservation(s)"
range: "pico:PersonObservation"
cardinality: "1..*"
note: "Every reconstruction MUST link to at least one observation"
- property: "prov:wasGeneratedBy"
description: "Links to the reconstruction Activity"
range: "prov:Activity"
cardinality: "1"
note: "Documents how/when/by whom reconstruction was created"
optional_properties:
- property: "prov:wasRevisionOf"
description: "Links to previous version of this reconstruction"
range: "pico:PersonReconstruction"
cardinality: "0..1"
note: "For tracking updates to reconstructions over time"
- property: "sdo:name"
description: "Normalized/preferred name form"
range: "xsd:string"
note: "The canonical name for this person"
- property: "sdo:additionalName"
description: "Structured name following PNV"
range: "pnv:PersonName"
note: "Full name breakdown using Person Name Vocabulary"
- property: "sdo:givenName"
description: "Given/first name"
range: "xsd:string"
- property: "sdo:familyName"
description: "Family/surname"
range: "xsd:string"
- property: "sdo:gender"
description: "Gender of the person"
range: "sdo:GenderType"
values: ["sdo:Male", "sdo:Female"]
- property: "sdo:birthDate"
description: "Birth date (ISO 8601)"
range: "xsd:date"
note: "May be incomplete: YYYY, YYYY-MM, or YYYY-MM-DD"
- property: "sdo:birthPlace"
description: "Place of birth"
range: "xsd:string or xsd:anyURI"
note: "Prefer linking to GeoNames or Wikidata"
- property: "sdo:deathDate"
description: "Death date (ISO 8601)"
range: "xsd:date"
- property: "sdo:deathPlace"
description: "Place of death"
range: "xsd:string or xsd:anyURI"
example:
description: "PersonReconstruction derived from multiple observations"
turtle: |
cbg:person_reconstruction_anna_koppen
a pico:PersonReconstruction ;
sdo:name "Anna Maria Koppen" ;
sdo:familyName "Koppen" ;
sdo:givenName "Anna Maria" ;
sdo:gender sdo:Female ;
sdo:birthPlace "Haarlem" ;
sdo:birthDate "1860-03-31"^^xsd:date ;
sdo:deathPlace "Detroit, USA" ;
sdo:deathDate "1926"^^xsd:gYear ;
prov:wasDerivedFrom nha:marriage_1885_po_1 ,
cbg:emigration_1887_po_1 ,
us:death_1926_po_1 ;
prov:wasGeneratedBy cbg:reconstruction_activity_01 .
# =============================================================================
# SOURCE AND SCAN CLASSES
# =============================================================================
#
# Sources (sdo:ArchiveComponent) and Scans (sdo:ImageObject) document where
# PersonObservations were extracted from. Essential for provenance.
# =============================================================================
source_classes:
archive_component:
description: |
A Source document from which PersonObservations are extracted.
PiCo does not aim to fully describe archival sources (use RiC-O or DC for that),
but requires minimal identification for provenance tracking.
class: "sdo:ArchiveComponent"
class_uri: "https://schema.org/ArchiveComponent"
superclass: "sdo:CreativeWork"
properties:
- property: "sdo:name"
description: "Identifying name for the source"
range: "xsd:string"
cardinality: "1"
note: "Combine title, date, archive location for identification"
example: "BS Marriage Haarlem, November 11, 1885, certificate number 321"
- property: "sdo:additionalType"
description: "Type of source document"
range: "picot_sourcetypes:Concept"
note: "Use PiCo SourceType thesaurus"
- property: "sdo:dateCreated"
description: "Date the source was created"
range: "xsd:date"
- property: "sdo:holdingArchive"
description: "Institution holding the source"
range: "xsd:anyURI"
note: "Link to heritage custodian (GHCID or Wikidata)"
- property: "sdo:url"
description: "Permalink to the source"
range: "sdo:URL"
note: "Preferably a persistent identifier"
- property: "sdo:contentLocation"
description: "Geographic coverage of the source"
range: "xsd:string or xsd:anyURI"
- property: "sdo:associatedMedia"
description: "Link to scan(s) of the source"
range: "sdo:ImageObject"
cardinality: "0..*"
image_object:
description: |
A Scan of a source document. Links to the digital image at the holding archive.
class: "sdo:ImageObject"
class_uri: "https://schema.org/ImageObject"
superclass: "sdo:CreativeWork"
properties:
- property: "sdo:url"
description: "URL to the full scan"
range: "sdo:URL"
note: "Preferably IIIF manifest"
- property: "sdo:thumbnail"
description: "URL to thumbnail image"
range: "sdo:ImageObject"
- property: "sdo:embedUrl"
description: "URL to image viewer"
range: "sdo:URL"
- property: "sdo:position"
description: "Position in sequence of scans"
range: "xsd:int"
note: "For multi-page sources"
# =============================================================================
# BIOGRAPHICAL PROPERTIES
# =============================================================================
#
# Properties for capturing biographical details about persons in observations.
# These appear in the source and are transcribed to the observation.
# =============================================================================
biographical_properties:
description: |
Biographical properties capture personal details as they appear in sources.
These are used for both PersonObservation (source-bound) and
PersonReconstruction (normalized).
age:
property: "pico:hasAge"
property_uri: "https://personsincontext.org/model#hasAge"
description: "Age of person as stated in source"
range: "xsd:string"
domain: "pico:PersonObservation"
note: |
Used when birth date unknown but age is recorded.
Age assumed in years unless specified ("4" = 4 years, "4 months" = 4 months).
Numerical preferred over text ("4" not "four").
examples:
- "30"
- "4 months"
- "about 25"
religion:
property: "pico:hasReligion"
property_uri: "https://personsincontext.org/model#hasReligion"
description: "Religious affiliation as stated in source"
range: "xsd:string or xsd:anyURI"
domain: "pico:Person"
note: "Can link to SKOS thesaurus for religions"
examples:
- "Catholic"
- "Reformed"
- "Jewish"
deceased:
property: "pico:deceased"
property_uri: "https://personsincontext.org/model#deceased"
description: "Indication that person is deceased (when death date unknown)"
range: "xsd:boolean"
domain: "pico:PersonObservation"
note: |
Only used when deathDate is unknown but death is indicated.
A person without deathDate and without deceased:true is assumed alive.
Important for privacy considerations in publishing person records.
gender:
property: "sdo:gender"
property_uri: "https://schema.org/gender"
description: "Gender of the person"
range: "sdo:GenderType"
domain: "pico:Person"
values:
- uri: "sdo:Male"
label: "Male"
- uri: "sdo:Female"
label: "Female"
address:
property: "sdo:address"
property_uri: "https://schema.org/address"
description: "Physical address as mentioned in source"
range: "xsd:string"
domain: "pico:PersonObservation"
note: "Address exactly as recorded in source"
initials:
property: "pnv:initials"
property_uri: "https://w3id.org/pnv#initials"
description: "Initials of given name(s)"
range: "xsd:string"
domain: "pnv:PersonName"
note: "Each initial followed by period (e.g., 'P.R.', 'H.A.F.M.O.')"
examples:
- "P.R."
- "C.Joh."
- "H.A.F.M.O."
# =============================================================================
# FAMILY RELATIONSHIP PROPERTIES
# =============================================================================
#
# PiCo defines extensive family relationship properties for genealogical data.
# These enable modeling complex family structures from historical records.
# =============================================================================
family_relationships:
description: |
Family relationship properties link persons within and across sources.
Rules:
- For PersonObservations: relationships refer to OTHER observations on SAME source
- For PersonReconstructions: relationships refer to other reconstructions
Property characteristics:
- Symmetric: If A hasRelation B, then B hasRelation A (spouses, siblings, cousins)
- Transitive: hasAncestor/hasDescendant chain through generations
- Inverse pairs: parent/children, grandparent/grandchild, etc.
# ---------------------------------------------------------------------------
# Core Family (Schema.org)
# ---------------------------------------------------------------------------
core_relationships:
- property: "sdo:parent"
property_uri: "https://schema.org/parent"
description: "A parent of the person"
inverse: "sdo:children"
subPropertyOf: ["sdo:relatedTo", "pico:hasAncestor"]
note: "Biological or legal parent"
- property: "sdo:children"
property_uri: "https://schema.org/children"
description: "A child of the person"
inverse: "sdo:parent"
subPropertyOf: ["sdo:relatedTo", "pico:hasDescendant"]
- property: "sdo:spouse"
property_uri: "https://schema.org/spouse"
description: "The person's spouse"
symmetric: true
subPropertyOf: "sdo:relatedTo"
- property: "sdo:sibling"
property_uri: "https://schema.org/sibling"
description: "A brother or sister"
symmetric: true
subPropertyOf: "sdo:relatedTo"
# ---------------------------------------------------------------------------
# Transitive Ancestry (PiCo)
# ---------------------------------------------------------------------------
ancestry_relationships:
- property: "pico:hasAncestor"
property_uri: "https://personsincontext.org/model#hasAncestor"
description: "Any ancestor (parent, grandparent, etc.)"
type: "owl:TransitiveProperty"
inverse: "pico:hasDescendant"
note: "Not used directly; parent→parent chains automatically create ancestors"
- property: "pico:hasDescendant"
property_uri: "https://personsincontext.org/model#hasDescendant"
description: "Any descendant (child, grandchild, etc.)"
type: "owl:TransitiveProperty"
inverse: "pico:hasAncestor"
# ---------------------------------------------------------------------------
# Grandparents/Grandchildren
# ---------------------------------------------------------------------------
grandparent_relationships:
- property: "pico:hasGrandparent"
property_uri: "https://personsincontext.org/model#hasGrandparent"
inverse: "pico:hasGrandchild"
- property: "pico:hasGrandchild"
property_uri: "https://personsincontext.org/model#hasGrandchild"
inverse: "pico:hasGrandparent"
- property: "pico:hasGreat-grandparent"
property_uri: "https://personsincontext.org/model#hasGreat-grandparent"
inverse: "pico:hasGreat-grandchild"
- property: "pico:hasGreat-grandchild"
property_uri: "https://personsincontext.org/model#hasGreat-grandchild"
inverse: "pico:hasGreat-grandparent"
# ---------------------------------------------------------------------------
# Aunts/Uncles and Nieces/Nephews
# ---------------------------------------------------------------------------
extended_family:
- property: "pico:hasUncle_Aunt"
property_uri: "https://personsincontext.org/model#hasUncle_Aunt"
description: "An uncle or aunt (sibling of parent)"
inverse: "pico:hasNephew_Niece"
- property: "pico:hasNephew_Niece"
property_uri: "https://personsincontext.org/model#hasNephew_Niece"
description: "A nephew or niece (child of sibling)"
inverse: "pico:hasUncle_Aunt"
- property: "pico:hasCousin"
property_uri: "https://personsincontext.org/model#hasCousin"
description: "A cousin (child of parent's sibling)"
symmetric: true
# ---------------------------------------------------------------------------
# Step-family
# ---------------------------------------------------------------------------
step_relationships:
- property: "pico:hasStepparent"
property_uri: "https://personsincontext.org/model#hasStepparent"
description: "A stepparent (spouse of biological parent)"
inverse: "pico:hasStepchild"
- property: "pico:hasStepchild"
property_uri: "https://personsincontext.org/model#hasStepchild"
inverse: "pico:hasStepparent"
- property: "pico:hasStepsibling"
property_uri: "https://personsincontext.org/model#hasStepsibling"
description: "A stepbrother or stepsister"
symmetric: true
- property: "pico:hasHalf-sibling"
property_uri: "https://personsincontext.org/model#hasHalf-sibling"
description: "A half-brother or half-sister (one shared parent)"
symmetric: true
# ---------------------------------------------------------------------------
# Foster/Godparent
# ---------------------------------------------------------------------------
non_biological_relationships:
- property: "pico:hasFosterParent"
property_uri: "https://personsincontext.org/model#hasFosterParent"
inverse: "pico:hasFosterChild"
- property: "pico:hasFosterChild"
property_uri: "https://personsincontext.org/model#hasFosterChild"
inverse: "pico:hasFosterParent"
- property: "pico:hasGodparent"
property_uri: "https://personsincontext.org/model#hasGodparent"
description: "A godparent (witness at baptism)"
inverse: "pico:hasGodchild"
- property: "pico:hasGodchild"
property_uri: "https://personsincontext.org/model#hasGodchild"
inverse: "pico:hasGodparent"
- property: "pico:hasLegitimizedChild"
property_uri: "https://personsincontext.org/model#hasLegitimizedChild"
description: "A child legitimized by marriage or legal recognition"
inverse: "pico:isLegitimitezedChildOf"
- property: "pico:isLegitimitezedChildOf"
property_uri: "https://personsincontext.org/model#isLegitimitezedChildOf"
inverse: "pico:hasLegitimizedChild"
# ---------------------------------------------------------------------------
# In-Laws
# ---------------------------------------------------------------------------
in_law_relationships:
- property: "pico:hasParent-in-law"
property_uri: "https://personsincontext.org/model#hasParent-in-law"
inverse: "pico:hasChild-in-law"
- property: "pico:hasChild-in-law"
property_uri: "https://personsincontext.org/model#hasChild-in-law"
inverse: "pico:hasParent-in-law"
- property: "pico:hasSibling-in-law"
property_uri: "https://personsincontext.org/model#hasSibling-in-law"
description: "Brother/sister-in-law"
symmetric: true
- property: "pico:hasGrandparent-in-law"
property_uri: "https://personsincontext.org/model#hasGrandparent-in-law"
inverse: "pico:hasGrandchild-in-law"
- property: "pico:hasGrandchild-in-law"
property_uri: "https://personsincontext.org/model#hasGrandchild-in-law"
inverse: "pico:hasGrandparent-in-law"
- property: "pico:hasUncle_Aunt-in-law"
property_uri: "https://personsincontext.org/model#hasUncle_Aunt-in-law"
inverse: "pico:hasNephew_Niece-in-law"
- property: "pico:hasNephew_Niece-in-law"
property_uri: "https://personsincontext.org/model#hasNephew_Niece-in-law"
inverse: "pico:hasUncle_Aunt-in-law"
- property: "pico:hasCousin-in-law"
property_uri: "https://personsincontext.org/model#hasCousin-in-law"
symmetric: true
- property: "pico:hasStepparent-in-law"
property_uri: "https://personsincontext.org/model#hasStepparent-in-law"
inverse: "pico:hasStepchild-in-law"
- property: "pico:hasStepchild-in-law"
property_uri: "https://personsincontext.org/model#hasStepchild-in-law"
inverse: "pico:hasStepparent-in-law"
# ---------------------------------------------------------------------------
# Former Partners
# ---------------------------------------------------------------------------
former_partner_relationships:
- property: "pico:isWidOf"
property_uri: "https://personsincontext.org/model#isWidOf"
description: "Is widow/widower of deceased spouse"
note: "The subject is the surviving partner"
- property: "pico:hasPreviousPartner"
property_uri: "https://personsincontext.org/model#hasPreviousPartner"
description: "A former spouse or partner"
symmetric: true
# =============================================================================
# PROVENANCE MODEL (PROV-O INTEGRATION)
# =============================================================================
#
# Enhanced provenance model for tracking observation extraction and
# reconstruction creation activities.
# =============================================================================
enhanced_provenance_model:
description: |
PiCo uses W3C PROV-O for provenance tracking at two levels:
1. OBSERVATION LEVEL: Where did this observation come from?
- prov:hadPrimarySource → Source document
- prov:wasGeneratedBy → Extraction activity (optional)
2. RECONSTRUCTION LEVEL: How was this person entity created?
- prov:wasDerivedFrom → Source observation(s)
- prov:wasGeneratedBy → Reconstruction activity
- prov:wasRevisionOf → Previous reconstruction version
activity_class:
class: "prov:Activity"
class_uri: "http://www.w3.org/ns/prov#Activity"
description: "The activity that generated a PersonReconstruction"
properties:
- property: "prov:wasAssociatedWith"
description: "Agent responsible for the activity"
range: "prov:Agent"
- property: "prov:startedAtTime"
description: "When the activity started"
range: "xsd:dateTime"
- property: "prov:endedAtTime"
description: "When the activity completed"
range: "xsd:dateTime"
- property: "prov:used"
description: "Resources/tools used in the activity"
range: "prov:Entity"
note: "E.g., ML model, matching algorithm, rule set"
types:
human_reconstruction:
description: "Manual reconstruction by researcher"
note: "Provide: time, place, knowledge sources, researcher name"
algorithmic_reconstruction:
description: "Automated reconstruction by software"
note: "Provide: algorithm name, version, configuration, parameters"
agent_class:
class: "prov:Agent"
class_uri: "http://www.w3.org/ns/prov#Agent"
description: "Person or organization responsible for reconstruction"
properties:
- property: "sdo:name"
description: "Name of the agent"
range: "xsd:string"
- property: "sdo:url"
description: "URL identifying the agent"
range: "sdo:URL"
examples:
- name: "CBG|Center for Family History"
url: "https://cbg.nl"
type: "organization"
- name: "GLM-4.6 Person Extractor v1.0"
url: null
type: "software"
derivation_properties:
- property: "prov:wasDerivedFrom"
property_uri: "http://www.w3.org/ns/prov#wasDerivedFrom"
description: "Links PersonReconstruction to source PersonObservation(s)"
domain: "pico:PersonReconstruction"
range: "pico:PersonObservation"
cardinality: "1..*"
note: "REQUIRED for all PersonReconstructions"
- property: "prov:wasRevisionOf"
property_uri: "http://www.w3.org/ns/prov#wasRevisionOf"
description: "Links to previous version of reconstruction"
domain: "pico:PersonReconstruction"
range: "pico:PersonReconstruction"
cardinality: "0..1"
note: "For tracking reconstruction updates over time"
# =============================================================================
# PICO VOCABULARIES/THESAURI
# =============================================================================
#
# PiCo provides controlled vocabularies for roles, source types, and events.
# =============================================================================
pico_vocabularies:
description: |
PiCo defines three SKOS concept schemes for controlled terminology:
- Roles: The role a person plays in a source (child, declarant, witness, etc.)
- SourceTypes: Types of historical sources (birth certificate, census, etc.)
- EventTypes: Types of life events (birth, marriage, death, etc.)
roles_thesaurus:
id: "picot_roles"
uri: "https://terms.personsincontext.org/roles/"
type: "skos:ConceptScheme"
label: "Persons in Context role thesaurus"
description: "Roles that persons can have in historical sources"
usage: |
Use pico:hasRole property with a term from this thesaurus.
Example: picot_roles:575 (child), picot_roles:489 (declarant)
example_concepts:
- id: "575"
label: "child"
description: "Person appearing as child in a record"
- id: "489"
label: "declarant"
description: "Person declaring/reporting an event"
- id: "witness"
label: "witness"
description: "Person witnessing an event or signing a document"
- id: "bride"
label: "bride"
description: "Female partner in a marriage"
- id: "groom"
label: "groom"
description: "Male partner in a marriage"
sourcetypes_thesaurus:
id: "picot_sourcetypes"
uri: "https://terms.personsincontext.org/sourcetypes/"
type: "skos:ConceptScheme"
label: "Persons in Context sourceType thesaurus"
description: "Types of historical sources containing person observations"
usage: |
Use sdo:additionalType property on sdo:ArchiveComponent.
Example: picot_sourcetypes:551 (civil registry: birth)
example_concepts:
- id: "551"
label: "civil registry: birth"
description: "Birth certificate from civil registration"
- id: "marriage"
label: "civil registry: marriage"
description: "Marriage certificate"
- id: "death"
label: "civil registry: death"
description: "Death certificate"
- id: "census"
label: "census"
description: "Population census record"
- id: "church_baptism"
label: "church record: baptism"
description: "Baptismal record from church register"
- id: "notarial"
label: "notarial record"
description: "Notarial act or protocol"
eventtypes_thesaurus:
id: "picot_eventtypes"
uri: "https://terms.personsincontext.org/eventtypes/"
type: "skos:ConceptScheme"
label: "Persons in Context eventType thesaurus"
description: "Types of life events documented in sources"
example_concepts:
- id: "birth"
label: "birth"
- id: "baptism"
label: "baptism"
- id: "marriage"
label: "marriage"
- id: "death"
label: "death"
- id: "burial"
label: "burial"
- id: "emigration"
label: "emigration"
- id: "immigration"
label: "immigration"
# =============================================================================
# GLM ANNOTATOR OUTPUT SCHEMA UPDATE
# =============================================================================
#
# Extended output schema for GLM-4.6 annotator to include family relationships
# and biographical properties.
# =============================================================================
glm_extended_output_schema:
description: |
Extended JSON output schema that includes all PiCo properties.
This supplements the core system_prompt output format.
persons_extended:
description: "Extended person object with all PiCo properties"
schema:
pnv_name:
literalName: "string"
givenName: "string|null"
patronym: "string|null"
surnamePrefix: "string|null"
baseSurname: "string|null"
honorificPrefix: "string|null"
honorificSuffix: "string|null"
initials: "string|null"
biographical:
birth_date: "ISO date|null"
death_date: "ISO date|null"
birth_place: "string|null"
death_place: "string|null"
gender: "Male|Female|null"
age: "string|null"
religion: "string|null"
deceased: "boolean|null"
address: "string|null"
floruit: "string|null"
roles: "array of role objects"
family_relationships:
parent: "array of person references"
children: "array of person references"
spouse: "array of person references"
sibling: "array of person references"
grandparent: "array of person references"
grandchild: "array of person references"
uncle_aunt: "array of person references"
nephew_niece: "array of person references"
cousin: "array of person references"
stepparent: "array of person references"
stepchild: "array of person references"
stepsibling: "array of person references"
half_sibling: "array of person references"
foster_parent: "array of person references"
foster_child: "array of person references"
godparent: "array of person references"
godchild: "array of person references"
parent_in_law: "array of person references"
child_in_law: "array of person references"
sibling_in_law: "array of person references"
previous_partner: "array of person references"
widow_of: "person reference|null"
context: "string|null"
# =============================================================================
# CH-ANNOTATOR HYPERNYM INTEGRATION UPDATE
# =============================================================================
#
# Updated hypernym mappings to include reconstruction pattern.
# =============================================================================
extended_hypernym_mapping:
description: |
Extended mappings between PiCo classes and CH-Annotator hypernyms,
including the reconstruction pattern.
mappings:
# Observation level
- pico_class: "pico:PersonObservation"
ch_hypernym: "AGT.PER"
ch_code: "AGT.PER"
note: "Source-bound person mention"
- pico_class: "pico:PersonObservation"
ch_hypernym: "AGT.STF"
ch_code: "AGT.STF"
condition: "When person has organizational role"
note: "Staff member observation"
# Reconstruction level
- pico_class: "pico:PersonReconstruction"
ch_hypernym: "AGT.PER"
ch_code: "AGT.PER"
note: "Reconstructed person entity"
linking: true
linking_sources: ["Wikidata", "VIAF", "ISNI"]
# Name components
- pico_class: "pnv:PersonName"
ch_hypernym: "APP.NAM"
ch_code: "APP.NAM"
note: "Structured name"
# Roles
- pico_class: "pico:hasRole"
ch_hypernym: "ROL"
ch_code: "ROL"
note: "Role in source"
# Family relationships
- pico_class: "sdo:parent"
ch_hypernym: "AGT.PER"
relationship_type: "family"
note: "Parent relationship"
- pico_class: "sdo:spouse"
ch_hypernym: "AGT.PER"
relationship_type: "family"
note: "Spouse relationship"
- pico_class: "pico:hasGodparent"
ch_hypernym: "AGT.PER"
relationship_type: "ritual_kinship"
note: "Godparent relationship (common in historical records)"
# Sources
- pico_class: "sdo:ArchiveComponent"
ch_hypernym: "WRK.DOC"
ch_code: "WRK.DOC"
note: "Source document"
# Provenance
- pico_class: "prov:Activity"
ch_hypernym: null
note: "Not directly annotated; tracked in provenance metadata"
- pico_class: "prov:Agent"
ch_hypernym: "AGT"
ch_code: "AGT"
note: "Extraction/reconstruction agent"
# =============================================================================
# HISTORICAL SOURCE EXTRACTION EXAMPLES
# =============================================================================
#
# Comprehensive examples showing extraction from different historical source types.
# These demonstrate the full PiCo model including family relationships.
# =============================================================================
historical_extraction_examples:
description: |
These examples demonstrate extraction from common historical source types,
showing how to capture family relationships, biographical data, and roles
according to the PiCo model.
# ---------------------------------------------------------------------------
# Example 1: Dutch Marriage Certificate (Burgerlijke Stand)
# ---------------------------------------------------------------------------
marriage_certificate_example:
source_type: "civil_registration"
source_text: |
Heden den elfden November achttien honderd vijf en tachtig, zijn voor ons
Ambtenaar van den Burgerlijken Stand der gemeente Haarlem, verschenen:
Cornelis Johannes Koppen, oud dertig jaren, schilder, geboren te Haarlem,
wonende alhier, meerderjarige zoon van wijlen Pieter Koppen en van
Anna Maria Brouwer, zonder beroep, wonende alhier;
en Anna Maria Visser, oud zeven en twintig jaren, zonder beroep, geboren
te Amsterdam, wonende alhier, meerderjarige dochter van Jan Visser,
koopman, en van wijlen Cornelia de Vries.
Als getuigen waren tegenwoordig: Hendrik Koppen, oud vijf en dertig jaren,
schilder, broeder van den bruidegom; en Willem Visser, oud twee en dertig
jaren, timmerman, broeder van de bruid.
expected_output:
pico_observation:
observation_id: "bs_haarlem_1885_marriage_321"
observed_at: "2025-12-12T10:00:00Z"
source_type: "civil_registration"
source_reference: "BS Marriage Haarlem, November 11, 1885, certificate 321"
persons:
- person_index: 0
pnv_name:
literalName: "Cornelis Johannes Koppen"
givenName: "Cornelis Johannes"
baseSurname: "Koppen"
roles:
- role_title: "schilder"
role_in_source: "groom"
biographical:
age: "30"
birth_place: "Haarlem"
address: "Haarlem"
family_relationships:
parent:
- person_index: 2
target_name: "Pieter Koppen"
- person_index: 3
target_name: "Anna Maria Brouwer"
spouse:
- person_index: 1
target_name: "Anna Maria Visser"
sibling:
- person_index: 6
target_name: "Hendrik Koppen"
- person_index: 1
pnv_name:
literalName: "Anna Maria Visser"
givenName: "Anna Maria"
baseSurname: "Visser"
roles:
- role_in_source: "bride"
biographical:
age: "27"
birth_place: "Amsterdam"
address: "Haarlem"
family_relationships:
parent:
- person_index: 4
target_name: "Jan Visser"
- person_index: 5
target_name: "Cornelia de Vries"
spouse:
- person_index: 0
target_name: "Cornelis Johannes Koppen"
sibling:
- person_index: 7
target_name: "Willem Visser"
- person_index: 2
pnv_name:
literalName: "Pieter Koppen"
givenName: "Pieter"
baseSurname: "Koppen"
biographical:
deceased: true
family_relationships:
children:
- person_index: 0
target_name: "Cornelis Johannes Koppen"
- person_index: 6
target_name: "Hendrik Koppen"
spouse:
- person_index: 3
target_name: "Anna Maria Brouwer"
- person_index: 3
pnv_name:
literalName: "Anna Maria Brouwer"
givenName: "Anna Maria"
baseSurname: "Brouwer"
roles:
- role_title: "zonder beroep"
biographical:
address: "Haarlem"
family_relationships:
children:
- person_index: 0
target_name: "Cornelis Johannes Koppen"
- person_index: 6
target_name: "Hendrik Koppen"
widow_of:
person_index: 2
target_name: "Pieter Koppen"
- person_index: 4
pnv_name:
literalName: "Jan Visser"
givenName: "Jan"
baseSurname: "Visser"
roles:
- role_title: "koopman"
family_relationships:
children:
- person_index: 1
target_name: "Anna Maria Visser"
- person_index: 7
target_name: "Willem Visser"
spouse:
- person_index: 5
target_name: "Cornelia de Vries"
- person_index: 5
pnv_name:
literalName: "Cornelia de Vries"
givenName: "Cornelia"
surnamePrefix: "de"
baseSurname: "Vries"
biographical:
deceased: true
family_relationships:
children:
- person_index: 1
target_name: "Anna Maria Visser"
- person_index: 7
target_name: "Willem Visser"
spouse:
- person_index: 4
target_name: "Jan Visser"
- person_index: 6
pnv_name:
literalName: "Hendrik Koppen"
givenName: "Hendrik"
baseSurname: "Koppen"
roles:
- role_title: "schilder"
role_in_source: "witness"
biographical:
age: "35"
family_relationships:
sibling:
- person_index: 0
target_name: "Cornelis Johannes Koppen"
parent:
- person_index: 2
target_name: "Pieter Koppen"
- person_index: 3
target_name: "Anna Maria Brouwer"
- person_index: 7
pnv_name:
literalName: "Willem Visser"
givenName: "Willem"
baseSurname: "Visser"
roles:
- role_title: "timmerman"
role_in_source: "witness"
biographical:
age: "32"
family_relationships:
sibling:
- person_index: 1
target_name: "Anna Maria Visser"
parent:
- person_index: 4
target_name: "Jan Visser"
- person_index: 5
target_name: "Cornelia de Vries"
temporal_references:
- expression: "den elfden November achttien honderd vijf en tachtig"
normalized: "1885-11-11"
type: "DATE"
locations_mentioned:
- name: "Haarlem"
type: "city"
- name: "Amsterdam"
type: "city"
# ---------------------------------------------------------------------------
# Example 2: Early Modern Notarial Protocol Index Entry
# ---------------------------------------------------------------------------
notarial_index_example:
source_type: "historical_indices"
source_text: |
Notarial Archive Amsterdam, inv. 5075/1234
30 January 1680
Before notary Pieter van der Meer appeared:
Jacob Janszoon van der Hoeven, merchant of this city,
with his wife Maritgen Claes, for themselves and as
guardians (voogden) of the minor children of the late
Claes Jacobsz and Aeltgen Pieters, namely:
- Jan Claeszoon, aged about 16 years
- Trijntgen Claesdr, aged about 12 years
Witnesses: Hendrick Jansz, baker, and Cornelis Pietersz,
schoolmaster, both of this city.
expected_output:
pico_observation:
observation_id: "na_amsterdam_5075_1234"
observed_at: "2025-12-12T10:00:00Z"
source_type: "historical_indices"
source_reference: "Notarial Archive Amsterdam, inv. 5075/1234, 30 January 1680"
persons:
- person_index: 0
pnv_name:
literalName: "Jacob Janszoon van der Hoeven"
givenName: "Jacob"
patronym: "Janszoon"
surnamePrefix: "van der"
baseSurname: "Hoeven"
roles:
- role_title: "merchant"
role_in_source: "declarant"
- role_title: "voogd"
role_in_source: null
biographical:
address: "Amsterdam"
family_relationships:
spouse:
- person_index: 1
target_name: "Maritgen Claes"
- person_index: 1
pnv_name:
literalName: "Maritgen Claes"
givenName: "Maritgen"
patronym: "Claes"
roles:
- role_in_source: "declarant"
- role_title: "voogd"
family_relationships:
spouse:
- person_index: 0
target_name: "Jacob Janszoon van der Hoeven"
- person_index: 2
pnv_name:
literalName: "Claes Jacobsz"
givenName: "Claes"
patronym: "Jacobsz"
biographical:
deceased: true
family_relationships:
spouse:
- person_index: 3
target_name: "Aeltgen Pieters"
children:
- person_index: 4
target_name: "Jan Claeszoon"
- person_index: 5
target_name: "Trijntgen Claesdr"
- person_index: 3
pnv_name:
literalName: "Aeltgen Pieters"
givenName: "Aeltgen"
patronym: "Pieters"
biographical:
deceased: true
family_relationships:
spouse:
- person_index: 2
target_name: "Claes Jacobsz"
children:
- person_index: 4
target_name: "Jan Claeszoon"
- person_index: 5
target_name: "Trijntgen Claesdr"
- person_index: 4
pnv_name:
literalName: "Jan Claeszoon"
givenName: "Jan"
patronym: "Claeszoon"
roles:
- role_in_source: "child"
biographical:
age: "about 16"
family_relationships:
parent:
- person_index: 2
target_name: "Claes Jacobsz"
- person_index: 3
target_name: "Aeltgen Pieters"
sibling:
- person_index: 5
target_name: "Trijntgen Claesdr"
- person_index: 5
pnv_name:
literalName: "Trijntgen Claesdr"
givenName: "Trijntgen"
patronym: "Claesdr"
roles:
- role_in_source: "child"
biographical:
age: "about 12"
gender: "Female"
family_relationships:
parent:
- person_index: 2
target_name: "Claes Jacobsz"
- person_index: 3
target_name: "Aeltgen Pieters"
sibling:
- person_index: 4
target_name: "Jan Claeszoon"
- person_index: 6
pnv_name:
literalName: "Pieter van der Meer"
givenName: "Pieter"
surnamePrefix: "van der"
baseSurname: "Meer"
roles:
- role_title: "notary"
- person_index: 7
pnv_name:
literalName: "Hendrick Jansz"
givenName: "Hendrick"
patronym: "Jansz"
roles:
- role_title: "baker"
role_in_source: "witness"
biographical:
address: "Amsterdam"
- person_index: 8
pnv_name:
literalName: "Cornelis Pietersz"
givenName: "Cornelis"
patronym: "Pietersz"
roles:
- role_title: "schoolmaster"
role_in_source: "witness"
biographical:
address: "Amsterdam"
temporal_references:
- expression: "30 January 1680"
normalized: "1680-01-30"
type: "DATE"
locations_mentioned:
- name: "Amsterdam"
type: "city"
# ---------------------------------------------------------------------------
# Example 3: Church Baptismal Record with Godparents
# ---------------------------------------------------------------------------
baptism_record_example:
source_type: "church_records"
source_text: |
Den 15en Meij 1702 is gedoopt
Johanna, dochter van Willem Hendriksen en Geertruijd Jans,
getuijgen waren de E. Heer Jan Willem van Beverwijck
ende Juffrou Maria van Loon, huijsvrouw van de heer
Pieter Anthonisz Verschoor.
expected_output:
pico_observation:
observation_id: "dtb_amsterdam_1702_baptism_johanna"
observed_at: "2025-12-12T10:00:00Z"
source_type: "church_records"
source_reference: "DTB Amsterdam, 15 May 1702"
persons:
- person_index: 0
pnv_name:
literalName: "Johanna"
givenName: "Johanna"
roles:
- role_in_source: "child"
biographical:
gender: "Female"
family_relationships:
parent:
- person_index: 1
target_name: "Willem Hendriksen"
- person_index: 2
target_name: "Geertruijd Jans"
godparent:
- person_index: 3
target_name: "Jan Willem van Beverwijck"
- person_index: 4
target_name: "Maria van Loon"
- person_index: 1
pnv_name:
literalName: "Willem Hendriksen"
givenName: "Willem"
patronym: "Hendriksen"
biographical:
gender: "Male"
family_relationships:
children:
- person_index: 0
target_name: "Johanna"
spouse:
- person_index: 2
target_name: "Geertruijd Jans"
- person_index: 2
pnv_name:
literalName: "Geertruijd Jans"
givenName: "Geertruijd"
patronym: "Jans"
biographical:
gender: "Female"
family_relationships:
children:
- person_index: 0
target_name: "Johanna"
spouse:
- person_index: 1
target_name: "Willem Hendriksen"
- person_index: 3
pnv_name:
literalName: "Jan Willem van Beverwijck"
givenName: "Jan Willem"
surnamePrefix: "van"
baseSurname: "Beverwijck"
honorificPrefix: "de E. Heer"
roles:
- role_in_source: "witness"
biographical:
gender: "Male"
family_relationships:
godchild:
- person_index: 0
target_name: "Johanna"
- person_index: 4
pnv_name:
literalName: "Maria van Loon"
givenName: "Maria"
surnamePrefix: "van"
baseSurname: "Loon"
honorificPrefix: "Juffrou"
roles:
- role_in_source: "witness"
biographical:
gender: "Female"
family_relationships:
godchild:
- person_index: 0
target_name: "Johanna"
spouse:
- person_index: 5
target_name: "Pieter Anthonisz Verschoor"
- person_index: 5
pnv_name:
literalName: "Pieter Anthonisz Verschoor"
givenName: "Pieter"
patronym: "Anthonisz"
baseSurname: "Verschoor"
honorificPrefix: "de heer"
biographical:
gender: "Male"
family_relationships:
spouse:
- person_index: 4
target_name: "Maria van Loon"
temporal_references:
- expression: "Den 15en Meij 1702"
normalized: "1702-05-15"
type: "DATE"
# ---------------------------------------------------------------------------
# Example 4: Modern LinkedIn Staff Profile
# ---------------------------------------------------------------------------
linkedin_profile_example:
source_type: "modern_digital"
source_text: |
Dr. Maria van den Berg
Director of Collections | Rijksmuseum
Amsterdam, Netherlands
About:
Leading the collections management team at the Rijksmuseum since 2018.
Previously Head Curator at the Van Gogh Museum (2012-2018).
PhD in Art History, University of Amsterdam.
Experience:
- Director of Collections, Rijksmuseum (2018-present)
- Head Curator, Van Gogh Museum (2012-2018)
- Assistant Curator, Stedelijk Museum (2008-2012)
Education:
- PhD Art History, University of Amsterdam (2008)
- MA Museum Studies, University of Amsterdam (2003)
expected_output:
pico_observation:
observation_id: "linkedin_maria_van_den_berg_2025"
observed_at: "2025-12-12T10:00:00Z"
source_type: "modern_digital"
source_reference: "https://linkedin.com/in/mariavandenberg"
persons:
- person_index: 0
pnv_name:
literalName: "Dr. Maria van den Berg"
givenName: "Maria"
surnamePrefix: "van den"
baseSurname: "Berg"
honorificPrefix: "Dr."
roles:
- role_title: "Director of Collections"
organization: "Rijksmuseum"
period: "2018-present"
heritage_relevant: true
heritage_type: "M"
- role_title: "Head Curator"
organization: "Van Gogh Museum"
period: "2012-2018"
heritage_relevant: true
heritage_type: "M"
- role_title: "Assistant Curator"
organization: "Stedelijk Museum"
period: "2008-2012"
heritage_relevant: true
heritage_type: "M"
biographical:
address: "Amsterdam, Netherlands"
family_relationships: {}
context: "Heritage sector professional with museum career"
organizations_mentioned:
- name: "Rijksmuseum"
type: "M"
role_in_source: "employer"
- name: "Van Gogh Museum"
type: "M"
role_in_source: "employer"
- name: "Stedelijk Museum"
type: "M"
role_in_source: "employer"
- name: "University of Amsterdam"
type: "E"
role_in_source: "education"
locations_mentioned:
- name: "Amsterdam"
type: "city"
- name: "Netherlands"
type: "country"
# =============================================================================
# END OF MODULE
# =============================================================================