228 lines
8.4 KiB
YAML
228 lines
8.4 KiB
YAML
# =============================================================================
|
|
# PiCo Integration Module - Index
|
|
# =============================================================================
|
|
# Part of: GLAM-NER Entity Annotation Convention v1.7.0
|
|
# Module: integrations/pico/
|
|
#
|
|
# Description:
|
|
# PiCO (Person in Context Ontology) integration for person observation modeling.
|
|
# Enables tracking provenance of person mentions and linking to formal records.
|
|
#
|
|
# Key concepts:
|
|
# - PersonObservation: A textual mention of a person (source-bound)
|
|
# - PersonName (PNV): Structured name components
|
|
# - Person (CIDOC-CRM E21): Reconstructed person entity
|
|
#
|
|
# References:
|
|
# - PiCo Ontology: https://w3id.org/pico
|
|
# - Person Name Vocabulary (PNV): https://w3id.org/pnv
|
|
# - CIDOC-CRM: https://www.cidoc-crm.org/
|
|
#
|
|
# Module Structure:
|
|
# pico/
|
|
# ├── _index.yaml # This file - module manifest
|
|
# ├── schema/
|
|
# │ ├── observation.yaml # Core PiCo observation pattern
|
|
# │ ├── pnv_components.yaml # Person Name Vocabulary
|
|
# │ ├── relationships.yaml # Family and social relationships
|
|
# │ ├── temporal.yaml # Date and calendar systems
|
|
# │ └── locations.yaml # Location type definitions
|
|
# ├── examples/
|
|
# │ ├── _examples_index.yaml # Examples overview
|
|
# │ ├── 01_dutch_marriage.yaml # Example 1: Dutch civil registration
|
|
# │ ├── 02_notarial_protocol.yaml
|
|
# │ ├── 03_church_baptismal.yaml
|
|
# │ ├── 04_linkedin_profile.yaml
|
|
# │ ├── 05_arabic_waqf.yaml
|
|
# │ ├── 06_hebrew_ketubah.yaml # REAL DATA: Yale Mashhad 1896
|
|
# │ ├── 07_spanish_colonial.yaml
|
|
# │ ├── 08_italian_notarial.yaml
|
|
# │ ├── 09_greek_orthodox.yaml
|
|
# │ ├── 10_russian_metrical.yaml # REAL DATA: BYU Osiek 1894
|
|
# │ └── 11_ottoman_sijill.yaml
|
|
# └── naming_conventions/
|
|
# ├── dutch.yaml # Dutch naming rules
|
|
# ├── arabic.yaml # Arabic naming rules
|
|
# ├── hebrew.yaml # Hebrew naming rules
|
|
# └── ... # Other language conventions
|
|
#
|
|
# Last Updated: 2025-01-13
|
|
# Version: 1.7.0
|
|
# =============================================================================
|
|
|
|
module:
|
|
id: "pico_integration"
|
|
name: "PiCo Integration Module"
|
|
version: "1.7.0"
|
|
parent: "ch_annotator-v1_7_0"
|
|
description: |
|
|
PiCO (Person in Context Ontology) models textual observations of persons
|
|
as distinct from reconstructed person entities. This enables:
|
|
- Tracking provenance of person mentions
|
|
- Handling name variations across sources
|
|
- Linking observations to formal person records
|
|
|
|
The observation/reconstruction pattern separates:
|
|
1. What was OBSERVED in text (PersonObservation) - source-bound, exact
|
|
2. What was RECONSTRUCTED as entity (E21_Person) - inferred, normalized
|
|
|
|
This is critical for heritage data where the same person may appear with
|
|
different name forms, titles, or spellings across sources.
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Module Components
|
|
# -----------------------------------------------------------------------------
|
|
|
|
components:
|
|
schema:
|
|
description: "Core schema definitions for PiCo model"
|
|
files:
|
|
- path: "schema/observation.yaml"
|
|
description: "PersonObservation class and properties"
|
|
classes:
|
|
- "picom:PersonObservation"
|
|
|
|
- path: "schema/pnv_components.yaml"
|
|
description: "Person Name Vocabulary (PNV) components"
|
|
classes:
|
|
- "pnv:PersonName"
|
|
|
|
- path: "schema/relationships.yaml"
|
|
description: "Family and social relationship types"
|
|
properties:
|
|
- "sdo:parent"
|
|
- "sdo:children"
|
|
- "sdo:spouse"
|
|
- "sdo:sibling"
|
|
- "godparent"
|
|
- "witness"
|
|
|
|
- path: "schema/temporal.yaml"
|
|
description: "Date formats, calendar systems, temporal modeling"
|
|
|
|
- path: "schema/locations.yaml"
|
|
description: "Location types for biographical data"
|
|
|
|
examples:
|
|
description: "Complete extraction examples demonstrating PiCo patterns"
|
|
index_file: "examples/_examples_index.yaml"
|
|
real_data_examples:
|
|
- id: "06_hebrew_ketubah"
|
|
data_status: "REAL_HISTORICAL_DATA"
|
|
source: "Yale University Beinecke Library"
|
|
call_number: "Hebrew MSS suppl 194"
|
|
|
|
- id: "10_russian_metrical"
|
|
data_status: "REAL_HISTORICAL_DATA"
|
|
source: "Archiwum Panstwowe w Poznaniu Oddzial w Koninie"
|
|
reference: "54/792/0/6.1/140"
|
|
|
|
synthetic_examples:
|
|
- "01_dutch_marriage"
|
|
- "02_notarial_protocol"
|
|
- "03_church_baptismal"
|
|
- "04_linkedin_profile"
|
|
- "05_arabic_waqf"
|
|
- "07_spanish_colonial"
|
|
- "08_italian_notarial"
|
|
- "09_greek_orthodox"
|
|
- "11_ottoman_sijill"
|
|
|
|
naming_conventions:
|
|
description: "Language-specific naming rules and patterns"
|
|
files:
|
|
- path: "naming_conventions/dutch.yaml"
|
|
language: "nl"
|
|
covers: ["tussenvoegsels", "patronymics", "sorting rules"]
|
|
|
|
- path: "naming_conventions/arabic.yaml"
|
|
language: "ar"
|
|
covers: ["nasab", "nisba", "kunya", "laqab"]
|
|
|
|
- path: "naming_conventions/hebrew.yaml"
|
|
language: "he"
|
|
covers: ["ben/bat patronymics", "ketubah conventions"]
|
|
|
|
- path: "naming_conventions/spanish.yaml"
|
|
language: "es"
|
|
covers: ["double surnames", "colonial titles"]
|
|
|
|
- path: "naming_conventions/italian.yaml"
|
|
language: "it"
|
|
covers: ["notarial conventions", "nobility particles"]
|
|
|
|
- path: "naming_conventions/greek.yaml"
|
|
language: "el"
|
|
covers: ["Orthodox naming", "genitive forms"]
|
|
|
|
- path: "naming_conventions/russian.yaml"
|
|
language: "ru"
|
|
covers: ["patronymics", "metrical book conventions"]
|
|
|
|
- path: "naming_conventions/ottoman.yaml"
|
|
language: "ota"
|
|
covers: ["Ottoman Turkish", "Arabic-Ottoman blend"]
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# GLM-4.7 Annotator Configuration
|
|
# -----------------------------------------------------------------------------
|
|
|
|
glm_annotator_config:
|
|
model: "glm-4.7"
|
|
api_endpoint: "https://api.z.ai/api/coding/paas/v4/chat/completions"
|
|
temperature: 0.1
|
|
max_tokens: 4000
|
|
system_prompt_file: "schema/observation.yaml" # Contains extraction instructions
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Hypernym Mapping (GLAM-NER v1.7.0)
|
|
# -----------------------------------------------------------------------------
|
|
|
|
hypernym_mapping:
|
|
description: "How PiCo concepts map to GLAM-NER v1.7.0 hypernyms"
|
|
|
|
mappings:
|
|
- pico_class: "picom:PersonObservation"
|
|
glam_hypernym: "AGT.PER"
|
|
note: "Person observations create AGT.PER entities"
|
|
|
|
- pico_class: "picom:PersonObservation"
|
|
glam_hypernym: "AGT.STF"
|
|
condition: "When observed with organizational role"
|
|
note: "Staff members with role context"
|
|
|
|
- pico_class: "pnv:PersonName"
|
|
glam_hypernym: "APP.NAM"
|
|
note: "Name strings as appellations"
|
|
|
|
- pico_class: "picom:hasRole"
|
|
glam_hypernym: "ROL"
|
|
note: "Extracted roles link to ROL hypernym"
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Usage Notes
|
|
# -----------------------------------------------------------------------------
|
|
|
|
usage:
|
|
loading: |
|
|
Since YAML does not have native imports, applications should load
|
|
module files individually or use a custom loader. Example:
|
|
|
|
```python
|
|
import yaml
|
|
from pathlib import Path
|
|
|
|
def load_pico_module(base_path: Path) -> dict:
|
|
module = {}
|
|
module['index'] = yaml.safe_load((base_path / '_index.yaml').read_text())
|
|
module['observation'] = yaml.safe_load((base_path / 'schema/observation.yaml').read_text())
|
|
module['pnv'] = yaml.safe_load((base_path / 'schema/pnv_components.yaml').read_text())
|
|
# ... load other components as needed
|
|
return module
|
|
```
|
|
|
|
validation: |
|
|
Each YAML file is valid standalone. Validate with:
|
|
```bash
|
|
python3 -c "import yaml; yaml.safe_load(open('path/to/file.yaml'))"
|
|
```
|