glam/data/entity_annotation/modules/integrations/pico/_index.yaml

# =============================================================================
# PiCo Integration Module - Index
# =============================================================================
# Part of: GLAM-NER Entity Annotation Convention v1.7.0
# Module: integrations/pico/
#
# Description:
#   PiCO (Person in Context Ontology) integration for person observation modeling.
#   Enables tracking provenance of person mentions and linking to formal records.
#
# Key concepts:
#   - PersonObservation: A textual mention of a person (source-bound)
#   - PersonName (PNV): Structured name components
#   - Person (CIDOC-CRM E21): Reconstructed person entity
#
# References:
#   - PiCo Ontology: https://w3id.org/pico
#   - Person Name Vocabulary (PNV): https://w3id.org/pnv
#   - CIDOC-CRM: https://www.cidoc-crm.org/
#
# Module Structure:
#   pico/
#   ├── _index.yaml                  # This file - module manifest
#   ├── schema/
#   │   ├── observation.yaml         # Core PiCo observation pattern
#   │   ├── pnv_components.yaml      # Person Name Vocabulary
#   │   ├── relationships.yaml       # Family and social relationships
#   │   ├── temporal.yaml            # Date and calendar systems
#   │   └── locations.yaml           # Location type definitions
#   ├── examples/
#   │   ├── _examples_index.yaml     # Examples overview
#   │   ├── 01_dutch_marriage.yaml   # Example 1: Dutch civil registration
#   │   ├── 02_notarial_protocol.yaml
#   │   ├── 03_church_baptismal.yaml
#   │   ├── 04_linkedin_profile.yaml
#   │   ├── 05_arabic_waqf.yaml
#   │   ├── 06_hebrew_ketubah.yaml   # REAL DATA: Yale Mashhad 1896
#   │   ├── 07_spanish_colonial.yaml
#   │   ├── 08_italian_notarial.yaml
#   │   ├── 09_greek_orthodox.yaml
#   │   ├── 10_russian_metrical.yaml # REAL DATA: BYU Osiek 1894
#   │   └── 11_ottoman_sijill.yaml
#   └── naming_conventions/
#       ├── dutch.yaml               # Dutch naming rules
#       ├── arabic.yaml              # Arabic naming rules
#       ├── hebrew.yaml              # Hebrew naming rules
#       └── ...                      # Other language conventions
#
# Last Updated: 2025-01-13
# Version: 1.7.0
# =============================================================================

module:
  id: "pico_integration"
  name: "PiCo Integration Module"
  version: "1.7.0"
  parent: "ch_annotator-v1_7_0"
  description: |
    PiCO (Person in Context Ontology) models textual observations of persons
    as distinct from reconstructed person entities. This enables:
    - Tracking provenance of person mentions
    - Handling name variations across sources
    - Linking observations to formal person records

    The observation/reconstruction pattern separates:
    1. What was OBSERVED in text (PersonObservation) - source-bound, exact
    2. What was RECONSTRUCTED as entity (E21_Person) - inferred, normalized

    This is critical for heritage data where the same person may appear with
    different name forms, titles, or spellings across sources.

# -----------------------------------------------------------------------------
# Module Components
# -----------------------------------------------------------------------------

components:
  schema:
    description: "Core schema definitions for PiCo model"
    files:
      - path: "schema/observation.yaml"
        description: "PersonObservation class and properties"
        classes:
          - "picom:PersonObservation"

      - path: "schema/pnv_components.yaml"
        description: "Person Name Vocabulary (PNV) components"
        classes:
          - "pnv:PersonName"

      - path: "schema/relationships.yaml"
        description: "Family and social relationship types"
        properties:
          - "sdo:parent"
          - "sdo:children"
          - "sdo:spouse"
          - "sdo:sibling"
          - "godparent"
          - "witness"

      - path: "schema/temporal.yaml"
        description: "Date formats, calendar systems, temporal modeling"

      - path: "schema/locations.yaml"
        description: "Location types for biographical data"

  examples:
    description: "Complete extraction examples demonstrating PiCo patterns"
    index_file: "examples/_examples_index.yaml"
    real_data_examples:
      - id: "06_hebrew_ketubah"
        data_status: "REAL_HISTORICAL_DATA"
        source: "Yale University Beinecke Library"
        call_number: "Hebrew MSS suppl 194"

      - id: "10_russian_metrical"
        data_status: "REAL_HISTORICAL_DATA"
        source: "Archiwum Panstwowe w Poznaniu Oddzial w Koninie"
        reference: "54/792/0/6.1/140"

    synthetic_examples:
      - "01_dutch_marriage"
      - "02_notarial_protocol"
      - "03_church_baptismal"
      - "04_linkedin_profile"
      - "05_arabic_waqf"
      - "07_spanish_colonial"
      - "08_italian_notarial"
      - "09_greek_orthodox"
      - "11_ottoman_sijill"

  naming_conventions:
    description: "Language-specific naming rules and patterns"
    files:
      - path: "naming_conventions/dutch.yaml"
        language: "nl"
        covers: ["tussenvoegsels", "patronymics", "sorting rules"]

      - path: "naming_conventions/arabic.yaml"
        language: "ar"
        covers: ["nasab", "nisba", "kunya", "laqab"]

      - path: "naming_conventions/hebrew.yaml"
        language: "he"
        covers: ["ben/bat patronymics", "ketubah conventions"]

      - path: "naming_conventions/spanish.yaml"
        language: "es"
        covers: ["double surnames", "colonial titles"]

      - path: "naming_conventions/italian.yaml"
        language: "it"
        covers: ["notarial conventions", "nobility particles"]

      - path: "naming_conventions/greek.yaml"
        language: "el"
        covers: ["Orthodox naming", "genitive forms"]

      - path: "naming_conventions/russian.yaml"
        language: "ru"
        covers: ["patronymics", "metrical book conventions"]

      - path: "naming_conventions/ottoman.yaml"
        language: "ota"
        covers: ["Ottoman Turkish", "Arabic-Ottoman blend"]

# -----------------------------------------------------------------------------
# GLM-4.6 Annotator Configuration
# -----------------------------------------------------------------------------

glm_annotator_config:
  model: "glm-4.6"
  api_endpoint: "https://api.z.ai/api/coding/paas/v4/chat/completions"
  temperature: 0.1
  max_tokens: 4000
  system_prompt_file: "schema/observation.yaml"  # Contains extraction instructions

# -----------------------------------------------------------------------------
# Hypernym Mapping (GLAM-NER v1.7.0)
# -----------------------------------------------------------------------------

hypernym_mapping:
  description: "How PiCo concepts map to GLAM-NER v1.7.0 hypernyms"

  mappings:
    - pico_class: "picom:PersonObservation"
      glam_hypernym: "AGT.PER"
      note: "Person observations create AGT.PER entities"

    - pico_class: "picom:PersonObservation"
      glam_hypernym: "AGT.STF"
      condition: "When observed with organizational role"
      note: "Staff members with role context"

    - pico_class: "pnv:PersonName"
      glam_hypernym: "APP.NAM"
      note: "Name strings as appellations"

    - pico_class: "picom:hasRole"
      glam_hypernym: "ROL"
      note: "Extracted roles link to ROL hypernym"

# -----------------------------------------------------------------------------
# Usage Notes
# -----------------------------------------------------------------------------

usage:
  loading: |
    Since YAML does not have native imports, applications should load
    module files individually or use a custom loader. Example:

    ```python
    import yaml
    from pathlib import Path

    def load_pico_module(base_path: Path) -> dict:
        module = {}
        module['index'] = yaml.safe_load((base_path / '_index.yaml').read_text())
        module['observation'] = yaml.safe_load((base_path / 'schema/observation.yaml').read_text())
        module['pnv'] = yaml.safe_load((base_path / 'schema/pnv_components.yaml').read_text())
        # ... load other components as needed
        return module
    ```

  validation: |
    Each YAML file is valid standalone. Validate with:
    ```bash
    python3 -c "import yaml; yaml.safe_load(open('path/to/file.yaml'))"
    ```