diff --git a/docs/plan/person_pid/08_implementation_guidelines.md b/docs/plan/person_pid/08_implementation_guidelines.md
new file mode 100644
index 0000000000..d2d681716a
--- /dev/null
+++ b/docs/plan/person_pid/08_implementation_guidelines.md
@@ -0,0 +1,2447 @@
+# Implementation Guidelines
+
+**Version**: 0.1.0
+**Last Updated**: 2025-01-09
+**Related**: [Identifier Structure](./05_identifier_structure_design.md) | [Claims and Provenance](./07_claims_and_provenance.md)
+
+---
+
+## 1. Overview
+
+This document provides comprehensive technical specifications for implementing the PPID system:
+
+- Database architecture (PostgreSQL + RDF triple store)
+- API design (REST + GraphQL)
+- Data ingestion pipeline
+- GHCID integration patterns
+- Security and access control
+- Performance requirements
+- Technology stack recommendations
+- Deployment architecture
+- Monitoring and observability
+
+---
+
+## 2. Architecture Overview
+
+### 2.1 System Components
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│ PPID System │
+├─────────────────────────────────────────────────────────────────────────────┤
+│ │
+│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
+│ │ Ingestion │────▶│ Processing │────▶│ Storage │ │
+│ │ Layer │ │ Layer │ │ Layer │ │
+│ └──────────────┘ └──────────────┘ └──────────────┘ │
+│ │ │ │ │
+│ ▼ ▼ ▼ │
+│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
+│ │ Web Scrapers │ │ Entity Res. │ │ PostgreSQL │ │
+│ │ API Clients │ │ NER/NLP │ │ (Relational) │ │
+│ │ File Import │ │ Validation │ ├──────────────┤ │
+│ └──────────────┘ └──────────────┘ │ Apache Jena │ │
+│ │ (RDF/SPARQL) │ │
+│ ├──────────────┤ │
+│ │ Redis │ │
+│ │ (Cache) │ │
+│ └──────────────┘ │
+│ │ │
+│ ┌─────────────────────┴─────────────────────┐ │
+│ ▼ ▼ │
+│ ┌──────────────┐ ┌──────────┐│
+│ │ REST API │ │ GraphQL ││
+│ │ (FastAPI) │ │ (Ariadne││
+│ └──────────────┘ └──────────┘│
+│ │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+### 2.2 Technology Stack
+
+| Layer | Technology | Purpose |
+|-------|------------|---------|
+| **API Framework** | FastAPI (Python 3.11+) | REST API, async support |
+| **GraphQL** | Ariadne | GraphQL endpoint |
+| **Relational DB** | PostgreSQL 16 | Primary data store |
+| **Triple Store** | Apache Jena Fuseki | RDF/SPARQL queries |
+| **Cache** | Redis 7 | Session, rate limiting, caching |
+| **Queue** | Apache Kafka | Async processing pipeline |
+| **Search** | Elasticsearch 8 | Full-text search |
+| **Object Storage** | MinIO / S3 | HTML archives, files |
+| **Container** | Docker + Kubernetes | Deployment |
+| **Monitoring** | Prometheus + Grafana | Metrics and alerting |
+
+---
+
+## 3. Database Schema
+
+### 3.1 PostgreSQL Schema
+
+```sql
+-- Enable required extensions
+CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
+CREATE EXTENSION IF NOT EXISTS "pg_trgm"; -- For fuzzy matching
+
+-- Enum types
+CREATE TYPE ppid_type AS ENUM ('POID', 'PRID');
+CREATE TYPE claim_status AS ENUM ('active', 'superseded', 'retracted');
+CREATE TYPE source_type AS ENUM (
+ 'official_registry',
+ 'institutional_website',
+ 'professional_network',
+ 'social_media',
+ 'news_article',
+ 'academic_publication',
+ 'user_submitted',
+ 'inferred'
+);
+
+-- Person Observations (POID)
+CREATE TABLE person_observations (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ poid VARCHAR(24) UNIQUE NOT NULL, -- POID-xxxx-xxxx-xxxx-xxxx
+
+ -- Source metadata
+ source_url TEXT NOT NULL,
+ source_type source_type NOT NULL,
+ retrieved_at TIMESTAMPTZ NOT NULL,
+ content_hash VARCHAR(64) NOT NULL, -- SHA-256
+ html_archive_path TEXT,
+
+ -- Extracted name components (PNV-compatible)
+ literal_name TEXT,
+ given_name TEXT,
+ surname TEXT,
+ surname_prefix TEXT, -- van, de, etc.
+ patronymic TEXT,
+ generation_suffix TEXT, -- Jr., III, etc.
+
+ -- Metadata
+ extraction_agent VARCHAR(100),
+ extraction_confidence DECIMAL(3,2),
+ created_at TIMESTAMPTZ DEFAULT NOW(),
+ updated_at TIMESTAMPTZ DEFAULT NOW(),
+
+ -- Indexes
+ CONSTRAINT valid_poid CHECK (poid ~ '^POID-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{3}[0-9a-fxX]$')
+);
+
+-- Person Reconstructions (PRID)
+CREATE TABLE person_reconstructions (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ prid VARCHAR(24) UNIQUE NOT NULL, -- PRID-xxxx-xxxx-xxxx-xxxx
+
+ -- Canonical name (resolved from observations)
+ canonical_name TEXT NOT NULL,
+ given_name TEXT,
+ surname TEXT,
+ surname_prefix TEXT,
+
+ -- Curation metadata
+ curator_id UUID REFERENCES users(id),
+ curation_method VARCHAR(50), -- 'manual', 'algorithmic', 'hybrid'
+ confidence_score DECIMAL(3,2),
+
+ -- Versioning
+ version INTEGER DEFAULT 1,
+ previous_version_id UUID REFERENCES person_reconstructions(id),
+
+ -- Timestamps
+ created_at TIMESTAMPTZ DEFAULT NOW(),
+ updated_at TIMESTAMPTZ DEFAULT NOW(),
+
+ CONSTRAINT valid_prid CHECK (prid ~ '^PRID-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{3}[0-9a-fxX]$')
+);
+
+-- Link table: PRID derives from POIDs
+CREATE TABLE reconstruction_observations (
+ prid_id UUID REFERENCES person_reconstructions(id) ON DELETE CASCADE,
+ poid_id UUID REFERENCES person_observations(id) ON DELETE CASCADE,
+ linked_at TIMESTAMPTZ DEFAULT NOW(),
+ linked_by UUID REFERENCES users(id),
+ link_confidence DECIMAL(3,2),
+ PRIMARY KEY (prid_id, poid_id)
+);
+
+-- Claims (assertions with provenance)
+CREATE TABLE claims (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ claim_id VARCHAR(50) UNIQUE NOT NULL,
+
+ -- Subject (what this claim is about)
+ poid_id UUID REFERENCES person_observations(id),
+
+ -- Claim content
+ claim_type VARCHAR(50) NOT NULL, -- 'job_title', 'employer', 'email', etc.
+ claim_value TEXT NOT NULL,
+
+ -- Provenance (MANDATORY per Rule 6)
+ source_url TEXT NOT NULL,
+ retrieved_on TIMESTAMPTZ NOT NULL,
+ xpath TEXT NOT NULL,
+ html_file TEXT NOT NULL,
+ xpath_match_score DECIMAL(3,2) NOT NULL,
+ content_hash VARCHAR(64),
+
+ -- Quality
+ confidence DECIMAL(3,2),
+ extraction_agent VARCHAR(100),
+ status claim_status DEFAULT 'active',
+
+ -- Relationships
+ supersedes_id UUID REFERENCES claims(id),
+
+ -- Timestamps
+ created_at TIMESTAMPTZ DEFAULT NOW(),
+ verified_at TIMESTAMPTZ,
+
+ -- Index for claim lookups
+ CONSTRAINT valid_xpath_score CHECK (xpath_match_score >= 0 AND xpath_match_score <= 1)
+);
+
+-- Claim relationships (supports, conflicts)
+CREATE TABLE claim_relationships (
+ claim_a_id UUID REFERENCES claims(id) ON DELETE CASCADE,
+ claim_b_id UUID REFERENCES claims(id) ON DELETE CASCADE,
+ relationship_type VARCHAR(20) NOT NULL, -- 'supports', 'conflicts_with'
+ notes TEXT,
+ created_at TIMESTAMPTZ DEFAULT NOW(),
+ PRIMARY KEY (claim_a_id, claim_b_id, relationship_type)
+);
+
+-- External identifiers (ORCID, ISNI, VIAF, Wikidata)
+CREATE TABLE external_identifiers (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ prid_id UUID REFERENCES person_reconstructions(id) ON DELETE CASCADE,
+ identifier_scheme VARCHAR(20) NOT NULL, -- 'orcid', 'isni', 'viaf', 'wikidata'
+ identifier_value VARCHAR(100) NOT NULL,
+ verified BOOLEAN DEFAULT FALSE,
+ verified_at TIMESTAMPTZ,
+ source_url TEXT,
+ created_at TIMESTAMPTZ DEFAULT NOW(),
+ UNIQUE (prid_id, identifier_scheme, identifier_value)
+);
+
+-- GHCID links (person to heritage institution)
+CREATE TABLE ghcid_affiliations (
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+ prid_id UUID REFERENCES person_reconstructions(id) ON DELETE CASCADE,
+ ghcid VARCHAR(50) NOT NULL, -- e.g., NL-NH-HAA-A-NHA
+
+ -- Affiliation details
+ role_title TEXT,
+ department TEXT,
+ affiliation_start DATE,
+ affiliation_end DATE,
+ is_current BOOLEAN DEFAULT TRUE,
+
+ -- Provenance
+ source_poid_id UUID REFERENCES person_observations(id),
+ confidence DECIMAL(3,2),
+
+ created_at TIMESTAMPTZ DEFAULT NOW(),
+ updated_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- Indexes for performance
+CREATE INDEX idx_observations_source_url ON person_observations(source_url);
+CREATE INDEX idx_observations_content_hash ON person_observations(content_hash);
+CREATE INDEX idx_observations_name_trgm ON person_observations
+ USING gin (literal_name gin_trgm_ops);
+
+CREATE INDEX idx_reconstructions_name_trgm ON person_reconstructions
+ USING gin (canonical_name gin_trgm_ops);
+
+CREATE INDEX idx_claims_type ON claims(claim_type);
+CREATE INDEX idx_claims_poid ON claims(poid_id);
+CREATE INDEX idx_claims_status ON claims(status);
+
+CREATE INDEX idx_ghcid_affiliations_ghcid ON ghcid_affiliations(ghcid);
+CREATE INDEX idx_ghcid_affiliations_current ON ghcid_affiliations(prid_id)
+ WHERE is_current = TRUE;
+
+-- Full-text search
+CREATE INDEX idx_observations_fts ON person_observations
+ USING gin (to_tsvector('english', literal_name));
+CREATE INDEX idx_reconstructions_fts ON person_reconstructions
+ USING gin (to_tsvector('english', canonical_name));
+```
+
+### 3.2 RDF Triple Store Schema
+
+```turtle
+@prefix ppid: .
+@prefix ppidv: .
+@prefix ppidt: .
+@prefix picom: .
+@prefix pnv: .
+@prefix prov: .
+@prefix schema: .
+@prefix xsd: .
+
+# Example Person Observation
+ppid:POID-7a3b-c4d5-e6f7-890X a ppidt:PersonObservation, picom:PersonObservation ;
+ ppidv:poid "POID-7a3b-c4d5-e6f7-890X" ;
+
+ # Name (PNV structured)
+ pnv:hasName [
+ a pnv:PersonName ;
+ pnv:literalName "Jan van den Berg" ;
+ pnv:givenName "Jan" ;
+ pnv:surnamePrefix "van den" ;
+ pnv:baseSurname "Berg"
+ ] ;
+
+ # Provenance
+ prov:wasDerivedFrom ;
+ prov:wasGeneratedBy ppid:extraction-activity-001 ;
+ prov:generatedAtTime "2025-01-09T14:30:00Z"^^xsd:dateTime ;
+
+ # Claims
+ ppidv:hasClaim ppid:claim-001, ppid:claim-002, ppid:claim-003 .
+
+# Example Person Reconstruction
+ppid:PRID-1234-5678-90ab-cde5 a ppidt:PersonReconstruction, picom:PersonReconstruction ;
+ ppidv:prid "PRID-1234-5678-90ab-cde5" ;
+
+ # Canonical name
+ schema:name "Jan van den Berg" ;
+ pnv:hasName [
+ a pnv:PersonName ;
+ pnv:literalName "Jan van den Berg" ;
+ pnv:givenName "Jan" ;
+ pnv:surnamePrefix "van den" ;
+ pnv:baseSurname "Berg"
+ ] ;
+
+ # Derived from observations
+ prov:wasDerivedFrom ppid:POID-7a3b-c4d5-e6f7-890X,
+ ppid:POID-8c4d-e5f6-g7h8-901Y ;
+
+ # GHCID affiliation
+ ppidv:affiliatedWith ;
+
+ # External identifiers
+ ppidv:orcid "0000-0002-1234-5678" ;
+ owl:sameAs .
+```
+
+---
+
+## 4. API Design
+
+### 4.1 REST API Endpoints
+
+```yaml
+openapi: 3.1.0
+info:
+ title: PPID API
+ version: 1.0.0
+ description: Person Persistent Identifier API
+
+servers:
+ - url: https://api.ppid.org/v1
+
+paths:
+ # Person Observations
+ /observations:
+ post:
+ summary: Create new person observation
+ operationId: createObservation
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/CreateObservationRequest'
+ responses:
+ '201':
+ description: Observation created
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/PersonObservation'
+
+ get:
+ summary: Search observations
+ operationId: searchObservations
+ parameters:
+ - name: name
+ in: query
+ schema:
+ type: string
+ - name: source_url
+ in: query
+ schema:
+ type: string
+ - name: limit
+ in: query
+ schema:
+ type: integer
+ default: 20
+ - name: offset
+ in: query
+ schema:
+ type: integer
+ default: 0
+ responses:
+ '200':
+ description: List of observations
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ObservationList'
+
+ /observations/{poid}:
+ get:
+ summary: Get observation by POID
+ operationId: getObservation
+ parameters:
+ - name: poid
+ in: path
+ required: true
+ schema:
+ type: string
+ pattern: '^POID-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{3}[0-9a-fxX]$'
+ responses:
+ '200':
+ description: Person observation
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/PersonObservation'
+ text/turtle:
+ schema:
+ type: string
+ application/ld+json:
+ schema:
+ type: object
+
+ /observations/{poid}/claims:
+ get:
+ summary: Get claims for observation
+ operationId: getObservationClaims
+ parameters:
+ - name: poid
+ in: path
+ required: true
+ schema:
+ type: string
+ responses:
+ '200':
+ description: List of claims
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ClaimList'
+
+ # Person Reconstructions
+ /reconstructions:
+ post:
+ summary: Create person reconstruction from observations
+ operationId: createReconstruction
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/CreateReconstructionRequest'
+ responses:
+ '201':
+ description: Reconstruction created
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/PersonReconstruction'
+
+ /reconstructions/{prid}:
+ get:
+ summary: Get reconstruction by PRID
+ operationId: getReconstruction
+ parameters:
+ - name: prid
+ in: path
+ required: true
+ schema:
+ type: string
+ responses:
+ '200':
+ description: Person reconstruction
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/PersonReconstruction'
+
+ /reconstructions/{prid}/observations:
+ get:
+ summary: Get observations linked to reconstruction
+ operationId: getReconstructionObservations
+ responses:
+ '200':
+ description: Linked observations
+
+ /reconstructions/{prid}/history:
+ get:
+ summary: Get version history
+ operationId: getReconstructionHistory
+ responses:
+ '200':
+ description: Version history
+
+ # Validation
+ /validate/{ppid}:
+ get:
+ summary: Validate PPID format and checksum
+ operationId: validatePpid
+ responses:
+ '200':
+ description: Validation result
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ valid:
+ type: boolean
+ type:
+ type: string
+ enum: [POID, PRID]
+ exists:
+ type: boolean
+
+ # Search
+ /search:
+ get:
+ summary: Full-text search across all records
+ operationId: search
+ parameters:
+ - name: q
+ in: query
+ required: true
+ schema:
+ type: string
+ - name: type
+ in: query
+ schema:
+ type: string
+ enum: [observation, reconstruction, all]
+ default: all
+ responses:
+ '200':
+ description: Search results
+
+ # Entity Resolution
+ /resolve:
+ post:
+ summary: Find matching records for input data
+ operationId: resolveEntity
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/EntityResolutionRequest'
+ responses:
+ '200':
+ description: Resolution candidates
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ResolutionCandidates'
+
+components:
+ schemas:
+ CreateObservationRequest:
+ type: object
+ required:
+ - source_url
+ - retrieved_at
+ - claims
+ properties:
+ source_url:
+ type: string
+ format: uri
+ source_type:
+ type: string
+ enum: [official_registry, institutional_website, professional_network, social_media]
+ retrieved_at:
+ type: string
+ format: date-time
+ content_hash:
+ type: string
+ html_archive_path:
+ type: string
+ extraction_agent:
+ type: string
+ claims:
+ type: array
+ items:
+ $ref: '#/components/schemas/ClaimInput'
+
+ ClaimInput:
+ type: object
+ required:
+ - claim_type
+ - claim_value
+ - xpath
+ - xpath_match_score
+ properties:
+ claim_type:
+ type: string
+ claim_value:
+ type: string
+ xpath:
+ type: string
+ xpath_match_score:
+ type: number
+ minimum: 0
+ maximum: 1
+ confidence:
+ type: number
+
+ PersonObservation:
+ type: object
+ properties:
+ poid:
+ type: string
+ source_url:
+ type: string
+ literal_name:
+ type: string
+ claims:
+ type: array
+ items:
+ $ref: '#/components/schemas/Claim'
+ created_at:
+ type: string
+ format: date-time
+
+ CreateReconstructionRequest:
+ type: object
+ required:
+ - observation_ids
+ properties:
+ observation_ids:
+ type: array
+ items:
+ type: string
+ minItems: 1
+ canonical_name:
+ type: string
+ external_identifiers:
+ type: object
+
+ PersonReconstruction:
+ type: object
+ properties:
+ prid:
+ type: string
+ canonical_name:
+ type: string
+ observations:
+ type: array
+ items:
+ type: string
+ external_identifiers:
+ type: object
+ ghcid_affiliations:
+ type: array
+ items:
+ $ref: '#/components/schemas/GhcidAffiliation'
+
+ GhcidAffiliation:
+ type: object
+ properties:
+ ghcid:
+ type: string
+ role_title:
+ type: string
+ is_current:
+ type: boolean
+
+ securitySchemes:
+ bearerAuth:
+ type: http
+ scheme: bearer
+ bearerFormat: JWT
+ apiKey:
+ type: apiKey
+ in: header
+ name: X-API-Key
+
+security:
+ - bearerAuth: []
+ - apiKey: []
+```
+
+### 4.2 GraphQL Schema
+
+```graphql
+type Query {
+ # Observations
+ observation(poid: ID!): PersonObservation
+ observations(
+ name: String
+ sourceUrl: String
+ limit: Int = 20
+ offset: Int = 0
+ ): ObservationConnection!
+
+ # Reconstructions
+ reconstruction(prid: ID!): PersonReconstruction
+ reconstructions(
+ name: String
+ ghcid: String
+ limit: Int = 20
+ offset: Int = 0
+ ): ReconstructionConnection!
+
+ # Search
+ search(query: String!, type: SearchType = ALL): SearchResults!
+
+ # Validation
+ validatePpid(ppid: String!): ValidationResult!
+
+ # Entity Resolution
+ resolveEntity(input: EntityResolutionInput!): [ResolutionCandidate!]!
+}
+
+type Mutation {
+ # Create observation
+ createObservation(input: CreateObservationInput!): PersonObservation!
+
+ # Create reconstruction
+ createReconstruction(input: CreateReconstructionInput!): PersonReconstruction!
+
+ # Link observation to reconstruction
+ linkObservation(prid: ID!, poid: ID!): PersonReconstruction!
+
+ # Update reconstruction
+ updateReconstruction(prid: ID!, input: UpdateReconstructionInput!): PersonReconstruction!
+
+ # Add external identifier
+ addExternalIdentifier(
+ prid: ID!
+ scheme: IdentifierScheme!
+ value: String!
+ ): PersonReconstruction!
+
+ # Add GHCID affiliation
+ addGhcidAffiliation(
+ prid: ID!
+ ghcid: String!
+ roleTitle: String
+ isCurrent: Boolean = true
+ ): PersonReconstruction!
+}
+
+type PersonObservation {
+ poid: ID!
+ sourceUrl: String!
+ sourceType: SourceType!
+ retrievedAt: DateTime!
+ contentHash: String
+ htmlArchivePath: String
+
+ # Name components
+ literalName: String
+ givenName: String
+ surname: String
+ surnamePrefix: String
+
+ # Related data
+ claims: [Claim!]!
+ linkedReconstructions: [PersonReconstruction!]!
+
+ # Metadata
+ extractionAgent: String
+ extractionConfidence: Float
+ createdAt: DateTime!
+}
+
+type PersonReconstruction {
+ prid: ID!
+ canonicalName: String!
+ givenName: String
+ surname: String
+ surnamePrefix: String
+
+ # Linked observations
+ observations: [PersonObservation!]!
+
+ # External identifiers
+ orcid: String
+ isni: String
+ viaf: String
+ wikidata: String
+ externalIdentifiers: [ExternalIdentifier!]!
+
+ # GHCID affiliations
+ ghcidAffiliations: [GhcidAffiliation!]!
+ currentAffiliations: [GhcidAffiliation!]!
+
+ # Versioning
+ version: Int!
+ previousVersion: PersonReconstruction
+ history: [PersonReconstruction!]!
+
+ # Curation
+ curator: User
+ curationMethod: CurationMethod
+ confidenceScore: Float
+
+ createdAt: DateTime!
+ updatedAt: DateTime!
+}
+
+type Claim {
+ id: ID!
+ claimType: ClaimType!
+ claimValue: String!
+
+ # Provenance (MANDATORY)
+ sourceUrl: String!
+ retrievedOn: DateTime!
+ xpath: String!
+ htmlFile: String!
+ xpathMatchScore: Float!
+
+ # Quality
+ confidence: Float
+ extractionAgent: String
+ status: ClaimStatus!
+
+ # Relationships
+ supports: [Claim!]!
+ conflictsWith: [Claim!]!
+ supersedes: Claim
+
+ createdAt: DateTime!
+}
+
+type GhcidAffiliation {
+ ghcid: String!
+ institution: HeritageCustodian # Resolved from GHCID
+ roleTitle: String
+ department: String
+ startDate: Date
+ endDate: Date
+ isCurrent: Boolean!
+ confidence: Float
+}
+
+type HeritageCustodian {
+ ghcid: String!
+ name: String!
+ institutionType: String!
+ city: String
+ country: String
+}
+
+type ExternalIdentifier {
+ scheme: IdentifierScheme!
+ value: String!
+ verified: Boolean!
+ verifiedAt: DateTime
+}
+
+enum SourceType {
+ OFFICIAL_REGISTRY
+ INSTITUTIONAL_WEBSITE
+ PROFESSIONAL_NETWORK
+ SOCIAL_MEDIA
+ NEWS_ARTICLE
+ ACADEMIC_PUBLICATION
+ USER_SUBMITTED
+ INFERRED
+}
+
+enum ClaimType {
+ FULL_NAME
+ GIVEN_NAME
+ FAMILY_NAME
+ JOB_TITLE
+ EMPLOYER
+ EMPLOYER_GHCID
+ EMAIL
+ LINKEDIN_URL
+ ORCID
+ BIRTH_DATE
+ EDUCATION
+}
+
+enum ClaimStatus {
+ ACTIVE
+ SUPERSEDED
+ RETRACTED
+}
+
+enum IdentifierScheme {
+ ORCID
+ ISNI
+ VIAF
+ WIKIDATA
+ LOC_NAF
+}
+
+enum CurationMethod {
+ MANUAL
+ ALGORITHMIC
+ HYBRID
+}
+
+enum SearchType {
+ OBSERVATION
+ RECONSTRUCTION
+ ALL
+}
+
+input CreateObservationInput {
+ sourceUrl: String!
+ sourceType: SourceType!
+ retrievedAt: DateTime!
+ contentHash: String
+ htmlArchivePath: String
+ extractionAgent: String
+ claims: [ClaimInput!]!
+}
+
+input ClaimInput {
+ claimType: ClaimType!
+ claimValue: String!
+ xpath: String!
+ xpathMatchScore: Float!
+ confidence: Float
+}
+
+input CreateReconstructionInput {
+ observationIds: [ID!]!
+ canonicalName: String
+ externalIdentifiers: ExternalIdentifiersInput
+}
+
+input EntityResolutionInput {
+ name: String!
+ employer: String
+ jobTitle: String
+ email: String
+ linkedinUrl: String
+}
+
+type ResolutionCandidate {
+ reconstruction: PersonReconstruction
+ observation: PersonObservation
+ matchScore: Float!
+ matchFactors: [MatchFactor!]!
+}
+
+type MatchFactor {
+ field: String!
+ score: Float!
+ method: String!
+}
+```
+
+### 4.3 FastAPI Implementation
+
+```python
+from fastapi import FastAPI, HTTPException, Depends, Query
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from typing import Optional
+from datetime import datetime
+import uuid
+
+app = FastAPI(
+ title="PPID API",
+ description="Person Persistent Identifier API",
+ version="1.0.0"
+)
+
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"],
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+)
+
+
+# --- Pydantic Models ---
+
+class ClaimInput(BaseModel):
+ claim_type: str
+ claim_value: str
+ xpath: str
+ xpath_match_score: float = Field(ge=0, le=1)
+ confidence: Optional[float] = Field(None, ge=0, le=1)
+
+
+class CreateObservationRequest(BaseModel):
+ source_url: str
+ source_type: str = "institutional_website"
+ retrieved_at: datetime
+ content_hash: Optional[str] = None
+ html_archive_path: Optional[str] = None
+ extraction_agent: Optional[str] = None
+ claims: list[ClaimInput]
+
+
+class PersonObservationResponse(BaseModel):
+ poid: str
+ source_url: str
+ source_type: str
+ retrieved_at: datetime
+ literal_name: Optional[str] = None
+ claims: list[dict]
+ created_at: datetime
+
+
+class CreateReconstructionRequest(BaseModel):
+ observation_ids: list[str]
+ canonical_name: Optional[str] = None
+ external_identifiers: Optional[dict] = None
+
+
+class ValidationResult(BaseModel):
+ valid: bool
+ ppid_type: Optional[str] = None
+ exists: Optional[bool] = None
+ error: Optional[str] = None
+
+
+# --- Dependencies ---
+
+async def get_db():
+ """Database connection dependency."""
+ # In production, use connection pool
+ pass
+
+
+async def get_current_user(api_key: str = Depends(oauth2_scheme)):
+ """Authenticate user from API key or JWT."""
+ pass
+
+
+# --- Endpoints ---
+
+@app.post("/api/v1/observations", response_model=PersonObservationResponse)
+async def create_observation(
+ request: CreateObservationRequest,
+ db = Depends(get_db)
+):
+ """
+ Create a new Person Observation from extracted data.
+
+ The POID is generated deterministically from source metadata.
+ """
+ from ppid.identifiers import generate_poid
+
+ # Generate deterministic POID
+ poid = generate_poid(
+ source_url=request.source_url,
+ retrieval_timestamp=request.retrieved_at.isoformat(),
+ content_hash=request.content_hash or ""
+ )
+
+ # Check for existing observation with same POID
+ existing = await db.get_observation(poid)
+ if existing:
+ return existing
+
+ # Extract name from claims
+ literal_name = None
+ for claim in request.claims:
+ if claim.claim_type == "full_name":
+ literal_name = claim.claim_value
+ break
+
+ # Create observation record
+ observation = await db.create_observation(
+ poid=poid,
+ source_url=request.source_url,
+ source_type=request.source_type,
+ retrieved_at=request.retrieved_at,
+ content_hash=request.content_hash,
+ html_archive_path=request.html_archive_path,
+ literal_name=literal_name,
+ extraction_agent=request.extraction_agent,
+ claims=[c.dict() for c in request.claims]
+ )
+
+ return observation
+
+
+@app.get("/api/v1/observations/{poid}", response_model=PersonObservationResponse)
+async def get_observation(poid: str, db = Depends(get_db)):
+ """Get Person Observation by POID."""
+ from ppid.identifiers import validate_ppid
+
+ is_valid, error = validate_ppid(poid)
+ if not is_valid:
+ raise HTTPException(status_code=400, detail=f"Invalid POID: {error}")
+
+ observation = await db.get_observation(poid)
+ if not observation:
+ raise HTTPException(status_code=404, detail="Observation not found")
+
+ return observation
+
+
+@app.get("/api/v1/validate/{ppid}", response_model=ValidationResult)
+async def validate_ppid_endpoint(ppid: str, db = Depends(get_db)):
+ """Validate PPID format, checksum, and existence."""
+ from ppid.identifiers import validate_ppid_full
+
+ is_valid, error = validate_ppid_full(ppid)
+
+ if not is_valid:
+ return ValidationResult(valid=False, error=error)
+
+ ppid_type = "POID" if ppid.startswith("POID") else "PRID"
+
+ # Check existence
+ if ppid_type == "POID":
+ exists = await db.observation_exists(ppid)
+ else:
+ exists = await db.reconstruction_exists(ppid)
+
+ return ValidationResult(
+ valid=True,
+ ppid_type=ppid_type,
+ exists=exists
+ )
+
+
+@app.post("/api/v1/reconstructions")
+async def create_reconstruction(
+ request: CreateReconstructionRequest,
+ db = Depends(get_db),
+ user = Depends(get_current_user)
+):
+ """Create Person Reconstruction from linked observations."""
+ from ppid.identifiers import generate_prid
+
+ # Validate all POIDs exist
+ for poid in request.observation_ids:
+ if not await db.observation_exists(poid):
+ raise HTTPException(
+ status_code=400,
+ detail=f"Observation not found: {poid}"
+ )
+
+ # Generate deterministic PRID
+ prid = generate_prid(
+ observation_ids=request.observation_ids,
+ curator_id=str(user.id),
+ timestamp=datetime.utcnow().isoformat()
+ )
+
+ # Determine canonical name
+ if request.canonical_name:
+ canonical_name = request.canonical_name
+ else:
+ # Use name from highest-confidence observation
+ observations = await db.get_observations(request.observation_ids)
+ canonical_name = max(
+ observations,
+ key=lambda o: o.extraction_confidence or 0
+ ).literal_name
+
+ # Create reconstruction
+ reconstruction = await db.create_reconstruction(
+ prid=prid,
+ canonical_name=canonical_name,
+ observation_ids=request.observation_ids,
+ curator_id=user.id,
+ external_identifiers=request.external_identifiers
+ )
+
+ return reconstruction
+
+
+@app.get("/api/v1/search")
+async def search(
+ q: str = Query(..., min_length=2),
+ type: str = Query("all", regex="^(observation|reconstruction|all)$"),
+ limit: int = Query(20, ge=1, le=100),
+ offset: int = Query(0, ge=0),
+ db = Depends(get_db)
+):
+ """Full-text search across observations and reconstructions."""
+ results = await db.search(
+ query=q,
+ search_type=type,
+ limit=limit,
+ offset=offset
+ )
+ return results
+
+
+@app.post("/api/v1/resolve")
+async def resolve_entity(
+ name: str,
+ employer: Optional[str] = None,
+ job_title: Optional[str] = None,
+ email: Optional[str] = None,
+ db = Depends(get_db)
+):
+ """
+ Entity resolution: find matching records for input data.
+
+ Returns ranked candidates with match scores.
+ """
+ from ppid.entity_resolution import find_candidates
+
+ candidates = await find_candidates(
+ db=db,
+ name=name,
+ employer=employer,
+ job_title=job_title,
+ email=email
+ )
+
+ return {
+ "candidates": candidates,
+ "query": {
+ "name": name,
+ "employer": employer,
+ "job_title": job_title,
+ "email": email
+ }
+ }
+```
+
+---
+
+## 5. Data Ingestion Pipeline
+
+### 5.1 Pipeline Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ DATA INGESTION PIPELINE │
+├─────────────────────────────────────────────────────────────────┤
+│ │
+│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
+│ │ Source │───▶│ Extract │───▶│ Transform│───▶│ Load │ │
+│ │ Fetch │ │ (NER) │ │ (Validate) │ (Store) │ │
+│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │
+│ │ │ │ │ │
+│ ▼ ▼ ▼ ▼ │
+│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
+│ │ Archive │ │ Claims │ │ POID │ │ Postgres│ │
+│ │ HTML │ │ XPath │ │ Generate│ │ + RDF │ │
+│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │
+│ │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### 5.2 Pipeline Implementation
+
+```python
+from dataclasses import dataclass
+from datetime import datetime
+from typing import AsyncIterator
+import hashlib
+import asyncio
+from kafka import KafkaProducer, KafkaConsumer
+
+@dataclass
+class SourceDocument:
+ url: str
+ html_content: str
+ retrieved_at: datetime
+ content_hash: str
+ archive_path: str
+
+
+@dataclass
+class ExtractedClaim:
+ claim_type: str
+ claim_value: str
+ xpath: str
+ xpath_match_score: float
+ confidence: float
+
+
+@dataclass
+class PersonObservationData:
+ source: SourceDocument
+ claims: list[ExtractedClaim]
+ poid: str
+
+
+class IngestionPipeline:
+ """
+ Main data ingestion pipeline for PPID.
+
+ Stages:
+ 1. Fetch: Retrieve web pages, archive HTML
+ 2. Extract: NER/NLP to identify person data, generate claims with XPath
+ 3. Transform: Validate, generate POID, structure data
+ 4. Load: Store in PostgreSQL and RDF triple store
+ """
+
+ def __init__(
+ self,
+ db_pool,
+ rdf_store,
+ kafka_producer: KafkaProducer,
+ archive_storage,
+ llm_extractor
+ ):
+ self.db = db_pool
+ self.rdf = rdf_store
+ self.kafka = kafka_producer
+ self.archive = archive_storage
+ self.extractor = llm_extractor
+
+ async def process_url(self, url: str) -> list[PersonObservationData]:
+ """
+ Full pipeline for a single URL.
+
+ Returns list of PersonObservations extracted from the page.
+ """
+ # Stage 1: Fetch and archive
+ source = await self._fetch_and_archive(url)
+
+ # Stage 2: Extract claims with XPath
+ observations = await self._extract_observations(source)
+
+ # Stage 3: Transform and validate
+ validated = await self._transform_and_validate(observations)
+
+ # Stage 4: Load to databases
+ await self._load_observations(validated)
+
+ return validated
+
+ async def _fetch_and_archive(self, url: str) -> SourceDocument:
+ """Fetch URL and archive HTML."""
+ from playwright.async_api import async_playwright
+
+ async with async_playwright() as p:
+ browser = await p.chromium.launch()
+ page = await browser.new_page()
+
+ await page.goto(url, wait_until='networkidle')
+ html_content = await page.content()
+
+ await browser.close()
+
+ # Calculate content hash
+ content_hash = hashlib.sha256(html_content.encode()).hexdigest()
+
+ # Archive HTML
+ retrieved_at = datetime.utcnow()
+ archive_path = await self.archive.store(
+ url=url,
+ content=html_content,
+ timestamp=retrieved_at
+ )
+
+ return SourceDocument(
+ url=url,
+ html_content=html_content,
+ retrieved_at=retrieved_at,
+ content_hash=content_hash,
+ archive_path=archive_path
+ )
+
+ async def _extract_observations(
+ self,
+ source: SourceDocument
+ ) -> list[tuple[list[ExtractedClaim], str]]:
+ """
+ Extract person observations with XPath provenance.
+
+ Uses LLM for extraction, then validates XPath.
+ """
+ from lxml import html
+
+ # Parse HTML
+ tree = html.fromstring(source.html_content)
+
+ # Use LLM to extract person data with XPath
+ extraction_result = await self.extractor.extract_persons(
+ html_content=source.html_content,
+ source_url=source.url
+ )
+
+ observations = []
+
+ for person in extraction_result.persons:
+ validated_claims = []
+
+ for claim in person.claims:
+ # Verify XPath points to expected value
+ try:
+ elements = tree.xpath(claim.xpath)
+ if elements:
+ actual_value = elements[0].text_content().strip()
+
+ # Calculate match score
+ if actual_value == claim.claim_value:
+ match_score = 1.0
+ else:
+ from difflib import SequenceMatcher
+ match_score = SequenceMatcher(
+ None, actual_value, claim.claim_value
+ ).ratio()
+
+ if match_score >= 0.8: # Accept if 80%+ match
+ validated_claims.append(ExtractedClaim(
+ claim_type=claim.claim_type,
+ claim_value=claim.claim_value,
+ xpath=claim.xpath,
+ xpath_match_score=match_score,
+ confidence=claim.confidence * match_score
+ ))
+ except Exception as e:
+ # Skip claims with invalid XPath
+ continue
+
+ if validated_claims:
+ # Get literal name from claims
+ literal_name = next(
+ (c.claim_value for c in validated_claims if c.claim_type == 'full_name'),
+ None
+ )
+ observations.append((validated_claims, literal_name))
+
+ return observations
+
+ async def _transform_and_validate(
+ self,
+ observations: list[tuple[list[ExtractedClaim], str]],
+ source: SourceDocument
+ ) -> list[PersonObservationData]:
+ """Transform extracted data and generate POIDs."""
+ from ppid.identifiers import generate_poid
+
+ results = []
+
+ for claims, literal_name in observations:
+ # Generate deterministic POID
+ claims_hash = hashlib.sha256(
+ str(sorted([c.claim_value for c in claims])).encode()
+ ).hexdigest()
+
+ poid = generate_poid(
+ source_url=source.url,
+ retrieval_timestamp=source.retrieved_at.isoformat(),
+ content_hash=f"{source.content_hash}:{claims_hash}"
+ )
+
+ results.append(PersonObservationData(
+ source=source,
+ claims=claims,
+ poid=poid
+ ))
+
+ return results
+
+ async def _load_observations(
+ self,
+ observations: list[PersonObservationData]
+ ) -> None:
+ """Load observations to PostgreSQL and RDF store."""
+ for obs in observations:
+ # Check if already exists (idempotent)
+ existing = await self.db.get_observation(obs.poid)
+ if existing:
+ continue
+
+ # Insert to PostgreSQL
+ await self.db.create_observation(
+ poid=obs.poid,
+ source_url=obs.source.url,
+ source_type='institutional_website',
+ retrieved_at=obs.source.retrieved_at,
+ content_hash=obs.source.content_hash,
+ html_archive_path=obs.source.archive_path,
+ literal_name=next(
+ (c.claim_value for c in obs.claims if c.claim_type == 'full_name'),
+ None
+ ),
+ claims=[{
+ 'claim_type': c.claim_type,
+ 'claim_value': c.claim_value,
+ 'xpath': c.xpath,
+ 'xpath_match_score': c.xpath_match_score,
+ 'confidence': c.confidence
+ } for c in obs.claims]
+ )
+
+ # Insert to RDF triple store
+ await self._insert_rdf(obs)
+
+ # Publish to Kafka for downstream processing
+ self.kafka.send('ppid.observations.created', {
+ 'poid': obs.poid,
+ 'source_url': obs.source.url,
+ 'timestamp': obs.source.retrieved_at.isoformat()
+ })
+
+ async def _insert_rdf(self, obs: PersonObservationData) -> None:
+ """Insert observation as RDF triples."""
+ from rdflib import Graph, Namespace, Literal, URIRef
+ from rdflib.namespace import RDF, XSD
+
+ PPID = Namespace("https://ppid.org/")
+ PPIDV = Namespace("https://ppid.org/vocab#")
+ PROV = Namespace("http://www.w3.org/ns/prov#")
+
+ g = Graph()
+
+ obs_uri = PPID[obs.poid]
+
+ g.add((obs_uri, RDF.type, PPIDV.PersonObservation))
+ g.add((obs_uri, PPIDV.poid, Literal(obs.poid)))
+ g.add((obs_uri, PROV.wasDerivedFrom, URIRef(obs.source.url)))
+ g.add((obs_uri, PROV.generatedAtTime, Literal(
+ obs.source.retrieved_at.isoformat(),
+ datatype=XSD.dateTime
+ )))
+
+ # Add claims
+ for i, claim in enumerate(obs.claims):
+ claim_uri = PPID[f"{obs.poid}/claim/{i}"]
+ g.add((obs_uri, PPIDV.hasClaim, claim_uri))
+ g.add((claim_uri, PPIDV.claimType, Literal(claim.claim_type)))
+ g.add((claim_uri, PPIDV.claimValue, Literal(claim.claim_value)))
+ g.add((claim_uri, PPIDV.xpath, Literal(claim.xpath)))
+ g.add((claim_uri, PPIDV.xpathMatchScore, Literal(
+ claim.xpath_match_score, datatype=XSD.decimal
+ )))
+
+ # Insert to triple store
+ await self.rdf.insert(g)
+```
+
+---
+
+## 6. GHCID Integration
+
+### 6.1 Linking Persons to Institutions
+
+```python
+from dataclasses import dataclass
+from typing import Optional
+from datetime import date
+
+@dataclass
+class GhcidAffiliation:
+ """
+ Link between a person (PRID) and a heritage institution (GHCID).
+ """
+ ghcid: str # e.g., "NL-NH-HAA-A-NHA"
+ role_title: Optional[str] = None
+ department: Optional[str] = None
+ start_date: Optional[date] = None
+ end_date: Optional[date] = None
+ is_current: bool = True
+ source_poid: Optional[str] = None
+ confidence: float = 0.9
+
+
+async def link_person_to_institution(
+ db,
+ prid: str,
+ ghcid: str,
+ role_title: str = None,
+ source_poid: str = None
+) -> GhcidAffiliation:
+ """
+ Create link between person and heritage institution.
+
+ Args:
+ prid: Person Reconstruction ID
+ ghcid: Global Heritage Custodian ID
+ role_title: Job title at institution
+ source_poid: Observation where affiliation was extracted
+
+ Returns:
+ Created affiliation record
+ """
+ # Validate PRID exists
+ reconstruction = await db.get_reconstruction(prid)
+ if not reconstruction:
+ raise ValueError(f"Reconstruction not found: {prid}")
+
+ # Validate GHCID format
+ if not validate_ghcid_format(ghcid):
+ raise ValueError(f"Invalid GHCID format: {ghcid}")
+
+ # Create affiliation
+ affiliation = await db.create_ghcid_affiliation(
+ prid_id=reconstruction.id,
+ ghcid=ghcid,
+ role_title=role_title,
+ source_poid_id=source_poid,
+ is_current=True
+ )
+
+ return affiliation
+
+
+def validate_ghcid_format(ghcid: str) -> bool:
+ """Validate GHCID format."""
+ import re
+ # Pattern: CC-RR-SSS-T-ABBREV
+ pattern = r'^[A-Z]{2}-[A-Z]{2}-[A-Z]{3}-[A-Z]-[A-Z0-9]+$'
+ return bool(re.match(pattern, ghcid))
+```
+
+### 6.2 RDF Integration
+
+```turtle
+@prefix ppid: .
+@prefix ppidv: .
+@prefix ghcid: .
+@prefix org: .
+@prefix schema: .
+
+# Person with GHCID affiliation
+ppid:PRID-1234-5678-90ab-cde5
+ a ppidv:PersonReconstruction ;
+ schema:name "Jan van den Berg" ;
+
+ # Current employment
+ org:memberOf [
+ a org:Membership ;
+ org:organization ghcid:NL-NH-HAA-A-NHA ;
+ org:role [
+ a org:Role ;
+ schema:name "Senior Archivist"
+ ] ;
+ ppidv:isCurrent true ;
+ schema:startDate "2015"^^xsd:gYear
+ ] ;
+
+ # Direct link for simple queries
+ ppidv:affiliatedWith ghcid:NL-NH-HAA-A-NHA .
+
+# The heritage institution (from GHCID system)
+ghcid:NL-NH-HAA-A-NHA
+ a ppidv:HeritageCustodian ;
+ schema:name "Noord-Hollands Archief" ;
+ ppidv:ghcid "NL-NH-HAA-A-NHA" ;
+ schema:address [
+ schema:addressLocality "Haarlem" ;
+ schema:addressCountry "NL"
+ ] .
+```
+
+### 6.3 SPARQL Queries
+
+```sparql
+# Find all persons affiliated with a specific institution
+PREFIX ppid:
+PREFIX ppidv:
+PREFIX ghcid:
+PREFIX schema:
+PREFIX org:
+
+SELECT ?prid ?name ?role ?isCurrent
+WHERE {
+ ?prid a ppidv:PersonReconstruction ;
+ schema:name ?name ;
+ org:memberOf ?membership .
+
+ ?membership org:organization ghcid:NL-NH-HAA-A-NHA ;
+ ppidv:isCurrent ?isCurrent .
+
+ OPTIONAL {
+ ?membership org:role ?roleNode .
+ ?roleNode schema:name ?role .
+ }
+}
+ORDER BY DESC(?isCurrent) ?name
+
+
+# Find all institutions a person has worked at
+SELECT ?ghcid ?institutionName ?role ?startDate ?endDate ?isCurrent
+WHERE {
+ ppid:PRID-1234-5678-90ab-cde5 org:memberOf ?membership .
+
+ ?membership org:organization ?institution ;
+ ppidv:isCurrent ?isCurrent .
+
+ ?institution ppidv:ghcid ?ghcid ;
+ schema:name ?institutionName .
+
+ OPTIONAL { ?membership schema:startDate ?startDate }
+ OPTIONAL { ?membership schema:endDate ?endDate }
+ OPTIONAL {
+ ?membership org:role ?roleNode .
+ ?roleNode schema:name ?role .
+ }
+}
+ORDER BY DESC(?isCurrent) DESC(?startDate)
+
+
+# Find archivists across all Dutch archives
+SELECT ?prid ?name ?institution ?institutionName
+WHERE {
+ ?prid a ppidv:PersonReconstruction ;
+ schema:name ?name ;
+ org:memberOf ?membership .
+
+ ?membership org:organization ?institution ;
+ ppidv:isCurrent true ;
+ org:role ?roleNode .
+
+ ?roleNode schema:name ?role .
+ FILTER(CONTAINS(LCASE(?role), "archivist"))
+
+ ?institution ppidv:ghcid ?ghcid ;
+ schema:name ?institutionName .
+ FILTER(STRSTARTS(?ghcid, "NL-"))
+}
+ORDER BY ?institutionName ?name
+```
+
+---
+
+## 7. Security and Access Control
+
+### 7.1 Authentication
+
+```python
+from fastapi import Depends, HTTPException, status
+from fastapi.security import OAuth2PasswordBearer, APIKeyHeader
+from jose import JWTError, jwt
+from passlib.context import CryptContext
+from datetime import datetime, timedelta
+from typing import Optional
+
+# Configuration
+SECRET_KEY = "your-secret-key" # Use env variable
+ALGORITHM = "HS256"
+ACCESS_TOKEN_EXPIRE_MINUTES = 30
+
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token", auto_error=False)
+api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
+
+
+class User:
+ def __init__(self, id: str, email: str, roles: list[str]):
+ self.id = id
+ self.email = email
+ self.roles = roles
+
+
+def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
+ """Create JWT access token."""
+ to_encode = data.copy()
+ expire = datetime.utcnow() + (expires_delta or timedelta(minutes=15))
+ to_encode.update({"exp": expire})
+ return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
+
+
+async def get_current_user(
+ token: Optional[str] = Depends(oauth2_scheme),
+ api_key: Optional[str] = Depends(api_key_header),
+ db = Depends(get_db)
+) -> User:
+ """
+ Authenticate user via JWT token or API key.
+ """
+ credentials_exception = HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Could not validate credentials",
+ headers={"WWW-Authenticate": "Bearer"},
+ )
+
+ # Try JWT token first
+ if token:
+ try:
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+ user_id: str = payload.get("sub")
+ if user_id is None:
+ raise credentials_exception
+
+ user = await db.get_user(user_id)
+ if user is None:
+ raise credentials_exception
+
+ return user
+ except JWTError:
+ pass
+
+ # Try API key
+ if api_key:
+ user = await db.get_user_by_api_key(api_key)
+ if user:
+ return user
+
+ raise credentials_exception
+
+
+def require_role(required_roles: list[str]):
+ """Dependency to require specific roles."""
+ async def role_checker(user: User = Depends(get_current_user)):
+ if not any(role in user.roles for role in required_roles):
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="Insufficient permissions"
+ )
+ return user
+ return role_checker
+```
+
+### 7.2 Authorization Roles
+
+| Role | Permissions |
+|------|-------------|
+| `reader` | Read observations, reconstructions, claims |
+| `contributor` | Create observations, add claims |
+| `curator` | Create reconstructions, link observations, resolve conflicts |
+| `admin` | Manage users, API keys, system configuration |
+| `api_client` | Programmatic access via API key |
+
+### 7.3 Rate Limiting
+
+```python
+from fastapi import Request
+import redis
+from datetime import datetime
+
+class RateLimiter:
+ """
+ Token bucket rate limiter using Redis.
+ """
+
+ def __init__(self, redis_client: redis.Redis):
+ self.redis = redis_client
+
+ async def is_allowed(
+ self,
+ key: str,
+ max_requests: int = 100,
+ window_seconds: int = 60
+ ) -> tuple[bool, dict]:
+ """
+ Check if request is allowed under rate limit.
+
+ Returns:
+ Tuple of (is_allowed, rate_limit_info)
+ """
+ now = datetime.utcnow().timestamp()
+ window_start = now - window_seconds
+
+ pipe = self.redis.pipeline()
+
+ # Remove old requests
+ pipe.zremrangebyscore(key, 0, window_start)
+
+ # Count requests in window
+ pipe.zcard(key)
+
+ # Add current request
+ pipe.zadd(key, {str(now): now})
+
+ # Set expiry
+ pipe.expire(key, window_seconds)
+
+ results = pipe.execute()
+ request_count = results[1]
+
+ is_allowed = request_count < max_requests
+
+ return is_allowed, {
+ "limit": max_requests,
+ "remaining": max(0, max_requests - request_count - 1),
+ "reset": int(now + window_seconds)
+ }
+
+
+# Rate limit tiers
+RATE_LIMITS = {
+ "anonymous": {"requests": 60, "window": 60},
+ "reader": {"requests": 100, "window": 60},
+ "contributor": {"requests": 500, "window": 60},
+ "curator": {"requests": 1000, "window": 60},
+ "api_client": {"requests": 5000, "window": 60},
+}
+```
+
+---
+
+## 8. Performance Requirements
+
+### 8.1 SLOs (Service Level Objectives)
+
+| Metric | Target | Measurement |
+|--------|--------|-------------|
+| **Availability** | 99.9% | Monthly uptime |
+| **API Latency (p50)** | < 50ms | Response time |
+| **API Latency (p99)** | < 500ms | Response time |
+| **Search Latency** | < 200ms | Full-text search |
+| **SPARQL Query** | < 1s | Simple queries |
+| **Throughput** | 1000 req/s | Sustained load |
+
+### 8.2 Scaling Strategy
+
+```yaml
+# Kubernetes HPA (Horizontal Pod Autoscaler)
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+ name: ppid-api
+spec:
+ scaleTargetRef:
+ apiVersion: apps/v1
+ kind: Deployment
+ name: ppid-api
+ minReplicas: 3
+ maxReplicas: 20
+ metrics:
+ - type: Resource
+ resource:
+ name: cpu
+ target:
+ type: Utilization
+ averageUtilization: 70
+ - type: Resource
+ resource:
+ name: memory
+ target:
+ type: Utilization
+ averageUtilization: 80
+ - type: Pods
+ pods:
+ metric:
+ name: http_requests_per_second
+ target:
+ type: AverageValue
+ averageValue: 100
+```
+
+### 8.3 Caching Strategy
+
+```python
+import redis
+from functools import wraps
+import json
+import hashlib
+
+class CacheManager:
+ """
+ Multi-tier caching for PPID.
+
+ Tiers:
+ 1. L1: In-memory (per-instance)
+ 2. L2: Redis (shared)
+ """
+
+ def __init__(self, redis_client: redis.Redis):
+ self.redis = redis_client
+ self.local_cache = {}
+
+ def cache_observation(self, ttl: int = 3600):
+ """Cache observation lookups."""
+ def decorator(func):
+ @wraps(func)
+ async def wrapper(poid: str, *args, **kwargs):
+ cache_key = f"observation:{poid}"
+
+ # Check L1 cache
+ if cache_key in self.local_cache:
+ return self.local_cache[cache_key]
+
+ # Check L2 cache
+ cached = self.redis.get(cache_key)
+ if cached:
+ data = json.loads(cached)
+ self.local_cache[cache_key] = data
+ return data
+
+ # Fetch from database
+ result = await func(poid, *args, **kwargs)
+
+ if result:
+ # Store in both caches
+ self.redis.setex(cache_key, ttl, json.dumps(result))
+ self.local_cache[cache_key] = result
+
+ return result
+ return wrapper
+ return decorator
+
+ def cache_search(self, ttl: int = 300):
+ """Cache search results (shorter TTL)."""
+ def decorator(func):
+ @wraps(func)
+ async def wrapper(query: str, *args, **kwargs):
+ # Create deterministic cache key from query params
+ key_data = {"query": query, "args": args, "kwargs": kwargs}
+ cache_key = f"search:{hashlib.md5(json.dumps(key_data, sort_keys=True).encode()).hexdigest()}"
+
+ cached = self.redis.get(cache_key)
+ if cached:
+ return json.loads(cached)
+
+ result = await func(query, *args, **kwargs)
+
+ self.redis.setex(cache_key, ttl, json.dumps(result))
+
+ return result
+ return wrapper
+ return decorator
+
+ def invalidate_observation(self, poid: str):
+ """Invalidate cache when observation is updated."""
+ cache_key = f"observation:{poid}"
+ self.redis.delete(cache_key)
+ self.local_cache.pop(cache_key, None)
+```
+
+---
+
+## 9. Deployment Architecture
+
+### 9.1 Kubernetes Deployment
+
+```yaml
+# API Deployment
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: ppid-api
+ labels:
+ app: ppid
+ component: api
+spec:
+ replicas: 3
+ selector:
+ matchLabels:
+ app: ppid
+ component: api
+ template:
+ metadata:
+ labels:
+ app: ppid
+ component: api
+ spec:
+ containers:
+ - name: api
+ image: ppid/api:latest
+ ports:
+ - containerPort: 8000
+ env:
+ - name: DATABASE_URL
+ valueFrom:
+ secretKeyRef:
+ name: ppid-secrets
+ key: database-url
+ - name: REDIS_URL
+ valueFrom:
+ secretKeyRef:
+ name: ppid-secrets
+ key: redis-url
+ - name: JWT_SECRET
+ valueFrom:
+ secretKeyRef:
+ name: ppid-secrets
+ key: jwt-secret
+ resources:
+ requests:
+ cpu: "250m"
+ memory: "512Mi"
+ limits:
+ cpu: "1000m"
+ memory: "2Gi"
+ livenessProbe:
+ httpGet:
+ path: /health
+ port: 8000
+ initialDelaySeconds: 10
+ periodSeconds: 10
+ readinessProbe:
+ httpGet:
+ path: /ready
+ port: 8000
+ initialDelaySeconds: 5
+ periodSeconds: 5
+
+---
+# Service
+apiVersion: v1
+kind: Service
+metadata:
+ name: ppid-api
+spec:
+ selector:
+ app: ppid
+ component: api
+ ports:
+ - port: 80
+ targetPort: 8000
+ type: ClusterIP
+
+---
+# Ingress
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+ name: ppid-ingress
+ annotations:
+ kubernetes.io/ingress.class: nginx
+ cert-manager.io/cluster-issuer: letsencrypt-prod
+spec:
+ tls:
+ - hosts:
+ - api.ppid.org
+ secretName: ppid-tls
+ rules:
+ - host: api.ppid.org
+ http:
+ paths:
+ - path: /
+ pathType: Prefix
+ backend:
+ service:
+ name: ppid-api
+ port:
+ number: 80
+```
+
+### 9.2 Docker Compose (Development)
+
+```yaml
+version: '3.8'
+
+services:
+ api:
+ build: .
+ ports:
+ - "8000:8000"
+ environment:
+ - DATABASE_URL=postgresql://ppid:ppid@postgres:5432/ppid
+ - REDIS_URL=redis://redis:6379
+ - FUSEKI_URL=http://fuseki:3030
+ depends_on:
+ - postgres
+ - redis
+ - fuseki
+ volumes:
+ - ./src:/app/src
+ - ./archives:/app/archives
+
+ postgres:
+ image: postgres:16
+ environment:
+ POSTGRES_USER: ppid
+ POSTGRES_PASSWORD: ppid
+ POSTGRES_DB: ppid
+ volumes:
+ - postgres_data:/var/lib/postgresql/data
+ ports:
+ - "5432:5432"
+
+ redis:
+ image: redis:7-alpine
+ ports:
+ - "6379:6379"
+
+ fuseki:
+ image: stain/jena-fuseki
+ environment:
+ ADMIN_PASSWORD: admin
+ FUSEKI_DATASET_1: ppid
+ volumes:
+ - fuseki_data:/fuseki
+ ports:
+ - "3030:3030"
+
+ elasticsearch:
+ image: elasticsearch:8.11.0
+ environment:
+ - discovery.type=single-node
+ - xpack.security.enabled=false
+ volumes:
+ - es_data:/usr/share/elasticsearch/data
+ ports:
+ - "9200:9200"
+
+ kafka:
+ image: confluentinc/cp-kafka:7.5.0
+ environment:
+ KAFKA_BROKER_ID: 1
+ KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
+ KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
+ depends_on:
+ - zookeeper
+ ports:
+ - "9092:9092"
+
+ zookeeper:
+ image: confluentinc/cp-zookeeper:7.5.0
+ environment:
+ ZOOKEEPER_CLIENT_PORT: 2181
+
+volumes:
+ postgres_data:
+ fuseki_data:
+ es_data:
+```
+
+---
+
+## 10. Monitoring and Observability
+
+### 10.1 Prometheus Metrics
+
+```python
+from prometheus_client import Counter, Histogram, Gauge
+import time
+
+# Metrics
+REQUEST_COUNT = Counter(
+ 'ppid_requests_total',
+ 'Total HTTP requests',
+ ['method', 'endpoint', 'status']
+)
+
+REQUEST_LATENCY = Histogram(
+ 'ppid_request_latency_seconds',
+ 'Request latency in seconds',
+ ['method', 'endpoint'],
+ buckets=[.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10]
+)
+
+OBSERVATIONS_CREATED = Counter(
+ 'ppid_observations_created_total',
+ 'Total observations created'
+)
+
+RECONSTRUCTIONS_CREATED = Counter(
+ 'ppid_reconstructions_created_total',
+ 'Total reconstructions created'
+)
+
+ENTITY_RESOLUTION_LATENCY = Histogram(
+ 'ppid_entity_resolution_seconds',
+ 'Entity resolution latency',
+ buckets=[.1, .25, .5, 1, 2.5, 5, 10, 30]
+)
+
+CACHE_HITS = Counter(
+ 'ppid_cache_hits_total',
+ 'Cache hits',
+ ['cache_type']
+)
+
+CACHE_MISSES = Counter(
+ 'ppid_cache_misses_total',
+ 'Cache misses',
+ ['cache_type']
+)
+
+DB_CONNECTIONS = Gauge(
+ 'ppid_db_connections',
+ 'Active database connections'
+)
+
+
+# Middleware for request metrics
+@app.middleware("http")
+async def metrics_middleware(request: Request, call_next):
+ start_time = time.time()
+
+ response = await call_next(request)
+
+ latency = time.time() - start_time
+
+ REQUEST_COUNT.labels(
+ method=request.method,
+ endpoint=request.url.path,
+ status=response.status_code
+ ).inc()
+
+ REQUEST_LATENCY.labels(
+ method=request.method,
+ endpoint=request.url.path
+ ).observe(latency)
+
+ return response
+```
+
+### 10.2 Logging
+
+```python
+import logging
+import json
+from datetime import datetime
+
+class JSONFormatter(logging.Formatter):
+ """Structured JSON logging for observability."""
+
+ def format(self, record):
+ log_record = {
+ "timestamp": datetime.utcnow().isoformat(),
+ "level": record.levelname,
+ "logger": record.name,
+ "message": record.getMessage(),
+ }
+
+ # Add extra fields
+ if hasattr(record, 'poid'):
+ log_record['poid'] = record.poid
+ if hasattr(record, 'prid'):
+ log_record['prid'] = record.prid
+ if hasattr(record, 'request_id'):
+ log_record['request_id'] = record.request_id
+ if hasattr(record, 'user_id'):
+ log_record['user_id'] = record.user_id
+ if hasattr(record, 'duration_ms'):
+ log_record['duration_ms'] = record.duration_ms
+
+ if record.exc_info:
+ log_record['exception'] = self.formatException(record.exc_info)
+
+ return json.dumps(log_record)
+
+
+# Configure logging
+def setup_logging():
+ handler = logging.StreamHandler()
+ handler.setFormatter(JSONFormatter())
+
+ root_logger = logging.getLogger()
+ root_logger.addHandler(handler)
+ root_logger.setLevel(logging.INFO)
+
+ # Reduce noise from libraries
+ logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
+ logging.getLogger("httpx").setLevel(logging.WARNING)
+```
+
+### 10.3 Grafana Dashboard (JSON)
+
+```json
+{
+ "dashboard": {
+ "title": "PPID System Overview",
+ "panels": [
+ {
+ "title": "Request Rate",
+ "type": "graph",
+ "targets": [
+ {
+ "expr": "sum(rate(ppid_requests_total[5m])) by (endpoint)"
+ }
+ ]
+ },
+ {
+ "title": "Request Latency (p99)",
+ "type": "graph",
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, rate(ppid_request_latency_seconds_bucket[5m]))"
+ }
+ ]
+ },
+ {
+ "title": "Observations Created",
+ "type": "stat",
+ "targets": [
+ {
+ "expr": "sum(ppid_observations_created_total)"
+ }
+ ]
+ },
+ {
+ "title": "Cache Hit Rate",
+ "type": "gauge",
+ "targets": [
+ {
+ "expr": "sum(rate(ppid_cache_hits_total[5m])) / (sum(rate(ppid_cache_hits_total[5m])) + sum(rate(ppid_cache_misses_total[5m])))"
+ }
+ ]
+ },
+ {
+ "title": "Error Rate",
+ "type": "graph",
+ "targets": [
+ {
+ "expr": "sum(rate(ppid_requests_total{status=~\"5..\"}[5m])) / sum(rate(ppid_requests_total[5m]))"
+ }
+ ]
+ }
+ ]
+ }
+}
+```
+
+---
+
+## 11. Implementation Checklist
+
+### 11.1 Phase 1: Core Infrastructure
+
+- [ ] Set up PostgreSQL database with schema
+- [ ] Set up Apache Jena Fuseki for RDF
+- [ ] Set up Redis for caching
+- [ ] Implement POID/PRID generation
+- [ ] Implement checksum validation
+- [ ] Create basic REST API endpoints
+
+### 11.2 Phase 2: Data Ingestion
+
+- [ ] Build web scraping infrastructure
+- [ ] Implement HTML archival
+- [ ] Integrate LLM for extraction
+- [ ] Implement XPath validation
+- [ ] Set up Kafka for async processing
+- [ ] Create ingestion pipeline
+
+### 11.3 Phase 3: Entity Resolution
+
+- [ ] Implement blocking strategies
+- [ ] Implement similarity metrics
+- [ ] Build clustering algorithm
+- [ ] Create human-in-loop review UI
+- [ ] Integrate with reconstruction creation
+
+### 11.4 Phase 4: GHCID Integration
+
+- [ ] Implement affiliation linking
+- [ ] Add SPARQL queries for institution lookups
+- [ ] Create bidirectional navigation
+- [ ] Sync with GHCID registry updates
+
+### 11.5 Phase 5: Production Readiness
+
+- [ ] Implement authentication/authorization
+- [ ] Set up rate limiting
+- [ ] Configure monitoring and alerting
+- [ ] Create backup and recovery procedures
+- [ ] Performance testing and optimization
+- [ ] Security audit
+
+---
+
+## 12. References
+
+### Standards
+- OAuth 2.0: https://oauth.net/2/
+- OpenAPI 3.1: https://spec.openapis.org/oas/latest.html
+- GraphQL: https://graphql.org/learn/
+- SPARQL 1.1: https://www.w3.org/TR/sparql11-query/
+
+### Related PPID Documents
+- [Identifier Structure Design](./05_identifier_structure_design.md)
+- [Entity Resolution Patterns](./06_entity_resolution_patterns.md)
+- [Claims and Provenance](./07_claims_and_provenance.md)
+
+### Technologies
+- FastAPI: https://fastapi.tiangolo.com/
+- Apache Jena: https://jena.apache.org/
+- PostgreSQL: https://www.postgresql.org/
+- Kubernetes: https://kubernetes.io/
diff --git a/docs/plan/person_pid/09_governance_and_sustainability.md b/docs/plan/person_pid/09_governance_and_sustainability.md
new file mode 100644
index 0000000000..287e8cbd96
--- /dev/null
+++ b/docs/plan/person_pid/09_governance_and_sustainability.md
@@ -0,0 +1,1009 @@
+# Governance and Sustainability
+
+**Version**: 0.1.0
+**Last Updated**: 2025-01-09
+**Related**: [Executive Summary](./01_executive_summary.md) | [Implementation Guidelines](./08_implementation_guidelines.md)
+
+---
+
+## 1. Overview
+
+This document defines the governance framework and sustainability model for PPID:
+
+- Organizational structure
+- Identifier assignment policies
+- Data stewardship and quality assurance
+- Community governance
+- Funding models
+- Deprecation and tombstoning
+- International coordination
+- Dispute resolution
+
+Long-term sustainability requires clear governance, community trust, and diversified funding.
+
+---
+
+## 2. Governance Principles
+
+### 2.1 Core Principles
+
+| Principle | Description |
+|-----------|-------------|
+| **Transparency** | All policies, decisions, and algorithms are public |
+| **Neutrality** | PPID serves the heritage community without commercial bias |
+| **Persistence** | Identifiers are permanent; "Cool URIs don't change" |
+| **Interoperability** | Open standards, no vendor lock-in |
+| **Privacy** | Minimal data collection, GDPR compliance |
+| **Inclusivity** | Global representation in governance bodies |
+
+### 2.2 Governance Comparison
+
+| Aspect | ORCID | ISNI | VIAF | PPID (Proposed) |
+|--------|-------|------|------|-----------------|
+| **Legal Status** | Non-profit | Non-profit | Consortium | Non-profit foundation |
+| **Membership** | Institutional | Agency-based | Library consortium | Heritage institutions |
+| **Funding** | Membership fees | Registration fees | In-kind (libraries) | Hybrid (grants + membership) |
+| **Decision Making** | Board + community | ISNI-IA board | OCLC-led | Steering committee + working groups |
+
+---
+
+## 3. Organizational Structure
+
+### 3.1 Governance Bodies
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ PPID GOVERNANCE STRUCTURE │
+├─────────────────────────────────────────────────────────────────┤
+│ │
+│ ┌───────────────────────────────────────────────────────────┐ │
+│ │ STEERING COMMITTEE │ │
+│ │ Strategic direction, policy approval, partnerships │ │
+│ │ Members: 9-15 (heritage institutions, researchers) │ │
+│ │ Meets: Quarterly │ │
+│ └───────────────────────────────────────────────────────────┘ │
+│ │ │
+│ ┌───────────────────┼───────────────────┐ │
+│ ▼ ▼ ▼ │
+│ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ │
+│ │ TECHNICAL │ │ POLICY │ │ COMMUNITY │ │
+│ │ COMMITTEE │ │ COMMITTEE │ │ COUNCIL │ │
+│ └───────────────┘ └───────────────┘ └───────────────┘ │
+│ │ │ │ │
+│ ▼ ▼ ▼ │
+│ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ │
+│ │ Working Groups│ │ Working Groups│ │ Regional │ │
+│ │ - API Design │ │ - Privacy │ │ Chapters │ │
+│ │ - ER Algorithms │ - Assignment │ │ - Europe │ │
+│ │ - Interop │ │ - Deprecation │ │ - Americas │ │
+│ └───────────────┘ └───────────────┘ │ - Asia-Pacific│ │
+│ └───────────────┘ │
+│ │
+│ ┌───────────────────────────────────────────────────────────┐ │
+│ │ OPERATIONAL TEAM │ │
+│ │ Day-to-day operations, infrastructure, support │ │
+│ │ Staff: Executive Director + 5-10 FTE │ │
+│ └───────────────────────────────────────────────────────────┘ │
+│ │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### 3.2 Steering Committee
+
+**Composition** (13 seats):
+- 3 seats: Archives (national, regional, specialized)
+- 3 seats: Libraries (national, academic, public)
+- 3 seats: Museums (art, history, science)
+- 2 seats: Research/academia
+- 2 seats: Technology/infrastructure providers
+
+**Responsibilities**:
+- Approve strategic direction
+- Approve annual budget
+- Approve major policy changes
+- Approve partnerships and MOUs
+- Appoint Executive Director
+
+**Terms**: 3 years, staggered, maximum 2 consecutive terms
+
+**Voting**: Supermajority (2/3) for policy changes; simple majority for operations
+
+### 3.3 Technical Committee
+
+**Composition**: 7-9 members with technical expertise
+
+**Responsibilities**:
+- Maintain technical specifications
+- Review and approve API changes
+- Oversee entity resolution algorithms
+- Ensure interoperability with ORCID, ISNI, VIAF
+- Conduct security reviews
+
+**Meetings**: Monthly + ad-hoc for urgent issues
+
+### 3.4 Policy Committee
+
+**Composition**: 7-9 members (legal, ethics, domain experts)
+
+**Responsibilities**:
+- Develop identifier assignment policies
+- Manage privacy and data protection
+- Handle dispute resolution
+- Define deprecation procedures
+- Ensure compliance with regulations
+
+### 3.5 Community Council
+
+**Composition**: Open to all registered PPID users
+
+**Responsibilities**:
+- Provide feedback on policies and features
+- Elect community representatives
+- Participate in annual summit
+- Propose new features and improvements
+
+---
+
+## 4. Identifier Assignment Policies
+
+### 4.1 Who Can Create Identifiers?
+
+| Identifier | Creator | Approval Required | Automation |
+|------------|---------|-------------------|------------|
+| **POID** | Any registered user | No | Fully automated |
+| **PRID** | Curators only | For disputed cases | Semi-automated |
+
+### 4.2 POID Creation Policy
+
+**Eligibility**: Any user with verified account can create POIDs.
+
+**Requirements**:
+1. Valid source URL (must be accessible or archived)
+2. Minimum one claim with XPath provenance
+3. HTML archive stored in PPID infrastructure
+4. Content hash for verification
+
+**Rate Limits**:
+- Free tier: 100 POIDs/day
+- Institutional: 10,000 POIDs/day
+- API partners: Negotiated limits
+
+**Prohibited Sources**:
+- Sites with robots.txt exclusion (unless permission obtained)
+- Paywalled content (without license)
+- Social media (privacy concerns)
+- Fake or fabricated pages
+
+### 4.3 PRID Creation Policy
+
+**Eligibility**: Curators with verified heritage institution affiliation.
+
+**Requirements**:
+1. Link at least one POID
+2. Provide canonical name
+3. Document curation decision (manual/algorithmic/hybrid)
+4. Accept responsibility for accuracy
+
+**Curator Certification**:
+```python
+CURATOR_REQUIREMENTS = {
+ "heritage_affiliation": True, # Must work at GHCID institution
+ "training_completed": True, # Online certification course
+ "probation_period": 30, # Days before full privileges
+ "initial_review": True, # First 10 PRIDs reviewed by senior curator
+}
+```
+
+**Conflict of Interest**:
+- Curators should not create PRIDs for themselves
+- Curators should disclose relationships with subjects
+- Institutional bias should be documented
+
+### 4.4 External Identifier Linking
+
+| External ID | Verification Required | Auto-Link Allowed |
+|-------------|----------------------|-------------------|
+| ORCID | Yes (via API) | Yes, if verified |
+| ISNI | Yes (via lookup) | Yes, if match > 95% |
+| VIAF | Yes (via API) | Yes, if verified |
+| Wikidata | Manual review | No |
+| LinkedIn | URL match only | No (privacy) |
+
+---
+
+## 5. Data Stewardship
+
+### 5.1 Data Quality Framework
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ DATA QUALITY LIFECYCLE │
+├─────────────────────────────────────────────────────────────────┤
+│ │
+│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
+│ │ Ingest │───▶│ Validate│───▶│ Curate │───▶│ Publish │ │
+│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │
+│ │ │ │ │ │
+│ ▼ ▼ ▼ ▼ │
+│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
+│ │ Format │ │ Syntax │ │ Entity │ │ Version │ │
+│ │ Check │ │ + Schema│ │ Resolut.│ │ Control │ │
+│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │
+│ │
+│ ┌───────────────────────────────────────────────────────────┐ │
+│ │ QUALITY METRICS │ │
+│ │ Completeness | Accuracy | Consistency | Timeliness │ │
+│ └───────────────────────────────────────────────────────────┘ │
+│ │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### 5.2 Quality Metrics
+
+| Metric | Definition | Target | Measurement |
+|--------|------------|--------|-------------|
+| **Completeness** | Required fields populated | > 95% | Automated check |
+| **Accuracy** | Claims match source | > 90% | Sampling audit |
+| **Consistency** | No conflicting claims | > 85% | Automated + review |
+| **Timeliness** | Data freshness | < 1 year | Re-verification cycle |
+| **Provenance** | XPath verifiable | 100% | Automated check |
+
+### 5.3 Data Retention Policy
+
+| Data Type | Retention Period | Rationale |
+|-----------|-----------------|-----------|
+| POIDs | Permanent | Persistent identifiers |
+| PRIDs | Permanent | Persistent identifiers |
+| Claims | Permanent (versioned) | Audit trail |
+| HTML Archives | 10 years | Storage costs |
+| API Logs | 2 years | Compliance |
+| User Data | Account lifetime + 1 year | GDPR |
+
+### 5.4 Quality Assurance Process
+
+```python
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional
+from datetime import datetime
+
+class QualityLevel(Enum):
+ GOLD = "gold" # Verified by multiple sources
+ SILVER = "silver" # Single authoritative source
+ BRONZE = "bronze" # Automated extraction, unverified
+ FLAGGED = "flagged" # Quality issues detected
+
+@dataclass
+class QualityAssessment:
+ observation_poid: str
+ quality_level: QualityLevel
+ completeness_score: float
+ accuracy_score: float
+ xpath_verification: bool
+ issues: list[str]
+ assessed_at: datetime
+ assessed_by: str # Curator ID or "automated"
+
+async def assess_observation_quality(poid: str, db) -> QualityAssessment:
+ """
+ Assess quality of a Person Observation.
+ """
+ observation = await db.get_observation(poid)
+ claims = await db.get_claims(poid)
+
+ issues = []
+
+ # Check completeness
+ required_fields = ['full_name']
+ optional_fields = ['job_title', 'employer', 'email']
+
+ found_required = sum(1 for c in claims if c.claim_type in required_fields)
+ found_optional = sum(1 for c in claims if c.claim_type in optional_fields)
+
+ completeness = (found_required / len(required_fields)) * 0.6 + \
+ (found_optional / len(optional_fields)) * 0.4
+
+ if completeness < 0.6:
+ issues.append("Low completeness: missing required fields")
+
+ # Check XPath verification
+ xpath_verified = all(c.xpath_match_score >= 0.9 for c in claims)
+ if not xpath_verified:
+ issues.append("Some claims have low XPath match scores")
+
+ # Check source quality
+ source_quality_map = {
+ 'official_registry': 1.0,
+ 'institutional_website': 0.9,
+ 'professional_network': 0.7,
+ 'social_media': 0.5,
+ }
+ source_quality = source_quality_map.get(observation.source_type, 0.5)
+
+ # Calculate accuracy (based on source + XPath)
+ accuracy = source_quality * (0.7 if xpath_verified else 0.4)
+
+ # Determine quality level
+ if accuracy >= 0.85 and completeness >= 0.9 and xpath_verified:
+ quality_level = QualityLevel.GOLD
+ elif accuracy >= 0.7 and completeness >= 0.7:
+ quality_level = QualityLevel.SILVER
+ elif issues:
+ quality_level = QualityLevel.FLAGGED
+ else:
+ quality_level = QualityLevel.BRONZE
+
+ return QualityAssessment(
+ observation_poid=poid,
+ quality_level=quality_level,
+ completeness_score=completeness,
+ accuracy_score=accuracy,
+ xpath_verification=xpath_verified,
+ issues=issues,
+ assessed_at=datetime.utcnow(),
+ assessed_by="automated"
+ )
+```
+
+---
+
+## 6. Community Governance
+
+### 6.1 Membership Tiers
+
+| Tier | Annual Fee | Benefits |
+|------|------------|----------|
+| **Individual** | Free | Read access, create POIDs (limited) |
+| **Contributor** | Free | Create POIDs (unlimited), suggest edits |
+| **Curator** | Free | Create PRIDs, resolve conflicts |
+| **Institutional Member** | EUR 500-5,000 | API access, priority support, governance vote |
+| **Sustaining Member** | EUR 10,000+ | Board nomination, strategic input |
+
+### 6.2 Curator Certification
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ CURATOR CERTIFICATION PATH │
+├─────────────────────────────────────────────────────────────────┤
+│ │
+│ Step 1: Application │
+│ ├─ Submit heritage institution affiliation (GHCID) │
+│ ├─ Provide professional references │
+│ └─ Accept code of conduct │
+│ │
+│ Step 2: Training (Online, ~4 hours) │
+│ ├─ Module 1: PPID fundamentals │
+│ ├─ Module 2: Entity resolution principles │
+│ ├─ Module 3: Claims and provenance │
+│ ├─ Module 4: Cultural naming conventions │
+│ └─ Module 5: Ethics and privacy │
+│ │
+│ Step 3: Practical Assessment │
+│ ├─ Create 5 POIDs from assigned sources │
+│ ├─ Create 3 PRIDs with entity resolution │
+│ └─ Resolve 2 simulated conflicts │
+│ │
+│ Step 4: Probation (30 days) │
+│ ├─ First 10 PRIDs reviewed by mentor │
+│ └─ Feedback and correction cycle │
+│ │
+│ Step 5: Full Certification │
+│ ├─ Certificate issued │
+│ ├─ Full PRID creation privileges │
+│ └─ Annual recertification required │
+│ │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### 6.3 Code of Conduct
+
+**Core Commitments**:
+
+1. **Accuracy**: Only create claims that are verifiable
+2. **Neutrality**: No personal bias in curation decisions
+3. **Transparency**: Document all curation decisions
+4. **Privacy**: Respect data subject rights
+5. **Collaboration**: Work constructively with community
+6. **Integrity**: No fabrication or manipulation of data
+
+**Violations and Consequences**:
+
+| Severity | Examples | Consequence |
+|----------|----------|-------------|
+| Minor | Incomplete documentation, slow response | Warning |
+| Moderate | Pattern of low-quality contributions | Suspension (30 days) |
+| Serious | Fabricated claims, privacy violations | Revocation + ban |
+| Critical | Malicious data corruption, harassment | Permanent ban + legal action |
+
+### 6.4 Decision-Making Process
+
+```python
+from enum import Enum
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+
+class DecisionType(Enum):
+ OPERATIONAL = "operational" # Staff decides
+ TECHNICAL = "technical" # Technical Committee votes
+ POLICY = "policy" # Policy Committee + Steering
+ STRATEGIC = "strategic" # Steering Committee only
+
+class VotingMethod(Enum):
+ SIMPLE_MAJORITY = "simple" # > 50%
+ SUPERMAJORITY = "super" # > 66.7%
+ CONSENSUS = "consensus" # No objections
+ LAZY_CONSENSUS = "lazy" # No objections in N days
+
+@dataclass
+class Proposal:
+ id: str
+ title: str
+ description: str
+ decision_type: DecisionType
+ voting_method: VotingMethod
+ proposed_by: str
+ proposed_at: datetime
+ discussion_period: timedelta
+ voting_period: timedelta
+ status: str # draft, discussion, voting, approved, rejected
+
+DECISION_MATRIX = {
+ DecisionType.OPERATIONAL: {
+ "authority": "Executive Director",
+ "voting": None,
+ "timeline": "Immediate"
+ },
+ DecisionType.TECHNICAL: {
+ "authority": "Technical Committee",
+ "voting": VotingMethod.SIMPLE_MAJORITY,
+ "timeline": "2 weeks discussion + 1 week voting"
+ },
+ DecisionType.POLICY: {
+ "authority": "Policy Committee + Steering",
+ "voting": VotingMethod.SUPERMAJORITY,
+ "timeline": "4 weeks discussion + 2 weeks voting"
+ },
+ DecisionType.STRATEGIC: {
+ "authority": "Steering Committee",
+ "voting": VotingMethod.SUPERMAJORITY,
+ "timeline": "8 weeks discussion + 2 weeks voting"
+ },
+}
+```
+
+---
+
+## 7. Funding Models
+
+### 7.1 Revenue Streams
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ PPID FUNDING MODEL │
+├─────────────────────────────────────────────────────────────────┤
+│ │
+│ ┌─────────────────────────────────────────────────────────┐ │
+│ │ DIVERSIFIED FUNDING (Target: 5-year sustainability) │ │
+│ └─────────────────────────────────────────────────────────┘ │
+│ │
+│ 40% ┌─────────────────────────────────────────────────────┐ │
+│ │ GRANTS & FOUNDATIONS │ │
+│ │ - Andrew W. Mellon Foundation │ │
+│ │ - Horizon Europe (heritage digitization) │ │
+│ │ - IMLS (US heritage institutions) │ │
+│ │ - Dutch Digital Heritage Network │ │
+│ └─────────────────────────────────────────────────────┘ │
+│ │
+│ 35% ┌─────────────────────────────────────────────────────┐ │
+│ │ INSTITUTIONAL MEMBERSHIP │ │
+│ │ - Tiered fees based on size │ │
+│ │ - Consortium discounts │ │
+│ │ - In-kind contributions (staff time, hosting) │ │
+│ └─────────────────────────────────────────────────────┘ │
+│ │
+│ 15% ┌─────────────────────────────────────────────────────┐ │
+│ │ API & SERVICES │ │
+│ │ - Commercial API access (high volume) │ │
+│ │ - Data enrichment services │ │
+│ │ - Custom integration support │ │
+│ └─────────────────────────────────────────────────────┘ │
+│ │
+│ 10% ┌─────────────────────────────────────────────────────┐ │
+│ │ TRAINING & EVENTS │ │
+│ │ - Curator certification fees │ │
+│ │ - Annual conference │ │
+│ │ - Workshops and webinars │ │
+│ └─────────────────────────────────────────────────────┘ │
+│ │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### 7.2 Institutional Membership Fees
+
+| Institution Size | Staff | Annual Fee (EUR) | API Calls/Month |
+|------------------|-------|------------------|-----------------|
+| Small | < 10 FTE | 500 | 50,000 |
+| Medium | 10-50 FTE | 1,500 | 200,000 |
+| Large | 50-200 FTE | 3,000 | 500,000 |
+| Very Large | > 200 FTE | 5,000 | 1,000,000 |
+| Consortium | Multiple | Negotiated | Pooled |
+
+**Discounts**:
+- Multi-year commitment: 10% discount
+- Consortium (5+ institutions): 25% discount
+- Developing nations: 50-75% discount
+
+### 7.3 Commercial API Tiers
+
+| Tier | Monthly Fee (EUR) | API Calls | SLA | Support |
+|------|-------------------|-----------|-----|---------|
+| Starter | 0 | 1,000 | None | Community |
+| Professional | 99 | 50,000 | 99% | Email |
+| Business | 499 | 500,000 | 99.5% | Priority |
+| Enterprise | Custom | Unlimited | 99.9% | Dedicated |
+
+### 7.4 Financial Sustainability Model
+
+```python
+from dataclasses import dataclass
+from decimal import Decimal
+
+@dataclass
+class AnnualBudget:
+ # Revenue
+ grants: Decimal
+ membership_fees: Decimal
+ api_revenue: Decimal
+ training_events: Decimal
+
+ # Expenses
+ staff_salaries: Decimal # ~60% of budget
+ infrastructure: Decimal # ~20% of budget
+ operations: Decimal # ~10% of budget
+ reserves: Decimal # ~10% of budget
+
+ @property
+ def total_revenue(self) -> Decimal:
+ return (self.grants + self.membership_fees +
+ self.api_revenue + self.training_events)
+
+ @property
+ def total_expenses(self) -> Decimal:
+ return (self.staff_salaries + self.infrastructure +
+ self.operations + self.reserves)
+
+ @property
+ def is_sustainable(self) -> bool:
+ return self.total_revenue >= self.total_expenses
+
+# Year 3 target budget (EUR)
+YEAR_3_BUDGET = AnnualBudget(
+ # Revenue
+ grants=Decimal("400000"),
+ membership_fees=Decimal("350000"),
+ api_revenue=Decimal("150000"),
+ training_events=Decimal("100000"),
+
+ # Expenses
+ staff_salaries=Decimal("600000"), # 6 FTE average
+ infrastructure=Decimal("200000"),
+ operations=Decimal("100000"),
+ reserves=Decimal("100000"),
+)
+
+assert YEAR_3_BUDGET.is_sustainable
+```
+
+---
+
+## 8. Deprecation and Tombstoning
+
+### 8.1 Identifier Lifecycle
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ IDENTIFIER LIFECYCLE STATES │
+├─────────────────────────────────────────────────────────────────┤
+│ │
+│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
+│ │ DRAFT │───▶│ ACTIVE │───▶│DEPRECATED│───▶│TOMBSTONE│ │
+│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │
+│ │ │ │ │ │
+│ │ │ │ │ │
+│ ▼ ▼ ▼ ▼ │
+│ Not Fully Still Permanent │
+│ published operational resolvable redirect │
+│ (with warning) to replacement │
+│ │
+│ ── NEVER DELETED ───────────────────────────────────────────── │
+│ Identifiers are PERMANENT. Tombstones persist forever. │
+│ │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### 8.2 Deprecation Reasons
+
+| Reason | Description | Action |
+|--------|-------------|--------|
+| **Duplicate** | Same person has two PRIDs | Merge → tombstone loser |
+| **Error** | Data fabrication detected | Tombstone with explanation |
+| **GDPR Request** | Data subject requests removal | Tombstone, redact PII |
+| **Merge** | PRIDs combined due to new evidence | Tombstone merged ID |
+| **Split** | PRID contained multiple persons | Create new PRIDs, tombstone original |
+
+### 8.3 Tombstone Format
+
+```turtle
+@prefix ppid: .
+@prefix ppidv: .
+@prefix schema: .
+
+# Tombstoned identifier
+ppid:PRID-1234-5678-90ab-cde5
+ a ppidv:TombstonedReconstruction ;
+ ppidv:tombstoneReason "duplicate" ;
+ ppidv:tombstoneDate "2025-06-15"^^xsd:date ;
+ ppidv:tombstoneBy ppid:curator-001 ;
+ ppidv:replacedBy ppid:PRID-9876-5432-10fe-dcb0 ;
+ ppidv:tombstoneNote "Merged with PRID-9876... after evidence showed same person" ;
+ schema:name "[TOMBSTONED]" . # Original name redacted
+
+# HTTP response for tombstoned URI returns 410 Gone with redirect
+# Content-Type: application/json
+# {
+# "status": "tombstoned",
+# "reason": "duplicate",
+# "replaced_by": "https://ppid.org/PRID-9876-5432-10fe-dcb0",
+# "tombstoned_on": "2025-06-15",
+# "note": "Merged with PRID-9876... after evidence showed same person"
+# }
+```
+
+### 8.4 GDPR Right to Erasure
+
+```python
+from datetime import datetime
+from typing import Optional
+
+async def handle_gdpr_erasure_request(
+ prid: str,
+ requestor_email: str,
+ identity_verification: dict,
+ db
+) -> dict:
+ """
+ Handle GDPR Article 17 (Right to Erasure) request.
+
+ PPID balances erasure rights with archival/research exemptions.
+ """
+ # 1. Verify identity
+ if not verify_identity(identity_verification):
+ return {"status": "rejected", "reason": "Identity verification failed"}
+
+ # 2. Check exemptions (GDPR Art. 17(3))
+ exemptions = check_exemptions(prid, db)
+
+ if exemptions.get("archival_public_interest"):
+ # Heritage/archival exemption may apply
+ return {
+ "status": "partial",
+ "reason": "Archival exemption applies",
+ "action": "Data minimized but identifier retained for historical record"
+ }
+
+ # 3. Tombstone the PRID (don't delete)
+ await db.tombstone_reconstruction(
+ prid=prid,
+ reason="gdpr_erasure",
+ tombstoned_by="gdpr_system",
+ redact_pii=True # Remove name, claims, but keep tombstone
+ )
+
+ # 4. Redact linked observations
+ observations = await db.get_observations_for_prid(prid)
+ for poid in observations:
+ await db.redact_observation(
+ poid=poid,
+ redact_fields=["literal_name", "claims"],
+ retain_provenance=True # Keep source URL for audit
+ )
+
+ # 5. Log for compliance
+ await db.log_gdpr_action(
+ action="erasure",
+ prid=prid,
+ requestor=requestor_email,
+ completed_at=datetime.utcnow()
+ )
+
+ return {
+ "status": "completed",
+ "prid": prid,
+ "action": "Tombstoned with PII redacted",
+ "completion_date": datetime.utcnow().isoformat()
+ }
+```
+
+---
+
+## 9. International Coordination
+
+### 9.1 Alignment with Existing Systems
+
+| System | Coordination Level | Integration |
+|--------|-------------------|-------------|
+| **ORCID** | Strategic partnership | owl:sameAs linking, API federation |
+| **ISNI** | Technical integration | Registration agent status (goal) |
+| **VIAF** | Data exchange | Cluster matching, link sharing |
+| **Wikidata** | Community alignment | Bidirectional linking |
+| **GND** | Regional partnership | German heritage focus |
+| **SNAC** | Domain partnership | Archival persons |
+
+### 9.2 Regional Chapters
+
+| Region | Focus | Lead Institution (Proposed) |
+|--------|-------|----------------------------|
+| **Europe** | EU heritage policy, GDPR | Europeana Foundation |
+| **North America** | SNAC integration, DPLA | Library of Congress |
+| **Asia-Pacific** | CJK naming conventions | National Diet Library (Japan) |
+| **Latin America** | Spanish/Portuguese names | Biblioteca Nacional (Brazil) |
+| **Africa** | Oral heritage, naming diversity | African Library & Info Assoc. |
+
+### 9.3 Standards Bodies Engagement
+
+| Body | Engagement | PPID Contribution |
+|------|------------|-------------------|
+| **ISO TC 46/SC 9** | Observer → Participant | Identifier standards input |
+| **W3C** | Community Group | RDF/linked data best practices |
+| **IETF** | Monitor | URI standards compliance |
+| **Dublin Core** | Contributor | Metadata alignment |
+| **CIDOC-CRM** | Liaison | Cultural heritage modeling |
+
+---
+
+## 10. Dispute Resolution
+
+### 10.1 Types of Disputes
+
+| Dispute Type | Description | Resolution Path |
+|--------------|-------------|-----------------|
+| **Merge Conflict** | Curators disagree on whether POIDs refer to same person | Curator panel review |
+| **Data Accuracy** | Claim disputed by subject or third party | Evidence review |
+| **Attribution** | Who should be credited for curation | Activity log review |
+| **Privacy** | Subject objects to data inclusion | GDPR process |
+| **Ownership** | Multiple claims to manage a PRID | Institution hierarchy |
+
+### 10.2 Resolution Process
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ DISPUTE RESOLUTION PROCESS │
+├─────────────────────────────────────────────────────────────────┤
+│ │
+│ Stage 1: Informal Resolution (0-7 days) │
+│ ├─ Parties communicate directly │
+│ ├─ Use discussion threads on disputed record │
+│ └─ Most disputes resolve here │
+│ │
+│ Stage 2: Mediation (7-21 days) │
+│ ├─ Neutral mediator assigned │
+│ ├─ Evidence review │
+│ └─ Mediated agreement │
+│ │
+│ Stage 3: Panel Review (21-42 days) │
+│ ├─ 3-person panel (1 Technical, 1 Policy, 1 Community) │
+│ ├─ Written submissions │
+│ └─ Binding decision │
+│ │
+│ Stage 4: Appeal (if applicable) │
+│ ├─ Steering Committee review │
+│ ├─ New evidence only │
+│ └─ Final decision │
+│ │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### 10.3 Dispute Resolution Implementation
+
+```python
+from enum import Enum
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import Optional
+
+class DisputeStatus(Enum):
+ OPEN = "open"
+ INFORMAL = "informal"
+ MEDIATION = "mediation"
+ PANEL = "panel"
+ APPEAL = "appeal"
+ RESOLVED = "resolved"
+ CLOSED = "closed"
+
+class DisputeType(Enum):
+ MERGE_CONFLICT = "merge_conflict"
+ DATA_ACCURACY = "data_accuracy"
+ ATTRIBUTION = "attribution"
+ PRIVACY = "privacy"
+ OWNERSHIP = "ownership"
+
+@dataclass
+class Dispute:
+ id: str
+ dispute_type: DisputeType
+ status: DisputeStatus
+ subject_prid: str
+ complainant_id: str
+ respondent_id: Optional[str]
+ description: str
+ evidence: list[str]
+ created_at: datetime
+ updated_at: datetime
+ deadline: datetime
+ resolution: Optional[str] = None
+ mediator_id: Optional[str] = None
+ panel_members: Optional[list[str]] = None
+
+async def escalate_dispute(dispute_id: str, db) -> Dispute:
+ """
+ Escalate dispute to next stage if deadline passed without resolution.
+ """
+ dispute = await db.get_dispute(dispute_id)
+
+ if dispute.status == DisputeStatus.INFORMAL:
+ # Escalate to mediation
+ mediator = await assign_mediator(dispute.dispute_type)
+ dispute.status = DisputeStatus.MEDIATION
+ dispute.mediator_id = mediator.id
+ dispute.deadline = datetime.utcnow() + timedelta(days=14)
+
+ elif dispute.status == DisputeStatus.MEDIATION:
+ # Escalate to panel
+ panel = await assemble_panel()
+ dispute.status = DisputeStatus.PANEL
+ dispute.panel_members = [p.id for p in panel]
+ dispute.deadline = datetime.utcnow() + timedelta(days=21)
+
+ elif dispute.status == DisputeStatus.PANEL:
+ # Force panel decision
+ await notify_panel_deadline(dispute)
+
+ dispute.updated_at = datetime.utcnow()
+ await db.update_dispute(dispute)
+
+ return dispute
+```
+
+---
+
+## 11. Risk Management
+
+### 11.1 Risk Matrix
+
+| Risk | Likelihood | Impact | Mitigation |
+|------|------------|--------|------------|
+| **Funding shortfall** | Medium | High | Diversified funding, reserves |
+| **Key person dependency** | Medium | Medium | Documentation, succession planning |
+| **Data breach** | Low | High | Security audits, encryption |
+| **Scope creep** | Medium | Medium | Clear charter, governance |
+| **Low adoption** | Medium | High | Partnerships, value demonstration |
+| **Technical debt** | Medium | Medium | Regular refactoring, documentation |
+| **Competitor emergence** | Low | Medium | Differentiation, partnerships |
+
+### 11.2 Contingency Plans
+
+**Funding Failure Contingency**:
+```
+IF annual_funding < 60% of target:
+ 1. Activate 6-month reserve fund
+ 2. Reduce staff to essential operations (3 FTE)
+ 3. Suspend new feature development
+ 4. Seek emergency grants
+ 5. Negotiate hosting cost reduction
+ 6. IF 12 months: Initiate graceful shutdown procedure
+```
+
+**Graceful Shutdown Procedure** (last resort):
+1. Announce 12-month wind-down timeline
+2. Export all data to open formats (RDF dumps)
+3. Transfer infrastructure to accepting institution
+4. Archive code on GitHub
+5. Ensure identifier resolution continues (static hosting)
+6. Document lessons learned
+
+---
+
+## 12. Implementation Roadmap
+
+### 12.1 Phase 1: Foundation (Year 1)
+
+| Quarter | Milestone |
+|---------|-----------|
+| Q1 | Legal entity established (non-profit foundation) |
+| Q1 | Steering Committee formed (interim) |
+| Q2 | Seed funding secured (EUR 500K+) |
+| Q2 | Executive Director hired |
+| Q3 | Technical infrastructure launched (beta) |
+| Q3 | First 10 institutional members |
+| Q4 | Curator certification program launched |
+| Q4 | 10,000 POIDs / 1,000 PRIDs |
+
+### 12.2 Phase 2: Growth (Year 2)
+
+| Quarter | Milestone |
+|---------|-----------|
+| Q1 | ORCID partnership formalized |
+| Q1 | 50 institutional members |
+| Q2 | API v1.0 stable release |
+| Q2 | First regional chapter (Europe) |
+| Q3 | 100,000 POIDs / 10,000 PRIDs |
+| Q3 | ISNI registration agent application |
+| Q4 | Annual conference (first) |
+| Q4 | Financial sustainability achieved (70%) |
+
+### 12.3 Phase 3: Maturity (Year 3+)
+
+| Quarter | Milestone |
+|---------|-----------|
+| Q1 | 200+ institutional members |
+| Q2 | ISO standardization process initiated |
+| Q3 | 1M POIDs / 100K PRIDs |
+| Q4 | Full financial sustainability (100%) |
+| Ongoing | International expansion |
+| Ongoing | Feature enhancements based on community input |
+
+---
+
+## 13. Success Metrics
+
+### 13.1 Adoption Metrics
+
+| Metric | Year 1 Target | Year 3 Target |
+|--------|---------------|---------------|
+| POIDs created | 10,000 | 1,000,000 |
+| PRIDs created | 1,000 | 100,000 |
+| Institutional members | 10 | 200 |
+| Certified curators | 50 | 500 |
+| Countries represented | 10 | 50 |
+| GHCID institutions linked | 100 | 5,000 |
+
+### 13.2 Quality Metrics
+
+| Metric | Target |
+|--------|--------|
+| Claim accuracy (audited) | > 90% |
+| XPath verification rate | 100% |
+| Duplicate detection rate | > 95% |
+| Dispute resolution time | < 30 days |
+| API uptime | > 99.9% |
+
+### 13.3 Sustainability Metrics
+
+| Metric | Year 1 | Year 3 |
+|--------|--------|--------|
+| Revenue diversity (sources) | 2 | 4 |
+| Membership revenue % | 20% | 35% |
+| Operating reserve (months) | 3 | 12 |
+| Grant dependency | 80% | 40% |
+
+---
+
+## 14. References
+
+### Governance Models
+- ORCID Governance: https://orcid.org/about/governance
+- ISNI International Agency: https://isni.org/page/governance/
+- W3C Process Document: https://www.w3.org/Consortium/Process/
+
+### Legal and Compliance
+- GDPR: https://gdpr.eu/
+- Dutch WGBO (heritage law): https://wetten.overheid.nl/
+- Non-profit foundation (stichting): https://www.kvk.nl/
+
+### Sustainability
+- NDSA Levels of Preservation: https://ndsa.org/publications/levels-of-digital-preservation/
+- COAR Sustainability Principles: https://www.coar-repositories.org/
+
+### Related PPID Documents
+- [Executive Summary](./01_executive_summary.md)
+- [Implementation Guidelines](./08_implementation_guidelines.md)