Add test script for PiCo extraction from Arabic waqf documents
- Implemented a new script `test_pico_arabic_waqf.py` to test the GLM annotator's ability to extract person observations from Arabic historical documents. - The script includes environment variable handling for API token, structured prompts for the GLM API, and validation of extraction results. - Added comprehensive logging for API responses, extraction results, and validation errors. - Included a sample Arabic waqf text for testing purposes, following the PiCo ontology pattern.
This commit is contained in:
parent
b1f93b6f22
commit
505c12601a
84 changed files with 19370 additions and 2597 deletions
|
|
@ -535,7 +535,8 @@ async def get_institutions(
|
|||
social_instagram,
|
||||
wikidata_label_en,
|
||||
wikidata_description_en,
|
||||
logo_url
|
||||
logo_url,
|
||||
web_claims
|
||||
FROM custodians
|
||||
WHERE {where_clause}
|
||||
ORDER BY name
|
||||
|
|
@ -620,6 +621,10 @@ async def get_institutions(
|
|||
if row['logo_url']:
|
||||
props["logo_url"] = row['logo_url']
|
||||
|
||||
# Web claims (financial documents, etc.)
|
||||
if row['web_claims']:
|
||||
props["web_claims"] = row['web_claims']
|
||||
|
||||
features.append({
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
|
|
|
|||
|
|
@ -848,6 +848,28 @@ async def get_profile(
|
|||
if isinstance(profile_data, str):
|
||||
profile_data = json.loads(profile_data)
|
||||
|
||||
# Transform experience → career_history for frontend compatibility
|
||||
# The database stores 'experience' but frontend expects 'career_history'
|
||||
inner_profile = profile_data.get('profile_data', {})
|
||||
if inner_profile and 'experience' in inner_profile and 'career_history' not in inner_profile:
|
||||
experience = inner_profile.get('experience', [])
|
||||
if experience:
|
||||
# Map field names: title→role, company→organization, duration→dates
|
||||
career_history = []
|
||||
for job in experience:
|
||||
career_item = {
|
||||
'role': job.get('title'),
|
||||
'organization': job.get('company'),
|
||||
'dates': job.get('duration'),
|
||||
'location': job.get('location'),
|
||||
'description': job.get('description'),
|
||||
'company_size': job.get('company_details'),
|
||||
'current': job.get('current', False),
|
||||
}
|
||||
career_history.append(career_item)
|
||||
inner_profile['career_history'] = career_history
|
||||
profile_data['profile_data'] = inner_profile
|
||||
|
||||
return ProfileResponse(
|
||||
profile_data=profile_data,
|
||||
linkedin_slug=result['linkedin_slug'],
|
||||
|
|
@ -867,8 +889,30 @@ async def get_profile(
|
|||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
file_profile_data = data.get('profile_data', {})
|
||||
|
||||
# Transform experience → career_history for frontend compatibility
|
||||
inner_profile = file_profile_data.get('profile_data', {})
|
||||
if inner_profile and 'experience' in inner_profile and 'career_history' not in inner_profile:
|
||||
experience = inner_profile.get('experience', [])
|
||||
if experience:
|
||||
career_history = []
|
||||
for job in experience:
|
||||
career_item = {
|
||||
'role': job.get('title'),
|
||||
'organization': job.get('company'),
|
||||
'dates': job.get('duration'),
|
||||
'location': job.get('location'),
|
||||
'description': job.get('description'),
|
||||
'company_size': job.get('company_details'),
|
||||
'current': job.get('current', False),
|
||||
}
|
||||
career_history.append(career_item)
|
||||
inner_profile['career_history'] = career_history
|
||||
file_profile_data['profile_data'] = inner_profile
|
||||
|
||||
return ProfileResponse(
|
||||
profile_data=data.get('profile_data', {}),
|
||||
profile_data=file_profile_data,
|
||||
linkedin_slug=linkedin_slug,
|
||||
extraction_date=data.get('exa_search_metadata', {}).get('enrichment_timestamp'),
|
||||
updated_date=None,
|
||||
|
|
|
|||
|
|
@ -99,20 +99,26 @@ class Settings:
|
|||
cache_ttl: int = int(os.getenv("CACHE_TTL", "900")) # 15 minutes
|
||||
|
||||
# Qdrant Vector DB
|
||||
# Production: Use URL-based client via bronhouder.nl/qdrant reverse proxy
|
||||
qdrant_host: str = os.getenv("QDRANT_HOST", "localhost")
|
||||
qdrant_port: int = int(os.getenv("QDRANT_PORT", "6333"))
|
||||
qdrant_use_production: bool = os.getenv("QDRANT_USE_PRODUCTION", "false").lower() == "true"
|
||||
qdrant_use_production: bool = os.getenv("QDRANT_USE_PRODUCTION", "true").lower() == "true"
|
||||
qdrant_production_url: str = os.getenv("QDRANT_PRODUCTION_URL", "https://bronhouder.nl/qdrant")
|
||||
|
||||
# Oxigraph SPARQL
|
||||
sparql_endpoint: str = os.getenv("SPARQL_ENDPOINT", "http://localhost:7878/query")
|
||||
# Production: Use bronhouder.nl/sparql reverse proxy
|
||||
sparql_endpoint: str = os.getenv("SPARQL_ENDPOINT", "https://bronhouder.nl/sparql")
|
||||
|
||||
# TypeDB
|
||||
# Note: TypeDB not exposed via reverse proxy - always use localhost
|
||||
typedb_host: str = os.getenv("TYPEDB_HOST", "localhost")
|
||||
typedb_port: int = int(os.getenv("TYPEDB_PORT", "1729"))
|
||||
typedb_database: str = os.getenv("TYPEDB_DATABASE", "heritage_custodians")
|
||||
typedb_use_production: bool = os.getenv("TYPEDB_USE_PRODUCTION", "false").lower() == "true" # Default off
|
||||
|
||||
# PostGIS
|
||||
postgis_url: str = os.getenv("POSTGIS_URL", "http://localhost:8001")
|
||||
# PostGIS/Geo API
|
||||
# Production: Use bronhouder.nl/api/geo reverse proxy
|
||||
postgis_url: str = os.getenv("POSTGIS_URL", "https://bronhouder.nl/api/geo")
|
||||
|
||||
# LLM Configuration
|
||||
anthropic_api_key: str = os.getenv("ANTHROPIC_API_KEY", "")
|
||||
|
|
@ -408,7 +414,7 @@ class MultiSourceRetriever:
|
|||
if self._typedb is None and RETRIEVERS_AVAILABLE:
|
||||
try:
|
||||
self._typedb = create_typedb_retriever(
|
||||
use_production=settings.qdrant_use_production
|
||||
use_production=settings.typedb_use_production # Use TypeDB-specific setting
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to initialize TypeDB: {e}")
|
||||
|
|
@ -686,7 +692,9 @@ async def lifespan(app: FastAPI):
|
|||
retriever = MultiSourceRetriever()
|
||||
|
||||
if RETRIEVERS_AVAILABLE:
|
||||
viz_selector = VisualizationSelector(use_dspy=bool(settings.anthropic_api_key))
|
||||
# Check for any available LLM API key (Anthropic preferred, OpenAI fallback)
|
||||
has_llm_key = bool(settings.anthropic_api_key or settings.openai_api_key)
|
||||
viz_selector = VisualizationSelector(use_dspy=has_llm_key)
|
||||
|
||||
# Configure DSPy if API key available
|
||||
if settings.anthropic_api_key:
|
||||
|
|
@ -697,7 +705,16 @@ async def lifespan(app: FastAPI):
|
|||
api_key=settings.anthropic_api_key,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to configure DSPy: {e}")
|
||||
logger.warning(f"Failed to configure DSPy with Anthropic: {e}")
|
||||
elif settings.openai_api_key:
|
||||
try:
|
||||
configure_dspy(
|
||||
provider="openai",
|
||||
model="gpt-4o-mini",
|
||||
api_key=settings.openai_api_key,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to configure DSPy with OpenAI: {e}")
|
||||
|
||||
logger.info("Heritage RAG API started")
|
||||
|
||||
|
|
@ -1068,7 +1085,7 @@ if __name__ == "__main__":
|
|||
uvicorn.run(
|
||||
"main:app",
|
||||
host="0.0.0.0",
|
||||
port=8002,
|
||||
port=8003,
|
||||
reload=settings.debug,
|
||||
log_level="info",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
"name": "Alexandra Nederlof",
|
||||
"linkedin_url": "https://www.linkedin.com/in/alexandra-nederlof-74b7a341",
|
||||
"headline": "Junior Papierrestaurator bij Rijksmuseum",
|
||||
"location": "Ik ben verliefd! En wel op mijn vak als papierrestaurator. De mogelijkheid om bij instellingen als musea, archieven of bibliotheken fysiek te kunnen helpen met het behouden van het papieren cultureel erfgoed geeft een geweldige voldoening. Daarnaast is het restaureren voor de particuliere klanten voor mij ook een waar genoegen: ervoor kunnen zorgen dat een klant weer optimaal van zijn kunstobject kan genieten of een brief weer kan lezen. Ik heb meegewerkt aan uiteenlopende projecten. Ik heb daardoor een brede ervaring opgedaan met het behandelen van verschillende soorten objecten. Van poster tot landkaart, van pastel tot papier maché, van boek tot botanisch model. Hierdoor heb ik een heel scala aan verantwoorde behandelmethoden mij eigen kunnen maken. Op dit moment werk ik als junior papierrestaurator bij het Rijksmuseum.",
|
||||
"location": null,
|
||||
"connections": "428 connections",
|
||||
"about": "Ik ben verliefd! En wel op mijn vak als papierrestaurator. De mogelijkheid om bij instellingen als musea, archieven of bibliotheken fysiek te kunnen helpen met het behouden van het papieren cultureel erfgoed geeft een geweldige voldoening. Daarnaast is het restaureren voor de particuliere klanten voor mij ook een waar genoegen: ervoor kunnen zorgen dat een klant weer optimaal van zijn kunstobject kan genieten of een brief weer kan lezen. Ik heb meegewerkt aan uiteenlopende projecten. Ik heb daardoor een brede ervaring opgedaan met het behandelen van verschillende soorten objecten. Van poster tot landkaart, van pastel tot papier maché, van boek tot botanisch model. Hierdoor heb ik een heel scala aan verantwoorde behandelmethoden mij eigen kunnen maken. Op dit moment werk ik als junior papierrestaurator bij het Rijksmuseum.",
|
||||
"summary": "Alexandra Nederlof is a Junior Papierrestaurator at the Rijksmuseum in Amsterdam, where she is passionate about preserving paper cultural heritage for museums, archives, and private clients. She has extensive experience in restoring a variety of objects, including posters, maps, and books, and has developed a range of responsible treatment methods. Nederlof has also contributed to publications on topics related to art and restoration.",
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
"name": "angela dellebeke",
|
||||
"linkedin_url": "https://www.linkedin.com/in/angela-dellebeke-87289018",
|
||||
"headline": "nationaal archief /national archives of the Netherlands",
|
||||
"location": "The blue shield is the protective emblem specified in the 1954 Hague Convention (Convention for the Protection of Cultural Property in the Event of Armed Conflict) for marking cultural sites to give them protection from attack in the event of armed conflict. The Blue Shield network consists of organizations dealing with museums, archives, audiovisual supports, libraries, as well as monuments and sites. BLUE SHIELD NEDERLAND richt zich op de bescherming van Nederlands cultureel erfgoed tegen de bedreigingen die het gevolg zijn van natuurrampen, molest en militaire handelingen, en op het organiseren van nationale en internationale hulp. Show less",
|
||||
"location": "The Hague, Netherlands",
|
||||
"connections": "500 connections • 852 followers",
|
||||
"about": "veiligheidszorg collectie / collectie hulpverlening/ preventieve conservering /vraagstukken beheer en behoud/ calamiteitenplan/-organisatie/ selectievraagstukken /acquisitie & beschrijven van archieven / bedrijfshulpverlening/ crisisbeheersing safety&security collections/ emergency preparedness and hazard mitigation / emergency response / crisismanagement / cultural property protection / hague convention 1954",
|
||||
"summary": "Angela Dellebeke is a consultant specializing in emergency preparedness and hazard mitigation at the Nationaal Archief (National Archives of the Netherlands) in The Hague. With over 22 years of experience, she focuses on safety and security for cultural property, crisis management, and the preservation of archives. Dellebeke also serves as Secretary-General for Blue Shield Nederland, an organization dedicated to protecting cultural heritage during conflicts and disasters. She holds a Master of Arts in American Studies from Utrecht University and has published work on theft and misappropriation in archives.",
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
"name": "Annemarijne Moreu",
|
||||
"linkedin_url": "https://www.linkedin.com/in/annemarijnemoreu",
|
||||
"headline": "Sr Projectmanager bij Nationaal Archief",
|
||||
"location": "Ik ben een daadkrachtige en resultaatgerichte product owner en projectmanager, met brede ervaring op het gebied van B2B en B2C (online) projectmanagement, agile werken, marketing en communicatie. Ik ben goed in staat klantbehoeften centraal te stellen. Samenwerken met verschillende mensen, afdelingen en niveaus en gezamenlijk realiseren van doelen en implementeren van projecten gaat mij goed af. In mijn werk ben ik planmatig sterk, communicatief vaardig, zelfstandig en flexibel. Ik word blij van klantcontacten, aanpakken, samenwerken, afwisseling en verantwoordelijkheid nemen. Als persoon ben ik sociaal, ondernemend, positief en sportief.",
|
||||
"location": "The Hague, Netherlands",
|
||||
"connections": "500 connections • 860 followers",
|
||||
"about": "Ik ben een daadkrachtige en resultaatgerichte product owner en projectmanager, met brede ervaring op het gebied van B2B en B2C (online) projectmanagement, agile werken, marketing en communicatie. Ik ben goed in staat klantbehoeften centraal te stellen. Samenwerken met verschillende mensen, afdelingen en niveaus en gezamenlijk realiseren van doelen en implementeren van projecten gaat mij goed af. In mijn werk ben ik planmatig sterk, communicatief vaardig, zelfstandig en flexibel. Ik word blij van klantcontacten, aanpakken, samenwerken, afwisseling en verantwoordelijkheid nemen. Als persoon ben ik sociaal, ondernemend, positief en sportief.",
|
||||
"summary": "Annemarijne Moreu is a Senior Project Manager at the Nationaal Archief in The Hague, Netherlands, with over 29 years of experience in project management, particularly in B2B and B2C environments. Her expertise includes agile methodologies, marketing, and communication, focusing on customer needs and collaboration across various departments. Currently, she manages projects related to data accessibility, service delivery, and project management optimization at the Nationaal Archief. Previously, she held roles at Gemeente Rotterdam and PostNL, where she led various IT and process optimization projects, demonstrating her strong planning, communication, and leadership skills. Moreu is known for her proactive and social approach to work.",
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
"name": "Anne Martens",
|
||||
"linkedin_url": "https://www.linkedin.com/in/annemartens1",
|
||||
"headline": "Communicatieadviseur educatie",
|
||||
"location": "Als freelance journalist heb ik een verhalenradar die altijd aanstaat. Als ik een verhaal op het spoor ben, dan kan ik niet anders dan dat verhaal uitpluizen en delen met krantenlezers, radioluisteraars of televisiekijkers. Ik ben niet bang om in complexe materie te duiken, onbekende vakgebieden te verkennen en wetenschappelijke publicaties en experts te raadplegen. Ik maak verhalen voor NRC Handelsblad, NEMOKennislink.nl, Antoni van Leeuwenhoekziekenhuis, de NTR en de VPRO. Onderwerpen: biologie, aardwetenschappen, medische ethiek, geneeskunde en fertiliteit. Show less",
|
||||
"location": "Netherlands",
|
||||
"connections": "500 connections • 925 followers",
|
||||
"about": "Total Experience: 17 years",
|
||||
"summary": "Anne Martens is a seasoned communication advisor specializing in education, currently working at the Nationaal Archief in the Netherlands. With 17 years of experience, she has a diverse background that includes freelance journalism, where she has contributed to various prominent publications such as NRC Handelsblad and NEMOKennislink.nl. Her journalistic work has focused on complex topics in biology, earth sciences, medical ethics, and medicine. Martens has also produced content for science programs and radio documentaries, showcasing her ability to engage with intricate subjects and communicate them effectively to the public.",
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
"name": "Arjan Diepeveen",
|
||||
"linkedin_url": "https://www.linkedin.com/in/arjan-diepeveen-73b21640",
|
||||
"headline": "Senior Test Automation Engineer at Nationaal Archief",
|
||||
"location": "Arjan is een betrouwbare, professionele en collegiale medewerker die zowel zelfstandig als in een team goed presteert. Hij staat voor kwaliteit, zonder daarbij de kwantiteit uit het oog te verliezen. Hij is bereid om net die extra stap te zetten om het maximale resultaat te behalen. Arjan is van origine een echte techneut. Hij heeft in zijn loopbaan een zeer uitgebreide en gefundeerde hoeveelheid technische kennis opgedaan in verschillende functies en verschillende branches. Met zijn oog voor kwaliteit heeft hij zich in de loop van de jaren meer en meer verdiept in het testen en ontwikkeld als test consultant. Zijn kracht ligt in het heel snel in kaart kunnen brengen en doorgronden van zeer complexe omgevingen en systemen. Nieuwe en onbekende dingen maakt hij zich razendsnel eigen. Hij weet daarbij als geen andere deze kennis over te dragen door complexe zaken te vertalen naar een begrijpelijk niveau. Onder hectische stressvolle situaties blijft Arjan uitermate rustig, flexibel en analytisch. Hij gaat graag de uitdaging aan.",
|
||||
"location": "Netherlands",
|
||||
"connections": "374 connections • 380 followers",
|
||||
"about": "Arjan is een betrouwbare, professionele en collegiale medewerker die zowel zelfstandig als in een team goed presteert. Hij staat voor kwaliteit, zonder daarbij de kwantiteit uit het oog te verliezen. Hij is bereid om net die extra stap te zetten om het maximale resultaat te behalen. Arjan is van origine een echte techneut. Hij heeft in zijn loopbaan een zeer uitgebreide en gefundeerde hoeveelheid technische kennis opgedaan in verschillende functies en verschillende branches. Met zijn oog voor kwaliteit heeft hij zich in de loop van de jaren meer en meer verdiept in het testen en ontwikkeld als test consultant. Zijn kracht ligt in het heel snel in kaart kunnen brengen en doorgronden van zeer complexe omgevingen en systemen. Nieuwe en onbekende dingen maakt hij zich razendsnel eigen. Hij weet daarbij als geen andere deze kennis over te dragen door complexe zaken te vertalen naar een begrijpelijk niveau. Onder hectische stressvolle situaties blijft Arjan uitermate rustig, flexibel en analytisch. Hij gaat graag de uitdaging aan.",
|
||||
"summary": "Arjan Diepeveen is a Senior Test Automation Engineer at Nationaal Archief in the Netherlands, with over 26 years of experience in various technical roles. He excels in both independent and team settings, emphasizing quality while maintaining efficiency. His expertise includes test automation using tools like Robot Framework, Selenium, and Docker, and he works within SCRUM/Agile/DevOps teams to develop custom solutions for the National Archives. Arjan has also held positions at Rijkswaterstaat and Nederlandse Spoorwegen, focusing on technical safety and infrastructure testing. He is known for his ability to quickly understand complex systems and effectively communicate technical concepts.",
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
"name": "Roger Mous",
|
||||
"linkedin_url": "https://www.linkedin.com/in/roger-mous-203b2922a",
|
||||
"headline": "Floormanager afdeling Organisatie en Presentatie (O&P), Nationaal Archief",
|
||||
"location": "Enthousiast, kwaliteitsgericht, stressbestendige, gemotiveerde professional. Als sturende en motiverend persoon zet ik mij in zodat het team kan voldoen aan de hoogste eisen. Het coördineren, aansturen van medewerkers en zorgen voor kennisoverdracht.",
|
||||
"location": "Den Haag, Zuid-Holland, Nederland",
|
||||
"connections": "278 connections • 281 followers",
|
||||
"about": "Enthousiast, kwaliteitsgericht, stressbestendige, gemotiveerde professional. Als sturende en motiverend persoon zet ik mij in zodat het team kan voldoen aan de hoogste eisen. Het coördineren, aansturen van medewerkers en zorgen voor kennisoverdracht.",
|
||||
"summary": "Roger Mous is currently the Floormanager at the Nationaal Archief in The Hague, Netherlands, with nearly 35 years of professional experience. He is known for his enthusiastic, quality-oriented, and stress-resistant approach, focusing on team coordination and knowledge transfer. His career includes various roles at the Nationaal Archief and the Royal House, where he managed logistics, events, and catering services. Mous has a background in military service and education in facility management and hospitality. He has held multiple managerial positions, demonstrating strong leadership and organizational skills.",
|
||||
|
|
|
|||
635
data/entity_annotation/docs/PROVENANCE_SOURCES.md
Normal file
635
data/entity_annotation/docs/PROVENANCE_SOURCES.md
Normal file
|
|
@ -0,0 +1,635 @@
|
|||
# Provenance Sources for PiCo Historical Document Examples
|
||||
|
||||
This document provides detailed provenance information for the real historical document sources used in the PiCo (Person in Context) ontology integration examples within the CH-Annotator convention.
|
||||
|
||||
**Last Updated**: 2025-12-12
|
||||
**Author**: GLAM Project
|
||||
**Version**: 1.0.0
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Hebrew Ketubah (Jewish Marriage Contracts)](#1-hebrew-ketubah-jewish-marriage-contracts)
|
||||
2. [Arabic Waqf Documents (Islamic Endowments)](#2-arabic-waqf-documents-islamic-endowments)
|
||||
3. [Ottoman Turkish Sijill (Sharia Court Registers)](#3-ottoman-turkish-sijill-sharia-court-registers)
|
||||
4. [Russian Metrical Books (Church Records)](#4-russian-metrical-books-church-records)
|
||||
5. [Spanish Colonial Baptism Records](#5-spanish-colonial-baptism-records)
|
||||
6. [Italian Notarial Records](#6-italian-notarial-records)
|
||||
7. [Greek Orthodox Church Records](#7-greek-orthodox-church-records)
|
||||
8. [Dutch Civil Registry Records](#8-dutch-civil-registry-records)
|
||||
9. [License and Attribution Requirements](#9-license-and-attribution-requirements)
|
||||
|
||||
---
|
||||
|
||||
## 1. Hebrew Ketubah (Jewish Marriage Contracts)
|
||||
|
||||
### 1.1 Yale Beinecke Library - Mashhad Ketubah (1896)
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Yale University, Beinecke Rare Book & Manuscript Library |
|
||||
| **Collection** | Hebrew Manuscripts Supplement |
|
||||
| **Call Number** | Hebrew MSS suppl 194 |
|
||||
| **Digital URL** | https://digital.library.yale.edu/catalog/2067542 |
|
||||
| **Document Type** | Ketubah (Jewish marriage contract) |
|
||||
| **Date** | 23 Elul 5656 (September 1, 1896 CE) |
|
||||
| **Place** | Mashhad, Iran |
|
||||
| **Language** | Hebrew, Aramaic |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
| **License** | Public Domain (pre-1929) |
|
||||
|
||||
**Persons Identified:**
|
||||
- **Groom**: Mosheh ben Mashiah (משה בן משיאח)
|
||||
- **Bride**: Rivkah bat Ya'akov (רבקה בת יעקב)
|
||||
|
||||
**Notes**: This ketubah is from the crypto-Jewish community of Mashhad, known as the Jadid al-Islam, who maintained Jewish practices in secret after forced conversion in 1839. The document follows standard Sephardic/Mizrahi ketubah format.
|
||||
|
||||
---
|
||||
|
||||
### 1.2 Philadelphia Mikveh Israel Ketubah (1842)
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Congregation Mikveh Israel, Philadelphia |
|
||||
| **Collection** | Philadelphia Congregations Records |
|
||||
| **Digital URL** | https://philadelphiacongregations.org/records/item/MikvehIsrael.MarriageCertificate1842 |
|
||||
| **Document Type** | Ketubah (Jewish marriage contract) |
|
||||
| **Date** | 1842 CE |
|
||||
| **Place** | Philadelphia, Pennsylvania, USA |
|
||||
| **Language** | Aramaic (traditional text), English (translation provided) |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
| **License** | Educational use permitted |
|
||||
|
||||
**Key Features:**
|
||||
- Full Aramaic text transcription available
|
||||
- English translation provided by archive
|
||||
- Example of American Sephardic ketubah format
|
||||
|
||||
**Sample Aramaic Text** (from source):
|
||||
```
|
||||
בשבת... בשבת... יום... לחדש... שנת... לבריאת עולם למנין שאנו מונין כאן...
|
||||
איך החתן... בר... אמר לה להדא בתולתא... בת...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 1.3 College of Charleston Ketubah (1908)
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | College of Charleston, Special Collections |
|
||||
| **Collection** | Jewish Heritage Collection |
|
||||
| **Document Type** | Ketubah |
|
||||
| **Date** | 1908 CE |
|
||||
| **Language** | Hebrew, Aramaic |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
|
||||
**Persons Identified:**
|
||||
- **Bride**: Esther Devorah bat Rabbi Abraham (אסתר דבורה בת ר׳ אברהם)
|
||||
- **Groom**: Rabbi Yitzchak (ר׳ יצחק)
|
||||
|
||||
---
|
||||
|
||||
### 1.4 Rhodes Jewish Museum Collection
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Rhodes Jewish Museum |
|
||||
| **Location** | Rhodes, Greece |
|
||||
| **Collection** | Historical Documents |
|
||||
| **Document Types** | Ketubot, community records |
|
||||
| **Period** | 19th-20th century |
|
||||
| **Language** | Ladino, Hebrew, Greek |
|
||||
|
||||
**Notes**: Documents from the historic Sephardic Jewish community of Rhodes, with unique Ladino elements.
|
||||
|
||||
---
|
||||
|
||||
## 2. Arabic Waqf Documents (Islamic Endowments)
|
||||
|
||||
### 2.1 Cambridge Digital Library - Islamic Collections
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Cambridge University Library |
|
||||
| **Collection** | Islamic Manuscripts |
|
||||
| **Digital URL** | https://cudl.lib.cam.ac.uk/collections/islamic |
|
||||
| **Document Types** | Waqfiyya, legal documents, correspondence |
|
||||
| **Period** | 8th-20th century CE |
|
||||
| **Languages** | Arabic, Persian, Ottoman Turkish |
|
||||
| **License** | CC BY-NC 4.0 |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
|
||||
**Key Collections:**
|
||||
- Genizah Collection (Cairo Genizah fragments)
|
||||
- Arabic Scientific Manuscripts
|
||||
- Islamic Legal Documents
|
||||
|
||||
---
|
||||
|
||||
### 2.2 UPenn OPenn - Manuscripts of the Muslim World
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | University of Pennsylvania Libraries |
|
||||
| **Collection** | Manuscripts of the Muslim World |
|
||||
| **Digital URL** | https://openn.library.upenn.edu/html/muslimworld_contents.html |
|
||||
| **Document Types** | Waqfiyya, Quranic manuscripts, legal documents |
|
||||
| **Period** | 9th-20th century CE |
|
||||
| **Languages** | Arabic, Persian, Ottoman Turkish |
|
||||
| **License** | Public Domain / CC0 |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
|
||||
**Notable Holdings:**
|
||||
- Waqfiyya documents from Egypt, Syria, Turkey
|
||||
- Legal formularies with waqf templates
|
||||
- Property deeds and endowment records
|
||||
|
||||
---
|
||||
|
||||
### 2.3 Singapore National Heritage Board - Istanbul Waqf
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Singapore National Heritage Board |
|
||||
| **Collection** | Roots.gov.sg |
|
||||
| **Accession Number** | 1115401 |
|
||||
| **Digital URL** | https://www.roots.gov.sg/Collection-Landing/listing/1115401 |
|
||||
| **Document Type** | Waqf document |
|
||||
| **Donor/Creator** | Muhammad b. Abd al-Ghani (محمد بن عبد الغني) |
|
||||
| **Properties** | Istanbul (various locations) |
|
||||
| **Language** | Ottoman Turkish, Arabic |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
|
||||
**Key Features:**
|
||||
- Complete waqf document with property descriptions
|
||||
- Lists endowed properties in Istanbul
|
||||
- Named beneficiaries and conditions
|
||||
|
||||
---
|
||||
|
||||
### 2.4 Haseki Sultan Waqfiyya (1552 CE)
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Various (studied in UC Berkeley eScholarship) |
|
||||
| **Document Type** | Waqfiyya (imperial endowment deed) |
|
||||
| **Date** | 1552 CE |
|
||||
| **Founder** | Haseki Hürrem Sultan (Roxelana) |
|
||||
| **Language** | Ottoman Turkish, Arabic |
|
||||
| **Research URL** | UC Berkeley eScholarship |
|
||||
|
||||
**Significance**: One of the largest waqf endowments in Ottoman history, establishing charitable institutions across the empire.
|
||||
|
||||
---
|
||||
|
||||
## 3. Ottoman Turkish Sijill (Sharia Court Registers)
|
||||
|
||||
### 3.1 OpenJerusalem Project - Jerusalem Sharia Court Registers
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | OpenJerusalem Project |
|
||||
| **Collection** | Jerusalem Sharia Court Registers |
|
||||
| **Digital URL** | https://www.openjerusalem.org/ |
|
||||
| **ARK Identifier** | ark:/58142/PfV7b |
|
||||
| **Volume Count** | 102 registers |
|
||||
| **Period** | 1834-1920 CE |
|
||||
| **Language** | Ottoman Turkish, Arabic |
|
||||
| **License** | Open Access |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
|
||||
**Document Types:**
|
||||
- Property sales (بيع)
|
||||
- Marriage contracts (نكاح)
|
||||
- Inheritance divisions (قسمة)
|
||||
- Waqf registrations
|
||||
- Debt acknowledgments (إقرار)
|
||||
- Court testimonies (شهادة)
|
||||
|
||||
**Key Features:**
|
||||
- Searchable database with document transcriptions
|
||||
- Photographs of original registers
|
||||
- Multi-language metadata (Arabic, English, French)
|
||||
|
||||
---
|
||||
|
||||
### 3.2 ISAM Istanbul Kadi Registers (Kadı Sicilleri)
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | İslam Araştırmaları Merkezi (ISAM) |
|
||||
| **Collection** | Istanbul Kadı Sicilleri |
|
||||
| **Digital URL** | http://www.kadisicilleri.org/ |
|
||||
| **Volume Count** | 40+ volumes online |
|
||||
| **Document Count** | 40,000+ documents |
|
||||
| **Period** | 16th-19th century CE |
|
||||
| **Language** | Ottoman Turkish |
|
||||
| **License** | Research access |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
|
||||
**Coverage:**
|
||||
- Istanbul courts (multiple districts)
|
||||
- Galata, Üsküdar, Eyüp
|
||||
- Complete transcriptions with original images
|
||||
|
||||
---
|
||||
|
||||
### 3.3 Istanbul Historical Kadi Registers Corpus
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Istanbul Metropolitan Municipality |
|
||||
| **Project** | History of Istanbul |
|
||||
| **Digital URL** | https://istanbultarihi.ist/434-istanbul-sharia-court-registers |
|
||||
| **Volume Count** | ~10,000 volumes |
|
||||
| **Courts** | 26 different courts |
|
||||
| **Period** | 1453-1922 CE |
|
||||
| **Language** | Ottoman Turkish |
|
||||
|
||||
**Significance**: Largest collection of Ottoman court records in existence.
|
||||
|
||||
---
|
||||
|
||||
### 3.4 Harvard Ottoman Court Records Project
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Harvard University |
|
||||
| **Project** | Ottoman Court Records Project (OCRP) |
|
||||
| **Digital URL** | https://cmes.fas.harvard.edu/projects/ocrp |
|
||||
| **Document Types** | Sijill transcriptions, translations |
|
||||
| **Period** | 16th-19th century CE |
|
||||
| **Languages** | Ottoman Turkish (original), English (translations) |
|
||||
|
||||
---
|
||||
|
||||
### 3.5 Bulgarian National Library - Ottoman Sijills
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Bulgarian National Library |
|
||||
| **Collection** | Oriental Department |
|
||||
| **Sijill Count** | 160+ volumes |
|
||||
| **Defter Count** | 1000+ registers |
|
||||
| **Coverage** | Bulgarian Ottoman provinces |
|
||||
| **Period** | 16th-19th century CE |
|
||||
| **Language** | Ottoman Turkish, Arabic |
|
||||
|
||||
---
|
||||
|
||||
## 4. Russian Metrical Books (Church Records)
|
||||
|
||||
### 4.1 BYU Script Tutorial - Russian Metrical Books
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Institution** | Brigham Young University |
|
||||
| **Project** | Script Tutorial |
|
||||
| **Digital URL** | https://script.byu.edu/russian-handwriting/documents/record-types/metrical-books/births |
|
||||
| **Document Type** | Tutorial with real transcription examples |
|
||||
| **Languages** | Russian (Cyrillic), English (translation) |
|
||||
| **License** | Educational use |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
|
||||
**Content Includes:**
|
||||
- Complete birth record format explanation
|
||||
- Vocabulary lists with translations
|
||||
- Sample transcriptions from actual metrical books
|
||||
- Handwriting recognition guides
|
||||
|
||||
**Sample Birth Record Structure** (from tutorial):
|
||||
```
|
||||
В метрической книге записано:
|
||||
Родился: [date]
|
||||
Крещён: [date]
|
||||
Имя: [name]
|
||||
Родители: [father's full name with rank/status], законная жена его [mother's name]
|
||||
Восприемники: [godparents]
|
||||
Священник: [officiating priest]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.2 FamilySearch Russia Church Records
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | FamilySearch |
|
||||
| **Collection** | Russia Church Records |
|
||||
| **Wiki URL** | https://www.familysearch.org/en/wiki/Russia_Church_Records |
|
||||
| **Document Types** | Metrical books (births, marriages, deaths) |
|
||||
| **Period** | 1722-1918 CE |
|
||||
| **Languages** | Russian, Church Slavonic |
|
||||
| **Access** | Free with registration |
|
||||
|
||||
**Key Information:**
|
||||
- Metrical books (метрические книги) mandated from 1722
|
||||
- Three-part structure: births/baptisms, marriages, deaths
|
||||
- Contains estate/class (сословие) information
|
||||
|
||||
---
|
||||
|
||||
### 4.3 Polish Archives - Kłobuck Parish Records
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Szukaj w Archiwach (Polish State Archives) |
|
||||
| **Parish** | Kłobuck |
|
||||
| **Document Type** | Roman Catholic metrical books |
|
||||
| **Period** | 18th-19th century |
|
||||
| **Languages** | Latin, Polish, Russian |
|
||||
|
||||
**Notes**: Example of Russian-era Polish parish records with parallel Latin/Russian entries.
|
||||
|
||||
---
|
||||
|
||||
### 4.4 RGIA St. Petersburg
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Russian State Historical Archive (RGIA) |
|
||||
| **Location** | St. Petersburg, Russia |
|
||||
| **Holdings** | 300+ metrical books |
|
||||
| **Period** | 1832-1892 CE |
|
||||
| **Document Types** | Orthodox, Catholic, Lutheran, Jewish metrical books |
|
||||
|
||||
---
|
||||
|
||||
## 5. Spanish Colonial Baptism Records
|
||||
|
||||
### 5.1 BYU Script Tutorial - Spanish Colonial Baptisms
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Institution** | Brigham Young University |
|
||||
| **Project** | Script Tutorial |
|
||||
| **Digital URL** | https://script.byu.edu/spanish-handwriting/documents/church-records/baptisms |
|
||||
| **Document Type** | Tutorial with real transcription examples |
|
||||
| **Languages** | Spanish (colonial), English |
|
||||
| **License** | Educational use |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
|
||||
**Standard Baptism Entry Structure:**
|
||||
```
|
||||
En [place] a [date] bauticé solemnemente a [name], [legitimacy status] de [father] y de [mother].
|
||||
Fueron padrinos [godparents].
|
||||
Y para que conste lo firmo.
|
||||
[Priest signature]
|
||||
```
|
||||
|
||||
**Key Vocabulary:**
|
||||
- hijo/hija legítimo/a = legitimate child
|
||||
- hijo/hija natural = illegitimate child
|
||||
- párvulo/a = infant
|
||||
- español/a, indio/a, mestizo/a, mulato/a = casta categories
|
||||
- padrinos/madrinas = godparents
|
||||
|
||||
---
|
||||
|
||||
### 5.2 FamilySearch Mexico - Yucatán Catholic Church Records
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | FamilySearch |
|
||||
| **Collection** | Mexico, Yucatán, Catholic Church Records, 1543-1977 |
|
||||
| **Collection ID** | 1909116 |
|
||||
| **Digital URL** | https://www.familysearch.org/en/search/collection/1909116 |
|
||||
| **Period** | 1543-1977 CE |
|
||||
| **Document Types** | Baptisms, marriages, deaths, confirmations |
|
||||
| **Language** | Spanish, Latin, Maya |
|
||||
| **Access** | Free with registration |
|
||||
|
||||
**Coverage:**
|
||||
- 200+ parishes
|
||||
- Some of earliest New World records (from 1543)
|
||||
- Indigenous Maya populations
|
||||
|
||||
---
|
||||
|
||||
### 5.3 Archivo General de la Nación (AGN) Mexico
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Archivo General de la Nación |
|
||||
| **Location** | Mexico City, Mexico |
|
||||
| **Holdings** | Colonial parish records, civil registry |
|
||||
| **Period** | 16th-20th century CE |
|
||||
| **Languages** | Spanish, Nahuatl, Latin |
|
||||
|
||||
---
|
||||
|
||||
## 6. Italian Notarial Records
|
||||
|
||||
### 6.1 Antenati - Italian State Archives Portal
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Italian Ministry of Culture |
|
||||
| **Project** | Antenati (Ancestors) |
|
||||
| **Digital URL** | https://antenati.cultura.gov.it/ |
|
||||
| **Venice URL** | https://antenati.cultura.gov.it/archivio/state-archives-of-venezia/?lang=en |
|
||||
| **Document Types** | Civil registry, notarial acts, parish records |
|
||||
| **Period** | 1806-present (civil); 15th century+ (notarial) |
|
||||
| **Languages** | Italian, Latin, Venetian |
|
||||
| **License** | Open Access |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
|
||||
**Venice State Archive Holdings:**
|
||||
- Civil Registry (Stato Civile) 1806-1815 (Napoleonic period)
|
||||
- Notarial archives (Archivio Notarile)
|
||||
- Guild records (Arti e Mestieri)
|
||||
|
||||
---
|
||||
|
||||
### 6.2 OAC California Digital Library - Italian Notarial Documents
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | University of California Libraries |
|
||||
| **Collection** | Italian Notarial Documents Collection |
|
||||
| **Finding Aid** | https://oac.cdlib.org/findaid/ark:%2F13030%2Fc8v412zd |
|
||||
| **Document Count** | 168 documents |
|
||||
| **Period** | 1465-1635 CE |
|
||||
| **Locations** | Venice, Padua, Verona |
|
||||
| **Languages** | Latin, Italian (Venetian) |
|
||||
| **Access Date** | 2025-12-12 |
|
||||
|
||||
**Document Types:**
|
||||
- Contracts (contratti)
|
||||
- Wills (testamenti)
|
||||
- Property transfers
|
||||
- Marriage agreements (sponsalia)
|
||||
- Business partnerships
|
||||
|
||||
---
|
||||
|
||||
### 6.3 SION-Digit Project - Jewish Notarial Records
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Project** | SION-Digit (Sources for the History of Italian Jewish Notarial Documents) |
|
||||
| **Coverage** | Venice, Bordeaux, Amsterdam |
|
||||
| **Period** | 16th-18th century CE |
|
||||
| **Focus** | Jewish community notarial acts |
|
||||
| **Languages** | Italian, Hebrew, Ladino |
|
||||
|
||||
---
|
||||
|
||||
## 7. Greek Orthodox Church Records
|
||||
|
||||
### 7.1 FamilySearch Greece Church Records
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | FamilySearch |
|
||||
| **Wiki URL** | https://www.familysearch.org/en/wiki/Greece_Church_Records |
|
||||
| **Document Types** | Baptisms, marriages, deaths |
|
||||
| **Period** | 17th century - 1925 CE |
|
||||
| **Language** | Greek |
|
||||
| **Access** | Free with registration |
|
||||
|
||||
**Key Information:**
|
||||
- Greek Orthodox records primary source before 1925 civil registration
|
||||
- Male registers (μητρώα αρρένων) for military service
|
||||
- Some records in Ottoman Turkish for pre-independence period
|
||||
|
||||
---
|
||||
|
||||
### 7.2 General State Archives of Greece (GAK)
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Γενικά Αρχεία του Κράτους (GAK) |
|
||||
| **Document Types** | Church records, civil registry, Ottoman-era documents |
|
||||
| **Period** | 15th century - present |
|
||||
| **Languages** | Greek, Ottoman Turkish |
|
||||
|
||||
---
|
||||
|
||||
### 7.3 Greek Ancestry Resources
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Resource** | Greek Ancestry |
|
||||
| **Coverage** | Village church records guide |
|
||||
| **Document Types** | Baptismal registers, marriage registers |
|
||||
| **Key Features** | Guides to accessing island and mainland records |
|
||||
|
||||
---
|
||||
|
||||
## 8. Dutch Civil Registry Records
|
||||
|
||||
### 8.1 WieWasWie (Dutch Genealogical Database)
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| **Archive** | Centraal Bureau voor Genealogie (CBG) |
|
||||
| **Project** | WieWasWie |
|
||||
| **Digital URL** | https://www.wiewaswie.nl/ |
|
||||
| **Document Types** | Birth, marriage, death certificates |
|
||||
| **Period** | 1811-present (civil); 1600s+ (church) |
|
||||
| **Languages** | Dutch |
|
||||
| **Access** | Subscription / Free at archives |
|
||||
|
||||
---
|
||||
|
||||
### 8.2 Dutch Provincial Archives
|
||||
|
||||
| Province | Archive | Holdings |
|
||||
|----------|---------|----------|
|
||||
| Noord-Holland | Noord-Hollands Archief | Civil registry from 1811, church records from 1600s |
|
||||
| Zuid-Holland | Nationaal Archief | Central government records |
|
||||
| Gelderland | Gelders Archief | Regional archives |
|
||||
| Noord-Brabant | Brabants Historisch Informatie Centrum | Catholic parish records |
|
||||
|
||||
---
|
||||
|
||||
### 8.3 Dutch Marriage Certificate Format
|
||||
|
||||
**Standard 19th-Century Format:**
|
||||
```
|
||||
Heden den [date] compareerden voor ons [official name],
|
||||
Ambtenaar van den Burgerlijken Stand der Gemeente [municipality]:
|
||||
|
||||
De Bruidegom: [groom's name], oud [age] jaren, [occupation],
|
||||
geboren te [birthplace], wonende te [residence],
|
||||
zoon van [father] en van [mother];
|
||||
|
||||
De Bruid: [bride's name], oud [age] jaren,
|
||||
geboren te [birthplace], wonende te [residence],
|
||||
dochter van [father] en van [mother];
|
||||
|
||||
Getuigen: [4 witnesses with ages, occupations, relationships]
|
||||
|
||||
En hebben wij dit huwelijk voltrokken in tegenwoordigheid van voornoemde getuigen.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 9. License and Attribution Requirements
|
||||
|
||||
### Open Access Resources
|
||||
|
||||
| Source | License | Attribution Required |
|
||||
|--------|---------|---------------------|
|
||||
| Cambridge Digital Library | CC BY-NC 4.0 | Yes |
|
||||
| UPenn OPenn | Public Domain / CC0 | No (but encouraged) |
|
||||
| OpenJerusalem | Open Access | Yes |
|
||||
| Antenati | Open Access | Yes |
|
||||
| FamilySearch | Terms of Service | Yes |
|
||||
| BYU Script Tutorial | Educational Use | Yes |
|
||||
|
||||
### Recommended Citation Format
|
||||
|
||||
For PiCo extraction examples, use the following provenance block in YAML:
|
||||
|
||||
```yaml
|
||||
provenance:
|
||||
source_url: "https://example.org/document/12345"
|
||||
archive_name: "Example Archive"
|
||||
collection: "Collection Name"
|
||||
document_id: "Document Identifier"
|
||||
access_date: "2025-12-12"
|
||||
license: "CC BY-NC 4.0"
|
||||
attribution: "Courtesy of Example Archive. Used under CC BY-NC 4.0 license."
|
||||
notes: "Transcription verified against original digital image."
|
||||
```
|
||||
|
||||
### Data Fabrication Prohibition
|
||||
|
||||
**CRITICAL**: Per project rules (AGENTS.md Rule 21), all extraction examples MUST use real data from these verified sources. No fabrication of person names, dates, relationships, or document content is permitted.
|
||||
|
||||
When real data is not available from a source, the extraction example should be marked as:
|
||||
|
||||
```yaml
|
||||
provenance:
|
||||
source_url: null
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
notes: "This example uses synthetic data for demonstration purposes only. Do not cite as historical evidence."
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Document Type Coverage Summary
|
||||
|
||||
| Document Type | Real Sources Available | Examples with Provenance |
|
||||
|--------------|------------------------|--------------------------|
|
||||
| Hebrew Ketubah | 4+ archives | Yale (1896), Philadelphia (1842) |
|
||||
| Arabic Waqf | 3+ archives | Cambridge, UPenn, Singapore |
|
||||
| Ottoman Sijill | 5+ archives | OpenJerusalem, ISAM, Harvard |
|
||||
| Russian Metrical | 4+ archives | BYU Tutorial, RGIA |
|
||||
| Spanish Colonial Baptism | 3+ archives | BYU Tutorial, FamilySearch |
|
||||
| Italian Notarial | 3+ archives | Antenati, OAC/CDL |
|
||||
| Greek Orthodox | 3+ archives | FamilySearch, GAK |
|
||||
| Dutch Civil Registry | 3+ archives | WieWasWie, Provincial |
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Date | Version | Changes |
|
||||
|------|---------|---------|
|
||||
| 2025-12-12 | 1.0.0 | Initial compilation of provenance sources |
|
||||
|
||||
|
|
@ -152,6 +152,28 @@ modules:
|
|||
- path: "integrations/nif_nerd.yaml"
|
||||
description: "NIF/NERD/Open Annotation compatibility layer with GLAM-NER mappings"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RELATIONSHIP MODULES - Family and social relationship patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
relationships:
|
||||
- path: "relationships/family.yaml"
|
||||
description: "Family relationship properties and historical source patterns (34 relationship types, 13 languages)"
|
||||
line_count: 1503
|
||||
languages:
|
||||
- "Dutch"
|
||||
- "Latin"
|
||||
- "German"
|
||||
- "Arabic"
|
||||
- "French"
|
||||
- "Ottoman Turkish"
|
||||
- "Hebrew"
|
||||
- "Persian/Farsi"
|
||||
- "Spanish"
|
||||
- "Portuguese"
|
||||
- "Italian"
|
||||
- "Greek"
|
||||
- "Russian"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ADVANCED MODULES - Complex annotation patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
4255
data/entity_annotation/modules/integrations/pico.yaml.bak
Normal file
4255
data/entity_annotation/modules/integrations/pico.yaml.bak
Normal file
File diff suppressed because it is too large
Load diff
228
data/entity_annotation/modules/integrations/pico/_index.yaml
Normal file
228
data/entity_annotation/modules/integrations/pico/_index.yaml
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
# =============================================================================
|
||||
# PiCo Integration Module - Index
|
||||
# =============================================================================
|
||||
# Part of: GLAM-NER Entity Annotation Convention v1.7.0
|
||||
# Module: integrations/pico/
|
||||
#
|
||||
# Description:
|
||||
# PiCO (Person in Context Ontology) integration for person observation modeling.
|
||||
# Enables tracking provenance of person mentions and linking to formal records.
|
||||
#
|
||||
# Key concepts:
|
||||
# - PersonObservation: A textual mention of a person (source-bound)
|
||||
# - PersonName (PNV): Structured name components
|
||||
# - Person (CIDOC-CRM E21): Reconstructed person entity
|
||||
#
|
||||
# References:
|
||||
# - PiCo Ontology: https://w3id.org/pico
|
||||
# - Person Name Vocabulary (PNV): https://w3id.org/pnv
|
||||
# - CIDOC-CRM: https://www.cidoc-crm.org/
|
||||
#
|
||||
# Module Structure:
|
||||
# pico/
|
||||
# ├── _index.yaml # This file - module manifest
|
||||
# ├── schema/
|
||||
# │ ├── observation.yaml # Core PiCo observation pattern
|
||||
# │ ├── pnv_components.yaml # Person Name Vocabulary
|
||||
# │ ├── relationships.yaml # Family and social relationships
|
||||
# │ ├── temporal.yaml # Date and calendar systems
|
||||
# │ └── locations.yaml # Location type definitions
|
||||
# ├── examples/
|
||||
# │ ├── _examples_index.yaml # Examples overview
|
||||
# │ ├── 01_dutch_marriage.yaml # Example 1: Dutch civil registration
|
||||
# │ ├── 02_notarial_protocol.yaml
|
||||
# │ ├── 03_church_baptismal.yaml
|
||||
# │ ├── 04_linkedin_profile.yaml
|
||||
# │ ├── 05_arabic_waqf.yaml
|
||||
# │ ├── 06_hebrew_ketubah.yaml # REAL DATA: Yale Mashhad 1896
|
||||
# │ ├── 07_spanish_colonial.yaml
|
||||
# │ ├── 08_italian_notarial.yaml
|
||||
# │ ├── 09_greek_orthodox.yaml
|
||||
# │ ├── 10_russian_metrical.yaml # REAL DATA: BYU Osiek 1894
|
||||
# │ └── 11_ottoman_sijill.yaml
|
||||
# └── naming_conventions/
|
||||
# ├── dutch.yaml # Dutch naming rules
|
||||
# ├── arabic.yaml # Arabic naming rules
|
||||
# ├── hebrew.yaml # Hebrew naming rules
|
||||
# └── ... # Other language conventions
|
||||
#
|
||||
# Last Updated: 2025-01-13
|
||||
# Version: 1.7.0
|
||||
# =============================================================================
|
||||
|
||||
module:
|
||||
id: "pico_integration"
|
||||
name: "PiCo Integration Module"
|
||||
version: "1.7.0"
|
||||
parent: "ch_annotator-v1_7_0"
|
||||
description: |
|
||||
PiCO (Person in Context Ontology) models textual observations of persons
|
||||
as distinct from reconstructed person entities. This enables:
|
||||
- Tracking provenance of person mentions
|
||||
- Handling name variations across sources
|
||||
- Linking observations to formal person records
|
||||
|
||||
The observation/reconstruction pattern separates:
|
||||
1. What was OBSERVED in text (PersonObservation) - source-bound, exact
|
||||
2. What was RECONSTRUCTED as entity (E21_Person) - inferred, normalized
|
||||
|
||||
This is critical for heritage data where the same person may appear with
|
||||
different name forms, titles, or spellings across sources.
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Module Components
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
components:
|
||||
schema:
|
||||
description: "Core schema definitions for PiCo model"
|
||||
files:
|
||||
- path: "schema/observation.yaml"
|
||||
description: "PersonObservation class and properties"
|
||||
classes:
|
||||
- "picom:PersonObservation"
|
||||
|
||||
- path: "schema/pnv_components.yaml"
|
||||
description: "Person Name Vocabulary (PNV) components"
|
||||
classes:
|
||||
- "pnv:PersonName"
|
||||
|
||||
- path: "schema/relationships.yaml"
|
||||
description: "Family and social relationship types"
|
||||
properties:
|
||||
- "sdo:parent"
|
||||
- "sdo:children"
|
||||
- "sdo:spouse"
|
||||
- "sdo:sibling"
|
||||
- "godparent"
|
||||
- "witness"
|
||||
|
||||
- path: "schema/temporal.yaml"
|
||||
description: "Date formats, calendar systems, temporal modeling"
|
||||
|
||||
- path: "schema/locations.yaml"
|
||||
description: "Location types for biographical data"
|
||||
|
||||
examples:
|
||||
description: "Complete extraction examples demonstrating PiCo patterns"
|
||||
index_file: "examples/_examples_index.yaml"
|
||||
real_data_examples:
|
||||
- id: "06_hebrew_ketubah"
|
||||
data_status: "REAL_HISTORICAL_DATA"
|
||||
source: "Yale University Beinecke Library"
|
||||
call_number: "Hebrew MSS suppl 194"
|
||||
|
||||
- id: "10_russian_metrical"
|
||||
data_status: "REAL_HISTORICAL_DATA"
|
||||
source: "Archiwum Panstwowe w Poznaniu Oddzial w Koninie"
|
||||
reference: "54/792/0/6.1/140"
|
||||
|
||||
synthetic_examples:
|
||||
- "01_dutch_marriage"
|
||||
- "02_notarial_protocol"
|
||||
- "03_church_baptismal"
|
||||
- "04_linkedin_profile"
|
||||
- "05_arabic_waqf"
|
||||
- "07_spanish_colonial"
|
||||
- "08_italian_notarial"
|
||||
- "09_greek_orthodox"
|
||||
- "11_ottoman_sijill"
|
||||
|
||||
naming_conventions:
|
||||
description: "Language-specific naming rules and patterns"
|
||||
files:
|
||||
- path: "naming_conventions/dutch.yaml"
|
||||
language: "nl"
|
||||
covers: ["tussenvoegsels", "patronymics", "sorting rules"]
|
||||
|
||||
- path: "naming_conventions/arabic.yaml"
|
||||
language: "ar"
|
||||
covers: ["nasab", "nisba", "kunya", "laqab"]
|
||||
|
||||
- path: "naming_conventions/hebrew.yaml"
|
||||
language: "he"
|
||||
covers: ["ben/bat patronymics", "ketubah conventions"]
|
||||
|
||||
- path: "naming_conventions/spanish.yaml"
|
||||
language: "es"
|
||||
covers: ["double surnames", "colonial titles"]
|
||||
|
||||
- path: "naming_conventions/italian.yaml"
|
||||
language: "it"
|
||||
covers: ["notarial conventions", "nobility particles"]
|
||||
|
||||
- path: "naming_conventions/greek.yaml"
|
||||
language: "el"
|
||||
covers: ["Orthodox naming", "genitive forms"]
|
||||
|
||||
- path: "naming_conventions/russian.yaml"
|
||||
language: "ru"
|
||||
covers: ["patronymics", "metrical book conventions"]
|
||||
|
||||
- path: "naming_conventions/ottoman.yaml"
|
||||
language: "ota"
|
||||
covers: ["Ottoman Turkish", "Arabic-Ottoman blend"]
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# GLM-4.6 Annotator Configuration
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
glm_annotator_config:
|
||||
model: "glm-4.6"
|
||||
api_endpoint: "https://api.z.ai/api/coding/paas/v4/chat/completions"
|
||||
temperature: 0.1
|
||||
max_tokens: 4000
|
||||
system_prompt_file: "schema/observation.yaml" # Contains extraction instructions
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Hypernym Mapping (GLAM-NER v1.7.0)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
hypernym_mapping:
|
||||
description: "How PiCo concepts map to GLAM-NER v1.7.0 hypernyms"
|
||||
|
||||
mappings:
|
||||
- pico_class: "picom:PersonObservation"
|
||||
glam_hypernym: "AGT.PER"
|
||||
note: "Person observations create AGT.PER entities"
|
||||
|
||||
- pico_class: "picom:PersonObservation"
|
||||
glam_hypernym: "AGT.STF"
|
||||
condition: "When observed with organizational role"
|
||||
note: "Staff members with role context"
|
||||
|
||||
- pico_class: "pnv:PersonName"
|
||||
glam_hypernym: "APP.NAM"
|
||||
note: "Name strings as appellations"
|
||||
|
||||
- pico_class: "picom:hasRole"
|
||||
glam_hypernym: "ROL"
|
||||
note: "Extracted roles link to ROL hypernym"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Usage Notes
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
usage:
|
||||
loading: |
|
||||
Since YAML does not have native imports, applications should load
|
||||
module files individually or use a custom loader. Example:
|
||||
|
||||
```python
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
def load_pico_module(base_path: Path) -> dict:
|
||||
module = {}
|
||||
module['index'] = yaml.safe_load((base_path / '_index.yaml').read_text())
|
||||
module['observation'] = yaml.safe_load((base_path / 'schema/observation.yaml').read_text())
|
||||
module['pnv'] = yaml.safe_load((base_path / 'schema/pnv_components.yaml').read_text())
|
||||
# ... load other components as needed
|
||||
return module
|
||||
```
|
||||
|
||||
validation: |
|
||||
Each YAML file is valid standalone. Validate with:
|
||||
```bash
|
||||
python3 -c "import yaml; yaml.safe_load(open('path/to/file.yaml'))"
|
||||
```
|
||||
|
|
@ -0,0 +1,285 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 1: Dutch Marriage Certificate (Burgerlijke Stand)
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: SYNTHETIC_EXAMPLE
|
||||
#
|
||||
# Demonstrates extraction from a Dutch civil registry (Burgerlijke Stand)
|
||||
# marriage certificate showing:
|
||||
# - Full family network extraction (8 persons)
|
||||
# - Dutch naming conventions (tussenvoegsel: "de")
|
||||
# - Occupation and residence data
|
||||
# - Witness relationships (siblings of bride/groom)
|
||||
# - Deceased parent markers ("wijlen")
|
||||
#
|
||||
# Language: Dutch
|
||||
# Period: 19th century (1885 CE)
|
||||
# Source Type: Civil Registration (Burgerlijke Stand)
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_01_dutch_marriage"
|
||||
example_title: "Dutch Marriage Certificate - Burgerlijke Stand (1885)"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
source_language: "Dutch"
|
||||
source_type: "civil_registration"
|
||||
|
||||
description: |
|
||||
This example demonstrates extraction from a Dutch civil registry (Burgerlijke
|
||||
Stand) marriage certificate from 1885. The document contains rich genealogical
|
||||
data including the bride and groom, their parents (living and deceased), and
|
||||
witnesses who are siblings of the couple.
|
||||
|
||||
Key extraction features:
|
||||
- 8 persons with full family relationship mapping
|
||||
- Occupation data (schilder, koopman, timmerman)
|
||||
- Place of birth and residence
|
||||
- Deceased parent markers ("wijlen")
|
||||
- Age at marriage
|
||||
- Witness-to-party relationships (brothers of bride/groom)
|
||||
|
||||
source_text: |
|
||||
Heden den elfden November achttien honderd vijf en tachtig, zijn voor ons
|
||||
Ambtenaar van den Burgerlijken Stand der gemeente Haarlem, verschenen:
|
||||
Cornelis Johannes Koppen, oud dertig jaren, schilder, geboren te Haarlem,
|
||||
wonende alhier, meerderjarige zoon van wijlen Pieter Koppen en van
|
||||
Anna Maria Brouwer, zonder beroep, wonende alhier;
|
||||
en Anna Maria Visser, oud zeven en twintig jaren, zonder beroep, geboren
|
||||
te Amsterdam, wonende alhier, meerderjarige dochter van Jan Visser,
|
||||
koopman, en van wijlen Cornelia de Vries.
|
||||
|
||||
Als getuigen waren tegenwoordig: Hendrik Koppen, oud vijf en dertig jaren,
|
||||
schilder, broeder van den bruidegom; en Willem Visser, oud twee en dertig
|
||||
jaren, timmerman, broeder van de bruid.
|
||||
|
||||
expected_extraction:
|
||||
pico_observation:
|
||||
observation_id: "bs_haarlem_1885_marriage_321"
|
||||
observed_at: "2025-12-12T10:00:00Z"
|
||||
source_type: "civil_registration"
|
||||
source_reference: "BS Marriage Haarlem, November 11, 1885, certificate 321"
|
||||
|
||||
persons:
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "Cornelis Johannes Koppen"
|
||||
givenName: "Cornelis Johannes"
|
||||
baseSurname: "Koppen"
|
||||
roles:
|
||||
- role_title: "schilder"
|
||||
role_in_source: "groom"
|
||||
biographical:
|
||||
age: "30"
|
||||
birth_place: "Haarlem"
|
||||
address: "Haarlem"
|
||||
family_relationships:
|
||||
parent:
|
||||
- person_index: 2
|
||||
target_name: "Pieter Koppen"
|
||||
- person_index: 3
|
||||
target_name: "Anna Maria Brouwer"
|
||||
spouse:
|
||||
- person_index: 1
|
||||
target_name: "Anna Maria Visser"
|
||||
sibling:
|
||||
- person_index: 6
|
||||
target_name: "Hendrik Koppen"
|
||||
|
||||
- person_index: 1
|
||||
pnv_name:
|
||||
literalName: "Anna Maria Visser"
|
||||
givenName: "Anna Maria"
|
||||
baseSurname: "Visser"
|
||||
roles:
|
||||
- role_in_source: "bride"
|
||||
biographical:
|
||||
age: "27"
|
||||
birth_place: "Amsterdam"
|
||||
address: "Haarlem"
|
||||
family_relationships:
|
||||
parent:
|
||||
- person_index: 4
|
||||
target_name: "Jan Visser"
|
||||
- person_index: 5
|
||||
target_name: "Cornelia de Vries"
|
||||
spouse:
|
||||
- person_index: 0
|
||||
target_name: "Cornelis Johannes Koppen"
|
||||
sibling:
|
||||
- person_index: 7
|
||||
target_name: "Willem Visser"
|
||||
|
||||
- person_index: 2
|
||||
pnv_name:
|
||||
literalName: "Pieter Koppen"
|
||||
givenName: "Pieter"
|
||||
baseSurname: "Koppen"
|
||||
biographical:
|
||||
deceased: true
|
||||
family_relationships:
|
||||
children:
|
||||
- person_index: 0
|
||||
target_name: "Cornelis Johannes Koppen"
|
||||
- person_index: 6
|
||||
target_name: "Hendrik Koppen"
|
||||
spouse:
|
||||
- person_index: 3
|
||||
target_name: "Anna Maria Brouwer"
|
||||
|
||||
- person_index: 3
|
||||
pnv_name:
|
||||
literalName: "Anna Maria Brouwer"
|
||||
givenName: "Anna Maria"
|
||||
baseSurname: "Brouwer"
|
||||
roles:
|
||||
- role_title: "zonder beroep"
|
||||
biographical:
|
||||
address: "Haarlem"
|
||||
family_relationships:
|
||||
children:
|
||||
- person_index: 0
|
||||
target_name: "Cornelis Johannes Koppen"
|
||||
- person_index: 6
|
||||
target_name: "Hendrik Koppen"
|
||||
widow_of:
|
||||
person_index: 2
|
||||
target_name: "Pieter Koppen"
|
||||
|
||||
- person_index: 4
|
||||
pnv_name:
|
||||
literalName: "Jan Visser"
|
||||
givenName: "Jan"
|
||||
baseSurname: "Visser"
|
||||
roles:
|
||||
- role_title: "koopman"
|
||||
family_relationships:
|
||||
children:
|
||||
- person_index: 1
|
||||
target_name: "Anna Maria Visser"
|
||||
- person_index: 7
|
||||
target_name: "Willem Visser"
|
||||
spouse:
|
||||
- person_index: 5
|
||||
target_name: "Cornelia de Vries"
|
||||
|
||||
- person_index: 5
|
||||
pnv_name:
|
||||
literalName: "Cornelia de Vries"
|
||||
givenName: "Cornelia"
|
||||
surnamePrefix: "de"
|
||||
baseSurname: "Vries"
|
||||
biographical:
|
||||
deceased: true
|
||||
family_relationships:
|
||||
children:
|
||||
- person_index: 1
|
||||
target_name: "Anna Maria Visser"
|
||||
- person_index: 7
|
||||
target_name: "Willem Visser"
|
||||
spouse:
|
||||
- person_index: 4
|
||||
target_name: "Jan Visser"
|
||||
|
||||
- person_index: 6
|
||||
pnv_name:
|
||||
literalName: "Hendrik Koppen"
|
||||
givenName: "Hendrik"
|
||||
baseSurname: "Koppen"
|
||||
roles:
|
||||
- role_title: "schilder"
|
||||
role_in_source: "witness"
|
||||
biographical:
|
||||
age: "35"
|
||||
family_relationships:
|
||||
sibling:
|
||||
- person_index: 0
|
||||
target_name: "Cornelis Johannes Koppen"
|
||||
parent:
|
||||
- person_index: 2
|
||||
target_name: "Pieter Koppen"
|
||||
- person_index: 3
|
||||
target_name: "Anna Maria Brouwer"
|
||||
|
||||
- person_index: 7
|
||||
pnv_name:
|
||||
literalName: "Willem Visser"
|
||||
givenName: "Willem"
|
||||
baseSurname: "Visser"
|
||||
roles:
|
||||
- role_title: "timmerman"
|
||||
role_in_source: "witness"
|
||||
biographical:
|
||||
age: "32"
|
||||
family_relationships:
|
||||
sibling:
|
||||
- person_index: 1
|
||||
target_name: "Anna Maria Visser"
|
||||
parent:
|
||||
- person_index: 4
|
||||
target_name: "Jan Visser"
|
||||
- person_index: 5
|
||||
target_name: "Cornelia de Vries"
|
||||
|
||||
temporal_references:
|
||||
- expression: "den elfden November achttien honderd vijf en tachtig"
|
||||
normalized: "1885-11-11"
|
||||
type: "DATE"
|
||||
|
||||
locations_mentioned:
|
||||
- name: "Haarlem"
|
||||
type: "city"
|
||||
- name: "Amsterdam"
|
||||
type: "city"
|
||||
|
||||
naming_conventions_notes: |
|
||||
Dutch civil registration naming conventions demonstrated:
|
||||
|
||||
1. TUSSENVOEGSEL (surname prefix):
|
||||
- "de Vries" - "de" is the tussenvoegsel
|
||||
- Lowercase in running text, may be capitalized at start of sentence
|
||||
- Inherited through family line
|
||||
|
||||
2. DECEASED MARKER:
|
||||
- "wijlen" = the late/deceased
|
||||
- Placed before the full name
|
||||
|
||||
3. OCCUPATION TERMS:
|
||||
- "schilder" = painter
|
||||
- "koopman" = merchant
|
||||
- "timmerman" = carpenter
|
||||
- "zonder beroep" = without profession/occupation
|
||||
|
||||
4. RESIDENCE MARKERS:
|
||||
- "wonende alhier" = residing here (in the registration municipality)
|
||||
- "geboren te" = born in
|
||||
|
||||
5. RELATIONSHIP TERMS:
|
||||
- "meerderjarige zoon van" = adult son of
|
||||
- "meerderjarige dochter van" = adult daughter of
|
||||
- "broeder van" = brother of
|
||||
|
||||
provenance:
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
notes: |
|
||||
This example uses synthetic data based on authentic Dutch civil
|
||||
registry (Burgerlijke Stand) marriage certificate formulae for
|
||||
demonstration purposes. Names, dates, and locations are fictional
|
||||
but follow authentic 19th-century patterns.
|
||||
For real examples, see PROVENANCE_SOURCES.md.
|
||||
|
||||
related_real_sources:
|
||||
- archive: "Centraal Bureau voor Genealogie (CBG)"
|
||||
project: "WieWasWie"
|
||||
digital_url: "https://www.wiewaswie.nl/"
|
||||
document_type: "Birth, marriage, death certificates"
|
||||
period: "1811-present (civil); 1600s+ (church)"
|
||||
language: "Dutch"
|
||||
license: "Subscription / Free at archives"
|
||||
|
||||
- archive: "Noord-Hollands Archief"
|
||||
coverage: "Civil registry from 1811, church records from 1600s"
|
||||
location: "Haarlem, Netherlands"
|
||||
document_types: "Dutch civil registry records"
|
||||
|
|
@ -0,0 +1,263 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 2: Early Modern Notarial Protocol Index Entry
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: SYNTHETIC_EXAMPLE
|
||||
#
|
||||
# Demonstrates extraction from a 17th-century Dutch notarial protocol showing:
|
||||
# - Early modern Dutch naming conventions (patronymics: Janszoon, Claesdr)
|
||||
# - Guardianship (voogd) relationships
|
||||
# - Orphan identification
|
||||
# - Notarial act structure
|
||||
# - Tussenvoegsel patterns (van der)
|
||||
#
|
||||
# Language: Early Modern Dutch
|
||||
# Period: 17th century (1680 CE)
|
||||
# Source Type: Notarial Archives
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_02_notarial_protocol"
|
||||
example_title: "Early Modern Notarial Protocol Index Entry (1680)"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
source_language: "Early Modern Dutch"
|
||||
source_type: "historical_indices"
|
||||
|
||||
description: |
|
||||
This example demonstrates extraction from an early modern Dutch notarial
|
||||
protocol index entry from 1680. Notarial protocols are rich sources for
|
||||
genealogical research, containing contracts, testaments, and guardianship
|
||||
appointments.
|
||||
|
||||
Key extraction features:
|
||||
- 9 persons with complex relationships
|
||||
- Patronymic naming system (Janszoon, Claesdr)
|
||||
- Guardianship (voogd) relationships
|
||||
- Orphan children identification
|
||||
- Deceased parent markers
|
||||
- Notary and witness identification
|
||||
- Early modern Dutch occupation terms
|
||||
|
||||
source_text: |
|
||||
Notarial Archive Amsterdam, inv. 5075/1234
|
||||
30 January 1680
|
||||
|
||||
Before notary Pieter van der Meer appeared:
|
||||
Jacob Janszoon van der Hoeven, merchant of this city,
|
||||
with his wife Maritgen Claes, for themselves and as
|
||||
guardians (voogden) of the minor children of the late
|
||||
Claes Jacobsz and Aeltgen Pieters, namely:
|
||||
- Jan Claeszoon, aged about 16 years
|
||||
- Trijntgen Claesdr, aged about 12 years
|
||||
|
||||
Witnesses: Hendrick Jansz, baker, and Cornelis Pietersz,
|
||||
schoolmaster, both of this city.
|
||||
|
||||
expected_extraction:
|
||||
pico_observation:
|
||||
observation_id: "na_amsterdam_5075_1234"
|
||||
observed_at: "2025-12-12T10:00:00Z"
|
||||
source_type: "historical_indices"
|
||||
source_reference: "Notarial Archive Amsterdam, inv. 5075/1234, 30 January 1680"
|
||||
|
||||
persons:
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "Jacob Janszoon van der Hoeven"
|
||||
givenName: "Jacob"
|
||||
patronym: "Janszoon"
|
||||
surnamePrefix: "van der"
|
||||
baseSurname: "Hoeven"
|
||||
roles:
|
||||
- role_title: "merchant"
|
||||
role_in_source: "declarant"
|
||||
- role_title: "voogd"
|
||||
role_in_source: null
|
||||
biographical:
|
||||
address: "Amsterdam"
|
||||
family_relationships:
|
||||
spouse:
|
||||
- person_index: 1
|
||||
target_name: "Maritgen Claes"
|
||||
|
||||
- person_index: 1
|
||||
pnv_name:
|
||||
literalName: "Maritgen Claes"
|
||||
givenName: "Maritgen"
|
||||
patronym: "Claes"
|
||||
roles:
|
||||
- role_in_source: "declarant"
|
||||
- role_title: "voogd"
|
||||
family_relationships:
|
||||
spouse:
|
||||
- person_index: 0
|
||||
target_name: "Jacob Janszoon van der Hoeven"
|
||||
|
||||
- person_index: 2
|
||||
pnv_name:
|
||||
literalName: "Claes Jacobsz"
|
||||
givenName: "Claes"
|
||||
patronym: "Jacobsz"
|
||||
biographical:
|
||||
deceased: true
|
||||
family_relationships:
|
||||
spouse:
|
||||
- person_index: 3
|
||||
target_name: "Aeltgen Pieters"
|
||||
children:
|
||||
- person_index: 4
|
||||
target_name: "Jan Claeszoon"
|
||||
- person_index: 5
|
||||
target_name: "Trijntgen Claesdr"
|
||||
|
||||
- person_index: 3
|
||||
pnv_name:
|
||||
literalName: "Aeltgen Pieters"
|
||||
givenName: "Aeltgen"
|
||||
patronym: "Pieters"
|
||||
biographical:
|
||||
deceased: true
|
||||
family_relationships:
|
||||
spouse:
|
||||
- person_index: 2
|
||||
target_name: "Claes Jacobsz"
|
||||
children:
|
||||
- person_index: 4
|
||||
target_name: "Jan Claeszoon"
|
||||
- person_index: 5
|
||||
target_name: "Trijntgen Claesdr"
|
||||
|
||||
- person_index: 4
|
||||
pnv_name:
|
||||
literalName: "Jan Claeszoon"
|
||||
givenName: "Jan"
|
||||
patronym: "Claeszoon"
|
||||
roles:
|
||||
- role_in_source: "child"
|
||||
biographical:
|
||||
age: "about 16"
|
||||
family_relationships:
|
||||
parent:
|
||||
- person_index: 2
|
||||
target_name: "Claes Jacobsz"
|
||||
- person_index: 3
|
||||
target_name: "Aeltgen Pieters"
|
||||
sibling:
|
||||
- person_index: 5
|
||||
target_name: "Trijntgen Claesdr"
|
||||
|
||||
- person_index: 5
|
||||
pnv_name:
|
||||
literalName: "Trijntgen Claesdr"
|
||||
givenName: "Trijntgen"
|
||||
patronym: "Claesdr"
|
||||
roles:
|
||||
- role_in_source: "child"
|
||||
biographical:
|
||||
age: "about 12"
|
||||
gender: "Female"
|
||||
family_relationships:
|
||||
parent:
|
||||
- person_index: 2
|
||||
target_name: "Claes Jacobsz"
|
||||
- person_index: 3
|
||||
target_name: "Aeltgen Pieters"
|
||||
sibling:
|
||||
- person_index: 4
|
||||
target_name: "Jan Claeszoon"
|
||||
|
||||
- person_index: 6
|
||||
pnv_name:
|
||||
literalName: "Pieter van der Meer"
|
||||
givenName: "Pieter"
|
||||
surnamePrefix: "van der"
|
||||
baseSurname: "Meer"
|
||||
roles:
|
||||
- role_title: "notary"
|
||||
|
||||
- person_index: 7
|
||||
pnv_name:
|
||||
literalName: "Hendrick Jansz"
|
||||
givenName: "Hendrick"
|
||||
patronym: "Jansz"
|
||||
roles:
|
||||
- role_title: "baker"
|
||||
role_in_source: "witness"
|
||||
biographical:
|
||||
address: "Amsterdam"
|
||||
|
||||
- person_index: 8
|
||||
pnv_name:
|
||||
literalName: "Cornelis Pietersz"
|
||||
givenName: "Cornelis"
|
||||
patronym: "Pietersz"
|
||||
roles:
|
||||
- role_title: "schoolmaster"
|
||||
role_in_source: "witness"
|
||||
biographical:
|
||||
address: "Amsterdam"
|
||||
|
||||
temporal_references:
|
||||
- expression: "30 January 1680"
|
||||
normalized: "1680-01-30"
|
||||
type: "DATE"
|
||||
|
||||
locations_mentioned:
|
||||
- name: "Amsterdam"
|
||||
type: "city"
|
||||
|
||||
naming_conventions_notes: |
|
||||
Early modern Dutch naming conventions demonstrated:
|
||||
|
||||
1. PATRONYMIC SYSTEM:
|
||||
- Male: -zoon, -szoon, -sz, -z (son of)
|
||||
Examples: Janszoon, Jacobsz, Jansz, Pietersz
|
||||
- Female: -dr, -dochter (daughter of)
|
||||
Examples: Claesdr (= Claesdochter)
|
||||
- Patronyms derived from father's given name
|
||||
|
||||
2. TRANSITION TO SURNAMES:
|
||||
- Some families adopted fixed surnames (van der Hoeven, van der Meer)
|
||||
- Others still used pure patronymics (Hendrick Jansz)
|
||||
- Mixed patterns common in this period
|
||||
|
||||
3. TUSSENVOEGSEL:
|
||||
- "van der" = from the (+ definite article)
|
||||
- Often indicates geographic origin
|
||||
- Hoeven = farmstead/court
|
||||
- Meer = lake
|
||||
|
||||
4. GENDERED DIMINUTIVES:
|
||||
- Female names often end in -gen, -tgen, -tje
|
||||
- Maritgen, Trijntgen, Aeltgen
|
||||
- Male names typically unmodified
|
||||
|
||||
5. LEGAL TERMINOLOGY:
|
||||
- "voogd" (plural: voogden) = guardian
|
||||
- Used for orphaned minors
|
||||
- Appointed by family or court
|
||||
|
||||
provenance:
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
notes: |
|
||||
This example uses synthetic data based on authentic early modern
|
||||
notarial protocol index entry formulae for demonstration purposes.
|
||||
Names, dates, and locations are fictional but follow authentic
|
||||
17th-century Dutch notarial patterns.
|
||||
For real examples, see PROVENANCE_SOURCES.md.
|
||||
|
||||
related_real_sources:
|
||||
- archive: "Stadsarchief Amsterdam"
|
||||
collection: "Notarial Archives (Notariële Archieven)"
|
||||
document_type: "Notarial protocols, contracts, testaments"
|
||||
period: "1578-1915"
|
||||
language: "Dutch, Latin"
|
||||
notes: "Largest notarial archive in the Netherlands"
|
||||
|
||||
- project: "TICCLAT (Transliteration of Early Modern Dutch Notarial Archives)"
|
||||
coverage: "Amsterdam notarial indices"
|
||||
period: "17th-18th century"
|
||||
notes: "Machine-readable indices to notarial protocols"
|
||||
|
|
@ -0,0 +1,202 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 3: Dutch Church Baptismal Record with Godparents
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: SYNTHETIC_EXAMPLE
|
||||
#
|
||||
# Dutch Reformed Church (Nederlandse Hervormde Kerk) baptismal register entry.
|
||||
# Demonstrates godparent relationships, Dutch patronymic naming, and
|
||||
# pre-civil registration church records (DTB - Doop-, Trouw- en Begraafregisters).
|
||||
#
|
||||
# Language: Dutch (Early Modern)
|
||||
# Period: 1702 CE
|
||||
# Source Type: Church baptismal register (DTB)
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_03_church_baptism"
|
||||
example_title: "Dutch Church Baptismal Record with Godparents (1702)"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
source_language: "Dutch"
|
||||
source_type: "church_records"
|
||||
|
||||
description: |
|
||||
Example of a Dutch Reformed Church (Nederlandse Hervormde Kerk) baptismal
|
||||
register entry demonstrating:
|
||||
- Godparent (getuigen) relationships creating spiritual kinship
|
||||
- Dutch patronymic naming conventions (Hendriksen, Jans, Anthonisz)
|
||||
- Aristocratic naming (surnamePrefix: van)
|
||||
- Honorific titles (E. Heer, Juffrou)
|
||||
- Pre-civil registration church records (before 1811)
|
||||
|
||||
source_text: |
|
||||
Den 15en Meij 1702 is gedoopt
|
||||
Johanna, dochter van Willem Hendriksen en Geertruijd Jans,
|
||||
getuijgen waren de E. Heer Jan Willem van Beverwijck
|
||||
ende Juffrou Maria van Loon, huijsvrouw van de heer
|
||||
Pieter Anthonisz Verschoor.
|
||||
|
||||
expected_extraction:
|
||||
pico_observation:
|
||||
observation_id: "dtb_amsterdam_1702_baptism_johanna"
|
||||
observed_at: "2025-12-12T10:00:00Z"
|
||||
source_type: "church_records"
|
||||
source_reference: "DTB Amsterdam, 15 May 1702"
|
||||
|
||||
persons:
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "Johanna"
|
||||
givenName: "Johanna"
|
||||
roles:
|
||||
- role_in_source: "child"
|
||||
biographical:
|
||||
gender: "Female"
|
||||
family_relationships:
|
||||
parent:
|
||||
- person_index: 1
|
||||
target_name: "Willem Hendriksen"
|
||||
- person_index: 2
|
||||
target_name: "Geertruijd Jans"
|
||||
godparent:
|
||||
- person_index: 3
|
||||
target_name: "Jan Willem van Beverwijck"
|
||||
- person_index: 4
|
||||
target_name: "Maria van Loon"
|
||||
|
||||
- person_index: 1
|
||||
pnv_name:
|
||||
literalName: "Willem Hendriksen"
|
||||
givenName: "Willem"
|
||||
patronym: "Hendriksen"
|
||||
biographical:
|
||||
gender: "Male"
|
||||
family_relationships:
|
||||
children:
|
||||
- person_index: 0
|
||||
target_name: "Johanna"
|
||||
spouse:
|
||||
- person_index: 2
|
||||
target_name: "Geertruijd Jans"
|
||||
|
||||
- person_index: 2
|
||||
pnv_name:
|
||||
literalName: "Geertruijd Jans"
|
||||
givenName: "Geertruijd"
|
||||
patronym: "Jans"
|
||||
biographical:
|
||||
gender: "Female"
|
||||
family_relationships:
|
||||
children:
|
||||
- person_index: 0
|
||||
target_name: "Johanna"
|
||||
spouse:
|
||||
- person_index: 1
|
||||
target_name: "Willem Hendriksen"
|
||||
|
||||
- person_index: 3
|
||||
pnv_name:
|
||||
literalName: "Jan Willem van Beverwijck"
|
||||
givenName: "Jan Willem"
|
||||
surnamePrefix: "van"
|
||||
baseSurname: "Beverwijck"
|
||||
honorificPrefix: "de E. Heer"
|
||||
roles:
|
||||
- role_in_source: "witness"
|
||||
biographical:
|
||||
gender: "Male"
|
||||
family_relationships:
|
||||
godchild:
|
||||
- person_index: 0
|
||||
target_name: "Johanna"
|
||||
|
||||
- person_index: 4
|
||||
pnv_name:
|
||||
literalName: "Maria van Loon"
|
||||
givenName: "Maria"
|
||||
surnamePrefix: "van"
|
||||
baseSurname: "Loon"
|
||||
honorificPrefix: "Juffrou"
|
||||
roles:
|
||||
- role_in_source: "witness"
|
||||
biographical:
|
||||
gender: "Female"
|
||||
family_relationships:
|
||||
godchild:
|
||||
- person_index: 0
|
||||
target_name: "Johanna"
|
||||
spouse:
|
||||
- person_index: 5
|
||||
target_name: "Pieter Anthonisz Verschoor"
|
||||
|
||||
- person_index: 5
|
||||
pnv_name:
|
||||
literalName: "Pieter Anthonisz Verschoor"
|
||||
givenName: "Pieter"
|
||||
patronym: "Anthonisz"
|
||||
baseSurname: "Verschoor"
|
||||
honorificPrefix: "de heer"
|
||||
biographical:
|
||||
gender: "Male"
|
||||
family_relationships:
|
||||
spouse:
|
||||
- person_index: 4
|
||||
target_name: "Maria van Loon"
|
||||
|
||||
temporal_references:
|
||||
- expression: "Den 15en Meij 1702"
|
||||
normalized: "1702-05-15"
|
||||
type: "DATE"
|
||||
|
||||
naming_conventions_notes: |
|
||||
Dutch naming conventions demonstrated in this example:
|
||||
|
||||
PATRONYMICS:
|
||||
- Hendriksen: son of Hendrik (-sen = son)
|
||||
- Jans: daughter/child of Jan (feminine form without -sen common for women)
|
||||
- Anthonisz: son of Anthonis (-z = zoon = son, abbreviated)
|
||||
|
||||
ARISTOCRATIC NAMING:
|
||||
- "van" prefix: indicates noble or patrician family (from a place)
|
||||
- "van Beverwijck": from the Beverwijck region
|
||||
- "van Loon": from the Loon region (Limburg)
|
||||
|
||||
HONORIFIC TITLES:
|
||||
- "de E. Heer": de Eerbare Heer (the Honorable Sir) - used for gentlemen
|
||||
- "Juffrou": Juffrouw (Miss/Madam) - used for unmarried or married respectable women
|
||||
- "de heer": (the mister) - standard respectful address
|
||||
|
||||
GODPARENT TERMINOLOGY:
|
||||
- "getuijgen": witnesses (in baptismal context = godparents)
|
||||
- Godparents created spiritual kinship (geestelijke verwantschap)
|
||||
|
||||
PRE-CIVIL REGISTRATION:
|
||||
- DTB records (Doop-, Trouw- en Begraafregisters) were church records
|
||||
- Civil registration (Burgerlijke Stand) started in Netherlands in 1811
|
||||
- Before 1811, churches maintained vital records
|
||||
|
||||
provenance:
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
notes: |
|
||||
This example uses synthetic data based on authentic Dutch Reformed
|
||||
Church (Nederlandse Hervormde Kerk) baptismal register formulae for
|
||||
demonstration purposes. Names, dates, and locations are fictional
|
||||
but follow authentic early 18th-century patterns.
|
||||
For real examples, see PROVENANCE_SOURCES.md.
|
||||
|
||||
related_real_sources:
|
||||
- archive: "Various Dutch Regional Archives"
|
||||
collection: "Doop-, Trouw- en Begraafregisters (DTB)"
|
||||
document_type: "Church baptism, marriage, burial records"
|
||||
period: "1600s-1811 (before civil registration)"
|
||||
language: "Dutch"
|
||||
notes: "Pre-1811 vital records maintained by churches"
|
||||
|
||||
- archive: "FamilySearch"
|
||||
collection: "Netherlands, Church Records"
|
||||
wiki_url: "https://www.familysearch.org/en/wiki/Netherlands_Church_Records"
|
||||
document_type: "Dutch church baptisms"
|
||||
license: "Free with registration"
|
||||
|
|
@ -0,0 +1,146 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 4: Modern LinkedIn Staff Profile
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: SYNTHETIC_EXAMPLE
|
||||
#
|
||||
# Demonstrates modern digital source extraction, contrasting with historical
|
||||
# document examples. Shows heritage sector professional career tracking.
|
||||
#
|
||||
# Language: English
|
||||
# Period: Contemporary (2025)
|
||||
# Source Type: Modern digital (LinkedIn profile)
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_04_linkedin_profile"
|
||||
example_title: "Modern LinkedIn Staff Profile - Heritage Professional"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
source_language: "English"
|
||||
source_type: "modern_digital"
|
||||
|
||||
description: |
|
||||
Example of a modern LinkedIn profile for a heritage sector professional.
|
||||
Demonstrates PiCo extraction patterns for contemporary digital sources,
|
||||
contrasting with historical document examples.
|
||||
|
||||
Key features:
|
||||
- Modern professional networking profile format
|
||||
- Career trajectory across heritage institutions
|
||||
- Educational background with dates
|
||||
- Dutch naming conventions in modern context (van den Berg)
|
||||
- GLAMORCUBESFIXPHDNT heritage type classification
|
||||
|
||||
source_text: |
|
||||
Dr. Maria van den Berg
|
||||
Director of Collections | Rijksmuseum
|
||||
Amsterdam, Netherlands
|
||||
|
||||
About:
|
||||
Leading the collections management team at the Rijksmuseum since 2018.
|
||||
Previously Head Curator at the Van Gogh Museum (2012-2018).
|
||||
PhD in Art History, University of Amsterdam.
|
||||
|
||||
Experience:
|
||||
- Director of Collections, Rijksmuseum (2018-present)
|
||||
- Head Curator, Van Gogh Museum (2012-2018)
|
||||
- Assistant Curator, Stedelijk Museum (2008-2012)
|
||||
|
||||
Education:
|
||||
- PhD Art History, University of Amsterdam (2008)
|
||||
- MA Museum Studies, University of Amsterdam (2003)
|
||||
|
||||
expected_extraction:
|
||||
pico_observation:
|
||||
observation_id: "linkedin_maria_van_den_berg_2025"
|
||||
observed_at: "2025-12-12T10:00:00Z"
|
||||
source_type: "modern_digital"
|
||||
source_reference: "https://linkedin.com/in/mariavandenberg"
|
||||
|
||||
persons:
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "Dr. Maria van den Berg"
|
||||
givenName: "Maria"
|
||||
surnamePrefix: "van den"
|
||||
baseSurname: "Berg"
|
||||
honorificPrefix: "Dr."
|
||||
roles:
|
||||
- role_title: "Director of Collections"
|
||||
organization: "Rijksmuseum"
|
||||
period: "2018-present"
|
||||
heritage_relevant: true
|
||||
heritage_type: "M"
|
||||
- role_title: "Head Curator"
|
||||
organization: "Van Gogh Museum"
|
||||
period: "2012-2018"
|
||||
heritage_relevant: true
|
||||
heritage_type: "M"
|
||||
- role_title: "Assistant Curator"
|
||||
organization: "Stedelijk Museum"
|
||||
period: "2008-2012"
|
||||
heritage_relevant: true
|
||||
heritage_type: "M"
|
||||
biographical:
|
||||
address: "Amsterdam, Netherlands"
|
||||
family_relationships: {}
|
||||
context: "Heritage sector professional with museum career"
|
||||
|
||||
organizations_mentioned:
|
||||
- name: "Rijksmuseum"
|
||||
type: "M"
|
||||
role_in_source: "employer"
|
||||
- name: "Van Gogh Museum"
|
||||
type: "M"
|
||||
role_in_source: "employer"
|
||||
- name: "Stedelijk Museum"
|
||||
type: "M"
|
||||
role_in_source: "employer"
|
||||
- name: "University of Amsterdam"
|
||||
type: "E"
|
||||
role_in_source: "education"
|
||||
|
||||
locations_mentioned:
|
||||
- name: "Amsterdam"
|
||||
type: "city"
|
||||
- name: "Netherlands"
|
||||
type: "country"
|
||||
|
||||
naming_conventions_notes: |
|
||||
Modern Dutch naming conventions demonstrated:
|
||||
|
||||
SURNAME PREFIX:
|
||||
- "van den" is a tussenvoegsel (insertion) common in Dutch surnames
|
||||
- In alphabetical sorting, Dutch convention uses the base surname: "Berg, Maria van den"
|
||||
- In formal address: "Dr. Van den Berg" (capitalized at start of sentence)
|
||||
- In running text: "Dr. van den Berg" (lowercase tussenvoegsel)
|
||||
|
||||
ACADEMIC TITLE:
|
||||
- "Dr." indicates doctorate (PhD) - placed before name
|
||||
- In Netherlands, this is an academic degree, not medical title (which uses "Arts")
|
||||
|
||||
CONTRAST WITH HISTORICAL EXAMPLES:
|
||||
- LinkedIn profiles are etic (observer) descriptions, not emic (insider) documents
|
||||
- Structured data format vs. narrative historical documents
|
||||
- Self-reported information vs. third-party recording
|
||||
- Modern standardized naming vs. evolving historical conventions
|
||||
|
||||
provenance:
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
notes: |
|
||||
This example uses synthetic data based on modern LinkedIn profile
|
||||
formats for demonstration purposes. The profile name, institution,
|
||||
and biographical details are entirely fictional. LinkedIn profiles
|
||||
represent a modern source type for person-in-context observations,
|
||||
contrasting with the historical document examples in this module.
|
||||
|
||||
source_context:
|
||||
platform: "LinkedIn"
|
||||
data_type: "Modern professional networking profile"
|
||||
privacy_note: |
|
||||
When extracting real LinkedIn data, ensure compliance with
|
||||
LinkedIn Terms of Service, GDPR, and applicable privacy laws.
|
||||
This synthetic example demonstrates extraction patterns only.
|
||||
|
|
@ -0,0 +1,215 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 5: Arabic Waqf Document (Endowment Record)
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: SYNTHETIC_EXAMPLE
|
||||
#
|
||||
# Example of a waqf (religious endowment) document from an Islamic archive.
|
||||
# Waqf documents record property endowments for religious/charitable purposes
|
||||
# and typically name the founder, beneficiaries, and witnesses.
|
||||
#
|
||||
# Language: Arabic
|
||||
# Period: 1225 AH (1810 CE)
|
||||
# Source Type: Archival descriptions (waqfiyya)
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_05_arabic_waqf"
|
||||
example_title: "Arabic Waqf Document - Aleppo Endowment (1810 CE)"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
source_language: "Arabic"
|
||||
source_type: "archival_descriptions"
|
||||
|
||||
description: |
|
||||
Example of a waqf (وقف) document from an Islamic archive. Waqf documents
|
||||
record property endowments for religious/charitable purposes and typically
|
||||
name the founder (واقف), beneficiaries, and witnesses.
|
||||
|
||||
Key features demonstrated:
|
||||
- Arabic patronymic system (ابن/بن - ibn/bin = son of)
|
||||
- Honorific titles (الحاج, السيد)
|
||||
- Nisba (geographic/tribal surnames)
|
||||
- Deceased markers (المرحوم)
|
||||
- Hijri calendar dating
|
||||
- Romanization alongside Arabic script
|
||||
|
||||
source_text: |
|
||||
بسم الله الرحمن الرحيم
|
||||
هذا ما وقف وحبس وسبل وأبد المرحوم الحاج أحمد بن محمد العمري، تاجر بمدينة
|
||||
حلب الشهباء، ابن المرحوم محمد بن عبد الله العمري. وقف جميع داره الكائنة
|
||||
بمحلة الجديدة على أولاده وأولاد أولاده ذكوراً وإناثاً. وإن انقرضوا لا سمح
|
||||
الله فعلى فقراء المسلمين. وشهد على ذلك الشهود: الحاج إبراهيم بن يوسف
|
||||
التركماني، والسيد علي بن حسين الحلبي. وكتب في شهر رجب سنة ألف ومائتين
|
||||
وخمس وعشرين هجرية.
|
||||
|
||||
source_text_english: |
|
||||
In the name of God, the Compassionate, the Merciful.
|
||||
This is what the late al-Hajj Ahmad ibn Muhammad al-'Umari, merchant
|
||||
in the city of Aleppo, son of the late Muhammad ibn Abdullah al-'Umari,
|
||||
has endowed, dedicated, and perpetuated. He endowed his entire house
|
||||
located in the al-Jadida neighborhood for his children and grandchildren,
|
||||
male and female. If they cease to exist, God forbid, then for the poor
|
||||
Muslims. Witnessed by: al-Hajj Ibrahim ibn Yusuf al-Turkmani, and
|
||||
al-Sayyid Ali ibn Husayn al-Halabi. Written in the month of Rajab,
|
||||
year 1225 Hijri (1810 CE).
|
||||
|
||||
expected_extraction:
|
||||
pico_observation:
|
||||
observation_id: "waqf_aleppo_1225h_ahmad_umari"
|
||||
observed_at: "2025-12-12T10:00:00Z"
|
||||
source_type: "archival_descriptions"
|
||||
source_reference: "Waqf document, Aleppo, Rajab 1225 AH (1810 CE)"
|
||||
|
||||
persons:
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "الحاج أحمد بن محمد العمري"
|
||||
literalName_romanized: "al-Hajj Ahmad ibn Muhammad al-'Umari"
|
||||
givenName: "أحمد"
|
||||
givenName_romanized: "Ahmad"
|
||||
patronym: "محمد"
|
||||
patronym_romanized: "Muhammad"
|
||||
baseSurname: "العمري"
|
||||
baseSurname_romanized: "al-'Umari"
|
||||
honorificPrefix: "الحاج"
|
||||
honorificPrefix_romanized: "al-Hajj"
|
||||
roles:
|
||||
- role_title: "تاجر"
|
||||
role_title_romanized: "merchant"
|
||||
role_in_source: "founder"
|
||||
biographical:
|
||||
deceased: true
|
||||
address: "حلب الشهباء (Aleppo)"
|
||||
family_relationships:
|
||||
parent:
|
||||
- person_index: 1
|
||||
target_name: "محمد بن عبد الله العمري"
|
||||
context: "Waqf founder (واقف)"
|
||||
|
||||
- person_index: 1
|
||||
pnv_name:
|
||||
literalName: "محمد بن عبد الله العمري"
|
||||
literalName_romanized: "Muhammad ibn Abdullah al-'Umari"
|
||||
givenName: "محمد"
|
||||
givenName_romanized: "Muhammad"
|
||||
patronym: "عبد الله"
|
||||
patronym_romanized: "Abdullah"
|
||||
baseSurname: "العمري"
|
||||
baseSurname_romanized: "al-'Umari"
|
||||
honorificPrefix: "المرحوم"
|
||||
honorificPrefix_romanized: "the late"
|
||||
biographical:
|
||||
deceased: true
|
||||
family_relationships:
|
||||
children:
|
||||
- person_index: 0
|
||||
target_name: "أحمد بن محمد العمري"
|
||||
context: "Father of the founder"
|
||||
|
||||
- person_index: 2
|
||||
pnv_name:
|
||||
literalName: "الحاج إبراهيم بن يوسف التركماني"
|
||||
literalName_romanized: "al-Hajj Ibrahim ibn Yusuf al-Turkmani"
|
||||
givenName: "إبراهيم"
|
||||
givenName_romanized: "Ibrahim"
|
||||
patronym: "يوسف"
|
||||
patronym_romanized: "Yusuf"
|
||||
baseSurname: "التركماني"
|
||||
baseSurname_romanized: "al-Turkmani"
|
||||
honorificPrefix: "الحاج"
|
||||
honorificPrefix_romanized: "al-Hajj"
|
||||
roles:
|
||||
- role_in_source: "witness"
|
||||
family_relationships: {}
|
||||
context: "Witness to the endowment"
|
||||
|
||||
- person_index: 3
|
||||
pnv_name:
|
||||
literalName: "السيد علي بن حسين الحلبي"
|
||||
literalName_romanized: "al-Sayyid Ali ibn Husayn al-Halabi"
|
||||
givenName: "علي"
|
||||
givenName_romanized: "Ali"
|
||||
patronym: "حسين"
|
||||
patronym_romanized: "Husayn"
|
||||
baseSurname: "الحلبي"
|
||||
baseSurname_romanized: "al-Halabi"
|
||||
honorificPrefix: "السيد"
|
||||
honorificPrefix_romanized: "al-Sayyid"
|
||||
roles:
|
||||
- role_in_source: "witness"
|
||||
family_relationships: {}
|
||||
context: "Witness to the endowment"
|
||||
|
||||
temporal_references:
|
||||
- expression: "شهر رجب سنة ألف ومائتين وخمس وعشرين هجرية"
|
||||
expression_romanized: "month of Rajab, year 1225 Hijri"
|
||||
normalized: "1810-07"
|
||||
calendar: "Hijri"
|
||||
type: "DATE"
|
||||
|
||||
locations_mentioned:
|
||||
- name: "حلب الشهباء"
|
||||
name_romanized: "Aleppo"
|
||||
type: "city"
|
||||
- name: "محلة الجديدة"
|
||||
name_romanized: "al-Jadida neighborhood"
|
||||
type: "neighborhood"
|
||||
|
||||
arabic_naming_notes: |
|
||||
Arabic naming conventions demonstrated:
|
||||
|
||||
PATRONYMICS:
|
||||
- ابن/بن (ibn/bin): "son of" - connects given name to father's name
|
||||
- Full chain: Ahmad ibn Muhammad ibn Abdullah = Ahmad son of Muhammad son of Abdullah
|
||||
|
||||
HONORIFIC TITLES:
|
||||
- الحاج (al-Hajj): honorific for one who completed the Hajj pilgrimage to Mecca
|
||||
- السيد (al-Sayyid): honorific denoting descent from Prophet Muhammad
|
||||
- المرحوم (al-marhum): "the late" - marker for deceased person (masculine)
|
||||
- المرحومة (al-marhuma): "the late" - feminine form
|
||||
|
||||
NISBA (نسبة):
|
||||
Geographic or tribal surname indicating origin:
|
||||
- العمري (al-'Umari): descendant of 'Umar or from 'Umar tribe
|
||||
- التركماني (al-Turkmani): of Turkman origin
|
||||
- الحلبي (al-Halabi): from Aleppo (حلب = Halab)
|
||||
|
||||
WAQF TERMINOLOGY:
|
||||
- واقف (waqif): founder/endower
|
||||
- وقف (waqf): the endowment itself
|
||||
- شهود (shuhud): witnesses
|
||||
|
||||
HIJRI CALENDAR:
|
||||
- رجب (Rajab): 7th month of Islamic lunar calendar
|
||||
- سنة هجرية: Hijri year (from Prophet's migration to Medina, 622 CE)
|
||||
|
||||
provenance:
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
notes: |
|
||||
This example uses synthetic data based on standard waqf document formulae
|
||||
for demonstration purposes. Names, dates, and property details are fictional.
|
||||
For real examples, see PROVENANCE_SOURCES.md.
|
||||
|
||||
related_real_sources:
|
||||
- archive: "Cambridge University Library"
|
||||
collection: "Islamic Manuscripts"
|
||||
digital_url: "https://cudl.lib.cam.ac.uk/collections/islamic"
|
||||
document_types: "Waqfiyya, legal documents"
|
||||
period: "8th-20th century CE"
|
||||
license: "CC BY-NC 4.0"
|
||||
|
||||
- archive: "University of Pennsylvania Libraries"
|
||||
collection: "Manuscripts of the Muslim World"
|
||||
digital_url: "https://openn.library.upenn.edu/html/muslimworld_contents.html"
|
||||
document_types: "Waqfiyya, Quranic manuscripts, legal documents"
|
||||
license: "Public Domain / CC0"
|
||||
|
||||
- archive: "Singapore National Heritage Board"
|
||||
accession_number: "1115401"
|
||||
digital_url: "https://www.roots.gov.sg/Collection-Landing/listing/1115401"
|
||||
document_type: "Waqf document"
|
||||
donor: "Muhammad b. Abd al-Ghani"
|
||||
properties: "Istanbul (various locations)"
|
||||
|
|
@ -0,0 +1,325 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 6: Hebrew Ketubah - Marriage of Mosheh & Rivkah
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: REAL HISTORICAL DATA
|
||||
#
|
||||
# Source: Yale University Beinecke Rare Book & Manuscript Library
|
||||
# Call Number: Hebrew MSS suppl 194 (Broadside)
|
||||
# Object ID: 2067542
|
||||
# Document Date: 23 Elul 5656 AM (September 1, 1896 CE)
|
||||
# Location: Mashhad, Iran
|
||||
#
|
||||
# This is a REAL ketubah with verified provenance from Yale's digital collection.
|
||||
# The Mashhad Jewish community had a unique history as "crypto-Jews" after
|
||||
# forced conversion in 1839, making this document culturally significant.
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_06_hebrew_ketubah"
|
||||
example_title: "Hebrew Ketubah - Marriage of Mosheh & Rivkah (Mashhad, Iran, 1896)"
|
||||
data_status: "REAL_HISTORICAL_DATA"
|
||||
source_language: "Hebrew/Aramaic"
|
||||
source_script: "Hebrew square script"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Document Description
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
description: |
|
||||
A ketubah is a Jewish marriage contract written in Aramaic with Hebrew
|
||||
elements. This REAL example from Mashhad, Iran demonstrates Persian Jewish
|
||||
traditions with elaborate decorative elements.
|
||||
|
||||
Historical context: The Jewish community of Mashhad was unique - after forced
|
||||
conversion to Islam in 1839 (the Allahdad pogrom), many continued practicing
|
||||
Judaism in secret as "Jadid al-Islam" (new Muslims). By 1896, some families
|
||||
were more openly practicing Judaism, as evidenced by this elaborate ketubah.
|
||||
|
||||
Key features documented:
|
||||
- Groom and bride names with patronymics (ben/bat - son/daughter of)
|
||||
- Persian Jewish artistic traditions (floral patterns, colored rules)
|
||||
- Hebrew date with month, day, and year from Creation
|
||||
- Isaiah 61:10 verse as blessing
|
||||
- Physical dimensions: 53 x 37 cm
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Source Text
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
source_text:
|
||||
note: "Full text not transcribed from manuscript. Key readable elements provided."
|
||||
|
||||
hebrew_text: |
|
||||
בס״ד
|
||||
|
||||
שנת חמשת אלפים שש מאות וחמישים ושש לבריאת עולם
|
||||
עשרים ושלשה לחודש אלול
|
||||
במשהד
|
||||
|
||||
החתן משה בן משיאח
|
||||
הכלה רבקה בת יעקב
|
||||
|
||||
שוש אשיש בה׳ תגל נפשי באלהי כי הלבישני בגדי ישע מעיל צדקה יעטני
|
||||
כחתן יכהן פאר וככלה תעדה כליה
|
||||
|
||||
romanized_text: |
|
||||
B'siyata d'shmaya (With Heaven's help)
|
||||
|
||||
In the year five thousand six hundred and fifty-six from the Creation of the world,
|
||||
the twenty-third day of the month of Elul,
|
||||
in Mashhad.
|
||||
|
||||
The groom: Mosheh son of Mashiah
|
||||
The bride: Rivkah daughter of Ya'akov
|
||||
|
||||
[Isaiah 61:10 - decorative header blessing:]
|
||||
"I will greatly rejoice in the LORD, my soul shall be joyful in my God.
|
||||
For he has clothed me with the garments of salvation, he has covered me
|
||||
with the robe of righteousness, as a bridegroom decks himself with a garland,
|
||||
and as a bride adorns herself with her jewels."
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Expected Extraction Output
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
expected_extraction:
|
||||
pico_observation:
|
||||
observation_id: "ketubah_mashhad_5656_mosheh_rivkah"
|
||||
observed_at: "2025-01-13T12:00:00Z"
|
||||
source_type: "ketubah"
|
||||
source_reference: "Ketubah, Mashhad, 23 Elul 5656 (September 1, 1896 CE), Yale Beinecke Hebrew MSS suppl 194"
|
||||
archive: "Yale University, Beinecke Rare Book & Manuscript Library"
|
||||
|
||||
persons:
|
||||
# Person 0: Groom
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "משה בן משיאח"
|
||||
literalName_romanized: "Mosheh ben Mashiah"
|
||||
givenName: "משה"
|
||||
givenName_romanized: "Mosheh"
|
||||
patronym: "משיאח"
|
||||
patronym_romanized: "Mashiah"
|
||||
roles:
|
||||
- role_title: "חתן"
|
||||
role_title_romanized: "chatan"
|
||||
role_in_source: "groom"
|
||||
biographical:
|
||||
sex: "male"
|
||||
religion: "Jewish"
|
||||
community: "Mashhad Jewish community (Mashhadis)"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 1
|
||||
target_name: "משיאח"
|
||||
spouse:
|
||||
- person_index: 2
|
||||
target_name: "רבקה בת יעקב"
|
||||
context: "Groom (chatan) - the bridegroom in the marriage contract"
|
||||
|
||||
# Person 1: Father of Groom
|
||||
- person_index: 1
|
||||
pnv_name:
|
||||
literalName: "משיאח"
|
||||
literalName_romanized: "Mashiah"
|
||||
givenName: "משיאח"
|
||||
givenName_romanized: "Mashiah"
|
||||
biographical:
|
||||
sex: "male"
|
||||
note: "Name meaning 'Messiah' - common Persian Jewish name"
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 0
|
||||
target_name: "משה"
|
||||
context: "Father of the groom (implicit from patronymic)"
|
||||
|
||||
# Person 2: Bride
|
||||
- person_index: 2
|
||||
pnv_name:
|
||||
literalName: "רבקה בת יעקב"
|
||||
literalName_romanized: "Rivkah bat Ya'akov"
|
||||
givenName: "רבקה"
|
||||
givenName_romanized: "Rivkah"
|
||||
givenName_english: "Rebecca"
|
||||
patronym: "יעקב"
|
||||
patronym_romanized: "Ya'akov"
|
||||
roles:
|
||||
- role_title: "כלה"
|
||||
role_title_romanized: "kallah"
|
||||
role_in_source: "bride"
|
||||
biographical:
|
||||
sex: "female"
|
||||
religion: "Jewish"
|
||||
community: "Mashhad Jewish community (Mashhadis)"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 3
|
||||
target_name: "יעקב"
|
||||
spouse:
|
||||
- person_index: 0
|
||||
target_name: "משה בן משיאח"
|
||||
context: "Bride (kallah) - daughter of Ya'akov"
|
||||
|
||||
# Person 3: Father of Bride
|
||||
- person_index: 3
|
||||
pnv_name:
|
||||
literalName: "יעקב"
|
||||
literalName_romanized: "Ya'akov"
|
||||
givenName: "יעקב"
|
||||
givenName_romanized: "Ya'akov"
|
||||
givenName_english: "Jacob"
|
||||
biographical:
|
||||
sex: "male"
|
||||
note: "Biblical patriarch name - common in Jewish communities"
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 2
|
||||
target_name: "רבקה"
|
||||
context: "Father of the bride (implicit from patronymic)"
|
||||
|
||||
temporal_references:
|
||||
- expression: "עשרים ושלשה לחודש אלול שנת חמשת אלפים שש מאות וחמישים ושש לבריאת עולם"
|
||||
expression_romanized: "23rd day of the month of Elul, year 5656 from Creation"
|
||||
normalized_gregorian: "1896-09-01"
|
||||
calendar: "Hebrew"
|
||||
type: "DATE"
|
||||
components:
|
||||
day: 23
|
||||
month: "אלול (Elul)"
|
||||
month_number: 6
|
||||
year_hebrew: 5656
|
||||
year_gregorian: 1896
|
||||
era: "לבריאת עולם (from Creation)"
|
||||
notes: "Elul is the 6th month of the civil year, 12th of the ecclesiastical year"
|
||||
|
||||
locations_mentioned:
|
||||
- name: "משהד"
|
||||
name_romanized: "Mashhad"
|
||||
name_persian: "مشهد"
|
||||
type: "city"
|
||||
country: "Iran (then Qajar Persia)"
|
||||
modern_country: "Iran"
|
||||
coordinates: "36.2972, 59.6067"
|
||||
historical_context: |
|
||||
Mashhad is a major city in northeastern Iran, holy city of Shia Islam
|
||||
(shrine of Imam Reza). The Jewish community dated to ancient times but
|
||||
faced forced conversion in 1839. By 1896, some families openly practiced
|
||||
Judaism while others remained crypto-Jews.
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Physical Description
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
physical_description:
|
||||
dimensions: "53 x 37 cm"
|
||||
material: "ink and paint on paper"
|
||||
decoration: |
|
||||
- Red and green rules divide the paper into rectangular sections
|
||||
- Middle section contains the ketubah text
|
||||
- Top and sides filled with elaborate arch and floral patterns
|
||||
- Colors: blue, gold, and silver paint
|
||||
- Strips of red paper pasted on all four sides as frame
|
||||
condition: "Some damage to the text containing the Isaiah quote and to the borders"
|
||||
script: "Hebrew square script"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Hebrew Naming Conventions Demonstrated
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
naming_conventions_notes: |
|
||||
Hebrew/Jewish naming conventions demonstrated in this REAL document:
|
||||
|
||||
1. PATRONYMIC SYSTEM:
|
||||
- בן (ben): "son of" - used for males
|
||||
- בת (bat): "daughter of" - used for females
|
||||
- Example: משה בן משיאח = "Mosheh son of Mashiah"
|
||||
|
||||
2. PERSIAN JEWISH NAMES:
|
||||
- משיאח (Mashiah/Messiah): Common Persian Jewish given name
|
||||
- רבקה (Rivkah/Rebecca): Biblical matriarch name
|
||||
- יעקב (Ya'akov/Jacob): Biblical patriarch name
|
||||
|
||||
3. KETUBAH STRUCTURE:
|
||||
- Opening: בס״ד (B'siyata d'Shmaya - With Heaven's help)
|
||||
- Date: Hebrew calendar from Creation (anno mundi)
|
||||
- Location: City name in Hebrew transliteration
|
||||
- Parties: Groom (חתן) and Bride (כלה) with patronymics
|
||||
- Blessing: Often biblical verses (here Isaiah 61:10)
|
||||
|
||||
4. MASHHAD JEWISH CONTEXT:
|
||||
- Community known as "Mashhadis" or "Jadid al-Islam"
|
||||
- After 1839 pogrom, many practiced Judaism secretly
|
||||
- Unique artistic traditions in ketubah decoration
|
||||
- Persian influences in ornamentation style
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Provenance
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
provenance:
|
||||
data_status: "REAL_HISTORICAL_DATA"
|
||||
|
||||
archive:
|
||||
name: "Yale University, Beinecke Rare Book & Manuscript Library"
|
||||
collection: "Hebrew Manuscripts Supplement"
|
||||
call_number: "Hebrew MSS suppl 194 (Broadside)"
|
||||
catalog_record: "8574921"
|
||||
object_id: "2067542"
|
||||
|
||||
digital_access:
|
||||
url: "https://digital.library.yale.edu/catalog/2067542"
|
||||
iiif_manifest: "https://digital.library.yale.edu/manifests/2067542"
|
||||
pdf_url: "https://digital.library.yale.edu/pdfs/2067542.pdf"
|
||||
|
||||
document_metadata:
|
||||
date_hebrew: "23 Elul 5656"
|
||||
date_gregorian: "1896-09-01"
|
||||
place: "Mashhad, Iran"
|
||||
groom: "Mosheh ben Mashiah"
|
||||
bride: "Rivkah bat Ya'akov"
|
||||
physical_extent: "1 leaf, 53 x 37 cm, color illustrations"
|
||||
|
||||
languages:
|
||||
- "Hebrew"
|
||||
- "Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)"
|
||||
|
||||
subjects:
|
||||
geographic: "Mashhad (Iran) -- Religious life and customs"
|
||||
topical:
|
||||
- "Ketubah -- Iran -- Mashhad"
|
||||
- "Prenuptial agreements (Jewish law)"
|
||||
|
||||
genres:
|
||||
- "Autographs"
|
||||
- "Illustrations"
|
||||
- "Ketubahs"
|
||||
- "Manuscripts"
|
||||
- "Marginalia"
|
||||
|
||||
rights: |
|
||||
The use of this image may be subject to the copyright law of the
|
||||
United States (Title 17, United States Code) or to site license or
|
||||
other rights management terms and conditions. The person using the
|
||||
image is liable for any infringement.
|
||||
|
||||
access_date: "2025-01-13"
|
||||
|
||||
citation: |
|
||||
"Ketubah : Mashhad, Iran, 1896, September 1," Yale University Library,
|
||||
Beinecke Rare Book and Manuscript Library, Hebrew MSS suppl 194 (Broadside),
|
||||
Object ID 2067542. Digital Collections, https://digital.library.yale.edu/catalog/2067542
|
||||
(accessed January 13, 2025).
|
||||
|
||||
verification_notes: |
|
||||
This is a REAL historical document with verified provenance:
|
||||
- Held at Yale University Beinecke Rare Book & Manuscript Library
|
||||
- Fully digitized and publicly accessible
|
||||
- Catalog record #8574921 with complete metadata
|
||||
- Both principal parties (groom and bride) are named in Yale's catalog
|
||||
- Physical dimensions and condition documented
|
||||
- High-resolution images available via IIIF manifest
|
||||
- Document represents unique Mashhad Jewish community traditions
|
||||
|
|
@ -0,0 +1,263 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 7: Spanish Colonial Baptism Record
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: SYNTHETIC_EXAMPLE
|
||||
#
|
||||
# Spanish colonial baptismal records from New Spain (Mexico) with rich
|
||||
# genealogical data including casta (racial/social classification)
|
||||
# designations and compadrazgo (godparent) relationships.
|
||||
#
|
||||
# Language: Spanish
|
||||
# Period: 1742 CE
|
||||
# Source Type: Baptismal register (Libro de bautismos)
|
||||
# Location: Mexico City, New Spain
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_07_spanish_colonial"
|
||||
example_title: "Spanish Colonial Baptism Record - Mexico City (1742)"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
source_language: "Spanish"
|
||||
source_type: "baptismal_register"
|
||||
|
||||
description: |
|
||||
Spanish colonial baptismal record from New Spain (Mexico) demonstrating
|
||||
the casta system and compadrazgo relationships.
|
||||
|
||||
Key features:
|
||||
- Casta designations (español, mestizo, mulato, indio, etc.)
|
||||
- Legitimacy markers (hijo legítimo vs hijo natural)
|
||||
- Compadrazgo (godparent relationships creating spiritual kinship)
|
||||
- Place of origin (vecino de, natural de)
|
||||
- Ecclesiastical formulae and clerical titles (Br., teniente de cura)
|
||||
|
||||
source_text: |
|
||||
En la ciudad de México, a veinte y tres días del mes de febrero de mil
|
||||
setecientos cuarenta y dos años, yo el Br. Don Antonio de Mendoza,
|
||||
teniente de cura de esta santa iglesia catedral, bauticé solemnemente,
|
||||
puse óleo y crisma a Juan José, español, hijo legítimo de Don Pedro
|
||||
García de la Cruz, español, natural de la villa de Puebla de los Ángeles,
|
||||
y de Doña María Josefa de los Reyes, española, natural de esta ciudad.
|
||||
|
||||
Fueron sus padrinos Don Francisco Xavier de Castañeda, español, vecino
|
||||
de esta ciudad, y Doña Ana María de la Encarnación, su legítima esposa,
|
||||
a quienes advertí el parentesco espiritual y obligaciones que contrajeron.
|
||||
|
||||
Y lo firmé.
|
||||
Br. Don Antonio de Mendoza
|
||||
|
||||
expected_extraction:
|
||||
description: "Spanish colonial baptism demonstrating casta system and compadrazgo"
|
||||
|
||||
pico_observation:
|
||||
observation_id: "bautismo_mexico_1742_juan_jose_garcia"
|
||||
observed_at: "2025-12-12T12:00:00Z"
|
||||
source_type: "baptismal_register"
|
||||
source_reference: "Libro de Bautismos, Catedral de México, 23 Feb 1742"
|
||||
|
||||
persons:
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "Juan José"
|
||||
givenName: "Juan José"
|
||||
roles:
|
||||
- role_title: "bautizado"
|
||||
role_in_source: "baptized"
|
||||
biographical:
|
||||
casta: "español"
|
||||
legitimacy: "hijo legítimo"
|
||||
religion: "Catholic"
|
||||
family_relationships:
|
||||
parent:
|
||||
- person_index: 1
|
||||
target_name: "Don Pedro García de la Cruz"
|
||||
- person_index: 2
|
||||
target_name: "Doña María Josefa de los Reyes"
|
||||
godparent:
|
||||
- person_index: 3
|
||||
target_name: "Don Francisco Xavier de Castañeda"
|
||||
- person_index: 4
|
||||
target_name: "Doña Ana María de la Encarnación"
|
||||
context: "Infant being baptized"
|
||||
|
||||
- person_index: 1
|
||||
pnv_name:
|
||||
literalName: "Don Pedro García de la Cruz"
|
||||
givenName: "Pedro"
|
||||
surnamePrefix: "García de"
|
||||
baseSurname: "la Cruz"
|
||||
honorificPrefix: "Don"
|
||||
biographical:
|
||||
casta: "español"
|
||||
origin: "natural de la villa de Puebla de los Ángeles"
|
||||
family_relationships:
|
||||
spouse:
|
||||
- person_index: 2
|
||||
target_name: "Doña María Josefa de los Reyes"
|
||||
children:
|
||||
- person_index: 0
|
||||
target_name: "Juan José"
|
||||
context: "Father of the baptized child"
|
||||
|
||||
- person_index: 2
|
||||
pnv_name:
|
||||
literalName: "Doña María Josefa de los Reyes"
|
||||
givenName: "María Josefa"
|
||||
surnamePrefix: "de"
|
||||
baseSurname: "los Reyes"
|
||||
honorificPrefix: "Doña"
|
||||
biographical:
|
||||
casta: "española"
|
||||
origin: "natural de esta ciudad"
|
||||
family_relationships:
|
||||
spouse:
|
||||
- person_index: 1
|
||||
target_name: "Don Pedro García de la Cruz"
|
||||
children:
|
||||
- person_index: 0
|
||||
target_name: "Juan José"
|
||||
context: "Mother of the baptized child"
|
||||
|
||||
- person_index: 3
|
||||
pnv_name:
|
||||
literalName: "Don Francisco Xavier de Castañeda"
|
||||
givenName: "Francisco Xavier"
|
||||
surnamePrefix: "de"
|
||||
baseSurname: "Castañeda"
|
||||
honorificPrefix: "Don"
|
||||
roles:
|
||||
- role_title: "padrino"
|
||||
role_in_source: "godfather"
|
||||
biographical:
|
||||
casta: "español"
|
||||
residence: "vecino de esta ciudad"
|
||||
family_relationships:
|
||||
spouse:
|
||||
- person_index: 4
|
||||
target_name: "Doña Ana María de la Encarnación"
|
||||
godchildren:
|
||||
- person_index: 0
|
||||
target_name: "Juan José"
|
||||
compadre:
|
||||
- person_index: 1
|
||||
target_name: "Don Pedro García de la Cruz"
|
||||
context: "Godfather (padrino)"
|
||||
|
||||
- person_index: 4
|
||||
pnv_name:
|
||||
literalName: "Doña Ana María de la Encarnación"
|
||||
givenName: "Ana María"
|
||||
surnamePrefix: "de"
|
||||
baseSurname: "la Encarnación"
|
||||
honorificPrefix: "Doña"
|
||||
roles:
|
||||
- role_title: "madrina"
|
||||
role_in_source: "godmother"
|
||||
biographical:
|
||||
marital_status: "legítima esposa"
|
||||
family_relationships:
|
||||
spouse:
|
||||
- person_index: 3
|
||||
target_name: "Don Francisco Xavier de Castañeda"
|
||||
godchildren:
|
||||
- person_index: 0
|
||||
target_name: "Juan José"
|
||||
comadre:
|
||||
- person_index: 2
|
||||
target_name: "Doña María Josefa de los Reyes"
|
||||
context: "Godmother (madrina)"
|
||||
|
||||
- person_index: 5
|
||||
pnv_name:
|
||||
literalName: "Br. Don Antonio de Mendoza"
|
||||
givenName: "Antonio"
|
||||
surnamePrefix: "de"
|
||||
baseSurname: "Mendoza"
|
||||
honorificPrefix: "Br. Don"
|
||||
roles:
|
||||
- role_title: "teniente de cura"
|
||||
role_in_source: "officiant"
|
||||
biographical:
|
||||
ecclesiastical_position: "teniente de cura de esta santa iglesia catedral"
|
||||
family_relationships: {}
|
||||
context: "Priest who performed the baptism"
|
||||
|
||||
temporal_references:
|
||||
- expression: "a veinte y tres días del mes de febrero de mil setecientos cuarenta y dos años"
|
||||
normalized: "1742-02-23"
|
||||
calendar: "Gregorian"
|
||||
type: "DATE"
|
||||
|
||||
locations_mentioned:
|
||||
- name: "ciudad de México"
|
||||
type: "city"
|
||||
administrative_entity: "New Spain"
|
||||
- name: "santa iglesia catedral"
|
||||
type: "church"
|
||||
full_name: "Catedral Metropolitana de la Asunción de la Santísima Virgen María"
|
||||
- name: "villa de Puebla de los Ángeles"
|
||||
type: "city"
|
||||
modern_name: "Puebla"
|
||||
administrative_entity: "New Spain"
|
||||
|
||||
colonial_naming_notes: |
|
||||
Spanish colonial naming conventions demonstrated:
|
||||
|
||||
HONORIFIC TITLES:
|
||||
- Don/Doña: honorific indicating Spanish (peninsular or criollo) status
|
||||
- Br. (Bachiller): academic degree, often held by clergy
|
||||
|
||||
CASTA SYSTEM:
|
||||
- español/española: persons of Spanish descent (peninsular or criollo)
|
||||
- mestizo: Spanish + Indigenous ancestry
|
||||
- mulato: Spanish + African ancestry
|
||||
- indio: Indigenous person
|
||||
- (Many other classifications existed in the sistema de castas)
|
||||
|
||||
PLACE INDICATORS:
|
||||
- "natural de": indicates place of birth
|
||||
- "vecino de": indicates place of residence
|
||||
|
||||
LEGITIMACY MARKERS:
|
||||
- "hijo legítimo": legitimate child (parents married in Church)
|
||||
- "hijo natural": illegitimate child (parents not married)
|
||||
|
||||
COMPADRAZGO (Spiritual Kinship):
|
||||
- Padrino/madrina: godfather/godmother
|
||||
- Compadre/comadre: relationship between godparents and parents
|
||||
- "parentesco espiritual": spiritual kinship with religious obligations
|
||||
- Created lifelong obligations between families
|
||||
|
||||
provenance:
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
notes: |
|
||||
This example uses synthetic data based on standard Spanish colonial
|
||||
baptismal formulae for demonstration purposes. Names, dates, and
|
||||
locations are fictional but follow authentic 17th-century patterns.
|
||||
For real examples, see PROVENANCE_SOURCES.md.
|
||||
|
||||
related_real_sources:
|
||||
- archive: "Brigham Young University"
|
||||
collection: "Script Tutorial - Spanish Colonial Baptisms"
|
||||
digital_url: "https://script.byu.edu/spanish-handwriting/documents/church-records/baptisms"
|
||||
document_type: "Tutorial with real transcription examples"
|
||||
license: "Educational use"
|
||||
|
||||
- archive: "FamilySearch"
|
||||
collection: "Mexico, Yucatán, Catholic Church Records, 1543-1977"
|
||||
collection_id: "1909116"
|
||||
digital_url: "https://www.familysearch.org/en/search/collection/1909116"
|
||||
document_type: "Baptisms, marriages, deaths"
|
||||
license: "Free with registration"
|
||||
notes: "Contains some of earliest New World records (from 1543)"
|
||||
|
||||
- archive: "Archivo General de la Nación (AGN)"
|
||||
location: "Mexico City, Mexico"
|
||||
collection: "Colonial parish records"
|
||||
document_type: "Spanish colonial baptismal records"
|
||||
period: "16th-20th century CE"
|
||||
languages: "Spanish, Nahuatl, Latin"
|
||||
|
|
@ -0,0 +1,315 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 8: Italian Notarial Act (Venice, 1654 CE)
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: SYNTHETIC_EXAMPLE
|
||||
#
|
||||
# Demonstrates extraction from an Italian notarial act showing:
|
||||
# - Italian naming conventions (patronymic "fu", "quondam")
|
||||
# - Venetian nobility titles (Nobil Homo, Magnifico)
|
||||
# - Profession-based surnames (Fabbro, Ferrari)
|
||||
# - Parish-based location (contrada, sestiere)
|
||||
#
|
||||
# Language: Italian (Venetian)
|
||||
# Period: 1654 CE
|
||||
# Source Type: Notarial act
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_08_italian_notarial"
|
||||
example_title: "Italian Notarial Act - Venice (1654)"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
source_language: "Italian"
|
||||
source_script: "Latin"
|
||||
source_type: "notarial_act"
|
||||
|
||||
description: |
|
||||
Example of a 17th-century Venetian notarial act demonstrating:
|
||||
- Italian naming conventions with Latin survivals
|
||||
- Venetian nobility titles and social hierarchy
|
||||
- Deceased father markers (fu, quondam)
|
||||
- Profession-based surnames
|
||||
- Parish-based location system (contrada)
|
||||
|
||||
Notarial acts were legal documents recording contracts, wills, property
|
||||
transfers, and other legal transactions. They provide rich genealogical
|
||||
and social history data.
|
||||
|
||||
source_text: |
|
||||
Adì 15 Marzo 1654, in Venetia.
|
||||
|
||||
Presenti: Il Nobil Homo Messer Giovanni Battista Morosini fu
|
||||
quondam Magnifico Messer Andrea, della contrada di San Marco,
|
||||
et sua moglie la Nobil Donna Madonna Caterina Contarini fu
|
||||
quondam Messer Francesco. Testimoni: Messer Pietro fu Paolo
|
||||
Fabbro, habitante nella contrada di San Polo, et Messer Marco
|
||||
Antonio Ferrari fu Giovanni, bottegaio in Rialto. Rogato io
|
||||
Notaro Antonio Zen fu quondam Messer Giacomo, Notaro publico
|
||||
di Venetia.
|
||||
|
||||
expected_extraction:
|
||||
pico_observation:
|
||||
observation_id: "notarial_venice_1654-03-15_morosini"
|
||||
source_type: "notarial_act"
|
||||
source_reference: "Notarial act, Venice, March 15, 1654"
|
||||
|
||||
persons:
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "Il Nobil Homo Messer Giovanni Battista Morosini"
|
||||
givenName: "Giovanni Battista"
|
||||
baseSurname: "Morosini"
|
||||
honorificPrefix: "Il Nobil Homo Messer"
|
||||
roles:
|
||||
- role_title: "principal party"
|
||||
role_in_source: "party to act"
|
||||
biographical:
|
||||
social_status: "Venetian nobility"
|
||||
patronymic: "fu quondam Magnifico Messer Andrea"
|
||||
father_status: "deceased (quondam)"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 1
|
||||
target_name: "Magnifico Messer Andrea Morosini"
|
||||
spouse:
|
||||
- person_index: 2
|
||||
target_name: "Nobil Donna Madonna Caterina Contarini"
|
||||
context: "Principal party, Venetian noble"
|
||||
|
||||
- person_index: 1
|
||||
pnv_name:
|
||||
literalName: "Magnifico Messer Andrea Morosini"
|
||||
givenName: "Andrea"
|
||||
baseSurname: "Morosini"
|
||||
honorificPrefix: "Magnifico Messer"
|
||||
roles: []
|
||||
biographical:
|
||||
social_status: "Venetian nobility"
|
||||
deceased: true
|
||||
deceased_marker: "quondam"
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 0
|
||||
target_name: "Giovanni Battista Morosini"
|
||||
context: "Father of Giovanni Battista, deceased"
|
||||
|
||||
- person_index: 2
|
||||
pnv_name:
|
||||
literalName: "Nobil Donna Madonna Caterina Contarini"
|
||||
givenName: "Caterina"
|
||||
baseSurname: "Contarini"
|
||||
honorificPrefix: "Nobil Donna Madonna"
|
||||
roles:
|
||||
- role_title: "moglie"
|
||||
role_in_source: "wife"
|
||||
biographical:
|
||||
social_status: "Venetian nobility"
|
||||
patronymic: "fu quondam Messer Francesco"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 3
|
||||
target_name: "Messer Francesco Contarini"
|
||||
spouse:
|
||||
- person_index: 0
|
||||
target_name: "Giovanni Battista Morosini"
|
||||
context: "Wife of Giovanni Battista"
|
||||
|
||||
- person_index: 3
|
||||
pnv_name:
|
||||
literalName: "Messer Francesco Contarini"
|
||||
givenName: "Francesco"
|
||||
baseSurname: "Contarini"
|
||||
honorificPrefix: "Messer"
|
||||
roles: []
|
||||
biographical:
|
||||
deceased: true
|
||||
deceased_marker: "quondam"
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 2
|
||||
target_name: "Caterina Contarini"
|
||||
context: "Father of Caterina, deceased"
|
||||
|
||||
- person_index: 4
|
||||
pnv_name:
|
||||
literalName: "Messer Pietro fu Paolo Fabbro"
|
||||
givenName: "Pietro"
|
||||
baseSurname: "Fabbro"
|
||||
honorificPrefix: "Messer"
|
||||
roles:
|
||||
- role_title: "testimone"
|
||||
role_in_source: "witness"
|
||||
biographical:
|
||||
patronymic: "fu Paolo"
|
||||
residence: "contrada di San Polo"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 5
|
||||
target_name: "Paolo Fabbro"
|
||||
context: "First witness"
|
||||
|
||||
- person_index: 5
|
||||
pnv_name:
|
||||
literalName: "Paolo Fabbro"
|
||||
givenName: "Paolo"
|
||||
baseSurname: "Fabbro"
|
||||
roles: []
|
||||
biographical:
|
||||
deceased: true
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 4
|
||||
target_name: "Pietro Fabbro"
|
||||
context: "Father of witness Pietro, deceased"
|
||||
|
||||
- person_index: 6
|
||||
pnv_name:
|
||||
literalName: "Messer Marco Antonio Ferrari fu Giovanni"
|
||||
givenName: "Marco Antonio"
|
||||
baseSurname: "Ferrari"
|
||||
honorificPrefix: "Messer"
|
||||
roles:
|
||||
- role_title: "testimone"
|
||||
role_in_source: "witness"
|
||||
biographical:
|
||||
patronymic: "fu Giovanni"
|
||||
occupation: "bottegaio"
|
||||
workplace: "Rialto"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 7
|
||||
target_name: "Giovanni Ferrari"
|
||||
context: "Second witness, shopkeeper"
|
||||
|
||||
- person_index: 7
|
||||
pnv_name:
|
||||
literalName: "Giovanni Ferrari"
|
||||
givenName: "Giovanni"
|
||||
baseSurname: "Ferrari"
|
||||
roles: []
|
||||
biographical:
|
||||
deceased: true
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 6
|
||||
target_name: "Marco Antonio Ferrari"
|
||||
context: "Father of witness Marco Antonio, deceased"
|
||||
|
||||
- person_index: 8
|
||||
pnv_name:
|
||||
literalName: "Notaro Antonio Zen fu quondam Messer Giacomo"
|
||||
givenName: "Antonio"
|
||||
baseSurname: "Zen"
|
||||
honorificPrefix: "Notaro"
|
||||
roles:
|
||||
- role_title: "notaro"
|
||||
role_in_source: "notary"
|
||||
biographical:
|
||||
patronymic: "fu quondam Messer Giacomo"
|
||||
occupation: "Notaro publico di Venetia"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 9
|
||||
target_name: "Messer Giacomo Zen"
|
||||
context: "Notary who drafted the act"
|
||||
|
||||
- person_index: 9
|
||||
pnv_name:
|
||||
literalName: "Messer Giacomo Zen"
|
||||
givenName: "Giacomo"
|
||||
baseSurname: "Zen"
|
||||
honorificPrefix: "Messer"
|
||||
roles: []
|
||||
biographical:
|
||||
deceased: true
|
||||
deceased_marker: "quondam"
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 8
|
||||
target_name: "Antonio Zen"
|
||||
context: "Father of notary, deceased"
|
||||
|
||||
temporal_references:
|
||||
- expression: "Adì 15 Marzo 1654"
|
||||
normalized: "1654-03-15"
|
||||
calendar: "Gregorian"
|
||||
type: "DATE"
|
||||
|
||||
locations_mentioned:
|
||||
- name: "Venetia"
|
||||
name_modern: "Venice"
|
||||
type: "city"
|
||||
- name: "contrada di San Marco"
|
||||
type: "parish/district"
|
||||
parent: "Venice"
|
||||
- name: "contrada di San Polo"
|
||||
type: "parish/district"
|
||||
parent: "Venice"
|
||||
- name: "Rialto"
|
||||
type: "district/market"
|
||||
parent: "Venice"
|
||||
|
||||
italian_naming_notes: |
|
||||
Italian notarial naming conventions demonstrated:
|
||||
|
||||
DECEASED FATHER MARKERS:
|
||||
- "fu": Italian for "was" - indicates deceased father
|
||||
- "quondam": Latin survival meaning "formerly/the late"
|
||||
- Often combined: "fu quondam" for emphasis
|
||||
|
||||
VENETIAN NOBILITY TITLES:
|
||||
- "Magnifico Messer": high honorific for nobility
|
||||
- "Il Nobil Homo" / "N.H.": Venetian noble title (male)
|
||||
- "Nobil Donna" / "N.D.": Venetian noble title (female)
|
||||
- "Madonna": honorific for married noble women
|
||||
|
||||
COMMONER TITLES:
|
||||
- "Messer": general respectful address (Mister)
|
||||
|
||||
PROFESSION-BASED SURNAMES:
|
||||
- Fabbro: smith (from Latin faber)
|
||||
- Ferrari: ironworker (from Latin ferrarius)
|
||||
|
||||
LOCATION INDICATORS:
|
||||
- "habitante in/nella": residence indicator
|
||||
- "bottegaio": shopkeeper
|
||||
- Contrada: parish neighborhood system of Venice
|
||||
- Sestiere: one of six districts of Venice
|
||||
|
||||
NOTARIAL TERMINOLOGY:
|
||||
- "Rogato": drafted/witnessed (by notary)
|
||||
- "Notaro publico": public notary (licensed)
|
||||
|
||||
provenance:
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
notes: |
|
||||
This example uses synthetic data based on authentic 17th-century
|
||||
Venetian notarial document formulae for demonstration purposes.
|
||||
Names, dates, and locations are fictional but follow period-accurate
|
||||
conventions. For real examples, see PROVENANCE_SOURCES.md.
|
||||
|
||||
related_real_sources:
|
||||
- archive: "Italian Ministry of Culture"
|
||||
project: "Antenati (Ancestors)"
|
||||
digital_url: "https://antenati.cultura.gov.it/"
|
||||
venice_url: "https://antenati.cultura.gov.it/archivio/state-archives-of-venezia/?lang=en"
|
||||
document_type: "Civil registry, notarial acts, parish records"
|
||||
period: "15th century+"
|
||||
license: "Open Access"
|
||||
|
||||
- archive: "University of California Libraries"
|
||||
collection: "Italian Notarial Documents Collection"
|
||||
finding_aid: "https://oac.cdlib.org/findaid/ark:%2F13030%2Fc8v412zd"
|
||||
document_count: "168 documents"
|
||||
period: "1465-1635 CE"
|
||||
locations: "Venice, Padua, Verona"
|
||||
languages: "Latin, Italian (Venetian)"
|
||||
|
||||
- project: "SION-Digit (Sources for the History of Italian Jewish Notarial Documents)"
|
||||
coverage: "Venice, Bordeaux, Amsterdam"
|
||||
period: "16th-18th century CE"
|
||||
focus: "Jewish community notarial acts"
|
||||
languages: "Italian, Hebrew, Ladino"
|
||||
|
|
@ -0,0 +1,259 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 9: Greek Orthodox Parish Register (1875 CE, Thessaloniki)
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: SYNTHETIC_EXAMPLE
|
||||
#
|
||||
# Demonstrates extraction from a Greek Orthodox baptismal register showing:
|
||||
# - Greek script with romanization
|
||||
# - Greek patronymics (του + genitive)
|
||||
# - Godparent system (νονός/νονά)
|
||||
# - Orthodox naming conventions
|
||||
# - Deceased marker (μακαρίτης/μακαρίτισσα)
|
||||
#
|
||||
# Language: Greek (polytonic)
|
||||
# Period: 1875 CE
|
||||
# Source Type: Baptismal register
|
||||
# Calendar: Julian (Orthodox Church)
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_09_greek_baptismal_register"
|
||||
example_title: "Greek Orthodox Baptismal Register - Thessaloniki 1875"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
source_language: "Greek"
|
||||
source_script: "Greek (polytonic)"
|
||||
source_period: "1875 CE"
|
||||
source_type: "baptismal_register"
|
||||
|
||||
description: |
|
||||
This example demonstrates extraction from a 19th-century Greek Orthodox
|
||||
baptismal register, illustrating key features of Greek naming conventions
|
||||
and ecclesiastical record-keeping during the Ottoman period.
|
||||
|
||||
Key features demonstrated:
|
||||
- Polytonic Greek orthography (common in 19th century)
|
||||
- Patronymic formation with του + genitive case
|
||||
- Godparent (νονός/νονά) relationships
|
||||
- Deceased marker μακαρίτης/μακαρίτισσα ("the late")
|
||||
- Surnames derived from occupations (Παπαδόπουλος, Οἰκονόμος)
|
||||
- Ecclesiastical titles (Πρωτοπρεσβύτερος = Archpriest)
|
||||
- Julian calendar dating (Greek Orthodox tradition)
|
||||
|
||||
source_text: |
|
||||
Ἐν Θεσσαλονίκῃ, τῇ δεκάτῃ πέμπτῃ Μαρτίου τοῦ ἔτους 1875.
|
||||
|
||||
Ἐβαπτίσθη ὁ Δημήτριος, υἱὸς τοῦ Νικολάου Παπαδοπούλου,
|
||||
ἐμπόρου, καὶ τῆς νομίμου αὐτοῦ συζύγου Ἑλένης τῆς τοῦ
|
||||
μακαρίτου Γεωργίου Οἰκονόμου. Νονὸς ὁ Κωνσταντῖνος
|
||||
Καρατζᾶς τοῦ Ἰωάννου, ἰατρός. Ἱερεύς: ὁ Πρωτοπρεσβύτερος
|
||||
Ἀθανάσιος Χρυσοστόμου.
|
||||
|
||||
expected_extraction:
|
||||
pico_observation:
|
||||
observation_id: "baptism_thessaloniki_1875-03-15_papadopoulos"
|
||||
source_type: "baptismal_register"
|
||||
source_reference: "Greek Orthodox baptismal register, Thessaloniki, March 15, 1875"
|
||||
|
||||
persons:
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "Δημήτριος"
|
||||
literalName_romanized: "Dimitrios"
|
||||
givenName: "Δημήτριος"
|
||||
givenName_romanized: "Dimitrios"
|
||||
roles:
|
||||
- role_title: "βαπτισθείς"
|
||||
role_in_source: "baptized infant"
|
||||
biographical:
|
||||
sex: "male"
|
||||
religion: "Greek Orthodox"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 1
|
||||
target_name: "Νικόλαος Παπαδόπουλος"
|
||||
mother:
|
||||
- person_index: 2
|
||||
target_name: "Ἑλένη"
|
||||
godfather:
|
||||
- person_index: 4
|
||||
target_name: "Κωνσταντῖνος Καρατζᾶς"
|
||||
context: "Baptized infant"
|
||||
|
||||
- person_index: 1
|
||||
pnv_name:
|
||||
literalName: "Νικόλαος Παπαδόπουλος"
|
||||
literalName_romanized: "Nikolaos Papadopoulos"
|
||||
givenName: "Νικόλαος"
|
||||
givenName_romanized: "Nikolaos"
|
||||
baseSurname: "Παπαδόπουλος"
|
||||
baseSurname_romanized: "Papadopoulos"
|
||||
roles:
|
||||
- role_title: "πατήρ"
|
||||
role_in_source: "father"
|
||||
biographical:
|
||||
occupation: "ἔμπορος (merchant)"
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 0
|
||||
target_name: "Δημήτριος"
|
||||
spouse:
|
||||
- person_index: 2
|
||||
target_name: "Ἑλένη"
|
||||
context: "Father of the baptized, merchant"
|
||||
|
||||
- person_index: 2
|
||||
pnv_name:
|
||||
literalName: "Ἑλένη τῆς τοῦ μακαρίτου Γεωργίου Οἰκονόμου"
|
||||
literalName_romanized: "Eleni tis tou makaritou Georgiou Oikonomou"
|
||||
givenName: "Ἑλένη"
|
||||
givenName_romanized: "Eleni"
|
||||
roles:
|
||||
- role_title: "μήτηρ"
|
||||
role_in_source: "mother"
|
||||
biographical:
|
||||
marital_status: "νομίμη σύζυγος (lawful wife)"
|
||||
patronymic: "τῆς τοῦ μακαρίτου Γεωργίου Οἰκονόμου"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 3
|
||||
target_name: "Γεώργιος Οἰκονόμος"
|
||||
child:
|
||||
- person_index: 0
|
||||
target_name: "Δημήτριος"
|
||||
spouse:
|
||||
- person_index: 1
|
||||
target_name: "Νικόλαος Παπαδόπουλος"
|
||||
context: "Mother of the baptized"
|
||||
|
||||
- person_index: 3
|
||||
pnv_name:
|
||||
literalName: "μακαρίτης Γεώργιος Οἰκονόμος"
|
||||
literalName_romanized: "makaritis Georgios Oikonomos"
|
||||
givenName: "Γεώργιος"
|
||||
givenName_romanized: "Georgios"
|
||||
baseSurname: "Οἰκονόμος"
|
||||
baseSurname_romanized: "Oikonomos"
|
||||
roles: []
|
||||
biographical:
|
||||
deceased: true
|
||||
deceased_marker: "μακαρίτης"
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 2
|
||||
target_name: "Ἑλένη"
|
||||
context: "Maternal grandfather, deceased"
|
||||
|
||||
- person_index: 4
|
||||
pnv_name:
|
||||
literalName: "Κωνσταντῖνος Καρατζᾶς τοῦ Ἰωάννου"
|
||||
literalName_romanized: "Konstantinos Karatzas tou Ioannou"
|
||||
givenName: "Κωνσταντῖνος"
|
||||
givenName_romanized: "Konstantinos"
|
||||
baseSurname: "Καρατζᾶς"
|
||||
baseSurname_romanized: "Karatzas"
|
||||
roles:
|
||||
- role_title: "νονός"
|
||||
role_in_source: "godfather"
|
||||
biographical:
|
||||
occupation: "ἰατρός (physician)"
|
||||
patronymic: "τοῦ Ἰωάννου"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 5
|
||||
target_name: "Ἰωάννης Καρατζᾶς"
|
||||
godchild:
|
||||
- person_index: 0
|
||||
target_name: "Δημήτριος"
|
||||
context: "Godfather, physician"
|
||||
|
||||
- person_index: 5
|
||||
pnv_name:
|
||||
literalName: "Ἰωάννης Καρατζᾶς"
|
||||
literalName_romanized: "Ioannis Karatzas"
|
||||
givenName: "Ἰωάννης"
|
||||
givenName_romanized: "Ioannis"
|
||||
baseSurname: "Καρατζᾶς"
|
||||
baseSurname_romanized: "Karatzas"
|
||||
roles: []
|
||||
biographical: {}
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 4
|
||||
target_name: "Κωνσταντῖνος Καρατζᾶς"
|
||||
context: "Father of godfather"
|
||||
|
||||
- person_index: 6
|
||||
pnv_name:
|
||||
literalName: "Πρωτοπρεσβύτερος Ἀθανάσιος Χρυσοστόμου"
|
||||
literalName_romanized: "Protopresbyteros Athanasios Chrysostomou"
|
||||
givenName: "Ἀθανάσιος"
|
||||
givenName_romanized: "Athanasios"
|
||||
patronymic: "Χρυσοστόμου"
|
||||
patronymic_romanized: "Chrysostomou"
|
||||
honorificPrefix: "Πρωτοπρεσβύτερος"
|
||||
roles:
|
||||
- role_title: "ἱερεύς"
|
||||
role_in_source: "priest"
|
||||
biographical:
|
||||
ecclesiastical_rank: "Πρωτοπρεσβύτερος (Protopresbyter/Archpriest)"
|
||||
family_relationships: {}
|
||||
context: "Officiating priest"
|
||||
|
||||
temporal_references:
|
||||
- expression: "τῇ δεκάτῃ πέμπτῃ Μαρτίου τοῦ ἔτους 1875"
|
||||
expression_romanized: "ti dekati pempti Martiou tou etous 1875"
|
||||
normalized: "1875-03-15"
|
||||
calendar: "Julian"
|
||||
type: "DATE"
|
||||
note: "Greek Orthodox used Julian calendar; Gregorian equivalent: March 27, 1875"
|
||||
|
||||
locations_mentioned:
|
||||
- name: "Θεσσαλονίκη"
|
||||
name_romanized: "Thessaloniki"
|
||||
type: "city"
|
||||
modern_country: "Greece"
|
||||
historical_context: "Ottoman Empire (Selanik vilayet)"
|
||||
|
||||
greek_naming_notes: |
|
||||
Greek Orthodox naming conventions demonstrated:
|
||||
- "τοῦ" + genitive: patronymic marker ("son/daughter of")
|
||||
- "μακαρίτης/μακαρίτισσα": deceased marker ("the late")
|
||||
- "νομίμη σύζυγος": lawful wife
|
||||
- "νονός/νονά": godfather/godmother
|
||||
- Surnames from occupations: Παπαδόπουλος (priest's son), Οἰκονόμος (steward)
|
||||
- Ecclesiastical titles: Πρωτοπρεσβύτερος (Archpriest)
|
||||
- Polytonic Greek orthography common in 19th century
|
||||
- Julian calendar used by Greek Orthodox Church
|
||||
|
||||
provenance:
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
notes: |
|
||||
This example uses synthetic data based on authentic Greek Orthodox
|
||||
baptismal register formulae for demonstration purposes. Names, dates,
|
||||
and locations are fictional but follow 19th-century conventions.
|
||||
For real examples, see PROVENANCE_SOURCES.md.
|
||||
|
||||
related_real_sources:
|
||||
- archive: "FamilySearch"
|
||||
wiki_url: "https://www.familysearch.org/en/wiki/Greece_Church_Records"
|
||||
document_type: "Baptisms, marriages, deaths"
|
||||
period: "17th century - 1925 CE"
|
||||
language: "Greek"
|
||||
license: "Free with registration"
|
||||
notes: "Greek Orthodox records are primary source before 1925 civil registration"
|
||||
|
||||
- archive: "Γενικά Αρχεία του Κράτους (General State Archives of Greece)"
|
||||
abbreviation: "GAK"
|
||||
document_type: "Church records, civil registry, Ottoman-era documents"
|
||||
period: "15th century - present"
|
||||
languages: "Greek, Ottoman Turkish"
|
||||
notes: "National archive with records from all Greek regions"
|
||||
|
||||
- resource: "Greek Ancestry"
|
||||
coverage: "Village church records guide"
|
||||
document_type: "Baptismal registers, marriage registers"
|
||||
notes: "Guides to accessing island and mainland records"
|
||||
|
|
@ -0,0 +1,489 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 10: Russian Imperial Metrical Book - Birth of Stefan Nowicki
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: REAL HISTORICAL DATA
|
||||
#
|
||||
# Source: Archiwum Panstwowe w Poznaniu Oddzial w Koninie
|
||||
# Reference Code: 54/792/0/6.1/140
|
||||
# Scan: 4 of 76
|
||||
# Document Date: 27 December 1893 (Julian) / 8 January 1894 (Gregorian)
|
||||
# Location: Osiek Wielki, Congress Poland, Russian Empire
|
||||
#
|
||||
# Demonstrates extraction from a Russian Imperial metrical book showing:
|
||||
# - Cyrillic script with romanization
|
||||
# - Polish names recorded in Russian (Congress Poland context)
|
||||
# - Pre-revolutionary orthography
|
||||
# - Julian/Gregorian calendar dual dating
|
||||
# - Godparents (vospriemniki)
|
||||
# - Village-level vital records
|
||||
#
|
||||
# Transcription verified by BYU Script Tutorial paleographers.
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_10_russian_metrical_book"
|
||||
example_title: "Russian Imperial Metrical Book - Birth of Stefan Nowicki (1894)"
|
||||
data_status: "REAL_HISTORICAL_DATA"
|
||||
source_language: "Russian"
|
||||
source_script: "Cyrillic (pre-1918 orthography)"
|
||||
source_period: "1894 CE (Gregorian) / 1893 CE (Julian)"
|
||||
source_type: "metrical_book"
|
||||
document_subtype: "birth_record"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Source Text
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
source_text:
|
||||
russian_original: |
|
||||
Любины
|
||||
Состаялосъ въ деревнѣ осѣкъ велькій двадцать седьмаго Декабря
|
||||
/:восьмаго Января:/ тысяча восемьсоть девяносто третяго (четвертаго) года
|
||||
въ одинадцать часовъ утра Явился Янъ Новицкій /:Jan Nowicki:/
|
||||
сорока лѣтъ отъ роду земледѣлецъ изъ Любинъ, въ присутствіи
|
||||
Францишка Новицкаго сорока лѣтъ, и Михаила Влодарчика
|
||||
шестидесяти лѣтъ отъ роду, обоихъ земледѣльцевъ изъ Любинъ
|
||||
и предьявилъ намъ младенца мужскаго пола, объявляя
|
||||
что онъ родился въ Любинахъ двадцать пятаго Декабря
|
||||
/:шестаго Января:/ текущаго года, въ четыре часа вечеромъ
|
||||
отъ законной его жены Маріанны изъ Адамковъ /:Mary-
|
||||
anny z Adamkow:/ тридцати лѣтъ отъ роду, младенцу
|
||||
этому при святомъ крещеніи совершенномъ сего
|
||||
числа дано имя Стефанъ /:Stefan:/ а воспріемниками
|
||||
его были Войцех Гаудынъ, и Катаржина Гембка.
|
||||
Актъ сей объявляющему и свидѣтелямъ негра-
|
||||
мотнымъ прочитанъ нами только подписанъ
|
||||
Ксндзъ Павелъ Выборскій
|
||||
|
||||
romanized: |
|
||||
Lyubiny
|
||||
Sostoyalos' v derevne Osek Vel'kiy dvadtsat' sed'mago Dekabrya
|
||||
/:vos'mago Yanvarya:/ tysyacha vosem'sot' devyanosto tret'yago (chetvertago) goda
|
||||
v odinnadtsat' chasov utra Yavilsya Yan Novitskiy /:Jan Nowicki:/
|
||||
soroka let ot rodu zemledelets iz Lyubin, v prisutstvii
|
||||
Frantsishka Novitskago soroka let, i Mikhaila Vlodarchika
|
||||
shestidesyati let ot rodu, oboikh zemledeltsev iz Lyubin
|
||||
i pred'yavil nam mladentsa muzhskago pola, ob'yavlyaya
|
||||
chto on rodilsya v Lyubinakh dvadtsat' pyatago Dekabrya
|
||||
/:shestago Yanvarya:/ tekushchago goda, v chetyre chasa vecherom
|
||||
ot zakonnoy ego zheny Marianny iz Adamkov /:Mary-
|
||||
anny z Adamkow:/ tridtsati let ot rodu, mladentsu
|
||||
etomu pri svyatom kreshchenii sovershennom sego
|
||||
chisla dano imya Stefan /:Stefan:/ a vospriyemnikami
|
||||
ego byli Voytsekh Gaudyn, i Katarzhina Gembka.
|
||||
Akt sey ob'yavlyayushchemu i svidetel'yam negra-
|
||||
motnym prochitan nami tol'ko podpisan
|
||||
Ksndz Pavel Vyborskiy
|
||||
|
||||
english_translation: |
|
||||
Lubin
|
||||
It happened in the village of Osiek Wielki on the twenty-seventh of December
|
||||
/:eighth of January:/ in the year one thousand eight hundred ninety-three (four)
|
||||
at eleven o'clock in the morning. Appeared Jan Nowicki /:Jan Nowicki:/
|
||||
forty years of age, farmer from Lubin, in the presence of
|
||||
Franciszek Nowicki, forty years old, and Michal Wlodarczyk
|
||||
sixty years of age, both farmers from Lubin
|
||||
and presented to us an infant of the male sex, declaring
|
||||
that he was born in Lubin on the twenty-fifth of December
|
||||
/:sixth of January:/ of the current year, at four o'clock in the evening
|
||||
of his lawful wife Marianna nee Adamkow /:Mary-
|
||||
anna z Adamkow:/ thirty years of age. To this infant,
|
||||
at the holy baptism performed on this
|
||||
date, was given the name Stefan /:Stefan:/ and his godparents
|
||||
were Wojciech Gaudyn and Katarzyna Gembka.
|
||||
This act, to the declarant and to the illiterate witnesses,
|
||||
was read by us and only signed.
|
||||
Priest Pawel Wyborski
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Expected Extraction Output
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
expected_extraction:
|
||||
pico_observation:
|
||||
observation_id: "birth_osiek_wielki_1894_stefan_nowicki"
|
||||
source_type: "metrical_book"
|
||||
source_reference: "Akta stanu cywilnego Parafii Rzymskokatolickiej Osiek Wielki, Reference Code 54/792/0/6.1/140, scan 4/76"
|
||||
archive: "Archiwum Panstwowe w Poznaniu Oddzial w Koninie"
|
||||
|
||||
persons:
|
||||
# Person 0: The Infant (Stefan Nowicki)
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "Стефанъ Новицкій"
|
||||
literalName_romanized: "Stefan Novitskiy"
|
||||
literalName_polish: "Stefan Nowicki"
|
||||
givenName: "Стефанъ"
|
||||
givenName_romanized: "Stefan"
|
||||
baseSurname: "Новицкій"
|
||||
baseSurname_romanized: "Novitskiy"
|
||||
baseSurname_polish: "Nowicki"
|
||||
roles:
|
||||
- role_title: "младенецъ"
|
||||
role_in_source: "infant"
|
||||
biographical:
|
||||
sex: "male"
|
||||
religion: "Roman Catholic"
|
||||
birth_date_julian: "1893-12-25"
|
||||
birth_date_gregorian: "1894-01-06"
|
||||
baptism_date_julian: "1893-12-27"
|
||||
baptism_date_gregorian: "1894-01-08"
|
||||
birth_place: "Любины (Lubin)"
|
||||
birth_time: "4 o'clock in the evening"
|
||||
family_relationships:
|
||||
father:
|
||||
- person_index: 1
|
||||
target_name: "Янъ Новицкій"
|
||||
mother:
|
||||
- person_index: 2
|
||||
target_name: "Маріанна изъ Адамковъ"
|
||||
godfather:
|
||||
- person_index: 5
|
||||
target_name: "Войцех Гаудынъ"
|
||||
godmother:
|
||||
- person_index: 6
|
||||
target_name: "Катаржина Гембка"
|
||||
context: "Newborn infant, subject of the birth registration"
|
||||
|
||||
# Person 1: Father (Jan Nowicki)
|
||||
- person_index: 1
|
||||
pnv_name:
|
||||
literalName: "Янъ Новицкій"
|
||||
literalName_romanized: "Yan Novitskiy"
|
||||
literalName_polish: "Jan Nowicki"
|
||||
givenName: "Янъ"
|
||||
givenName_romanized: "Yan"
|
||||
givenName_polish: "Jan"
|
||||
baseSurname: "Новицкій"
|
||||
baseSurname_romanized: "Novitskiy"
|
||||
baseSurname_polish: "Nowicki"
|
||||
roles:
|
||||
- role_title: "отецъ"
|
||||
role_in_source: "father"
|
||||
- role_title: "объявляющій"
|
||||
role_in_source: "declarant"
|
||||
biographical:
|
||||
sex: "male"
|
||||
age: 40
|
||||
age_expression: "сорока лѣтъ отъ роду"
|
||||
occupation: "земледѣлецъ (farmer)"
|
||||
residence: "Любины (Lubin)"
|
||||
literacy: "illiterate (implied - act read to him)"
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 0
|
||||
target_name: "Стефанъ Новицкій"
|
||||
spouse:
|
||||
- person_index: 2
|
||||
target_name: "Маріанна изъ Адамковъ"
|
||||
possible_relative:
|
||||
- person_index: 3
|
||||
target_name: "Францишекъ Новицкій"
|
||||
relationship_type: "same surname - possibly brother or cousin"
|
||||
context: "Father of the infant, farmer from Lubin, appeared to register the birth"
|
||||
|
||||
# Person 2: Mother (Marianna nee Adamkow)
|
||||
- person_index: 2
|
||||
pnv_name:
|
||||
literalName: "Маріанна изъ Адамковъ"
|
||||
literalName_romanized: "Marianna iz Adamkov"
|
||||
literalName_polish: "Maryanna z Adamkow"
|
||||
givenName: "Маріанна"
|
||||
givenName_romanized: "Marianna"
|
||||
givenName_polish: "Maryanna"
|
||||
maidenName: "Адамковъ"
|
||||
maidenName_romanized: "Adamkov"
|
||||
maidenName_polish: "Adamkow"
|
||||
roles:
|
||||
- role_title: "мать"
|
||||
role_in_source: "mother"
|
||||
biographical:
|
||||
sex: "female"
|
||||
age: 30
|
||||
age_expression: "тридцати лѣтъ отъ роду"
|
||||
marital_status: "законная жена (lawful wife)"
|
||||
maiden_name_marker: "изъ (nee/z)"
|
||||
family_relationships:
|
||||
child:
|
||||
- person_index: 0
|
||||
target_name: "Стефанъ Новицкій"
|
||||
spouse:
|
||||
- person_index: 1
|
||||
target_name: "Янъ Новицкій"
|
||||
context: "Mother of the infant, lawful wife of Jan Nowicki"
|
||||
|
||||
# Person 3: First Witness (Franciszek Nowicki)
|
||||
- person_index: 3
|
||||
pnv_name:
|
||||
literalName: "Францишекъ Новицкій"
|
||||
literalName_romanized: "Frantsishek Novitskiy"
|
||||
literalName_polish: "Franciszek Nowicki"
|
||||
givenName: "Францишекъ"
|
||||
givenName_romanized: "Frantsishek"
|
||||
givenName_polish: "Franciszek"
|
||||
baseSurname: "Новицкій"
|
||||
baseSurname_romanized: "Novitskiy"
|
||||
baseSurname_polish: "Nowicki"
|
||||
roles:
|
||||
- role_title: "свидѣтель"
|
||||
role_in_source: "witness"
|
||||
biographical:
|
||||
sex: "male"
|
||||
age: 40
|
||||
age_expression: "сорока лѣтъ"
|
||||
occupation: "земледѣлецъ (farmer)"
|
||||
residence: "Любины (Lubin)"
|
||||
literacy: "illiterate (неграмотный)"
|
||||
family_relationships:
|
||||
possible_relative:
|
||||
- person_index: 1
|
||||
target_name: "Янъ Новицкій"
|
||||
relationship_type: "same surname, same age, same village - possibly brother"
|
||||
context: "First witness, farmer from Lubin, same surname as father"
|
||||
|
||||
# Person 4: Second Witness (Michal Wlodarczyk)
|
||||
- person_index: 4
|
||||
pnv_name:
|
||||
literalName: "Михаилъ Влодарчикъ"
|
||||
literalName_romanized: "Mikhail Vlodarchik"
|
||||
literalName_polish: "Michal Wlodarczyk"
|
||||
givenName: "Михаилъ"
|
||||
givenName_romanized: "Mikhail"
|
||||
givenName_polish: "Michal"
|
||||
baseSurname: "Влодарчикъ"
|
||||
baseSurname_romanized: "Vlodarchik"
|
||||
baseSurname_polish: "Wlodarczyk"
|
||||
roles:
|
||||
- role_title: "свидѣтель"
|
||||
role_in_source: "witness"
|
||||
biographical:
|
||||
sex: "male"
|
||||
age: 60
|
||||
age_expression: "шестидесяти лѣтъ отъ роду"
|
||||
occupation: "земледѣлецъ (farmer)"
|
||||
residence: "Любины (Lubin)"
|
||||
literacy: "illiterate (неграмотный)"
|
||||
family_relationships: {}
|
||||
context: "Second witness, farmer from Lubin, age 60"
|
||||
|
||||
# Person 5: Godfather (Wojciech Gaudyn)
|
||||
- person_index: 5
|
||||
pnv_name:
|
||||
literalName: "Войцех Гаудынъ"
|
||||
literalName_romanized: "Voytsekh Gaudyn"
|
||||
literalName_polish: "Wojciech Gaudyn"
|
||||
givenName: "Войцех"
|
||||
givenName_romanized: "Voytsekh"
|
||||
givenName_polish: "Wojciech"
|
||||
baseSurname: "Гаудынъ"
|
||||
baseSurname_romanized: "Gaudyn"
|
||||
baseSurname_polish: "Gaudyn"
|
||||
roles:
|
||||
- role_title: "воспріемникъ"
|
||||
role_in_source: "godfather"
|
||||
biographical:
|
||||
sex: "male"
|
||||
family_relationships:
|
||||
godchild:
|
||||
- person_index: 0
|
||||
target_name: "Стефанъ Новицкій"
|
||||
context: "Godfather (baptismal sponsor)"
|
||||
|
||||
# Person 6: Godmother (Katarzyna Gembka)
|
||||
- person_index: 6
|
||||
pnv_name:
|
||||
literalName: "Катаржина Гембка"
|
||||
literalName_romanized: "Katarzhina Gembka"
|
||||
literalName_polish: "Katarzyna Gembka"
|
||||
givenName: "Катаржина"
|
||||
givenName_romanized: "Katarzhina"
|
||||
givenName_polish: "Katarzyna"
|
||||
baseSurname: "Гембка"
|
||||
baseSurname_romanized: "Gembka"
|
||||
baseSurname_polish: "Gembka"
|
||||
roles:
|
||||
- role_title: "воспріемница"
|
||||
role_in_source: "godmother"
|
||||
biographical:
|
||||
sex: "female"
|
||||
family_relationships:
|
||||
godchild:
|
||||
- person_index: 0
|
||||
target_name: "Стефанъ Новицкій"
|
||||
context: "Godmother (baptismal sponsor)"
|
||||
|
||||
# Person 7: Priest (Pawel Wyborski)
|
||||
- person_index: 7
|
||||
pnv_name:
|
||||
literalName: "Ксндзъ Павелъ Выборскій"
|
||||
literalName_romanized: "Ksndz Pavel Vyborskiy"
|
||||
literalName_polish: "Ksiadz Pawel Wyborski"
|
||||
givenName: "Павелъ"
|
||||
givenName_romanized: "Pavel"
|
||||
givenName_polish: "Pawel"
|
||||
baseSurname: "Выборскій"
|
||||
baseSurname_romanized: "Vyborskiy"
|
||||
baseSurname_polish: "Wyborski"
|
||||
honorificPrefix: "Ксндзъ (Priest)"
|
||||
roles:
|
||||
- role_title: "ксндзъ"
|
||||
role_in_source: "priest"
|
||||
- role_title: "registrar"
|
||||
role_in_source: "signed the act"
|
||||
biographical:
|
||||
sex: "male"
|
||||
ecclesiastical_status: "Roman Catholic priest"
|
||||
literacy: "literate (only signer)"
|
||||
family_relationships: {}
|
||||
context: "Officiating priest who performed baptism and signed the registration"
|
||||
|
||||
temporal_references:
|
||||
- expression: "тысяча восемьсоть девяносто третяго (четвертаго) года"
|
||||
expression_romanized: "tysyacha vosem'sot' devyanosto tret'yago (chetvertago) goda"
|
||||
normalized_julian: "1893"
|
||||
normalized_gregorian: "1894"
|
||||
calendar: "Dual (Julian/Gregorian)"
|
||||
type: "YEAR"
|
||||
note: "Document shows both Julian (1893) and Gregorian (1894) years"
|
||||
|
||||
- expression: "двадцать седьмаго Декабря /:восьмаго Января:/"
|
||||
expression_romanized: "dvadtsat' sed'mago Dekabrya /:vos'mago Yanvarya:/"
|
||||
normalized_julian: "1893-12-27"
|
||||
normalized_gregorian: "1894-01-08"
|
||||
calendar: "Dual (Julian/Gregorian)"
|
||||
type: "DATE"
|
||||
event: "registration and baptism"
|
||||
|
||||
- expression: "двадцать пятаго Декабря /:шестаго Января:/"
|
||||
expression_romanized: "dvadtsat' pyatago Dekabrya /:shestago Yanvarya:/"
|
||||
normalized_julian: "1893-12-25"
|
||||
normalized_gregorian: "1894-01-06"
|
||||
calendar: "Dual (Julian/Gregorian)"
|
||||
type: "DATE"
|
||||
event: "birth"
|
||||
note: "Born on Christmas Day (Julian calendar)"
|
||||
|
||||
- expression: "въ четыре часа вечеромъ"
|
||||
expression_romanized: "v chetyre chasa vecherom"
|
||||
normalized: "16:00"
|
||||
type: "TIME"
|
||||
event: "birth"
|
||||
|
||||
- expression: "въ одинадцать часовъ утра"
|
||||
expression_romanized: "v odinnadtsat' chasov utra"
|
||||
normalized: "11:00"
|
||||
type: "TIME"
|
||||
event: "registration"
|
||||
|
||||
locations_mentioned:
|
||||
- name: "Осѣкъ Велькій"
|
||||
name_romanized: "Osek Vel'kiy"
|
||||
name_polish: "Osiek Wielki"
|
||||
type: "village (derevnya)"
|
||||
modern_location: "Greater Poland Voivodeship, Poland"
|
||||
coordinates: "52.2461, 18.6207"
|
||||
geonames_url: "https://www.google.com/maps/place/Osiek+Wielki,+Poland"
|
||||
|
||||
- name: "Любины"
|
||||
name_romanized: "Lyubiny"
|
||||
name_polish: "Lubin"
|
||||
type: "village"
|
||||
note: "Village where the family resided and child was born"
|
||||
|
||||
- name: "Parafia Rzymskokatolicka Osiek Wielki"
|
||||
type: "parish"
|
||||
note: "Roman Catholic Parish of Osiek Wielki - registration authority"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Russian/Polish Naming Conventions Demonstrated
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
naming_conventions_notes: |
|
||||
Congress Poland naming conventions demonstrated in this REAL document:
|
||||
|
||||
1. DUAL SCRIPT NOTATION:
|
||||
- Polish names recorded in both Russian Cyrillic AND Latin script
|
||||
- Example: "Янъ Новицкій /:Jan Nowicki:/"
|
||||
- Slashes and colons mark the Latin/Polish original
|
||||
|
||||
2. PRE-REVOLUTIONARY ORTHOGRAPHY:
|
||||
- Hard sign at end of words: Новицкій, Стефанъ
|
||||
- Yat instead of e: лѣтъ, деревнѣ, свидѣтелямъ
|
||||
- -аго/-яго genitive endings (later simplified to -ого/-его)
|
||||
|
||||
3. POLISH MAIDEN NAME CONVENTION:
|
||||
- "изъ Адамковъ" = "z Adamkow" = nee Adamkow
|
||||
- "изъ" (from) marks maiden/birth name
|
||||
|
||||
4. WITNESSES (свидѣтели):
|
||||
- Two male witnesses required for registration
|
||||
- Both noted as illiterate (неграмотнымъ)
|
||||
- Father (declarant) also illiterate - act "read" to them
|
||||
|
||||
5. CALENDAR SYSTEM:
|
||||
- Russian Empire used Julian calendar
|
||||
- Congress Poland (under Russian rule) noted both dates
|
||||
- 12-day difference in 1893-1894
|
||||
- Format: Julian date /:Gregorian date:/
|
||||
|
||||
6. GODPARENTS (воспріемники):
|
||||
- Male: воспріемникъ (godfather)
|
||||
- Female: воспріемница (godmother)
|
||||
- Not necessarily from same family as parents
|
||||
|
||||
7. SOCIAL/OCCUPATIONAL TERMS:
|
||||
- земледѣлецъ = farmer/agriculturalist
|
||||
- ксндзъ = ksiadz (Polish priest title, from German "Knez")
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Provenance
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
provenance:
|
||||
data_status: "REAL_HISTORICAL_DATA"
|
||||
|
||||
archive:
|
||||
name: "Archiwum Panstwowe w Poznaniu Oddzial w Koninie"
|
||||
name_english: "State Archive in Poznan, Konin Branch"
|
||||
collection: "Akta stanu cywilnego Parafii Rzymskokatolickiej Osiek Wielki (pow. kolski)"
|
||||
collection_english: "Civil Registration Records of the Roman Catholic Parish of Osiek Wielki (Kolo district)"
|
||||
reference_code: "54/792/0/6.1/140"
|
||||
scan_number: "4 of 76"
|
||||
|
||||
document_metadata:
|
||||
date_julian: "1893-12-27"
|
||||
date_gregorian: "1894-01-08"
|
||||
|
||||
digital_access:
|
||||
archive_url: "https://szukajwarchiwach.gov.pl"
|
||||
tutorial_url: "https://script.byu.edu/russian-handwriting/transcription/birth/osiek-wielki-poland/1894"
|
||||
|
||||
license: "Public domain (historical document over 100 years old)"
|
||||
|
||||
citation: |
|
||||
"Akta stanu cywilnego Parafii Rzymskokatolickiej Osiek Wielki (pow. kolski),"
|
||||
Archiwum Panstwowe w Poznaniu Oddzial w Koninie, Szukaj w Archiwach
|
||||
(szukajwarchiwach.gov.pl: accessed 25 January 2023), entry for Stefan Novitsky,
|
||||
Catholic birth record, 6 January 1894 (Gregorian date), Osiek Wielki, Czolowo,
|
||||
Kolo, Kaliska, Russian Empire, Reference Code 54/792/0/6.1/140, scan no. 4 of 76.
|
||||
|
||||
transcription_source:
|
||||
institution: "Brigham Young University"
|
||||
project: "Script Tutorial"
|
||||
url: "https://script.byu.edu/russian-handwriting/transcription/birth/osiek-wielki-poland/1894"
|
||||
access_date: "2025-01-13"
|
||||
notes: "Complete line-by-line transcription with Russian original, romanization, and English translation"
|
||||
|
||||
verification_notes: |
|
||||
This is a REAL historical document with verified transcription:
|
||||
- Original held at Polish State Archives (Archiwum Panstwowe)
|
||||
- Transcribed and verified by BYU Script Tutorial paleographers
|
||||
- All 8 persons are real historical individuals
|
||||
- Names provided in both Russian Cyrillic and Polish Latin script in original
|
||||
- Stefan Nowicki born 6 January 1894 (Gregorian) in Lubin village
|
||||
- Family: farmers (zemledelcy) in Greater Poland region
|
||||
- Document context: Congress Poland under Russian Imperial rule
|
||||
|
|
@ -0,0 +1,281 @@
|
|||
# =============================================================================
|
||||
# PiCo Example 11: Ottoman Turkish Sijill (Court Record)
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: _examples_index.yaml
|
||||
#
|
||||
# DATA STATUS: SYNTHETIC_EXAMPLE
|
||||
#
|
||||
# Demonstrates extraction from an Ottoman Turkish court record (sijill) showing:
|
||||
# - Ottoman Turkish in Arabic script
|
||||
# - Honorific titles: Ağa, Efendi, Çelebi, Hatun
|
||||
# - Patronymics: bin (son of), bint (daughter of)
|
||||
# - Deceased markers: merhum/merhume
|
||||
# - Hijri calendar
|
||||
# - Mixed Arabic-Turkish vocabulary
|
||||
# - Court terminology
|
||||
#
|
||||
# Language: Ottoman Turkish (Arabic script)
|
||||
# Period: 1258 AH (1842 CE)
|
||||
# Source Type: Sijill (Sharia Court Register)
|
||||
# Archive Context: Şer'iyye Sicilleri (Islamic Court Registers)
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
example_id: "example_11_ottoman_sijill"
|
||||
example_title: "Ottoman Court Record (Sijill) - Property Sale, Demirciköy 1258 AH"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
source_language: "Ottoman Turkish"
|
||||
source_script: "Arabic"
|
||||
source_period: "1258 AH (1842 CE)"
|
||||
source_type: "sijill"
|
||||
document_subtype: "property_sale"
|
||||
archive_context: "Şer'iyye Sicilleri (Islamic Court Registers)"
|
||||
|
||||
description: |
|
||||
This example demonstrates extraction from an Ottoman Turkish sijill
|
||||
(Islamic court register) documenting a property sale transaction.
|
||||
|
||||
Key features demonstrated:
|
||||
- Ottoman Turkish written in Arabic script
|
||||
- Honorific titles indicating social class (Ağa, Efendi, Çelebi, Hatun)
|
||||
- Arabic patronymic markers (bin, bint)
|
||||
- Turkish patronymic suffix (-oğlu)
|
||||
- Deceased markers (merhum/merhume)
|
||||
- Hijri lunar calendar dating
|
||||
- Mixed Arabic-Turkish legal vocabulary
|
||||
- Court record terminology (şahid, mübayi', ba'i)
|
||||
|
||||
source_text: |
|
||||
بسم الله الرحمن الرحيم
|
||||
|
||||
مجلس شرع شريفده محمد آغا بن عبد الله مرحوم قصبه دميرجیکوی
|
||||
ساکنلرندن محمد بن احمد افندی و زوجهسی فاطمه خاتون بنت علیاوغلو
|
||||
حاضر اولوب محمد آغا طرفندن یکری بش غروش بدل معلوم ایله صاتیلدی
|
||||
|
||||
شهود الحال: حسن افندی بن عمر، ابراهیم چلبی بن مصطفی
|
||||
|
||||
فی اوائل شهر رجب سنة ١٢٥٨
|
||||
|
||||
source_text_romanized: |
|
||||
Bismillahirrahmanirrahim
|
||||
|
||||
Meclis-i şer'-i şerifde Mehmed Ağa bin Abdullah merhum kasaba Demirciköy
|
||||
sakinlerinden Mehmed bin Ahmed Efendi ve zevcesi Fatma Hatun bint Ali-oğlu
|
||||
hazır olub Mehmed Ağa tarafından yirmi beş guruş bedel-i ma'lum ile satıldı
|
||||
|
||||
Şuhud al-hal: Hasan Efendi bin Ömer, İbrahim Çelebi bin Mustafa
|
||||
|
||||
Fi evail-i şehr-i Receb sene 1258
|
||||
|
||||
source_text_english: |
|
||||
In the name of God, the Merciful, the Compassionate
|
||||
|
||||
In the noble Sharia court, Mehmed Ağa son of the late Abdullah, [sold to]
|
||||
residents of the town of Demirciköy, Mehmed son of Ahmed Efendi and his
|
||||
wife Fatma Hatun daughter of Ali-oğlu, who were present, for the known
|
||||
price of twenty-five guruş, [the property] was sold by Mehmed Ağa.
|
||||
|
||||
Witnesses present: Hasan Efendi son of Ömer, İbrahim Çelebi son of Mustafa
|
||||
|
||||
In early Receb of the year 1258 [Hijri]
|
||||
|
||||
expected_extraction:
|
||||
pico_observation:
|
||||
observation_id: "sijill_demircikoy_1258ah_sale"
|
||||
source_type: "sijill"
|
||||
source_reference: "Şer'iyye Sicili, Demirciköy, Receb 1258 AH"
|
||||
|
||||
persons:
|
||||
- person_index: 0
|
||||
pnv_name:
|
||||
literalName: "محمد آغا بن عبد الله"
|
||||
literalName_romanized: "Mehmed Ağa bin Abdullah"
|
||||
givenName: "محمد"
|
||||
givenName_romanized: "Mehmed"
|
||||
title: "آغا (Ağa)"
|
||||
patronymic: "بن عبد الله"
|
||||
patronymic_romanized: "bin Abdullah"
|
||||
roles:
|
||||
- role_title: "بائع (ba'i)"
|
||||
role_in_source: "seller"
|
||||
biographical:
|
||||
sex: "male"
|
||||
status: "deceased"
|
||||
deceased_marker: "مرحوم (merhum)"
|
||||
social_rank: "Ağa (military/landowning class)"
|
||||
family_relationships:
|
||||
father:
|
||||
- name: "عبد الله (Abdullah)"
|
||||
status: "deceased"
|
||||
context: "Seller (deceased), Ağa = military/landowning"
|
||||
|
||||
- person_index: 1
|
||||
pnv_name:
|
||||
literalName: "محمد بن احمد افندی"
|
||||
literalName_romanized: "Mehmed bin Ahmed Efendi"
|
||||
givenName: "محمد"
|
||||
givenName_romanized: "Mehmed"
|
||||
title: "افندی (Efendi)"
|
||||
patronymic: "بن احمد"
|
||||
patronymic_romanized: "bin Ahmed"
|
||||
roles:
|
||||
- role_title: "مشتری (müşteri)"
|
||||
role_in_source: "buyer"
|
||||
biographical:
|
||||
sex: "male"
|
||||
residence: "Demirciköy"
|
||||
social_rank: "Efendi (educated class)"
|
||||
family_relationships:
|
||||
father:
|
||||
- name: "احمد (Ahmed)"
|
||||
spouse:
|
||||
- person_index: 2
|
||||
target_name: "Fatma Hatun"
|
||||
context: "Buyer, Efendi = literate/administrative"
|
||||
|
||||
- person_index: 2
|
||||
pnv_name:
|
||||
literalName: "فاطمه خاتون بنت علیاوغلو"
|
||||
literalName_romanized: "Fatma Hatun bint Ali-oğlu"
|
||||
givenName: "فاطمه"
|
||||
givenName_romanized: "Fatma"
|
||||
title: "خاتون (Hatun)"
|
||||
patronymic: "بنت علیاوغلو"
|
||||
patronymic_romanized: "bint Ali-oğlu"
|
||||
roles:
|
||||
- role_title: "مشتری (müşteri)"
|
||||
role_in_source: "buyer"
|
||||
- role_title: "زوجه (zevce)"
|
||||
role_in_source: "wife"
|
||||
biographical:
|
||||
sex: "female"
|
||||
marital_status: "married"
|
||||
social_rank: "Hatun (respectable woman)"
|
||||
family_relationships:
|
||||
father:
|
||||
- name: "علیاوغلو (Ali-oğlu)"
|
||||
spouse:
|
||||
- person_index: 1
|
||||
target_name: "Mehmed Efendi"
|
||||
context: "Wife of buyer, co-purchaser"
|
||||
|
||||
- person_index: 3
|
||||
pnv_name:
|
||||
literalName: "حسن افندی بن عمر"
|
||||
literalName_romanized: "Hasan Efendi bin Ömer"
|
||||
givenName: "حسن"
|
||||
givenName_romanized: "Hasan"
|
||||
title: "افندی (Efendi)"
|
||||
patronymic: "بن عمر"
|
||||
patronymic_romanized: "bin Ömer"
|
||||
roles:
|
||||
- role_title: "شاهد (şahid)"
|
||||
role_in_source: "witness"
|
||||
biographical:
|
||||
sex: "male"
|
||||
social_rank: "Efendi"
|
||||
family_relationships:
|
||||
father:
|
||||
- name: "عمر (Ömer)"
|
||||
context: "First witness"
|
||||
|
||||
- person_index: 4
|
||||
pnv_name:
|
||||
literalName: "ابراهیم چلبی بن مصطفی"
|
||||
literalName_romanized: "İbrahim Çelebi bin Mustafa"
|
||||
givenName: "ابراهیم"
|
||||
givenName_romanized: "İbrahim"
|
||||
title: "چلبی (Çelebi)"
|
||||
patronymic: "بن مصطفی"
|
||||
patronymic_romanized: "bin Mustafa"
|
||||
roles:
|
||||
- role_title: "شاهد (şahid)"
|
||||
role_in_source: "witness"
|
||||
biographical:
|
||||
sex: "male"
|
||||
social_rank: "Çelebi (gentleman/merchant)"
|
||||
family_relationships:
|
||||
father:
|
||||
- name: "مصطفی (Mustafa)"
|
||||
context: "Second witness"
|
||||
|
||||
temporal_references:
|
||||
- expression: "فی اوائل شهر رجب سنة ١٢٥٨"
|
||||
expression_romanized: "fi evail-i şehr-i Receb sene 1258"
|
||||
normalized: "1842-07"
|
||||
calendar: "Hijri"
|
||||
type: "DATE"
|
||||
conversion_note: "Receb 1258 AH ≈ July-August 1842 CE"
|
||||
|
||||
locations_mentioned:
|
||||
- name: "قصبه دميرجیکوی"
|
||||
name_romanized: "kasaba Demirciköy"
|
||||
type: "town (kasaba)"
|
||||
- name: "مجلس شرع شريف"
|
||||
name_romanized: "meclis-i şer'-i şerif"
|
||||
type: "court"
|
||||
|
||||
ottoman_naming_notes: |
|
||||
Ottoman Turkish naming conventions:
|
||||
|
||||
HONORIFIC TITLES:
|
||||
- آغا (Ağa): Military commander, landowner
|
||||
- افندی (Efendi): Educated person, official
|
||||
- چلبی (Çelebi): Gentleman, merchant
|
||||
- خاتون (Hatun): Respectable woman
|
||||
|
||||
PATRONYMIC PATTERNS:
|
||||
- بن (bin): Son of (Arabic)
|
||||
- بنت (bint): Daughter of (Arabic)
|
||||
- اوغلو (-oğlu): Son of (Turkish)
|
||||
|
||||
DECEASED MARKERS:
|
||||
- مرحوم (merhum): The late (man)
|
||||
- مرحومه (merhume): The late (woman)
|
||||
|
||||
CALENDAR: Hijri lunar (354/355 days)
|
||||
Receb 1258 AH ≈ July-August 1842 CE
|
||||
|
||||
provenance:
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
notes: |
|
||||
This example uses synthetic data based on authentic Ottoman Turkish
|
||||
sijill (court register) formulae for demonstration purposes. Names,
|
||||
dates, and locations are fictional but follow authentic 19th-century
|
||||
patterns. For real examples, see PROVENANCE_SOURCES.md.
|
||||
|
||||
related_real_sources:
|
||||
- archive: "OpenJerusalem Project"
|
||||
collection: "Jerusalem Sharia Court Registers"
|
||||
digital_url: "https://www.openjerusalem.org/"
|
||||
ark_identifier: "ark:/58142/PfV7b"
|
||||
volume_count: "102 registers"
|
||||
period: "1834-1920 CE"
|
||||
languages: "Ottoman Turkish, Arabic"
|
||||
license: "Open Access"
|
||||
document_types: "Property sales, marriage contracts, inheritance, waqf"
|
||||
|
||||
- archive: "İslam Araştırmaları Merkezi (ISAM)"
|
||||
collection: "Istanbul Kadı Sicilleri"
|
||||
digital_url: "http://www.kadisicilleri.org/"
|
||||
volume_count: "40+ volumes online"
|
||||
document_count: "40,000+ documents"
|
||||
period: "16th-19th century CE"
|
||||
language: "Ottoman Turkish"
|
||||
license: "Research access"
|
||||
|
||||
- archive: "Istanbul Metropolitan Municipality"
|
||||
project: "History of Istanbul"
|
||||
digital_url: "https://istanbultarihi.ist/434-istanbul-sharia-court-registers"
|
||||
volume_count: "~10,000 volumes"
|
||||
courts: "26 different courts"
|
||||
period: "1453-1922 CE"
|
||||
notes: "Largest collection of Ottoman court records in existence"
|
||||
|
||||
- archive: "Harvard University"
|
||||
project: "Ottoman Court Records Project (OCRP)"
|
||||
digital_url: "https://cmes.fas.harvard.edu/projects/ocrp"
|
||||
document_types: "Sijill transcriptions, translations"
|
||||
period: "16th-19th century CE"
|
||||
|
|
@ -0,0 +1,315 @@
|
|||
# =============================================================================
|
||||
# PiCo Examples Index
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/examples/
|
||||
# Parent: pico/_index.yaml
|
||||
#
|
||||
# This file provides a manifest and overview of all 11 PiCo extraction examples,
|
||||
# covering 10 different languages, scripts, and historical record types.
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
module_id: "pico_examples"
|
||||
module_title: "PiCo Historical Extraction Examples"
|
||||
version: "1.0.0"
|
||||
description: |
|
||||
A comprehensive collection of 11 extraction examples demonstrating PiCo
|
||||
(Person In Context Ontology) patterns for historical person data extraction
|
||||
from primary source documents spanning 10 languages and 6 centuries.
|
||||
|
||||
# =============================================================================
|
||||
# EXAMPLES OVERVIEW
|
||||
# =============================================================================
|
||||
|
||||
examples_summary:
|
||||
total_examples: 11
|
||||
synthetic_examples: 9
|
||||
real_data_examples: 2
|
||||
languages_covered:
|
||||
- Dutch
|
||||
- English
|
||||
- Arabic
|
||||
- Hebrew
|
||||
- Spanish
|
||||
- Italian
|
||||
- Greek
|
||||
- Russian
|
||||
- Polish
|
||||
- Ottoman Turkish
|
||||
scripts_covered:
|
||||
- Latin
|
||||
- Arabic
|
||||
- Hebrew
|
||||
- Greek (polytonic)
|
||||
- Cyrillic
|
||||
calendars_covered:
|
||||
- Gregorian
|
||||
- Julian
|
||||
- Hijri (Islamic)
|
||||
- Hebrew
|
||||
time_period: "1492 CE - 2025 CE"
|
||||
|
||||
# =============================================================================
|
||||
# EXAMPLES CATALOG
|
||||
# =============================================================================
|
||||
|
||||
examples:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 01: Dutch Marriage Act (1823)
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "01_dutch_marriage.yaml"
|
||||
example_id: "example_01_dutch_marriage"
|
||||
title: "Dutch Civil Marriage Act - Leeuwarden 1823"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
language: "Dutch"
|
||||
script: "Latin"
|
||||
period: "1823 CE"
|
||||
source_type: "burgerlijke_stand"
|
||||
document_type: "Marriage certificate"
|
||||
features:
|
||||
- Dutch patronymics (-zoon, -dochter)
|
||||
- Napoleonic civil registration format
|
||||
- Occupation and age recording
|
||||
- Witness systems
|
||||
persons_extracted: 6
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 02: Dutch Notarial Protocol (1789)
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "02_notarial_protocol.yaml"
|
||||
example_id: "example_02_dutch_notarial"
|
||||
title: "Dutch Notarial Protocol - Amsterdam 1789"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
language: "Dutch"
|
||||
script: "Latin"
|
||||
period: "1789 CE"
|
||||
source_type: "notarial_protocol"
|
||||
document_type: "Testament/Will"
|
||||
features:
|
||||
- VOC (Dutch East India Company) context
|
||||
- Colonial-era naming
|
||||
- Marital property conventions
|
||||
- Witness and notary roles
|
||||
persons_extracted: 5
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 03: Dutch Church Baptism (1650)
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "03_church_baptism.yaml"
|
||||
example_id: "example_03_dutch_baptism"
|
||||
title: "Dutch Reformed Church Baptism - Delft 1650"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
language: "Dutch"
|
||||
script: "Latin"
|
||||
period: "1650 CE"
|
||||
source_type: "church_register"
|
||||
document_type: "Baptismal record"
|
||||
features:
|
||||
- Dutch Reformed Church records
|
||||
- Golden Age naming conventions
|
||||
- Godparent (getuige) system
|
||||
- Artisan occupations
|
||||
persons_extracted: 5
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 04: LinkedIn Profile (2025)
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "04_linkedin_profile.yaml"
|
||||
example_id: "example_04_linkedin_modern"
|
||||
title: "Modern LinkedIn Profile - Heritage Sector Professional"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
language: "English"
|
||||
script: "Latin"
|
||||
period: "2025 CE"
|
||||
source_type: "social_media_profile"
|
||||
document_type: "Professional profile"
|
||||
features:
|
||||
- Modern digital naming conventions
|
||||
- Career trajectory extraction
|
||||
- Heritage sector roles
|
||||
- Digital platform metadata
|
||||
persons_extracted: 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 05: Arabic Waqf Document (1312 AH)
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "05_arabic_waqf.yaml"
|
||||
example_id: "example_05_arabic_waqf"
|
||||
title: "Arabic Waqf Document - Cairo 1312 AH (1894 CE)"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
language: "Arabic"
|
||||
script: "Arabic"
|
||||
period: "1312 AH (1894 CE)"
|
||||
source_type: "waqf_document"
|
||||
document_type: "Islamic endowment deed"
|
||||
features:
|
||||
- Classical Arabic naming (ibn, bint)
|
||||
- Honorific titles (Pasha, Bey, Effendi, Hanem)
|
||||
- Hijri calendar
|
||||
- Islamic legal terminology
|
||||
persons_extracted: 6
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 06: Hebrew Ketubah (1742) - REAL DATA
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "06_hebrew_ketubah.yaml"
|
||||
example_id: "example_06_hebrew_ketubah"
|
||||
title: "Hebrew Marriage Contract (Ketubah) - Modena 1742"
|
||||
data_status: "REAL_HISTORICAL_DATA"
|
||||
language: "Hebrew"
|
||||
script: "Hebrew"
|
||||
period: "5502 AM (1742 CE)"
|
||||
source_type: "ketubah"
|
||||
document_type: "Jewish marriage contract"
|
||||
archive: "Yale University Beinecke Library"
|
||||
ark_id: "ark:/15534/c27p8thn"
|
||||
features:
|
||||
- Hebrew naming (ben, bat)
|
||||
- Hebrew calendar
|
||||
- Rabbinic titles (HaRav, Morenu)
|
||||
- Ketubah legal formulae
|
||||
persons_extracted: 6
|
||||
real_data_citation: |
|
||||
Beinecke Rare Book and Manuscript Library, Yale University
|
||||
General Collection, GEN MSS 1309
|
||||
Ketubah: Modena (Italy), 23 Sivan 5502 (June 12, 1742)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 07: Spanish Colonial Record (1540)
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "07_spanish_colonial.yaml"
|
||||
example_id: "example_07_spanish_colonial"
|
||||
title: "Spanish Colonial Encomienda Record - Nueva España 1540"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
language: "Spanish"
|
||||
script: "Latin"
|
||||
period: "1540 CE"
|
||||
source_type: "colonial_record"
|
||||
document_type: "Encomienda grant"
|
||||
features:
|
||||
- Spanish colonial naming
|
||||
- Honorific titles (Don, Doña)
|
||||
- Indigenous name recording
|
||||
- Colonial administrative terminology
|
||||
persons_extracted: 5
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 08: Italian Notarial Record (1492)
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "08_italian_notarial.yaml"
|
||||
example_id: "example_08_italian_notarial"
|
||||
title: "Italian Notarial Act - Florence 1492"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
language: "Italian/Latin"
|
||||
script: "Latin"
|
||||
period: "1492 CE"
|
||||
source_type: "notarial_act"
|
||||
document_type: "Marriage contract"
|
||||
features:
|
||||
- Renaissance Italian naming
|
||||
- Latin legal formulae
|
||||
- Florentine patronymics
|
||||
- Notarial conventions
|
||||
persons_extracted: 6
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 09: Greek Orthodox Baptism (1875)
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "09_greek_orthodox.yaml"
|
||||
example_id: "example_09_greek_baptismal_register"
|
||||
title: "Greek Orthodox Baptismal Register - Thessaloniki 1875"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
language: "Greek"
|
||||
script: "Greek (polytonic)"
|
||||
period: "1875 CE"
|
||||
source_type: "baptismal_register"
|
||||
document_type: "Church baptismal record"
|
||||
features:
|
||||
- Greek patronymics (του + genitive)
|
||||
- Polytonic Greek orthography
|
||||
- Godparent system (νονός/νονά)
|
||||
- Deceased markers (μακαρίτης)
|
||||
- Julian calendar
|
||||
persons_extracted: 7
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 10: Russian Metrical Book (1894) - REAL DATA
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "10_russian_metrical.yaml"
|
||||
example_id: "example_10_russian_metrical"
|
||||
title: "Russian Imperial Metrical Book - Birth of Stefan Nowicki (1894)"
|
||||
data_status: "REAL_HISTORICAL_DATA"
|
||||
language: "Russian/Polish"
|
||||
script: "Cyrillic"
|
||||
period: "1894 CE"
|
||||
source_type: "metrical_book"
|
||||
document_type: "Birth registration"
|
||||
archive: "Archiwum Państwowe w Poznaniu"
|
||||
features:
|
||||
- Cyrillic script with romanization
|
||||
- Polish names in Russian
|
||||
- Pre-revolutionary orthography (ъ, ѣ)
|
||||
- Julian/Gregorian dual dating
|
||||
- Восприемники (godparents)
|
||||
persons_extracted: 6
|
||||
real_data_citation: |
|
||||
Archiwum Państwowe w Poznaniu (State Archive in Poznań)
|
||||
BYU Script Tutorial transcription
|
||||
Russian Imperial metrical book, Nowiki village, 1894
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Example 11: Ottoman Sijill (1258 AH / 1842 CE)
|
||||
# ---------------------------------------------------------------------------
|
||||
- file: "11_ottoman_sijill.yaml"
|
||||
example_id: "example_11_ottoman_sijill"
|
||||
title: "Ottoman Court Record (Sijill) - Property Sale, Demirciköy 1258 AH"
|
||||
data_status: "SYNTHETIC_EXAMPLE"
|
||||
language: "Ottoman Turkish"
|
||||
script: "Arabic"
|
||||
period: "1258 AH (1842 CE)"
|
||||
source_type: "sijill"
|
||||
document_type: "Property sale (court record)"
|
||||
features:
|
||||
- Ottoman Turkish in Arabic script
|
||||
- Honorific titles (Ağa, Efendi, Çelebi, Hatun)
|
||||
- Arabic patronymics (bin, bint)
|
||||
- Turkish patronymic (-oğlu)
|
||||
- Hijri calendar
|
||||
- Islamic court terminology
|
||||
persons_extracted: 5
|
||||
|
||||
# =============================================================================
|
||||
# USAGE NOTES
|
||||
# =============================================================================
|
||||
|
||||
usage_notes: |
|
||||
These examples are designed for:
|
||||
|
||||
1. TRAINING: Use as training data for NER/extraction models
|
||||
2. TESTING: Validate extraction pipelines against known outputs
|
||||
3. DOCUMENTATION: Understand PiCo patterns for different document types
|
||||
4. REFERENCE: Language-specific naming convention guides
|
||||
|
||||
IMPORTANT DISTINCTIONS:
|
||||
- SYNTHETIC_EXAMPLE: Created for demonstration; names/dates are fictional
|
||||
- REAL_HISTORICAL_DATA: Actual archival records with full provenance
|
||||
|
||||
Each example includes:
|
||||
- source_text: Original text in source language/script
|
||||
- expected_extraction: Complete PiCo-compliant output
|
||||
- [language]_naming_notes: Language-specific conventions
|
||||
- provenance: Data status and related real sources
|
||||
|
||||
# =============================================================================
|
||||
# RELATED RESOURCES
|
||||
# =============================================================================
|
||||
|
||||
related_resources:
|
||||
schema_files:
|
||||
- "../schema/observation.yaml"
|
||||
- "../schema/pnv_components.yaml"
|
||||
- "../schema/relationships.yaml"
|
||||
- "../schema/temporal.yaml"
|
||||
parent_index: "../_index.yaml"
|
||||
provenance_sources: "../../PROVENANCE_SOURCES.md"
|
||||
|
|
@ -0,0 +1,439 @@
|
|||
# =============================================================================
|
||||
# PiCo Integration Module: Observation Pattern
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/
|
||||
# Parent: _index.yaml
|
||||
#
|
||||
# Description: Core PiCo observation pattern and PersonObservation class.
|
||||
# Defines the source-bound observation layer that captures
|
||||
# person mentions exactly as they appear in sources.
|
||||
#
|
||||
# Last Updated: 2025-01-13
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Core Observation Pattern
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
observation_pattern:
|
||||
description: "Every person mention creates a PersonObservation"
|
||||
class: "picom:PersonObservation"
|
||||
class_uri: "https://w3id.org/pico/PersonObservation"
|
||||
|
||||
properties:
|
||||
- property: "picom:hasObservedName"
|
||||
description: "The name string as it appears in text"
|
||||
range: "pnv:PersonName"
|
||||
cardinality: "1"
|
||||
note: "Exact transcription of name from source"
|
||||
|
||||
- property: "picom:isObservationOf"
|
||||
description: "Links to reconstructed Person entity"
|
||||
range: "crm:E21_Person"
|
||||
cardinality: "0..1"
|
||||
note: "May be null if person not yet identified"
|
||||
|
||||
- property: "prov:hadPrimarySource"
|
||||
description: "The source document/webpage"
|
||||
range: "prov:Entity"
|
||||
cardinality: "1"
|
||||
note: "Required for provenance tracking"
|
||||
|
||||
- property: "picom:observedAt"
|
||||
description: "When the observation was made"
|
||||
range: "xsd:dateTime"
|
||||
cardinality: "1"
|
||||
note: "Extraction timestamp, not document date"
|
||||
|
||||
- property: "picom:observedInContext"
|
||||
description: "Surrounding text context"
|
||||
range: "xsd:string"
|
||||
cardinality: "0..1"
|
||||
note: "For disambiguation when reviewing"
|
||||
|
||||
- property: "picom:hasRole"
|
||||
description: "Role/position observed with the person"
|
||||
range: "xsd:string"
|
||||
cardinality: "0..*"
|
||||
note: "Links to ROLE hypernym when extracted"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Person Reconstruction Pattern
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
person_reconstruction_pattern:
|
||||
description: |
|
||||
A PersonReconstruction is created by linking one or more PersonObservations
|
||||
to form a unified person entity. This is the scholarly interpretation layer
|
||||
that connects source-bound observations to a conceptual person.
|
||||
|
||||
Key distinction:
|
||||
- PersonObservation: What is OBSERVED in a specific source (exact transcription)
|
||||
- PersonReconstruction: What is INFERRED about the person (normalized, linked)
|
||||
|
||||
A single PersonReconstruction may derive from observations across:
|
||||
- Multiple sources (birth record + marriage record + death record)
|
||||
- Different time periods (mentions across decades)
|
||||
- Various name forms ("Jan Jansz" + "Johannes Jansen" + "J. Jansen")
|
||||
|
||||
class: "pico:PersonReconstruction"
|
||||
class_uri: "https://personsincontext.org/model#PersonReconstruction"
|
||||
superclass: "pico:Person"
|
||||
|
||||
required_properties:
|
||||
- property: "prov:wasDerivedFrom"
|
||||
description: "Links to source PersonObservation(s)"
|
||||
range: "pico:PersonObservation"
|
||||
cardinality: "1..*"
|
||||
note: "Every reconstruction MUST link to at least one observation"
|
||||
|
||||
- property: "prov:wasGeneratedBy"
|
||||
description: "Links to the reconstruction Activity"
|
||||
range: "prov:Activity"
|
||||
cardinality: "1"
|
||||
note: "Documents how/when/by whom reconstruction was created"
|
||||
|
||||
optional_properties:
|
||||
- property: "prov:wasRevisionOf"
|
||||
description: "Links to previous version of this reconstruction"
|
||||
range: "pico:PersonReconstruction"
|
||||
cardinality: "0..1"
|
||||
note: "For tracking updates to reconstructions over time"
|
||||
|
||||
- property: "sdo:name"
|
||||
description: "Normalized/preferred name form"
|
||||
range: "xsd:string"
|
||||
note: "The canonical name for this person"
|
||||
|
||||
- property: "sdo:additionalName"
|
||||
description: "Structured name following PNV"
|
||||
range: "pnv:PersonName"
|
||||
note: "Full name breakdown using Person Name Vocabulary"
|
||||
|
||||
- property: "sdo:givenName"
|
||||
description: "Given/first name"
|
||||
range: "xsd:string"
|
||||
|
||||
- property: "sdo:familyName"
|
||||
description: "Family/surname"
|
||||
range: "xsd:string"
|
||||
|
||||
- property: "sdo:gender"
|
||||
description: "Gender of the person"
|
||||
range: "sdo:GenderType"
|
||||
values: ["sdo:Male", "sdo:Female"]
|
||||
|
||||
- property: "sdo:birthDate"
|
||||
description: "Birth date (ISO 8601)"
|
||||
range: "xsd:date"
|
||||
note: "May be incomplete: YYYY, YYYY-MM, or YYYY-MM-DD"
|
||||
|
||||
- property: "sdo:birthPlace"
|
||||
description: "Place of birth"
|
||||
range: "xsd:string or xsd:anyURI"
|
||||
note: "Prefer linking to GeoNames or Wikidata"
|
||||
|
||||
- property: "sdo:deathDate"
|
||||
description: "Death date (ISO 8601)"
|
||||
range: "xsd:date"
|
||||
|
||||
- property: "sdo:deathPlace"
|
||||
description: "Place of death"
|
||||
range: "xsd:string or xsd:anyURI"
|
||||
|
||||
example:
|
||||
description: "PersonReconstruction derived from multiple observations"
|
||||
turtle: |
|
||||
cbg:person_reconstruction_anna_koppen
|
||||
a pico:PersonReconstruction ;
|
||||
sdo:name "Anna Maria Koppen" ;
|
||||
sdo:familyName "Koppen" ;
|
||||
sdo:givenName "Anna Maria" ;
|
||||
sdo:gender sdo:Female ;
|
||||
sdo:birthPlace "Haarlem" ;
|
||||
sdo:birthDate "1860-03-31"^^xsd:date ;
|
||||
sdo:deathPlace "Detroit, USA" ;
|
||||
sdo:deathDate "1926"^^xsd:gYear ;
|
||||
prov:wasDerivedFrom nha:marriage_1885_po_1 ,
|
||||
cbg:emigration_1887_po_1 ,
|
||||
us:death_1926_po_1 ;
|
||||
prov:wasGeneratedBy cbg:reconstruction_activity_01 .
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Source and Scan Classes
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
source_classes:
|
||||
|
||||
archive_component:
|
||||
description: |
|
||||
A Source document from which PersonObservations are extracted.
|
||||
PiCo does not aim to fully describe archival sources (use RiC-O or DC for that),
|
||||
but requires minimal identification for provenance tracking.
|
||||
|
||||
class: "sdo:ArchiveComponent"
|
||||
class_uri: "https://schema.org/ArchiveComponent"
|
||||
superclass: "sdo:CreativeWork"
|
||||
|
||||
properties:
|
||||
- property: "sdo:name"
|
||||
description: "Identifying name for the source"
|
||||
range: "xsd:string"
|
||||
cardinality: "1"
|
||||
note: "Combine title, date, archive location for identification"
|
||||
example: "BS Marriage Haarlem, November 11, 1885, certificate number 321"
|
||||
|
||||
- property: "sdo:additionalType"
|
||||
description: "Type of source document"
|
||||
range: "picot_sourcetypes:Concept"
|
||||
note: "Use PiCo SourceType thesaurus"
|
||||
|
||||
- property: "sdo:dateCreated"
|
||||
description: "Date the source was created"
|
||||
range: "xsd:date"
|
||||
|
||||
- property: "sdo:holdingArchive"
|
||||
description: "Institution holding the source"
|
||||
range: "xsd:anyURI"
|
||||
note: "Link to heritage custodian (GHCID or Wikidata)"
|
||||
|
||||
- property: "sdo:url"
|
||||
description: "Permalink to the source"
|
||||
range: "sdo:URL"
|
||||
note: "Preferably a persistent identifier"
|
||||
|
||||
- property: "sdo:contentLocation"
|
||||
description: "Geographic coverage of the source"
|
||||
range: "xsd:string or xsd:anyURI"
|
||||
|
||||
- property: "sdo:associatedMedia"
|
||||
description: "Link to scan(s) of the source"
|
||||
range: "sdo:ImageObject"
|
||||
cardinality: "0..*"
|
||||
|
||||
image_object:
|
||||
description: |
|
||||
A Scan of a source document. Links to the digital image at the holding archive.
|
||||
|
||||
class: "sdo:ImageObject"
|
||||
class_uri: "https://schema.org/ImageObject"
|
||||
superclass: "sdo:CreativeWork"
|
||||
|
||||
properties:
|
||||
- property: "sdo:url"
|
||||
description: "URL to the full scan"
|
||||
range: "sdo:URL"
|
||||
note: "Preferably IIIF manifest"
|
||||
|
||||
- property: "sdo:thumbnail"
|
||||
description: "URL to thumbnail image"
|
||||
range: "sdo:ImageObject"
|
||||
|
||||
- property: "sdo:embedUrl"
|
||||
description: "URL to image viewer"
|
||||
range: "sdo:URL"
|
||||
|
||||
- property: "sdo:position"
|
||||
description: "Position in sequence of scans"
|
||||
range: "xsd:int"
|
||||
note: "For multi-page sources"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Biographical Properties
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
biographical_properties:
|
||||
description: |
|
||||
Biographical properties capture personal details as they appear in sources.
|
||||
These are used for both PersonObservation (source-bound) and
|
||||
PersonReconstruction (normalized).
|
||||
|
||||
age:
|
||||
property: "pico:hasAge"
|
||||
property_uri: "https://personsincontext.org/model#hasAge"
|
||||
description: "Age of person as stated in source"
|
||||
range: "xsd:string"
|
||||
domain: "pico:PersonObservation"
|
||||
note: |
|
||||
Used when birth date unknown but age is recorded.
|
||||
Age assumed in years unless specified ("4" = 4 years, "4 months" = 4 months).
|
||||
Numerical preferred over text ("4" not "four").
|
||||
examples:
|
||||
- "30"
|
||||
- "4 months"
|
||||
- "about 25"
|
||||
|
||||
religion:
|
||||
property: "pico:hasReligion"
|
||||
property_uri: "https://personsincontext.org/model#hasReligion"
|
||||
description: "Religious affiliation as stated in source"
|
||||
range: "xsd:string or xsd:anyURI"
|
||||
domain: "pico:Person"
|
||||
note: "Can link to SKOS thesaurus for religions"
|
||||
examples:
|
||||
- "Catholic"
|
||||
- "Reformed"
|
||||
- "Jewish"
|
||||
|
||||
deceased:
|
||||
property: "pico:deceased"
|
||||
property_uri: "https://personsincontext.org/model#deceased"
|
||||
description: "Indication that person is deceased (when death date unknown)"
|
||||
range: "xsd:boolean"
|
||||
domain: "pico:PersonObservation"
|
||||
note: |
|
||||
Only used when deathDate is unknown but death is indicated.
|
||||
A person without deathDate and without deceased:true is assumed alive.
|
||||
Important for privacy considerations in publishing person records.
|
||||
|
||||
gender:
|
||||
property: "sdo:gender"
|
||||
property_uri: "https://schema.org/gender"
|
||||
description: "Gender of the person"
|
||||
range: "sdo:GenderType"
|
||||
domain: "pico:Person"
|
||||
values:
|
||||
- uri: "sdo:Male"
|
||||
label: "Male"
|
||||
- uri: "sdo:Female"
|
||||
label: "Female"
|
||||
|
||||
address:
|
||||
property: "sdo:address"
|
||||
property_uri: "https://schema.org/address"
|
||||
description: "Physical address as mentioned in source"
|
||||
range: "xsd:string"
|
||||
domain: "pico:PersonObservation"
|
||||
note: "Address exactly as recorded in source"
|
||||
|
||||
initials:
|
||||
property: "pnv:initials"
|
||||
property_uri: "https://w3id.org/pnv#initials"
|
||||
description: "Initials of given name(s)"
|
||||
range: "xsd:string"
|
||||
domain: "pnv:PersonName"
|
||||
note: "Each initial followed by period (e.g., 'P.R.', 'H.A.F.M.O.')"
|
||||
examples:
|
||||
- "P.R."
|
||||
- "C.Joh."
|
||||
- "H.A.F.M.O."
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Hypernym Mapping (GLAM-NER v1.7.0)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
hypernym_mapping:
|
||||
description: "How PiCo concepts map to GLAM-NER v1.7.0 hypernyms"
|
||||
|
||||
mappings:
|
||||
- pico_class: "picom:PersonObservation"
|
||||
glam_hypernym: "AGT.PER"
|
||||
glam_code: "AGT.PER"
|
||||
note: "Person observations create AGT.PER entities"
|
||||
|
||||
- pico_class: "picom:PersonObservation"
|
||||
glam_hypernym: "AGT.STF"
|
||||
glam_code: "AGT.STF"
|
||||
condition: "When observed with organizational role"
|
||||
note: "Staff members with role context"
|
||||
|
||||
- pico_class: "pnv:PersonName"
|
||||
glam_hypernym: "APP.NAM"
|
||||
glam_code: "APP.NAM"
|
||||
note: "Name strings as appellations"
|
||||
|
||||
- pico_class: "picom:hasRole"
|
||||
glam_hypernym: "ROL"
|
||||
glam_code: "ROL"
|
||||
note: "Extracted roles link to ROL hypernym"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Simple Examples
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
examples:
|
||||
- description: "Staff member with title and role"
|
||||
text: "Dr. Maria van den Berg, Director"
|
||||
|
||||
observation:
|
||||
type: "picom:PersonObservation"
|
||||
id: "_:obs1"
|
||||
|
||||
hasObservedName:
|
||||
type: "pnv:PersonName"
|
||||
literalName: "Dr. Maria van den Berg"
|
||||
honorificPrefix: "Dr."
|
||||
givenName: "Maria"
|
||||
surnamePrefix: "van den"
|
||||
baseSurname: "Berg"
|
||||
|
||||
hasRole: "Director"
|
||||
hadPrimarySource: "https://example.org/staff-page"
|
||||
observedAt: "2025-12-02T10:30:00Z"
|
||||
|
||||
glam_ner_annotations:
|
||||
- span: "Dr. Maria van den Berg"
|
||||
type: "AGT.STF"
|
||||
code: "AGT.STF"
|
||||
confidence: 0.95
|
||||
|
||||
- span: "Director"
|
||||
type: "ROL.TIT"
|
||||
code: "ROL.TIT"
|
||||
confidence: 0.98
|
||||
|
||||
- description: "Historical artist"
|
||||
text: "Rembrandt van Rijn painted this in 1642"
|
||||
|
||||
observation:
|
||||
type: "picom:PersonObservation"
|
||||
id: "_:obs2"
|
||||
|
||||
hasObservedName:
|
||||
type: "pnv:PersonName"
|
||||
literalName: "Rembrandt van Rijn"
|
||||
givenName: "Rembrandt"
|
||||
surnamePrefix: "van"
|
||||
baseSurname: "Rijn"
|
||||
|
||||
isObservationOf: "wd:Q5598" # Wikidata Rembrandt
|
||||
hadPrimarySource: "https://example.org/artwork-page"
|
||||
observedAt: "2025-12-02T10:35:00Z"
|
||||
|
||||
glam_ner_annotations:
|
||||
- span: "Rembrandt van Rijn"
|
||||
type: "AGT.PER"
|
||||
code: "AGT.PER"
|
||||
confidence: 0.99
|
||||
linking:
|
||||
wikidata: "Q5598"
|
||||
viaf: "64013650"
|
||||
|
||||
- description: "Nobility title"
|
||||
text: "Count Willem van Loon"
|
||||
|
||||
observation:
|
||||
type: "picom:PersonObservation"
|
||||
id: "_:obs3"
|
||||
|
||||
hasObservedName:
|
||||
type: "pnv:PersonName"
|
||||
literalName: "Count Willem van Loon"
|
||||
honorificPrefix: "Count"
|
||||
givenName: "Willem"
|
||||
surnamePrefix: "van"
|
||||
baseSurname: "Loon"
|
||||
|
||||
hadPrimarySource: "https://example.org/archive-doc"
|
||||
observedAt: "2025-12-02T10:40:00Z"
|
||||
|
||||
glam_ner_annotations:
|
||||
- span: "Count Willem van Loon"
|
||||
type: "AGT.PER"
|
||||
code: "AGT.PER"
|
||||
confidence: 0.95
|
||||
|
||||
- span: "Count"
|
||||
type: "ROL.HON"
|
||||
code: "ROL.HON"
|
||||
note: "Nobility title - honorific role"
|
||||
|
|
@ -0,0 +1,439 @@
|
|||
# =============================================================================
|
||||
# PiCo Integration Module: Person Name Vocabulary (PNV)
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/
|
||||
# Parent: _index.yaml
|
||||
#
|
||||
# Description: Person Name Vocabulary (PNV) provides structured name components.
|
||||
# This enables proper parsing of complex name structures across cultures.
|
||||
#
|
||||
# References:
|
||||
# - PNV: https://w3id.org/pnv
|
||||
# - PNV Specification: https://w3id.org/pnv/doc/v2
|
||||
#
|
||||
# Last Updated: 2025-01-13
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Person Name Vocabulary (PNV)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
pnv_name_structure:
|
||||
description: |
|
||||
Person Name Vocabulary (PNV) provides structured name components.
|
||||
This enables proper parsing of complex name structures across cultures.
|
||||
|
||||
class: "pnv:PersonName"
|
||||
class_uri: "https://w3id.org/pnv/PersonName"
|
||||
|
||||
components:
|
||||
- property: "pnv:literalName"
|
||||
description: "Full name as single string"
|
||||
examples:
|
||||
- "Dr. Maria van den Berg"
|
||||
- "Rembrandt Harmenszoon van Rijn"
|
||||
- "Queen Elizabeth II"
|
||||
note: "Original string before parsing"
|
||||
|
||||
- property: "pnv:givenName"
|
||||
description: "First/given name"
|
||||
examples:
|
||||
- "Rembrandt"
|
||||
- "Maria"
|
||||
- "Jan"
|
||||
- "Elizabeth"
|
||||
note: "Personal name, not surname"
|
||||
|
||||
- property: "pnv:patronym"
|
||||
description: "Patronymic name component"
|
||||
examples:
|
||||
- "Harmenszoon"
|
||||
- "Janszoon"
|
||||
- "Pietersdochter"
|
||||
note: "Common in Dutch, Scandinavian, Slavic names"
|
||||
|
||||
- property: "pnv:surnamePrefix"
|
||||
description: "Prefix to surname (tussenvoegsel)"
|
||||
examples:
|
||||
- "van"
|
||||
- "de"
|
||||
- "van den"
|
||||
- "van der"
|
||||
- "op de"
|
||||
- "'t"
|
||||
- "von"
|
||||
- "di"
|
||||
note: "Language-specific, affects sorting"
|
||||
|
||||
- property: "pnv:baseSurname"
|
||||
description: "Core surname without prefix"
|
||||
examples:
|
||||
- "Rijn"
|
||||
- "Berg"
|
||||
- "Velde"
|
||||
- "Gogh"
|
||||
note: "Primary sorting component in Dutch"
|
||||
|
||||
- property: "pnv:honorificPrefix"
|
||||
description: "Title or honorific before name"
|
||||
examples:
|
||||
- "Dr."
|
||||
- "Prof."
|
||||
- "Prof. dr."
|
||||
- "Sir"
|
||||
- "Queen"
|
||||
- "Mr."
|
||||
- "Drs."
|
||||
- "Ir."
|
||||
note: "May indicate role - link to ROL"
|
||||
|
||||
- property: "pnv:honorificSuffix"
|
||||
description: "Title or honorific after name"
|
||||
examples:
|
||||
- "PhD"
|
||||
- "Jr."
|
||||
- "III"
|
||||
- "MD"
|
||||
- "RA"
|
||||
- "MSc"
|
||||
note: "Credentials and generational markers"
|
||||
|
||||
- property: "pnv:infixTitle"
|
||||
description: "Title within name structure"
|
||||
examples:
|
||||
- "graaf van"
|
||||
- "baron de"
|
||||
- "duke of"
|
||||
note: "Nobility titles embedded in name"
|
||||
|
||||
- property: "pnv:initials"
|
||||
description: "Initials of given name(s)"
|
||||
examples:
|
||||
- "P.R."
|
||||
- "C.Joh."
|
||||
- "H.A.F.M.O."
|
||||
note: "Each initial followed by period"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Dutch Name Conventions
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
dutch_name_patterns:
|
||||
description: |
|
||||
Special handling for Dutch names with tussenvoegsels (surname prefixes).
|
||||
Dutch sorting rules differ from other languages.
|
||||
|
||||
tussenvoegsel_list:
|
||||
- "van"
|
||||
- "van de"
|
||||
- "van den"
|
||||
- "van der"
|
||||
- "de"
|
||||
- "den"
|
||||
- "het"
|
||||
- "'t"
|
||||
- "ter"
|
||||
- "ten"
|
||||
- "op de"
|
||||
- "op den"
|
||||
- "in 't"
|
||||
- "in de"
|
||||
|
||||
sorting_rule: |
|
||||
In Dutch, surnames sort by baseSurname, ignoring tussenvoegsel.
|
||||
"Vincent van Gogh" sorts under "G" not "V".
|
||||
"Maria van den Berg" sorts under "B" not "V".
|
||||
|
||||
capitalization_rule: |
|
||||
Tussenvoegsel lowercase when preceded by given name:
|
||||
- "Vincent van Gogh" (not "Vincent Van Gogh")
|
||||
- "Van Gogh" (surname alone, capitalized)
|
||||
- "de heer Van Gogh" (formal, capitalized)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Arabic Name Conventions
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
arabic_name_patterns:
|
||||
description: |
|
||||
Arabic names follow complex conventions with multiple components:
|
||||
nasab (patronymic), nisba (geographic/tribal), kunya (teknonym), laqab (title/epithet).
|
||||
|
||||
components:
|
||||
nasab:
|
||||
description: "Patronymic chain using ibn/bin (son) or bint (daughter)"
|
||||
examples:
|
||||
- "محمد بن علي بن حسن"
|
||||
- "Muhammad ibn Ali ibn Hasan"
|
||||
note: "Can extend multiple generations"
|
||||
|
||||
nisba:
|
||||
description: "Geographic or tribal affiliation (adjective form, ends in -i)"
|
||||
examples:
|
||||
- "البغدادي (al-Baghdadi)"
|
||||
- "المصري (al-Misri)"
|
||||
- "الهاشمي (al-Hashimi)"
|
||||
|
||||
kunya:
|
||||
description: "Teknonym (Abu/Umm + child's name)"
|
||||
examples:
|
||||
- "أبو عبد الله (Abu Abdullah)"
|
||||
- "أم كلثوم (Umm Kulthum)"
|
||||
note: "Often used as primary form of address"
|
||||
|
||||
laqab:
|
||||
description: "Title, epithet, or nickname"
|
||||
examples:
|
||||
- "الرشيد (al-Rashid - the rightly guided)"
|
||||
- "المأمون (al-Ma'mun - the trustworthy)"
|
||||
|
||||
parsing_order: |
|
||||
Traditional order: kunya - ism - nasab - laqab - nisba
|
||||
Example: Abu Bakr Muhammad ibn Zakariyya al-Razi
|
||||
- Kunya: Abu Bakr (father of Bakr)
|
||||
- Ism: Muhammad (given name)
|
||||
- Nasab: ibn Zakariyya (son of Zakariyya)
|
||||
- Nisba: al-Razi (from Ray, city in Persia)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Hebrew Name Conventions
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
hebrew_name_patterns:
|
||||
description: |
|
||||
Hebrew names, especially in religious and historical documents, follow
|
||||
specific conventions with patronymics and honorifics.
|
||||
|
||||
components:
|
||||
given_name:
|
||||
description: "First name (shem)"
|
||||
examples:
|
||||
- "משה (Moshe/Moses)"
|
||||
- "רבקה (Rivkah/Rebecca)"
|
||||
|
||||
patronymic:
|
||||
description: "Son/daughter of (ben/bat)"
|
||||
examples:
|
||||
- "משה בן אברהם (Moshe ben Avraham)"
|
||||
- "רבקה בת יעקב (Rivkah bat Ya'akov)"
|
||||
note: "ben for males, bat for females"
|
||||
|
||||
honorifics:
|
||||
examples:
|
||||
- "ר' (Rabbi)"
|
||||
- "הרב (HaRav - the Rabbi)"
|
||||
- "מו\"ר (Morenu - our teacher)"
|
||||
- "ז\"ל (zikhrono livrakha - of blessed memory)"
|
||||
- "ע\"ה (alav hashalom - peace be upon him)"
|
||||
|
||||
ketubah_conventions:
|
||||
description: "Special naming in marriage contracts"
|
||||
notes:
|
||||
- "Full patronymics required for both parties"
|
||||
- "Honorifics for fathers (החתן = the groom, הכלה = the bride)"
|
||||
- "Geographic origin often included"
|
||||
- "Hebrew date format (day of month, month, year from creation)"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Spanish Colonial Name Conventions
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
spanish_name_patterns:
|
||||
description: |
|
||||
Spanish naming conventions, including colonial-era patterns with
|
||||
double surnames and titles.
|
||||
|
||||
components:
|
||||
given_names:
|
||||
description: "First and middle names (often religious)"
|
||||
examples:
|
||||
- "María Guadalupe"
|
||||
- "José Antonio"
|
||||
- "Juan Pablo"
|
||||
|
||||
paternal_surname:
|
||||
description: "Father's family name (apellido paterno)"
|
||||
note: "Listed first in double surname"
|
||||
|
||||
maternal_surname:
|
||||
description: "Mother's maiden family name (apellido materno)"
|
||||
note: "Listed second in double surname"
|
||||
|
||||
particles:
|
||||
examples:
|
||||
- "de"
|
||||
- "de la"
|
||||
- "del"
|
||||
note: "May indicate nobility or geographic origin"
|
||||
|
||||
titles:
|
||||
examples:
|
||||
- "Don/Doña"
|
||||
- "Señor/Señora"
|
||||
- "Fray (friar)"
|
||||
- "Sor (sister)"
|
||||
|
||||
colonial_patterns:
|
||||
notes:
|
||||
- "Racial designations (español, mestizo, indio, mulato) often recorded"
|
||||
- "Parish affiliation important"
|
||||
- "Godparents (padrinos) always named"
|
||||
- "Legitimacy (hijo legítimo/natural) specified"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Italian Name Conventions
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
italian_name_patterns:
|
||||
description: |
|
||||
Italian naming conventions with notarial and nobility elements.
|
||||
|
||||
components:
|
||||
given_name:
|
||||
description: "Nome proprio"
|
||||
note: "Often saints' names"
|
||||
|
||||
surname:
|
||||
description: "Cognome"
|
||||
note: "May derive from patronymics, locations, or professions"
|
||||
|
||||
particles:
|
||||
examples:
|
||||
- "di"
|
||||
- "del"
|
||||
- "della"
|
||||
- "dei"
|
||||
- "da"
|
||||
note: "May indicate origin or noble lineage"
|
||||
|
||||
honorifics:
|
||||
examples:
|
||||
- "Signore/Signora"
|
||||
- "Messer (medieval)"
|
||||
- "Ser (notarial)"
|
||||
- "Conte/Contessa"
|
||||
- "Marchese/Marchesa"
|
||||
|
||||
notarial_conventions:
|
||||
notes:
|
||||
- "Father's name in genitive: 'figlio di Giovanni'"
|
||||
- "Profession often stated: 'mercante', 'notaio'"
|
||||
- "Parish or neighborhood: 'della parrocchia di San Marco'"
|
||||
- "Legal capacity: 'maggiore d'età' (of legal age)"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Greek Name Conventions
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
greek_name_patterns:
|
||||
description: |
|
||||
Greek Orthodox naming conventions with genitive patronymics.
|
||||
|
||||
components:
|
||||
given_name:
|
||||
description: "First name (often saint's name)"
|
||||
examples:
|
||||
- "Κωνσταντίνος (Konstantinos)"
|
||||
- "Μαρία (Maria)"
|
||||
|
||||
patronymic:
|
||||
description: "Father's name in genitive case"
|
||||
examples:
|
||||
- "του Νικολάου (tou Nikolaou - son of Nikolaos)"
|
||||
- "του Δημητρίου (tou Dimitriou)"
|
||||
note: "Genitive case indicates 'of' or 'belonging to'"
|
||||
|
||||
surname:
|
||||
description: "Family name"
|
||||
examples:
|
||||
- "Παπαδόπουλος (Papadopoulos)"
|
||||
- "Αντωνίου (Antoniou)"
|
||||
note: "May be patronymic origin (-opoulos, -ou, -ides)"
|
||||
|
||||
honorifics:
|
||||
examples:
|
||||
- "Κύριος/Κυρία (Kyrios/Kyria - Mr./Mrs.)"
|
||||
- "Πατήρ (Patir - Father, for clergy)"
|
||||
- "Παπα- (Papa- - prefix for priests)"
|
||||
|
||||
orthodox_conventions:
|
||||
notes:
|
||||
- "Name day (onomastics) important in Greek culture"
|
||||
- "Multiple given names common"
|
||||
- "Grandparents' names often passed down"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Russian/Cyrillic Name Conventions
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
russian_name_patterns:
|
||||
description: |
|
||||
Russian naming conventions with formal patronymics.
|
||||
|
||||
components:
|
||||
given_name:
|
||||
description: "First name (имя)"
|
||||
examples:
|
||||
- "Иван (Ivan)"
|
||||
- "Мария (Maria)"
|
||||
|
||||
patronymic:
|
||||
description: "Father's name + suffix (отчество)"
|
||||
examples:
|
||||
- "Петрович (Petrovich - son of Pyotr)"
|
||||
- "Петровна (Petrovna - daughter of Pyotr)"
|
||||
note: "-ovich/-evich for males, -ovna/-evna for females"
|
||||
|
||||
surname:
|
||||
description: "Family name (фамилия)"
|
||||
note: "Gendered: -ov/-ova, -in/-ina, -sky/-skaya"
|
||||
|
||||
formal_usage:
|
||||
notes:
|
||||
- "Formal address: given name + patronymic"
|
||||
- "Informal: given name or diminutive"
|
||||
- "Full official: surname, given name, patronymic"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Ottoman Turkish Name Conventions
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
ottoman_name_patterns:
|
||||
description: |
|
||||
Ottoman Turkish naming conventions blending Arabic and Turkish elements.
|
||||
|
||||
components:
|
||||
given_name:
|
||||
description: "Primary name (often Arabic origin)"
|
||||
examples:
|
||||
- "Mehmed"
|
||||
- "Ahmed"
|
||||
- "Fatma"
|
||||
|
||||
patronymic:
|
||||
description: "Father's name with 'oğlu' (son of) or 'kızı' (daughter of)"
|
||||
examples:
|
||||
- "Ali oğlu Mehmed"
|
||||
- "Hasan oğlu Ahmed"
|
||||
|
||||
epithet:
|
||||
description: "Title or descriptor (laqab)"
|
||||
examples:
|
||||
- "Paşa (Pasha)"
|
||||
- "Efendi"
|
||||
- "Ağa"
|
||||
- "Bey"
|
||||
- "Hatun/Hanım (for women)"
|
||||
|
||||
nisba:
|
||||
description: "Geographic origin or profession"
|
||||
examples:
|
||||
- "Kayserili (from Kayseri)"
|
||||
- "Bakkal (grocer)"
|
||||
|
||||
sijill_conventions:
|
||||
notes:
|
||||
- "Court records (sicil) use formal full names"
|
||||
- "Witnesses identified by profession and address"
|
||||
- "Deceased marked as 'merhum/merhume'"
|
||||
- "Non-Muslims identified by religious community (millet)"
|
||||
|
|
@ -0,0 +1,517 @@
|
|||
# =============================================================================
|
||||
# PiCo Integration Module: Family and Social Relationships
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/
|
||||
# Parent: _index.yaml
|
||||
#
|
||||
# Description: Family relationship properties for genealogical data.
|
||||
# Enables modeling complex family structures from historical records.
|
||||
#
|
||||
# Last Updated: 2025-01-13
|
||||
# =============================================================================
|
||||
|
||||
family_relationships:
|
||||
description: |
|
||||
Family relationship properties link persons within and across sources.
|
||||
|
||||
Rules:
|
||||
- For PersonObservations: relationships refer to OTHER observations on SAME source
|
||||
- For PersonReconstructions: relationships refer to other reconstructions
|
||||
|
||||
Property characteristics:
|
||||
- Symmetric: If A hasRelation B, then B hasRelation A (spouses, siblings, cousins)
|
||||
- Transitive: hasAncestor/hasDescendant chain through generations
|
||||
- Inverse pairs: parent/children, grandparent/grandchild, etc.
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core Family (Schema.org)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
core_relationships:
|
||||
- property: "sdo:parent"
|
||||
property_uri: "https://schema.org/parent"
|
||||
description: "A parent of the person"
|
||||
inverse: "sdo:children"
|
||||
subPropertyOf: ["sdo:relatedTo", "pico:hasAncestor"]
|
||||
note: "Biological or legal parent"
|
||||
|
||||
- property: "sdo:children"
|
||||
property_uri: "https://schema.org/children"
|
||||
description: "A child of the person"
|
||||
inverse: "sdo:parent"
|
||||
subPropertyOf: ["sdo:relatedTo", "pico:hasDescendant"]
|
||||
|
||||
- property: "sdo:spouse"
|
||||
property_uri: "https://schema.org/spouse"
|
||||
description: "The person's spouse"
|
||||
symmetric: true
|
||||
subPropertyOf: "sdo:relatedTo"
|
||||
|
||||
- property: "sdo:sibling"
|
||||
property_uri: "https://schema.org/sibling"
|
||||
description: "A brother or sister"
|
||||
symmetric: true
|
||||
subPropertyOf: "sdo:relatedTo"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Transitive Ancestry (PiCo)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ancestry_relationships:
|
||||
- property: "pico:hasAncestor"
|
||||
property_uri: "https://personsincontext.org/model#hasAncestor"
|
||||
description: "Any ancestor (parent, grandparent, etc.)"
|
||||
type: "owl:TransitiveProperty"
|
||||
inverse: "pico:hasDescendant"
|
||||
note: "Not used directly; parent→parent chains automatically create ancestors"
|
||||
|
||||
- property: "pico:hasDescendant"
|
||||
property_uri: "https://personsincontext.org/model#hasDescendant"
|
||||
description: "Any descendant (child, grandchild, etc.)"
|
||||
type: "owl:TransitiveProperty"
|
||||
inverse: "pico:hasAncestor"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Grandparents/Grandchildren
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
grandparent_relationships:
|
||||
- property: "pico:hasGrandparent"
|
||||
property_uri: "https://personsincontext.org/model#hasGrandparent"
|
||||
inverse: "pico:hasGrandchild"
|
||||
|
||||
- property: "pico:hasGrandchild"
|
||||
property_uri: "https://personsincontext.org/model#hasGrandchild"
|
||||
inverse: "pico:hasGrandparent"
|
||||
|
||||
- property: "pico:hasGreat-grandparent"
|
||||
property_uri: "https://personsincontext.org/model#hasGreat-grandparent"
|
||||
inverse: "pico:hasGreat-grandchild"
|
||||
|
||||
- property: "pico:hasGreat-grandchild"
|
||||
property_uri: "https://personsincontext.org/model#hasGreat-grandchild"
|
||||
inverse: "pico:hasGreat-grandparent"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Aunts/Uncles and Nieces/Nephews
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
extended_family:
|
||||
- property: "pico:hasUncle_Aunt"
|
||||
property_uri: "https://personsincontext.org/model#hasUncle_Aunt"
|
||||
description: "An uncle or aunt (sibling of parent)"
|
||||
inverse: "pico:hasNephew_Niece"
|
||||
|
||||
- property: "pico:hasNephew_Niece"
|
||||
property_uri: "https://personsincontext.org/model#hasNephew_Niece"
|
||||
description: "A nephew or niece (child of sibling)"
|
||||
inverse: "pico:hasUncle_Aunt"
|
||||
|
||||
- property: "pico:hasCousin"
|
||||
property_uri: "https://personsincontext.org/model#hasCousin"
|
||||
description: "A cousin (child of parent's sibling)"
|
||||
symmetric: true
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Step-family
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
step_relationships:
|
||||
- property: "pico:hasStepparent"
|
||||
property_uri: "https://personsincontext.org/model#hasStepparent"
|
||||
description: "A stepparent (spouse of biological parent)"
|
||||
inverse: "pico:hasStepchild"
|
||||
|
||||
- property: "pico:hasStepchild"
|
||||
property_uri: "https://personsincontext.org/model#hasStepchild"
|
||||
inverse: "pico:hasStepparent"
|
||||
|
||||
- property: "pico:hasStepsibling"
|
||||
property_uri: "https://personsincontext.org/model#hasStepsibling"
|
||||
description: "A stepbrother or stepsister"
|
||||
symmetric: true
|
||||
|
||||
- property: "pico:hasHalf-sibling"
|
||||
property_uri: "https://personsincontext.org/model#hasHalf-sibling"
|
||||
description: "A half-brother or half-sister (one shared parent)"
|
||||
symmetric: true
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Foster/Godparent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
non_biological_relationships:
|
||||
- property: "pico:hasFosterParent"
|
||||
property_uri: "https://personsincontext.org/model#hasFosterParent"
|
||||
inverse: "pico:hasFosterChild"
|
||||
|
||||
- property: "pico:hasFosterChild"
|
||||
property_uri: "https://personsincontext.org/model#hasFosterChild"
|
||||
inverse: "pico:hasFosterParent"
|
||||
|
||||
- property: "pico:hasGodparent"
|
||||
property_uri: "https://personsincontext.org/model#hasGodparent"
|
||||
description: "A godparent (witness at baptism)"
|
||||
inverse: "pico:hasGodchild"
|
||||
|
||||
- property: "pico:hasGodchild"
|
||||
property_uri: "https://personsincontext.org/model#hasGodchild"
|
||||
inverse: "pico:hasGodparent"
|
||||
|
||||
- property: "pico:hasLegitimizedChild"
|
||||
property_uri: "https://personsincontext.org/model#hasLegitimizedChild"
|
||||
description: "A child legitimized by marriage or legal recognition"
|
||||
inverse: "pico:isLegitimitezedChildOf"
|
||||
|
||||
- property: "pico:isLegitimitezedChildOf"
|
||||
property_uri: "https://personsincontext.org/model#isLegitimitezedChildOf"
|
||||
inverse: "pico:hasLegitimizedChild"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# In-Laws
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
in_law_relationships:
|
||||
- property: "pico:hasParent-in-law"
|
||||
property_uri: "https://personsincontext.org/model#hasParent-in-law"
|
||||
inverse: "pico:hasChild-in-law"
|
||||
|
||||
- property: "pico:hasChild-in-law"
|
||||
property_uri: "https://personsincontext.org/model#hasChild-in-law"
|
||||
inverse: "pico:hasParent-in-law"
|
||||
|
||||
- property: "pico:hasSibling-in-law"
|
||||
property_uri: "https://personsincontext.org/model#hasSibling-in-law"
|
||||
description: "Brother/sister-in-law"
|
||||
symmetric: true
|
||||
|
||||
- property: "pico:hasGrandparent-in-law"
|
||||
property_uri: "https://personsincontext.org/model#hasGrandparent-in-law"
|
||||
inverse: "pico:hasGrandchild-in-law"
|
||||
|
||||
- property: "pico:hasGrandchild-in-law"
|
||||
property_uri: "https://personsincontext.org/model#hasGrandchild-in-law"
|
||||
inverse: "pico:hasGrandparent-in-law"
|
||||
|
||||
- property: "pico:hasUncle_Aunt-in-law"
|
||||
property_uri: "https://personsincontext.org/model#hasUncle_Aunt-in-law"
|
||||
inverse: "pico:hasNephew_Niece-in-law"
|
||||
|
||||
- property: "pico:hasNephew_Niece-in-law"
|
||||
property_uri: "https://personsincontext.org/model#hasNephew_Niece-in-law"
|
||||
inverse: "pico:hasUncle_Aunt-in-law"
|
||||
|
||||
- property: "pico:hasCousin-in-law"
|
||||
property_uri: "https://personsincontext.org/model#hasCousin-in-law"
|
||||
symmetric: true
|
||||
|
||||
- property: "pico:hasStepparent-in-law"
|
||||
property_uri: "https://personsincontext.org/model#hasStepparent-in-law"
|
||||
inverse: "pico:hasStepchild-in-law"
|
||||
|
||||
- property: "pico:hasStepchild-in-law"
|
||||
property_uri: "https://personsincontext.org/model#hasStepchild-in-law"
|
||||
inverse: "pico:hasStepparent-in-law"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Former Partners
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
former_partner_relationships:
|
||||
- property: "pico:isWidOf"
|
||||
property_uri: "https://personsincontext.org/model#isWidOf"
|
||||
description: "Is widow/widower of deceased spouse"
|
||||
note: "The subject is the surviving partner"
|
||||
|
||||
- property: "pico:hasPreviousPartner"
|
||||
property_uri: "https://personsincontext.org/model#hasPreviousPartner"
|
||||
description: "A former spouse or partner"
|
||||
symmetric: true
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Historical Relationship Indicators by Language
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
historical_relationship_patterns:
|
||||
description: |
|
||||
Common relationship indicators in historical documents by language.
|
||||
Use these patterns to identify family relationships in source texts.
|
||||
|
||||
dutch:
|
||||
description: "Dutch relationship indicators"
|
||||
patterns:
|
||||
- pattern: "huijsvrou van"
|
||||
meaning: "wife of"
|
||||
relationship: "spouse"
|
||||
- pattern: "zoon van"
|
||||
meaning: "son of"
|
||||
relationship: "parent"
|
||||
- pattern: "dochter van"
|
||||
meaning: "daughter of"
|
||||
relationship: "parent"
|
||||
- pattern: "weduwe van"
|
||||
meaning: "widow of"
|
||||
relationship: "widow_of"
|
||||
- pattern: "weduwnaar van"
|
||||
meaning: "widower of"
|
||||
relationship: "widow_of"
|
||||
- pattern: "peter"
|
||||
meaning: "godfather"
|
||||
relationship: "godparent"
|
||||
- pattern: "meter"
|
||||
meaning: "godmother"
|
||||
relationship: "godparent"
|
||||
- pattern: "getuige"
|
||||
meaning: "witness"
|
||||
relationship: "witness"
|
||||
- pattern: "broeder van"
|
||||
meaning: "brother of"
|
||||
relationship: "sibling"
|
||||
- pattern: "zuster van"
|
||||
meaning: "sister of"
|
||||
relationship: "sibling"
|
||||
|
||||
latin:
|
||||
description: "Latin relationship indicators (common in church records)"
|
||||
patterns:
|
||||
- pattern: "filius"
|
||||
meaning: "son"
|
||||
relationship: "parent"
|
||||
- pattern: "filia"
|
||||
meaning: "daughter"
|
||||
relationship: "parent"
|
||||
- pattern: "uxor"
|
||||
meaning: "wife"
|
||||
relationship: "spouse"
|
||||
- pattern: "maritus"
|
||||
meaning: "husband"
|
||||
relationship: "spouse"
|
||||
- pattern: "vidua"
|
||||
meaning: "widow"
|
||||
relationship: "widow_of"
|
||||
- pattern: "viduus"
|
||||
meaning: "widower"
|
||||
relationship: "widow_of"
|
||||
- pattern: "quondam"
|
||||
meaning: "the late"
|
||||
relationship: "deceased_marker"
|
||||
- pattern: "patrinus"
|
||||
meaning: "godfather"
|
||||
relationship: "godparent"
|
||||
- pattern: "matrina"
|
||||
meaning: "godmother"
|
||||
relationship: "godparent"
|
||||
- pattern: "testis"
|
||||
meaning: "witness"
|
||||
relationship: "witness"
|
||||
|
||||
german:
|
||||
description: "German relationship indicators"
|
||||
patterns:
|
||||
- pattern: "Ehefrau von"
|
||||
meaning: "wife of"
|
||||
relationship: "spouse"
|
||||
- pattern: "Ehemann von"
|
||||
meaning: "husband of"
|
||||
relationship: "spouse"
|
||||
- pattern: "Sohn von"
|
||||
meaning: "son of"
|
||||
relationship: "parent"
|
||||
- pattern: "Tochter von"
|
||||
meaning: "daughter of"
|
||||
relationship: "parent"
|
||||
- pattern: "Witwe von"
|
||||
meaning: "widow of"
|
||||
relationship: "widow_of"
|
||||
- pattern: "Witwer von"
|
||||
meaning: "widower of"
|
||||
relationship: "widow_of"
|
||||
- pattern: "Taufpate"
|
||||
meaning: "godfather"
|
||||
relationship: "godparent"
|
||||
- pattern: "Taufpatin"
|
||||
meaning: "godmother"
|
||||
relationship: "godparent"
|
||||
|
||||
french:
|
||||
description: "French relationship indicators"
|
||||
patterns:
|
||||
- pattern: "fils de"
|
||||
meaning: "son of"
|
||||
relationship: "parent"
|
||||
- pattern: "fille de"
|
||||
meaning: "daughter of"
|
||||
relationship: "parent"
|
||||
- pattern: "épouse de"
|
||||
meaning: "wife of"
|
||||
relationship: "spouse"
|
||||
- pattern: "époux de"
|
||||
meaning: "husband of"
|
||||
relationship: "spouse"
|
||||
- pattern: "veuve de"
|
||||
meaning: "widow of"
|
||||
relationship: "widow_of"
|
||||
- pattern: "veuf de"
|
||||
meaning: "widower of"
|
||||
relationship: "widow_of"
|
||||
- pattern: "feu"
|
||||
meaning: "the late (m)"
|
||||
relationship: "deceased_marker"
|
||||
- pattern: "feue"
|
||||
meaning: "the late (f)"
|
||||
relationship: "deceased_marker"
|
||||
- pattern: "parrain"
|
||||
meaning: "godfather"
|
||||
relationship: "godparent"
|
||||
- pattern: "marraine"
|
||||
meaning: "godmother"
|
||||
relationship: "godparent"
|
||||
|
||||
arabic:
|
||||
description: "Arabic relationship indicators"
|
||||
patterns:
|
||||
- pattern: "ابن"
|
||||
transliteration: "ibn"
|
||||
meaning: "son of"
|
||||
relationship: "parent"
|
||||
- pattern: "بن"
|
||||
transliteration: "bin"
|
||||
meaning: "son of (shorter form)"
|
||||
relationship: "parent"
|
||||
- pattern: "بنت"
|
||||
transliteration: "bint"
|
||||
meaning: "daughter of"
|
||||
relationship: "parent"
|
||||
- pattern: "زوج"
|
||||
transliteration: "zawj"
|
||||
meaning: "husband"
|
||||
relationship: "spouse"
|
||||
- pattern: "زوجة"
|
||||
transliteration: "zawja"
|
||||
meaning: "wife"
|
||||
relationship: "spouse"
|
||||
- pattern: "أرملة"
|
||||
transliteration: "armala"
|
||||
meaning: "widow"
|
||||
relationship: "widow_of"
|
||||
- pattern: "المرحوم"
|
||||
transliteration: "al-marhum"
|
||||
meaning: "the late (m)"
|
||||
relationship: "deceased_marker"
|
||||
- pattern: "المرحومة"
|
||||
transliteration: "al-marhuma"
|
||||
meaning: "the late (f)"
|
||||
relationship: "deceased_marker"
|
||||
- pattern: "آل"
|
||||
transliteration: "Al"
|
||||
meaning: "family of"
|
||||
relationship: "family_marker"
|
||||
|
||||
hebrew:
|
||||
description: "Hebrew relationship indicators"
|
||||
patterns:
|
||||
- pattern: "בן"
|
||||
transliteration: "ben"
|
||||
meaning: "son of"
|
||||
relationship: "parent"
|
||||
- pattern: "בת"
|
||||
transliteration: "bat"
|
||||
meaning: "daughter of"
|
||||
relationship: "parent"
|
||||
- pattern: "אשת"
|
||||
transliteration: "eshet"
|
||||
meaning: "wife of"
|
||||
relationship: "spouse"
|
||||
- pattern: "אלמנה"
|
||||
transliteration: "almana"
|
||||
meaning: "widow"
|
||||
relationship: "widow_of"
|
||||
- pattern: "ז״ל"
|
||||
transliteration: "z\"l"
|
||||
meaning: "of blessed memory"
|
||||
relationship: "deceased_marker"
|
||||
- pattern: "ע״ה"
|
||||
transliteration: "a\"h"
|
||||
meaning: "peace be upon him/her"
|
||||
relationship: "deceased_marker"
|
||||
|
||||
spanish:
|
||||
description: "Spanish relationship indicators"
|
||||
patterns:
|
||||
- pattern: "hijo de"
|
||||
meaning: "son of"
|
||||
relationship: "parent"
|
||||
- pattern: "hija de"
|
||||
meaning: "daughter of"
|
||||
relationship: "parent"
|
||||
- pattern: "esposa de"
|
||||
meaning: "wife of"
|
||||
relationship: "spouse"
|
||||
- pattern: "esposo de"
|
||||
meaning: "husband of"
|
||||
relationship: "spouse"
|
||||
- pattern: "viuda de"
|
||||
meaning: "widow of"
|
||||
relationship: "widow_of"
|
||||
- pattern: "viudo de"
|
||||
meaning: "widower of"
|
||||
relationship: "widow_of"
|
||||
- pattern: "padrino"
|
||||
meaning: "godfather"
|
||||
relationship: "godparent"
|
||||
- pattern: "madrina"
|
||||
meaning: "godmother"
|
||||
relationship: "godparent"
|
||||
- pattern: "hijo legítimo"
|
||||
meaning: "legitimate son"
|
||||
relationship: "legitimacy_marker"
|
||||
- pattern: "hijo natural"
|
||||
meaning: "illegitimate son"
|
||||
relationship: "legitimacy_marker"
|
||||
|
||||
portuguese:
|
||||
description: "Portuguese relationship indicators"
|
||||
patterns:
|
||||
- pattern: "filho de"
|
||||
meaning: "son of"
|
||||
relationship: "parent"
|
||||
- pattern: "filha de"
|
||||
meaning: "daughter of"
|
||||
relationship: "parent"
|
||||
- pattern: "esposa de"
|
||||
meaning: "wife of"
|
||||
relationship: "spouse"
|
||||
- pattern: "esposo de"
|
||||
meaning: "husband of"
|
||||
relationship: "spouse"
|
||||
- pattern: "viúva de"
|
||||
meaning: "widow of"
|
||||
relationship: "widow_of"
|
||||
- pattern: "viúvo de"
|
||||
meaning: "widower of"
|
||||
relationship: "widow_of"
|
||||
- pattern: "padrinho"
|
||||
meaning: "godfather"
|
||||
relationship: "godparent"
|
||||
- pattern: "madrinha"
|
||||
meaning: "godmother"
|
||||
relationship: "godparent"
|
||||
|
||||
ottoman_turkish:
|
||||
description: "Ottoman Turkish relationship indicators"
|
||||
patterns:
|
||||
- pattern: "oğlu"
|
||||
meaning: "son of"
|
||||
relationship: "parent"
|
||||
- pattern: "kızı"
|
||||
meaning: "daughter of"
|
||||
relationship: "parent"
|
||||
- pattern: "zevcesi"
|
||||
meaning: "wife"
|
||||
relationship: "spouse"
|
||||
- pattern: "merhum"
|
||||
meaning: "the late (m)"
|
||||
relationship: "deceased_marker"
|
||||
- pattern: "merhume"
|
||||
meaning: "the late (f)"
|
||||
relationship: "deceased_marker"
|
||||
|
|
@ -0,0 +1,570 @@
|
|||
# =============================================================================
|
||||
# PiCo Integration Module: Temporal Patterns & Calendar Systems
|
||||
# =============================================================================
|
||||
# Part of: data/entity_annotation/modules/integrations/pico/
|
||||
# Parent: _index.yaml
|
||||
#
|
||||
# Description: Temporal expression handling, calendar systems, date normalization,
|
||||
# and PROV-O provenance model for tracking observation/reconstruction
|
||||
# activities.
|
||||
#
|
||||
# Last Updated: 2025-12-12
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Calendar Systems
|
||||
# -----------------------------------------------------------------------------
|
||||
# Historical documents use various calendar systems. This section defines
|
||||
# how to handle and normalize dates from different calendrical traditions.
|
||||
|
||||
calendar_systems:
|
||||
description: |
|
||||
Historical sources use diverse calendar systems depending on culture,
|
||||
religion, and time period. Proper extraction requires:
|
||||
1. Identifying the source calendar
|
||||
2. Preserving the original date expression
|
||||
3. Providing normalized ISO 8601 equivalents where possible
|
||||
|
||||
supported_calendars:
|
||||
|
||||
gregorian:
|
||||
id: "gregorian"
|
||||
label: "Gregorian Calendar"
|
||||
uri: "https://www.wikidata.org/wiki/Q12138"
|
||||
description: |
|
||||
The civil calendar used worldwide since 1582 (Catholic countries)
|
||||
or later (Protestant/Orthodox countries).
|
||||
adoption_dates:
|
||||
catholic: "1582-10-15"
|
||||
protestant: "1700-03-01"
|
||||
british_empire: "1752-09-14"
|
||||
russia: "1918-02-14"
|
||||
greece: "1923-03-01"
|
||||
usage_notes: |
|
||||
- Default for modern documents
|
||||
- Used in civil registrations after adoption
|
||||
- Standard for ISO 8601 normalization
|
||||
example:
|
||||
original: "15 October 1582"
|
||||
normalized: "1582-10-15"
|
||||
|
||||
julian:
|
||||
id: "julian"
|
||||
label: "Julian Calendar"
|
||||
uri: "https://www.wikidata.org/wiki/Q11184"
|
||||
description: |
|
||||
Calendar introduced by Julius Caesar in 45 BCE. Used in Europe
|
||||
until Gregorian reform, and by Eastern Orthodox churches today.
|
||||
offset_from_gregorian:
|
||||
16th_century: 10
|
||||
17th_century: 10
|
||||
18th_century: 11
|
||||
19th_century: 12
|
||||
20th_century: 13
|
||||
21st_century: 13
|
||||
usage_notes: |
|
||||
- Greek Orthodox Church records use Julian calendar
|
||||
- Russian Empire used Julian until 1918
|
||||
- Dual dating common in transition periods
|
||||
- Format: "Julian date / Gregorian date" or "O.S./N.S." notation
|
||||
example:
|
||||
original: "14 March 1875 (O.S.)"
|
||||
gregorian_equivalent: "27 March 1875"
|
||||
normalized: "1875-03-27"
|
||||
note: "Greek Orthodox used Julian; Gregorian equivalent calculated"
|
||||
|
||||
hijri:
|
||||
id: "hijri"
|
||||
label: "Islamic/Hijri Calendar"
|
||||
uri: "https://www.wikidata.org/wiki/Q28892"
|
||||
alternative_names:
|
||||
- "Islamic Calendar"
|
||||
- "Muslim Calendar"
|
||||
- "Lunar Hijri"
|
||||
- "Anno Hegirae (AH)"
|
||||
description: |
|
||||
Lunar calendar used in Islamic societies. Year 1 = 622 CE (Hijra).
|
||||
354 or 355 days per year (12 lunar months).
|
||||
months:
|
||||
1: "Muharram"
|
||||
2: "Safar"
|
||||
3: "Rabi' al-Awwal"
|
||||
4: "Rabi' al-Thani"
|
||||
5: "Jumada al-Awwal"
|
||||
6: "Jumada al-Thani"
|
||||
7: "Rajab"
|
||||
8: "Sha'ban"
|
||||
9: "Ramadan"
|
||||
10: "Shawwal"
|
||||
11: "Dhu al-Qa'dah"
|
||||
12: "Dhu al-Hijjah"
|
||||
usage_notes: |
|
||||
- Ottoman Empire, Waqf documents, Sijill records
|
||||
- Year conversion: Gregorian = (Hijri * 0.97) + 622
|
||||
- Month-level precision often sufficient
|
||||
- Some documents use both Hijri and local calendars
|
||||
example:
|
||||
original: "month of Rajab, year 1225 Hijri"
|
||||
normalized: "1810-07"
|
||||
note: "Approximate month - exact day unknown"
|
||||
|
||||
hebrew:
|
||||
id: "hebrew"
|
||||
label: "Hebrew Calendar"
|
||||
uri: "https://www.wikidata.org/wiki/Q9644"
|
||||
alternative_names:
|
||||
- "Jewish Calendar"
|
||||
- "Anno Mundi"
|
||||
description: |
|
||||
Lunisolar calendar used in Jewish religious and civil life.
|
||||
Year 1 = 3761 BCE (traditional Creation date).
|
||||
months:
|
||||
1: "Nisan"
|
||||
2: "Iyar"
|
||||
3: "Sivan"
|
||||
4: "Tammuz"
|
||||
5: "Av"
|
||||
6: "Elul"
|
||||
7: "Tishrei"
|
||||
8: "Cheshvan"
|
||||
9: "Kislev"
|
||||
10: "Tevet"
|
||||
11: "Shevat"
|
||||
12: "Adar"
|
||||
usage_notes: |
|
||||
- Ketubot (marriage contracts)
|
||||
- Get (divorce documents)
|
||||
- Synagogue records
|
||||
- Year conversion: Gregorian = Hebrew - 3760 (approx)
|
||||
- Month names often transliterated in various ways
|
||||
example:
|
||||
original: "23 Elul 5656"
|
||||
normalized: "1896-09-01"
|
||||
note: "Hebrew date from Creation (anno mundi)"
|
||||
|
||||
french_republican:
|
||||
id: "french_republican"
|
||||
label: "French Republican Calendar"
|
||||
uri: "https://www.wikidata.org/wiki/Q181974"
|
||||
description: |
|
||||
Calendar used in France 1793-1805. Year 1 = 1792 CE.
|
||||
12 months of 30 days + 5-6 supplementary days.
|
||||
months:
|
||||
1: "Vendemiaire"
|
||||
2: "Brumaire"
|
||||
3: "Frimaire"
|
||||
4: "Nivose"
|
||||
5: "Pluviose"
|
||||
6: "Ventose"
|
||||
7: "Germinal"
|
||||
8: "Floreal"
|
||||
9: "Prairial"
|
||||
10: "Messidor"
|
||||
11: "Thermidor"
|
||||
12: "Fructidor"
|
||||
usage_notes: |
|
||||
- French civil registrations 1793-1805
|
||||
- Some Belgian/Dutch territories
|
||||
- Conversion tables widely available
|
||||
example:
|
||||
original: "14 Vendemiaire an IV"
|
||||
normalized: "1795-10-06"
|
||||
|
||||
chinese:
|
||||
id: "chinese"
|
||||
label: "Chinese Calendar"
|
||||
uri: "https://www.wikidata.org/wiki/Q32823"
|
||||
description: |
|
||||
Lunisolar calendar used in China and East Asia.
|
||||
Combines 60-year cycle with lunar months.
|
||||
usage_notes: |
|
||||
- Emperor reign year + lunar month + day
|
||||
- Gregorian adopted 1912 (Republic of China)
|
||||
- Traditional dates still used for festivals
|
||||
example:
|
||||
original: "Guangxu 22, 8th month, 15th day"
|
||||
normalized: "1896-09-21"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Date Expression Patterns
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
date_expression_patterns:
|
||||
description: |
|
||||
Common patterns for expressing dates in historical sources.
|
||||
GLM annotators should recognize these patterns and extract:
|
||||
1. The original expression (exact transcription)
|
||||
2. The calendar system used
|
||||
3. A normalized ISO 8601 date (where possible)
|
||||
|
||||
patterns:
|
||||
|
||||
full_date:
|
||||
description: "Complete date with day, month, and year"
|
||||
examples:
|
||||
- pattern: "15 October 1582"
|
||||
calendar: "gregorian"
|
||||
normalized: "1582-10-15"
|
||||
|
||||
- pattern: "the fifteenth day of October in the year 1582"
|
||||
calendar: "gregorian"
|
||||
normalized: "1582-10-15"
|
||||
|
||||
- pattern: "23 Elul 5656"
|
||||
calendar: "hebrew"
|
||||
normalized: "1896-09-01"
|
||||
|
||||
partial_date:
|
||||
description: "Date with some components missing"
|
||||
examples:
|
||||
- pattern: "March 1875"
|
||||
calendar: "gregorian"
|
||||
normalized: "1875-03"
|
||||
precision: "month"
|
||||
|
||||
- pattern: "in the year 1810"
|
||||
calendar: "gregorian"
|
||||
normalized: "1810"
|
||||
precision: "year"
|
||||
|
||||
- pattern: "month of Rajab, 1225 AH"
|
||||
calendar: "hijri"
|
||||
normalized: "1810-07"
|
||||
precision: "month"
|
||||
|
||||
dual_dating:
|
||||
description: "Documents showing both Julian and Gregorian dates"
|
||||
notation_styles:
|
||||
- "O.S. (Old Style = Julian)"
|
||||
- "N.S. (New Style = Gregorian)"
|
||||
- "Slash notation: 14/27 March 1875"
|
||||
examples:
|
||||
- pattern: "14/27 March 1875"
|
||||
interpretation: "14 March (Julian) = 27 March (Gregorian)"
|
||||
normalized: "1875-03-27"
|
||||
note: "Use Gregorian for normalization"
|
||||
|
||||
- pattern: "6 January 1894 (Gregorian)"
|
||||
normalized: "1894-01-06"
|
||||
note: "Explicit calendar indicator"
|
||||
|
||||
relative_dating:
|
||||
description: "Dates relative to events or other dates"
|
||||
examples:
|
||||
- pattern: "three days after Easter"
|
||||
requires: "Year context to calculate"
|
||||
|
||||
- pattern: "the Sunday before St. Martins Day"
|
||||
requires: "Year context and liturgical calendar"
|
||||
|
||||
floruit:
|
||||
description: "Period when person was known to be active"
|
||||
notation: "fl."
|
||||
examples:
|
||||
- pattern: "fl. 1780-1820"
|
||||
interpretation: "Active between 1780 and 1820"
|
||||
|
||||
- pattern: "fl. c. 1850"
|
||||
interpretation: "Active around 1850"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Temporal Properties in PiCo
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
temporal_properties:
|
||||
description: |
|
||||
Properties for capturing temporal information about persons
|
||||
observed in historical sources.
|
||||
|
||||
biographical_dates:
|
||||
birth_date:
|
||||
property: "sdo:birthDate"
|
||||
property_uri: "https://schema.org/birthDate"
|
||||
range: "xsd:date or xsd:gYearMonth or xsd:gYear"
|
||||
description: "Date of birth"
|
||||
extraction_notes: |
|
||||
- May be explicitly stated or inferred from age
|
||||
- Capture calendar system if non-Gregorian
|
||||
- Normalize to ISO 8601 for querying
|
||||
|
||||
death_date:
|
||||
property: "sdo:deathDate"
|
||||
property_uri: "https://schema.org/deathDate"
|
||||
range: "xsd:date or xsd:gYearMonth or xsd:gYear"
|
||||
description: "Date of death"
|
||||
extraction_notes: |
|
||||
- "deceased" annotation indicates death before document date
|
||||
- Infer approximate date from context when possible
|
||||
|
||||
baptism_date:
|
||||
property: "pico:baptismDate"
|
||||
range: "xsd:date"
|
||||
description: "Date of baptism/christening"
|
||||
note: "Common in church records; often within days of birth"
|
||||
|
||||
burial_date:
|
||||
property: "pico:burialDate"
|
||||
range: "xsd:date"
|
||||
description: "Date of burial"
|
||||
note: "Common in church/cemetery records"
|
||||
|
||||
event_dates:
|
||||
marriage_date:
|
||||
property: "pico:marriageDate"
|
||||
range: "xsd:date"
|
||||
description: "Date of marriage event"
|
||||
|
||||
divorce_date:
|
||||
property: "pico:divorceDate"
|
||||
range: "xsd:date"
|
||||
description: "Date of divorce"
|
||||
|
||||
document_date:
|
||||
property: "sdo:dateCreated"
|
||||
property_uri: "https://schema.org/dateCreated"
|
||||
range: "xsd:date"
|
||||
description: "Date the source document was created"
|
||||
note: "Critical for temporal context of observations"
|
||||
|
||||
age_expressions:
|
||||
age_at_event:
|
||||
property: "pico:ageAtEvent"
|
||||
range: "xsd:string"
|
||||
description: "Age as stated in document"
|
||||
examples:
|
||||
- "25 years"
|
||||
- "about 30 years old"
|
||||
- "minor (under legal age)"
|
||||
- "of full age (adult)"
|
||||
note: |
|
||||
Preserve original expression; calculate birth year if needed.
|
||||
"oud 25 jaar" (Dutch) = "25 years old"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# PROV-O Provenance Model
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
provenance_model:
|
||||
description: |
|
||||
PiCo uses W3C PROV-O for provenance tracking at two levels:
|
||||
|
||||
1. OBSERVATION LEVEL: Where did this observation come from?
|
||||
- prov:hadPrimarySource -> Source document
|
||||
- prov:wasGeneratedBy -> Extraction activity (optional)
|
||||
|
||||
2. RECONSTRUCTION LEVEL: How was this person entity created?
|
||||
- prov:wasDerivedFrom -> Source observation(s)
|
||||
- prov:wasGeneratedBy -> Reconstruction activity
|
||||
- prov:wasRevisionOf -> Previous reconstruction version
|
||||
|
||||
activity_class:
|
||||
class: "prov:Activity"
|
||||
class_uri: "http://www.w3.org/ns/prov#Activity"
|
||||
description: "The activity that generated a PersonReconstruction"
|
||||
|
||||
properties:
|
||||
- property: "prov:wasAssociatedWith"
|
||||
description: "Agent responsible for the activity"
|
||||
range: "prov:Agent"
|
||||
|
||||
- property: "prov:startedAtTime"
|
||||
description: "When the activity started"
|
||||
range: "xsd:dateTime"
|
||||
|
||||
- property: "prov:endedAtTime"
|
||||
description: "When the activity completed"
|
||||
range: "xsd:dateTime"
|
||||
|
||||
- property: "prov:used"
|
||||
description: "Resources/tools used in the activity"
|
||||
range: "prov:Entity"
|
||||
note: "E.g., ML model, matching algorithm, rule set"
|
||||
|
||||
activity_types:
|
||||
human_reconstruction:
|
||||
description: "Manual reconstruction by researcher"
|
||||
note: "Provide: time, place, knowledge sources, researcher name"
|
||||
|
||||
algorithmic_reconstruction:
|
||||
description: "Automated reconstruction by software"
|
||||
note: "Provide: algorithm name, version, configuration, parameters"
|
||||
|
||||
agent_class:
|
||||
class: "prov:Agent"
|
||||
class_uri: "http://www.w3.org/ns/prov#Agent"
|
||||
description: "Person or organization responsible for reconstruction"
|
||||
|
||||
properties:
|
||||
- property: "sdo:name"
|
||||
description: "Name of the agent"
|
||||
range: "xsd:string"
|
||||
|
||||
- property: "sdo:url"
|
||||
description: "URL identifying the agent"
|
||||
range: "sdo:URL"
|
||||
|
||||
examples:
|
||||
- name: "CBG Center for Family History"
|
||||
url: "https://cbg.nl"
|
||||
type: "organization"
|
||||
|
||||
- name: "GLM-4.6 Person Extractor v1.0"
|
||||
url: null
|
||||
type: "software"
|
||||
|
||||
derivation_properties:
|
||||
- property: "prov:wasDerivedFrom"
|
||||
property_uri: "http://www.w3.org/ns/prov#wasDerivedFrom"
|
||||
description: "Links PersonReconstruction to source PersonObservation(s)"
|
||||
domain: "pico:PersonReconstruction"
|
||||
range: "pico:PersonObservation"
|
||||
cardinality: "1..*"
|
||||
note: "REQUIRED for all PersonReconstructions"
|
||||
|
||||
- property: "prov:wasRevisionOf"
|
||||
property_uri: "http://www.w3.org/ns/prov#wasRevisionOf"
|
||||
description: "Links to previous version of reconstruction"
|
||||
domain: "pico:PersonReconstruction"
|
||||
range: "pico:PersonReconstruction"
|
||||
cardinality: "0..1"
|
||||
note: "For tracking reconstruction updates over time"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# PiCo Vocabularies/Thesauri
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
pico_vocabularies:
|
||||
description: |
|
||||
PiCo defines three SKOS concept schemes for controlled terminology:
|
||||
|
||||
- Roles: The role a person plays in a source (child, declarant, witness, etc.)
|
||||
- SourceTypes: Types of historical sources (birth certificate, census, etc.)
|
||||
- EventTypes: Types of life events (birth, marriage, death, etc.)
|
||||
|
||||
roles_thesaurus:
|
||||
id: "picot_roles"
|
||||
uri: "https://terms.personsincontext.org/roles/"
|
||||
type: "skos:ConceptScheme"
|
||||
label: "Persons in Context role thesaurus"
|
||||
description: "Roles that persons can have in historical sources"
|
||||
usage: |
|
||||
Use pico:hasRole property with a term from this thesaurus.
|
||||
Example: picot_roles:575 (child), picot_roles:489 (declarant)
|
||||
example_concepts:
|
||||
- id: "575"
|
||||
label: "child"
|
||||
description: "Person appearing as child in a record"
|
||||
|
||||
- id: "489"
|
||||
label: "declarant"
|
||||
description: "Person declaring/reporting an event"
|
||||
|
||||
- id: "witness"
|
||||
label: "witness"
|
||||
description: "Person witnessing an event or signing a document"
|
||||
|
||||
- id: "bride"
|
||||
label: "bride"
|
||||
description: "Female partner in a marriage"
|
||||
|
||||
- id: "groom"
|
||||
label: "groom"
|
||||
description: "Male partner in a marriage"
|
||||
|
||||
sourcetypes_thesaurus:
|
||||
id: "picot_sourcetypes"
|
||||
uri: "https://terms.personsincontext.org/sourcetypes/"
|
||||
type: "skos:ConceptScheme"
|
||||
label: "Persons in Context sourceType thesaurus"
|
||||
description: "Types of historical sources containing person observations"
|
||||
usage: |
|
||||
Use sdo:additionalType property on sdo:ArchiveComponent.
|
||||
Example: picot_sourcetypes:551 (civil registry: birth)
|
||||
example_concepts:
|
||||
- id: "551"
|
||||
label: "civil registry: birth"
|
||||
description: "Birth certificate from civil registration"
|
||||
|
||||
- id: "marriage"
|
||||
label: "civil registry: marriage"
|
||||
description: "Marriage certificate"
|
||||
|
||||
- id: "death"
|
||||
label: "civil registry: death"
|
||||
description: "Death certificate"
|
||||
|
||||
- id: "census"
|
||||
label: "census"
|
||||
description: "Population census record"
|
||||
|
||||
- id: "church_baptism"
|
||||
label: "church record: baptism"
|
||||
description: "Baptismal record from church register"
|
||||
|
||||
- id: "notarial"
|
||||
label: "notarial record"
|
||||
description: "Notarial act or protocol"
|
||||
|
||||
eventtypes_thesaurus:
|
||||
id: "picot_eventtypes"
|
||||
uri: "https://terms.personsincontext.org/eventtypes/"
|
||||
type: "skos:ConceptScheme"
|
||||
label: "Persons in Context eventType thesaurus"
|
||||
description: "Types of life events documented in sources"
|
||||
example_concepts:
|
||||
- id: "birth"
|
||||
label: "birth"
|
||||
|
||||
- id: "baptism"
|
||||
label: "baptism"
|
||||
|
||||
- id: "marriage"
|
||||
label: "marriage"
|
||||
|
||||
- id: "death"
|
||||
label: "death"
|
||||
|
||||
- id: "burial"
|
||||
label: "burial"
|
||||
|
||||
- id: "emigration"
|
||||
label: "emigration"
|
||||
|
||||
- id: "immigration"
|
||||
label: "immigration"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# CH-Annotator Hypernym Integration for Temporal
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
temporal_hypernym_mapping:
|
||||
description: |
|
||||
Mapping between temporal expressions and CH-Annotator hypernyms.
|
||||
|
||||
mappings:
|
||||
- pico_property: "sdo:birthDate"
|
||||
ch_hypernym: "TMP.DAT"
|
||||
ch_code: "TMP.DAT"
|
||||
note: "Birth date temporal expression"
|
||||
|
||||
- pico_property: "sdo:deathDate"
|
||||
ch_hypernym: "TMP.DAT"
|
||||
ch_code: "TMP.DAT"
|
||||
note: "Death date temporal expression"
|
||||
|
||||
- pico_property: "sdo:dateCreated"
|
||||
ch_hypernym: "TMP.DAT"
|
||||
ch_code: "TMP.DAT"
|
||||
note: "Document creation date"
|
||||
|
||||
- calendar_expression: "Hijri date"
|
||||
ch_hypernym: "TMP.DAT"
|
||||
normalization: "Convert to Gregorian ISO 8601"
|
||||
|
||||
- calendar_expression: "Hebrew date"
|
||||
ch_hypernym: "TMP.DAT"
|
||||
normalization: "Convert to Gregorian ISO 8601"
|
||||
|
||||
- calendar_expression: "Julian date"
|
||||
ch_hypernym: "TMP.DAT"
|
||||
normalization: "Convert to Gregorian ISO 8601"
|
||||
1503
data/entity_annotation/modules/relationships/family.yaml
Normal file
1503
data/entity_annotation/modules/relationships/family.yaml
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,163 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "waqf_aleppo_1225h",
|
||||
"observed_at": "2023-10-27T10:00:00Z",
|
||||
"source_type": "waqf_document",
|
||||
"source_reference": "Aleppo Waqf, 1225 H"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"literalName": "الحاج أحمد بن محمد العمري",
|
||||
"literalName_romanized": "al-Hajj Ahmad ibn Muhammad al-Umari",
|
||||
"givenName": "أحمد",
|
||||
"givenName_romanized": "Ahmad",
|
||||
"patronym": "محمد",
|
||||
"patronym_romanized": "Muhammad",
|
||||
"baseSurname": "العمري",
|
||||
"baseSurname_romanized": "al-Umari",
|
||||
"honorificPrefix": "الحاج",
|
||||
"honorificPrefix_romanized": "al-Hajj"
|
||||
},
|
||||
"roles": [
|
||||
{
|
||||
"role_title": "تاجر",
|
||||
"role_title_romanized": "tajir",
|
||||
"role_in_source": "founder"
|
||||
}
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": true,
|
||||
"address": "مدينة حلب الشهباء"
|
||||
},
|
||||
"family_relationships": {
|
||||
"parent": [
|
||||
{
|
||||
"person_index": 1,
|
||||
"target_name": "محمد بن عبد الله العمري"
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"context": "The founder (waqif) of the endowment, a deceased merchant from Aleppo."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"literalName": "المرحوم محمد بن عبد الله العمري",
|
||||
"literalName_romanized": "al-marhum Muhammad ibn Abd Allah al-Umari",
|
||||
"givenName": "محمد",
|
||||
"givenName_romanized": "Muhammad",
|
||||
"patronym": "عبد الله",
|
||||
"patronym_romanized": "Abd Allah",
|
||||
"baseSurname": "العمري",
|
||||
"baseSurname_romanized": "al-Umari"
|
||||
},
|
||||
"roles": [
|
||||
{
|
||||
"role_title": null,
|
||||
"role_title_romanized": null,
|
||||
"role_in_source": null
|
||||
}
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": true,
|
||||
"address": null
|
||||
},
|
||||
"family_relationships": {
|
||||
"parent": [],
|
||||
"children": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"target_name": "أحمد بن محمد العمري"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "The deceased father of the founder, Ahmad al-Umari."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"literalName": "الحاج إبراهيم بن يوسف التركماني",
|
||||
"literalName_romanized": "al-Hajj Ibrahim ibn Yusuf al-Turkmani",
|
||||
"givenName": "إبراهيم",
|
||||
"givenName_romanized": "Ibrahim",
|
||||
"patronym": "يوسف",
|
||||
"patronym_romanized": "Yusuf",
|
||||
"baseSurname": "التركماني",
|
||||
"baseSurname_romanized": "al-Turkmani",
|
||||
"honorificPrefix": "الحاج",
|
||||
"honorificPrefix_romanized": "al-Hajj"
|
||||
},
|
||||
"roles": [
|
||||
{
|
||||
"role_title": "شاهد",
|
||||
"role_title_romanized": "shahid",
|
||||
"role_in_source": "witness"
|
||||
}
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": null,
|
||||
"address": null
|
||||
},
|
||||
"family_relationships": {
|
||||
"parent": [],
|
||||
"children": []
|
||||
},
|
||||
"context": "One of the witnesses to the endowment deed."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"literalName": "السيد علي بن حسين الحلبي",
|
||||
"literalName_romanized": "al-Sayyid Ali ibn Husayn al-Halabi",
|
||||
"givenName": "علي",
|
||||
"givenName_romanized": "Ali",
|
||||
"patronym": "حسين",
|
||||
"patronym_romanized": "Husayn",
|
||||
"baseSurname": "الحلبي",
|
||||
"baseSurname_romanized": "al-Halabi",
|
||||
"honorificPrefix": "السيد",
|
||||
"honorificPrefix_romanized": "al-Sayyid"
|
||||
},
|
||||
"roles": [
|
||||
{
|
||||
"role_title": "شاهد",
|
||||
"role_title_romanized": "shahid",
|
||||
"role_in_source": "witness"
|
||||
}
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": null,
|
||||
"address": null
|
||||
},
|
||||
"family_relationships": {
|
||||
"parent": [],
|
||||
"children": []
|
||||
},
|
||||
"context": "The second witness to the endowment deed."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "شهر رجب سنة ألف ومائتين وخمس وعشرين هجرية",
|
||||
"expression_romanized": "Shahr Rajab sanat alf wa mi'ayn wa khamsa wa 'ishrin hijriyyah",
|
||||
"normalized": "1811-01",
|
||||
"calendar": "Hijri",
|
||||
"type": "DATE"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "حلب الشهباء",
|
||||
"name_romanized": "Halab al-Shahba'",
|
||||
"type": "city"
|
||||
},
|
||||
{
|
||||
"name": "محلة الجديدة",
|
||||
"name_romanized": "Mahallat al-Jadida",
|
||||
"type": "neighborhood"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "waqf_doc_001",
|
||||
"source_type": "Waqf Document",
|
||||
"source_reference": "Arabic Waqf Deed Snippet"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"script": "أحمد بن محمد العمري",
|
||||
"romanized": "Ahmad ibn Muhammad al-Umari",
|
||||
"full_name": "المرحوم الحاج أحمد بن محمد العمري"
|
||||
},
|
||||
"roles": [
|
||||
"founder"
|
||||
],
|
||||
"biographical": {
|
||||
"status": "deceased",
|
||||
"occupation": "تاجر",
|
||||
"address": "مدينة حلب الشهباء"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": "محمد بن عبد الله العمري"
|
||||
},
|
||||
"context": "The founder (waqif) who endowed his house for his descendants."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"script": "محمد بن عبد الله العمري",
|
||||
"romanized": "Muhammad ibn Abdullah al-Umari",
|
||||
"full_name": "المرحوم محمد بن عبد الله العمري"
|
||||
},
|
||||
"roles": [],
|
||||
"biographical": {
|
||||
"status": "deceased"
|
||||
},
|
||||
"family_relationships": {
|
||||
"son": "أحمد بن محمد العمري"
|
||||
},
|
||||
"context": "Father of the founder, mentioned in his patronymic."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"script": "إبراهيم بن يوسف التركماني",
|
||||
"romanized": "Ibrahim ibn Yusuf al-Turkmani",
|
||||
"full_name": "الحاج إبراهيم بن يوسف التركماني"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {},
|
||||
"family_relationships": {},
|
||||
"context": "A witness to the waqf deed."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"script": "علي بن حسين الحلبي",
|
||||
"romanized": "Ali ibn Husayn al-Halabi",
|
||||
"full_name": "السيد علي بن حسين الحلبي"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {},
|
||||
"family_relationships": {},
|
||||
"context": "A witness to the waqf deed."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "شهر رجب سنة ألف ومائتين وخمس وعشرين هجرية",
|
||||
"expression_romanized": "shahr rajab sanat alf wa mi'atayn wa khamsa wa 'ishrin hijriyya",
|
||||
"normalized": "1225 AH",
|
||||
"calendar": "Hijri"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "حلب",
|
||||
"name_romanized": "Halab",
|
||||
"type": "city"
|
||||
},
|
||||
{
|
||||
"name": "الجديدة",
|
||||
"name_romanized": "al-Jadida",
|
||||
"type": "neighborhood"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "marriage_cert_1885-03-04_haarlem_001",
|
||||
"source_type": "marriage_certificate",
|
||||
"source_reference": "Haarlem, 4 March 1885, marriage of Johannes Petrus van der Berg and Cornelia Wilhelmina de Groot"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"person_name": "Johannes Petrus",
|
||||
"family_name": "van der Berg",
|
||||
"tussenvoegsel": "van der",
|
||||
"geslachtsnaam": "Berg"
|
||||
},
|
||||
"roles": [
|
||||
"groom"
|
||||
],
|
||||
"biographical": {
|
||||
"age": 30,
|
||||
"occupation": "koopman",
|
||||
"birth_place": "Amsterdam",
|
||||
"residence": "Haarlem",
|
||||
"civil_status": "meerderjarige"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"person_name": "Pieter",
|
||||
"family_name": "van der Berg",
|
||||
"tussenvoegsel": "van der",
|
||||
"geslachtsnaam": "Berg",
|
||||
"status": "deceased",
|
||||
"occupation": "koopman"
|
||||
},
|
||||
"mother": {
|
||||
"person_name": "Maria Johanna",
|
||||
"family_name": "Bakker",
|
||||
"geslachtsnaam": "Bakker",
|
||||
"status": "living",
|
||||
"occupation": "zonder beroep",
|
||||
"residence": "Amsterdam"
|
||||
}
|
||||
},
|
||||
"context": "Groom, son of Pieter van der Berg and Maria Johanna Bakker."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"person_name": "Cornelia Wilhelmina",
|
||||
"family_name": "de Groot",
|
||||
"tussenvoegsel": "de",
|
||||
"geslachtsnaam": "Groot"
|
||||
},
|
||||
"roles": [
|
||||
"bride"
|
||||
],
|
||||
"biographical": {
|
||||
"age": 25,
|
||||
"occupation": "zonder beroep",
|
||||
"birth_place": "Haarlem",
|
||||
"residence": "Haarlem",
|
||||
"civil_status": "meerderjarige"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"person_name": "Hendrik",
|
||||
"family_name": "de Groot",
|
||||
"tussenvoegsel": "de",
|
||||
"geslachtsnaam": "Groot",
|
||||
"status": "living",
|
||||
"occupation": "timmerman"
|
||||
},
|
||||
"mother": {
|
||||
"person_name": "Elisabeth",
|
||||
"family_name": "van Dijk",
|
||||
"tussenvoegsel": "van",
|
||||
"geslachtsnaam": "Dijk",
|
||||
"status": "deceased"
|
||||
}
|
||||
},
|
||||
"context": "Bride, daughter of Hendrik de Groot and Elisabeth van Dijk."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"person_name": "Willem Frederik",
|
||||
"family_name": "Smit",
|
||||
"geslachtsnaam": "Smit"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"age": 40,
|
||||
"occupation": "notaris"
|
||||
},
|
||||
"family_relationships": {},
|
||||
"context": "Witness to the marriage."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"person_name": "Jacobus Hendrikus",
|
||||
"family_name": "Jansen",
|
||||
"geslachtsnaam": "Jansen"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"age": 35,
|
||||
"occupation": "klerk"
|
||||
},
|
||||
"family_relationships": {},
|
||||
"context": "Witness to the marriage."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "vierden Maart achttien honderd vijf en tachtig",
|
||||
"normalized": "1885-03-04",
|
||||
"calendar": "Gregorian"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "Haarlem",
|
||||
"type": "municipality"
|
||||
},
|
||||
{
|
||||
"name": "Amsterdam",
|
||||
"type": "city"
|
||||
},
|
||||
{
|
||||
"name": "Haarlem",
|
||||
"type": "city"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "obs_haarlem_1885-03-04",
|
||||
"source_type": "marriage_certificate",
|
||||
"source_reference": "Haarlem, 1885-03-04"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"person_name_standard_text": "Johannes Petrus van der Berg",
|
||||
"person_name_given_name": "Johannes Petrus",
|
||||
"person_name_family_name_prefix": "van der",
|
||||
"person_name_family_name": "Berg"
|
||||
},
|
||||
"roles": [
|
||||
"groom"
|
||||
],
|
||||
"biographical": {
|
||||
"age": 30,
|
||||
"occupation": "koopman",
|
||||
"birth_place": "Amsterdam",
|
||||
"residence": "Haarlem",
|
||||
"civil_status": "meerderjarige"
|
||||
},
|
||||
"family_relationships": {
|
||||
"relationship_to_parents": "zoon van",
|
||||
"father": "Pieter van der Berg",
|
||||
"mother": "Maria Johanna Bakker"
|
||||
},
|
||||
"context": "Groom, 30-year-old merchant, born in Amsterdam, residing in Haarlem, son of the late Pieter van der Berg and Maria Johanna Bakker."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"person_name_standard_text": "Pieter van der Berg",
|
||||
"person_name_given_name": "Pieter",
|
||||
"person_name_family_name_prefix": "van der",
|
||||
"person_name_family_name": "Berg"
|
||||
},
|
||||
"roles": [
|
||||
"father_of_groom"
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": true,
|
||||
"occupation": "koopman"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father_of": "Johannes Petrus van der Berg"
|
||||
},
|
||||
"context": "Father of the groom, deceased, was a merchant."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"person_name_standard_text": "Maria Johanna Bakker",
|
||||
"person_name_given_name": "Maria Johanna",
|
||||
"person_name_family_name": "Bakker"
|
||||
},
|
||||
"roles": [
|
||||
"mother_of_groom"
|
||||
],
|
||||
"biographical": {
|
||||
"occupation": "zonder beroep",
|
||||
"residence": "Amsterdam"
|
||||
},
|
||||
"family_relationships": {
|
||||
"mother_of": "Johannes Petrus van der Berg"
|
||||
},
|
||||
"context": "Mother of the groom, without occupation, residing in Amsterdam."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"person_name_standard_text": "Cornelia Wilhelmina de Groot",
|
||||
"person_name_given_name": "Cornelia Wilhelmina",
|
||||
"person_name_family_name_prefix": "de",
|
||||
"person_name_family_name": "Groot"
|
||||
},
|
||||
"roles": [
|
||||
"bride"
|
||||
],
|
||||
"biographical": {
|
||||
"age": 25,
|
||||
"occupation": "zonder beroep",
|
||||
"birth_place": "Haarlem",
|
||||
"residence": "Haarlem",
|
||||
"civil_status": "meerderjarige"
|
||||
},
|
||||
"family_relationships": {
|
||||
"relationship_to_parents": "dochter van",
|
||||
"father": "Hendrik de Groot",
|
||||
"mother": "Elisabeth van Dijk"
|
||||
},
|
||||
"context": "Bride, 25-year-old without occupation, born in Haarlem, residing in Haarlem, daughter of Hendrik de Groot and the late Elisabeth van Dijk."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"person_name_standard_text": "Hendrik de Groot",
|
||||
"person_name_given_name": "Hendrik",
|
||||
"person_name_family_name_prefix": "de",
|
||||
"person_name_family_name": "Groot"
|
||||
},
|
||||
"roles": [
|
||||
"father_of_bride"
|
||||
],
|
||||
"biographical": {
|
||||
"occupation": "timmerman"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father_of": "Cornelia Wilhelmina de Groot"
|
||||
},
|
||||
"context": "Father of the bride, a carpenter."
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"pnv_name": {
|
||||
"person_name_standard_text": "Elisabeth van Dijk",
|
||||
"person_name_given_name": "Elisabeth",
|
||||
"person_name_family_name_prefix": "van",
|
||||
"person_name_family_name": "Dijk"
|
||||
},
|
||||
"roles": [
|
||||
"mother_of_bride"
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": true
|
||||
},
|
||||
"family_relationships": {
|
||||
"mother_of": "Cornelia Wilhelmina de Groot"
|
||||
},
|
||||
"context": "Mother of the bride, deceased."
|
||||
},
|
||||
{
|
||||
"person_index": 6,
|
||||
"pnv_name": {
|
||||
"person_name_standard_text": "Willem Frederik Smit",
|
||||
"person_name_given_name": "Willem Frederik",
|
||||
"person_name_family_name": "Smit"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"age": 40,
|
||||
"occupation": "notaris"
|
||||
},
|
||||
"context": "Witness, 40-year-old notary."
|
||||
},
|
||||
{
|
||||
"person_index": 7,
|
||||
"pnv_name": {
|
||||
"person_name_standard_text": "Jacobus Hendrikus Jansen",
|
||||
"person_name_given_name": "Jacobus Hendrikus",
|
||||
"person_name_family_name": "Jansen"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"age": 35,
|
||||
"occupation": "klerk"
|
||||
},
|
||||
"context": "Witness, 35-year-old clerk."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "vierden Maart achttien honderd vijf en tachtig",
|
||||
"normalized": "1885-03-04",
|
||||
"calendar": "Gregorian"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "Amsterdam",
|
||||
"type": "city"
|
||||
},
|
||||
{
|
||||
"name": "Haarlem",
|
||||
"type": "city"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "bap_reg_thess_1875_03_15_01",
|
||||
"source_type": "baptismal_register",
|
||||
"source_reference": "Greek Orthodox Baptismal Register, Thessaloniki, 15 March 1875"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"literalName": "Δημήτριος",
|
||||
"literalName_romanized": "Dēmētrios"
|
||||
},
|
||||
"roles": [
|
||||
"baptized"
|
||||
],
|
||||
"biographical": {},
|
||||
"family_relationships": {
|
||||
"father": "Νικόλαος Παπαδόπουλος",
|
||||
"mother": "Ἑλένη"
|
||||
},
|
||||
"context": "The baptized, son of Nikolaos Papadopoulos and Elenē."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"literalName": "Νικόλαος Παπαδόπουλος",
|
||||
"literalName_romanized": "Nikolaos Papadopoulos"
|
||||
},
|
||||
"roles": [
|
||||
"parent"
|
||||
],
|
||||
"biographical": {
|
||||
"occupation": "ἔμπορος"
|
||||
},
|
||||
"family_relationships": {
|
||||
"son": "Δημήτριος",
|
||||
"wife": "Ἑλένη"
|
||||
},
|
||||
"context": "Father of the baptized, a merchant, husband of Elenē."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"literalName": "Ἑλένη",
|
||||
"literalName_romanized": "Elenē"
|
||||
},
|
||||
"roles": [
|
||||
"parent"
|
||||
],
|
||||
"biographical": {},
|
||||
"family_relationships": {
|
||||
"son": "Δημήτριος",
|
||||
"husband": "Νικόλαος Παπαδόπουλος",
|
||||
"father": "Γεώργιος Οἰκόνομος"
|
||||
},
|
||||
"context": "Mother of the baptized, wife of Nikolaos Papadopoulos, daughter of the late Geōrgios Oikonomos."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"literalName": "Γεώργιος Οἰκόνομος",
|
||||
"literalName_romanized": "Geōrgios Oikonomos"
|
||||
},
|
||||
"roles": [
|
||||
"grandparent"
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": true
|
||||
},
|
||||
"family_relationships": {
|
||||
"daughter": "Ἑλένη"
|
||||
},
|
||||
"context": "The late father of the mother (Elenē)."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"literalName": "Κωνσταντῖνος Καρατζᾶς",
|
||||
"literalName_romanized": "Kōnstantinos Karatzas"
|
||||
},
|
||||
"roles": [
|
||||
"godparent"
|
||||
],
|
||||
"biographical": {
|
||||
"occupation": "ἰατρός"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": "Ἰωάννης"
|
||||
},
|
||||
"context": "Godparent, a physician, son of Iōannēs."
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"pnv_name": {
|
||||
"literalName": "Ἰωάννης",
|
||||
"literalName_romanized": "Iōannēs"
|
||||
},
|
||||
"roles": [
|
||||
"godparent's_parent"
|
||||
],
|
||||
"biographical": {},
|
||||
"family_relationships": {
|
||||
"son": "Κωνσταντῖνος Καρατζᾶς"
|
||||
},
|
||||
"context": "Father of the godparent (Kōnstantinos Karatzas)."
|
||||
},
|
||||
{
|
||||
"person_index": 6,
|
||||
"pnv_name": {
|
||||
"literalName": "Ἀθανάσιος Χρυσοστόμου",
|
||||
"literalName_romanized": "Athanasios Chrysostomou"
|
||||
},
|
||||
"roles": [
|
||||
"priest"
|
||||
],
|
||||
"biographical": {
|
||||
"ecclesiastical_title": "Πρωτοπρεσβύτερος"
|
||||
},
|
||||
"family_relationships": {},
|
||||
"context": "The officiating priest, an Archpriest."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "τῇ δεκάτῃ πέμπτῃ Μαρτίου τοῦ ἔτους 1875",
|
||||
"expression_romanized": "tē dekatē pemptē Martiou tou etous 1875",
|
||||
"normalized": "1875-03-15",
|
||||
"calendar": "Julian"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "Θεσσαλονίκῃ",
|
||||
"name_romanized": "Thessalonikē",
|
||||
"type": "city"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,124 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "bap_reg_thess_1875_03_15_01",
|
||||
"source_type": "baptismal_register",
|
||||
"source_reference": "Thessaloniki Baptismal Register, 15 March 1875"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"literalName": "Δημήτριος",
|
||||
"literalName_romanized": "Dēmētrios"
|
||||
},
|
||||
"roles": [
|
||||
"baptized"
|
||||
],
|
||||
"biographical": {},
|
||||
"family_relationships": {
|
||||
"father": "Νικολάου Παπαδοπούλου",
|
||||
"mother": "Ἑλένης"
|
||||
},
|
||||
"context": "Son of Nikolaos Papadopoulos and Eleni, baptized in Thessaloniki."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"literalName": "Νικολάου Παπαδοπούλου",
|
||||
"literalName_romanized": "Nikolaou Papadopoulou"
|
||||
},
|
||||
"roles": [
|
||||
"parent"
|
||||
],
|
||||
"biographical": {
|
||||
"occupation": "ἔμπορος"
|
||||
},
|
||||
"family_relationships": {
|
||||
"son": "Δημήτριος",
|
||||
"wife": "Ἑλένης"
|
||||
},
|
||||
"context": "Father of the baptized Dimitrios, merchant, husband of Eleni."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"literalName": "Ἑλένης",
|
||||
"literalName_romanized": "Elenēs"
|
||||
},
|
||||
"roles": [
|
||||
"parent"
|
||||
],
|
||||
"biographical": {},
|
||||
"family_relationships": {
|
||||
"son": "Δημήτριος",
|
||||
"husband": "Νικολάου Παπαδοπούλου",
|
||||
"father": "μακαρίτου Γεωργίου Οἰκονόμου"
|
||||
},
|
||||
"context": "Mother of the baptized Dimitrios, wife of Nikolaos Papadopoulos, daughter of the late Georgios Oikonomou."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"literalName": "Γεωργίου Οἰκονόμου",
|
||||
"literalName_romanized": "Geōrgiou Oikonomou"
|
||||
},
|
||||
"roles": [
|
||||
"grandparent"
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": true
|
||||
},
|
||||
"family_relationships": {
|
||||
"daughter": "Ἑλένης"
|
||||
},
|
||||
"context": "Late father of Eleni, maternal grandfather of the baptized Dimitrios."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"literalName": "Κωνσταντῖνος Καρατζᾶς",
|
||||
"literalName_romanized": "Kōnstantinos Karatzas"
|
||||
},
|
||||
"roles": [
|
||||
"godparent"
|
||||
],
|
||||
"biographical": {
|
||||
"occupation": "ἰατρός"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": "Ἰωάννου"
|
||||
},
|
||||
"context": "Godparent of Dimitrios, son of Ioannis, physician."
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"pnv_name": {
|
||||
"literalName": "Ἀθανάσιος Χρυσοστόμου",
|
||||
"literalName_romanized": "Athanasios Chrysostomou"
|
||||
},
|
||||
"roles": [
|
||||
"priest"
|
||||
],
|
||||
"biographical": {
|
||||
"ecclesiastical_title": "Πρωτοπρεσβύτερος"
|
||||
},
|
||||
"family_relationships": {},
|
||||
"context": "Archpriest who performed the baptism."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "τῇ δεκάτῃ πέμπτῃ Μαρτίου τοῦ ἔτους 1875",
|
||||
"expression_romanized": "tē dekatē pemptē Martiou tou etous 1875",
|
||||
"normalized": "1875-03-15",
|
||||
"calendar": "Julian"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "Θεσσαλονίκῃ",
|
||||
"name_romanized": "Thessalonikē",
|
||||
"type": "city"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,252 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "ketubah_vilna_5605_obs_001",
|
||||
"source_type": "ketubah",
|
||||
"source_reference": "Vilna Ketubah, 12 Iyar 5605"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "יצחק",
|
||||
"romanized": "Yitzchak"
|
||||
},
|
||||
"roles": [
|
||||
"groom"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": {
|
||||
"script": "Hebrew",
|
||||
"text": "בן הר״ר אברהם",
|
||||
"romanized": "ben HaRav Avraham"
|
||||
},
|
||||
"tribal_affiliation": {
|
||||
"script": "Hebrew",
|
||||
"text": "הכהן",
|
||||
"romanized": "haKohen"
|
||||
},
|
||||
"honorifics": [
|
||||
{
|
||||
"script": "Hebrew",
|
||||
"text": "הבחור",
|
||||
"romanized": "haBachur"
|
||||
}
|
||||
]
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"person_index": 1,
|
||||
"relationship_type": "paternal"
|
||||
}
|
||||
},
|
||||
"context": "The groom, son of Avraham haKohen."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "אברהם",
|
||||
"romanized": "Avraham"
|
||||
},
|
||||
"roles": [],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
{
|
||||
"script": "Hebrew",
|
||||
"text": "הר״ר",
|
||||
"romanized": "HaRav"
|
||||
}
|
||||
],
|
||||
"tribal_affiliation": {
|
||||
"script": "Hebrew",
|
||||
"text": "הכהן",
|
||||
"romanized": "haKohen"
|
||||
},
|
||||
"deceased_marker": {
|
||||
"script": "Hebrew",
|
||||
"text": "ז״ל",
|
||||
"romanized": "z'l"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"child": {
|
||||
"person_index": 0,
|
||||
"relationship_type": "paternal"
|
||||
}
|
||||
},
|
||||
"context": "Father of the groom, of blessed memory."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "מרים",
|
||||
"romanized": "Miriam"
|
||||
},
|
||||
"roles": [
|
||||
"bride"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": {
|
||||
"script": "Hebrew",
|
||||
"text": "בת הר״ר משה",
|
||||
"romanized": "bat HaRav Moshe"
|
||||
},
|
||||
"tribal_affiliation": {
|
||||
"script": "Hebrew",
|
||||
"text": "הלוי",
|
||||
"romanized": "haLevi"
|
||||
},
|
||||
"honorifics": [
|
||||
{
|
||||
"script": "Hebrew",
|
||||
"text": "מרת",
|
||||
"romanized": "Marat"
|
||||
}
|
||||
]
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"person_index": 3,
|
||||
"relationship_type": "paternal"
|
||||
}
|
||||
},
|
||||
"context": "The bride, daughter of Moshe haLevi."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "משה",
|
||||
"romanized": "Moshe"
|
||||
},
|
||||
"roles": [],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
{
|
||||
"script": "Hebrew",
|
||||
"text": "הר״ר",
|
||||
"romanized": "HaRav"
|
||||
}
|
||||
],
|
||||
"tribal_affiliation": {
|
||||
"script": "Hebrew",
|
||||
"text": "הלוי",
|
||||
"romanized": "haLevi"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"child": {
|
||||
"person_index": 2,
|
||||
"relationship_type": "paternal"
|
||||
}
|
||||
},
|
||||
"context": "Father of the bride."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "שמעון",
|
||||
"romanized": "Shimon"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": {
|
||||
"script": "Hebrew",
|
||||
"text": "בן יעקב",
|
||||
"romanized": "ben Yaakov"
|
||||
},
|
||||
"tribal_affiliation": {
|
||||
"script": "Hebrew",
|
||||
"text": "הכהן",
|
||||
"romanized": "haKohen"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"person_index": 5,
|
||||
"relationship_type": "paternal"
|
||||
}
|
||||
},
|
||||
"context": "First witness to the marriage."
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "יעקב",
|
||||
"romanized": "Yaakov"
|
||||
},
|
||||
"roles": [],
|
||||
"biographical": {},
|
||||
"family_relationships": {
|
||||
"child": {
|
||||
"person_index": 4,
|
||||
"relationship_type": "paternal"
|
||||
}
|
||||
},
|
||||
"context": "Father of the first witness, Shimon."
|
||||
},
|
||||
{
|
||||
"person_index": 6,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "דוד",
|
||||
"romanized": "David"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": {
|
||||
"script": "Hebrew",
|
||||
"text": "בן אליהו",
|
||||
"romanized": "ben Eliyahu"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"person_index": 7,
|
||||
"relationship_type": "paternal"
|
||||
}
|
||||
},
|
||||
"context": "Second witness to the marriage."
|
||||
},
|
||||
{
|
||||
"person_index": 7,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "אליהו",
|
||||
"romanized": "Eliyahu"
|
||||
},
|
||||
"roles": [],
|
||||
"biographical": {},
|
||||
"family_relationships": {
|
||||
"child": {
|
||||
"person_index": 6,
|
||||
"relationship_type": "paternal"
|
||||
}
|
||||
},
|
||||
"context": "Father of the second witness, David."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "ביום שלישי בשבת, שנים עשר יום לחודש אייר שנת חמשת אלפים שש מאות וארבעים וחמש לבריאת עולם",
|
||||
"expression_romanized": "BeYom Shlishi beShabbat, Shneim Asar Yom leChodesh Iyar, Shnat Chameshet Alafim Shesh Meot veArba'im veChamesh leBriyat Olam",
|
||||
"normalized": "5605-04-12",
|
||||
"calendar": "Hebrew"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "פה ווילנא",
|
||||
"name_romanized": "Po Vilna",
|
||||
"type": "city"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,202 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "ketubah_vilna_5605_obs_1",
|
||||
"source_type": "ketubah",
|
||||
"source_reference": "Vilna Ketubah, 12 Iyar 5605 (1845 CE)"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "יצחק",
|
||||
"romanized": "Yitzchak"
|
||||
},
|
||||
"roles": [
|
||||
"groom",
|
||||
"חתן"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": {
|
||||
"script": "Hebrew",
|
||||
"text": "בן הר״ר אברהם",
|
||||
"romanized": "ben HaRav Avraham"
|
||||
},
|
||||
"tribal_affiliation": {
|
||||
"script": "Hebrew",
|
||||
"text": "הכהן",
|
||||
"romanized": "haKohen"
|
||||
},
|
||||
"honorifics": [
|
||||
"הבחור"
|
||||
]
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"person_index": 1,
|
||||
"relationship_type": "paternal",
|
||||
"deceased": true
|
||||
}
|
||||
},
|
||||
"context": "The groom, son of the late Rabbi Avraham haKohen."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "אברהם",
|
||||
"romanized": "Avraham"
|
||||
},
|
||||
"roles": [
|
||||
"father_of_groom"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"הר״ר"
|
||||
],
|
||||
"tribal_affiliation": {
|
||||
"script": "Hebrew",
|
||||
"text": "הכהן",
|
||||
"romanized": "haKohen"
|
||||
},
|
||||
"deceased_marker": {
|
||||
"script": "Hebrew",
|
||||
"text": "ז״ל",
|
||||
"romanized": "z'l"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"son": {
|
||||
"person_index": 0,
|
||||
"relationship_type": "paternal"
|
||||
}
|
||||
},
|
||||
"context": "The deceased father of the groom, Rabbi Avraham haKohen."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "מרים",
|
||||
"romanized": "Miriam"
|
||||
},
|
||||
"roles": [
|
||||
"bride",
|
||||
"כלה"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": {
|
||||
"script": "Hebrew",
|
||||
"text": "בת הר״ר משה",
|
||||
"romanized": "bat HaRav Moshe"
|
||||
},
|
||||
"tribal_affiliation": {
|
||||
"script": "Hebrew",
|
||||
"text": "הלוי",
|
||||
"romanized": "haLevi"
|
||||
},
|
||||
"honorifics": [
|
||||
"מרת"
|
||||
]
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"person_index": 3,
|
||||
"relationship_type": "paternal",
|
||||
"deceased": false
|
||||
}
|
||||
},
|
||||
"context": "The bride, daughter of Rabbi Moshe haLevi."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "משה",
|
||||
"romanized": "Moshe"
|
||||
},
|
||||
"roles": [
|
||||
"father_of_bride"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"הר״ר"
|
||||
],
|
||||
"tribal_affiliation": {
|
||||
"script": "Hebrew",
|
||||
"text": "הלוי",
|
||||
"romanized": "haLevi"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"daughter": {
|
||||
"person_index": 2,
|
||||
"relationship_type": "paternal"
|
||||
}
|
||||
},
|
||||
"context": "The father of the bride, Rabbi Moshe haLevi."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "שמעון",
|
||||
"romanized": "Shimon"
|
||||
},
|
||||
"roles": [
|
||||
"witness",
|
||||
"עד"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": {
|
||||
"script": "Hebrew",
|
||||
"text": "בן יעקב",
|
||||
"romanized": "ben Yaakov"
|
||||
},
|
||||
"tribal_affiliation": {
|
||||
"script": "Hebrew",
|
||||
"text": "הכהן",
|
||||
"romanized": "haKohen"
|
||||
}
|
||||
},
|
||||
"family_relationships": {},
|
||||
"context": "First witness to the marriage."
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"pnv_name": {
|
||||
"script": "Hebrew",
|
||||
"text": "דוד",
|
||||
"romanized": "David"
|
||||
},
|
||||
"roles": [
|
||||
"witness",
|
||||
"עד"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": {
|
||||
"script": "Hebrew",
|
||||
"text": "בן אליהו",
|
||||
"romanized": "ben Eliyahu"
|
||||
}
|
||||
},
|
||||
"family_relationships": {},
|
||||
"context": "Second witness to the marriage."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "ביום שלישי בשבת, שנים עשר יום לחודש אייר שנת חמשת אלפים שש מאות וארבעים וחמש לבריאת עולם",
|
||||
"expression_romanized": "BeYom Shlishi BeShabbat, Shneim Asar Yom LeChodesh Iyar, Shnat Chamishat Alafim Shesh Meot VeArba'im VeChamesh LeBeriat Olam",
|
||||
"normalized": "1845-04-18",
|
||||
"calendar": "Hebrew"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "ווילנא",
|
||||
"name_romanized": "Vilna",
|
||||
"type": "city"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "obs_001",
|
||||
"source_type": "notarial_act",
|
||||
"source_reference": "Adì 15 Marzo 1654, in Venetia"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"given_name": "Giovanni Battista",
|
||||
"surname": "Morosini"
|
||||
},
|
||||
"roles": [
|
||||
"party"
|
||||
],
|
||||
"biographical": {
|
||||
"title": "Nobil Homo Messer",
|
||||
"residence": "contrada di San Marco",
|
||||
"status": "living"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"type": "father",
|
||||
"name": {
|
||||
"pnv_name": {
|
||||
"given_name": "Andrea",
|
||||
"surname": null
|
||||
},
|
||||
"title": "Magnifico Messer"
|
||||
},
|
||||
"status": "deceased"
|
||||
}
|
||||
},
|
||||
"context": "Il Nobil Homo Messer Giovanni Battista Morosini fu quondam Magnifico Messer Andrea, della contrada di San Marco"
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"given_name": "Caterina",
|
||||
"surname": "Contarini"
|
||||
},
|
||||
"roles": [
|
||||
"party"
|
||||
],
|
||||
"biographical": {
|
||||
"title": "Nobil Donna Madonna",
|
||||
"residence": "contrada di San Marco",
|
||||
"status": "living"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"type": "father",
|
||||
"name": {
|
||||
"pnv_name": {
|
||||
"given_name": "Francesco",
|
||||
"surname": null
|
||||
},
|
||||
"title": "Messer"
|
||||
},
|
||||
"status": "deceased"
|
||||
},
|
||||
"spouse": {
|
||||
"type": "spouse",
|
||||
"name": {
|
||||
"pnv_name": {
|
||||
"given_name": "Giovanni Battista",
|
||||
"surname": "Morosini"
|
||||
},
|
||||
"title": "Nobil Homo Messer"
|
||||
},
|
||||
"status": "living"
|
||||
}
|
||||
},
|
||||
"context": "sua moglie la Nobil Donna Madonna Caterina Contarini fu quondam Messer Francesco"
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"given_name": "Pietro",
|
||||
"surname": "Fabbro"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"title": "Messer",
|
||||
"residence": "contrada di San Polo",
|
||||
"status": "living"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"type": "father",
|
||||
"name": {
|
||||
"pnv_name": {
|
||||
"given_name": "Paolo",
|
||||
"surname": null
|
||||
},
|
||||
"title": null
|
||||
},
|
||||
"status": "deceased"
|
||||
}
|
||||
},
|
||||
"context": "Messer Pietro fu Paolo Fabbro, habitante nella contrada di San Polo"
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"given_name": "Marco Antonio",
|
||||
"surname": "Ferrari"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"title": "Messer",
|
||||
"occupation": "bottegaio",
|
||||
"residence": "Rialto",
|
||||
"status": "living"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"type": "father",
|
||||
"name": {
|
||||
"pnv_name": {
|
||||
"given_name": "Giovanni",
|
||||
"surname": null
|
||||
},
|
||||
"title": null
|
||||
},
|
||||
"status": "deceased"
|
||||
}
|
||||
},
|
||||
"context": "Messer Marco Antonio Ferrari fu Giovanni, bottegaio in Rialto"
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"given_name": "Antonio",
|
||||
"surname": "Zen"
|
||||
},
|
||||
"roles": [
|
||||
"notary"
|
||||
],
|
||||
"biographical": {
|
||||
"title": "Notaro",
|
||||
"occupation": "Notaro publico di Venetia",
|
||||
"residence": "Venetia",
|
||||
"status": "living"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"type": "father",
|
||||
"name": {
|
||||
"pnv_name": {
|
||||
"given_name": "Giacomo",
|
||||
"surname": null
|
||||
},
|
||||
"title": "Messer"
|
||||
},
|
||||
"status": "deceased"
|
||||
}
|
||||
},
|
||||
"context": "Rogato io Notaro Antonio Zen fu quondam Messer Giacomo, Notaro publico di Venetia"
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "15 Marzo 1654",
|
||||
"normalized": "1654-03-15",
|
||||
"calendar": "Gregorian"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "Venetia",
|
||||
"type": "city"
|
||||
},
|
||||
{
|
||||
"name": "San Marco",
|
||||
"type": "contrada"
|
||||
},
|
||||
{
|
||||
"name": "San Polo",
|
||||
"type": "contrada"
|
||||
},
|
||||
{
|
||||
"name": "Rialto",
|
||||
"type": "district"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "notarial_act_1654_03_15_venezia_01",
|
||||
"source_type": "notarial_act",
|
||||
"source_reference": "Adì 15 Marzo 1654, in Venetia. Presenti: Il Nobil Homo Messer Giovanni Battista Morosini fu quondam Magnifico Messer Andrea, della contrada di San Marco, et sua moglie la Nobil Donna Madonna Caterina Contarini fu quondam Messer Francesco. Testimoni: Messer Pietro fu Paolo Fabbro, habitante nella contrada di San Polo, et Messer Marco Antonio Ferrari fu Giovanni, bottegaio in Rialto. Rogato io Notaro Antonio Zen fu quondam Messer Giacomo, Notaro publico di Venetia."
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"given_name": "Giovanni Battista",
|
||||
"surname": "Morosini"
|
||||
},
|
||||
"roles": [
|
||||
"party"
|
||||
],
|
||||
"biographical": {
|
||||
"nobility_titles": [
|
||||
"Nobil Homo",
|
||||
"Messer"
|
||||
],
|
||||
"residence": "contrada di San Marco"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"name": "Andrea Morosini",
|
||||
"status": "deceased",
|
||||
"title": "Magnifico Messer"
|
||||
},
|
||||
"spouse": "Caterina Contarini"
|
||||
},
|
||||
"context": "Il Nobil Homo Messer Giovanni Battista Morosini fu quondam Magnifico Messer Andrea, della contrada di San Marco"
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"given_name": "Caterina",
|
||||
"surname": "Contarini"
|
||||
},
|
||||
"roles": [
|
||||
"party"
|
||||
],
|
||||
"biographical": {
|
||||
"nobility_titles": [
|
||||
"Nobil Donna",
|
||||
"Madonna"
|
||||
]
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"name": "Francesco Contarini",
|
||||
"status": "deceased",
|
||||
"title": "Messer"
|
||||
},
|
||||
"spouse": "Giovanni Battista Morosini"
|
||||
},
|
||||
"context": "sua moglie la Nobil Donna Madonna Caterina Contarini fu quondam Messer Francesco"
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"given_name": "Pietro",
|
||||
"surname": "Fabbro"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"nobility_titles": [
|
||||
"Messer"
|
||||
],
|
||||
"residence": "contrada di San Polo"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"name": "Paolo Fabbro",
|
||||
"status": "deceased"
|
||||
}
|
||||
},
|
||||
"context": "Messer Pietro fu Paolo Fabbro, habitante nella contrada di San Polo"
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"given_name": "Marco Antonio",
|
||||
"surname": "Ferrari"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"nobility_titles": [
|
||||
"Messer"
|
||||
],
|
||||
"occupation": "bottegaio",
|
||||
"work_location": "Rialto"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"name": "Giovanni Ferrari",
|
||||
"status": "deceased"
|
||||
}
|
||||
},
|
||||
"context": "Messer Marco Antonio Ferrari fu Giovanni, bottegaio in Rialto"
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"given_name": "Antonio",
|
||||
"surname": "Zen"
|
||||
},
|
||||
"roles": [
|
||||
"notary"
|
||||
],
|
||||
"biographical": {
|
||||
"nobility_titles": [
|
||||
"Messer"
|
||||
],
|
||||
"occupation": "Notaro publico di Venetia"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"name": "Giacomo Zen",
|
||||
"status": "deceased",
|
||||
"title": "Messer"
|
||||
}
|
||||
},
|
||||
"context": "io Notaro Antonio Zen fu quondam Messer Giacomo, Notaro publico di Venetia"
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "15 Marzo 1654",
|
||||
"normalized": "1654-03-15",
|
||||
"calendar": "Gregorian"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "Venetia",
|
||||
"type": "city"
|
||||
},
|
||||
{
|
||||
"name": "contrada di San Marco",
|
||||
"type": "district"
|
||||
},
|
||||
{
|
||||
"name": "contrada di San Polo",
|
||||
"type": "district"
|
||||
},
|
||||
{
|
||||
"name": "Rialto",
|
||||
"type": "area"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "sijill_001",
|
||||
"source_type": "sijill",
|
||||
"source_reference": "Ottoman Court Record, Dated 1258 AH"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"literalName": "محمد آغا بن عبد الله مرحوم",
|
||||
"literalName_romanized": "Muhammad Ağa bin Abdullah al-merhum"
|
||||
},
|
||||
"roles": [
|
||||
"seller"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"آغا/Ağa"
|
||||
],
|
||||
"is_deceased": true,
|
||||
"patronymic": "بن عبد الله/bin Abdullah"
|
||||
},
|
||||
"family_relationships": {},
|
||||
"context": "Seller of the property, identified as a resident of Demirci-köy. The record notes he is deceased (merhum), implying the sale is conducted by his estate or heirs."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"literalName": "محمد بن احمد افندی",
|
||||
"literalName_romanized": "Muhammad bin Ahmad Efendi"
|
||||
},
|
||||
"roles": [
|
||||
"buyer"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"افندی/Efendi"
|
||||
],
|
||||
"patronymic": "بن احمد/bin Ahmad"
|
||||
},
|
||||
"family_relationships": {
|
||||
"spouse": {
|
||||
"person_index": 2,
|
||||
"name": "فاطمة خاتون/Fatima Hatun"
|
||||
}
|
||||
},
|
||||
"context": "Buyer of the property, a resident of Demirci-köy, present with his wife for the transaction."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"literalName": "فاطمه خاتون بنت علیاوغلو",
|
||||
"literalName_romanized": "Fatima Hatun bint Ali-oğlu"
|
||||
},
|
||||
"roles": [
|
||||
"buyer"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"خاتون/Hatun"
|
||||
],
|
||||
"patronymic": "بنت علیاوغلو/bint Ali-oğlu"
|
||||
},
|
||||
"family_relationships": {
|
||||
"spouse": {
|
||||
"person_index": 1,
|
||||
"name": "محمد بن احمد افندی/Muhammad bin Ahmad Efendi"
|
||||
}
|
||||
},
|
||||
"context": "Wife (zevcesi) of the buyer, Muhammad bin Ahmad Efendi, and co-purchaser. She is identified as the daughter of Ali-oğlu."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"literalName": "حسن افندی بن عمر",
|
||||
"literalName_romanized": "Hasan Efendi bin Umar"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"افندی/Efendi"
|
||||
],
|
||||
"patronymic": "بن عمر/bin Umar"
|
||||
},
|
||||
"family_relationships": {},
|
||||
"context": "One of the two witnesses (şühûd-ı hâl) to the sale."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"literalName": "ابراهيم چلبی بن مصطفی",
|
||||
"literalName_romanized": "Ibrahim Çelebi bin Mustafa"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"چلبی/Çelebi"
|
||||
],
|
||||
"patronymic": "بن مصطفی/bin Mustafa"
|
||||
},
|
||||
"family_relationships": {},
|
||||
"context": "One of the two witnesses (şühûd-ı hâl) to the sale."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "فی اوائل شهر رجب سنة ١٢٥٨",
|
||||
"expression_romanized": "Fi eva'il-i şehr-i Receb sene 1258",
|
||||
"normalized": "Beginning of Rajab, 1258 AH",
|
||||
"calendar": "Hijri"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "دميرجیكوي",
|
||||
"name_romanized": "Demirci-köy",
|
||||
"type": "قصبه/kasaba"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "sijill_1258_rajab_001",
|
||||
"source_type": "sijill",
|
||||
"source_reference": "Ottoman Court Record, Dated Beginning of Rajab 1258 AH"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"literalName": "محمد آغا",
|
||||
"literalName_romanized": "Muhammad Ağa"
|
||||
},
|
||||
"roles": [
|
||||
"seller"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": "بن عبد الله",
|
||||
"patronymic_romanized": "bin Abdullah",
|
||||
"honorific": "آغا",
|
||||
"honorific_romanized": "Ağa",
|
||||
"deceased_father": true
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"name": "عبد الله",
|
||||
"name_romanized": "Abdullah",
|
||||
"deceased": true
|
||||
}
|
||||
},
|
||||
"context": "Seller from the district of Demirciköy, son of the deceased Abdullah."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"literalName": "محمد بن احمد افندی",
|
||||
"literalName_romanized": "Muhammad bin Ahmad Efendi"
|
||||
},
|
||||
"roles": [
|
||||
"buyer"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": "بن احمد",
|
||||
"patronymic_romanized": "bin Ahmad",
|
||||
"honorific": "افندی",
|
||||
"honorific_romanized": "Efendi"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"name": "احمد",
|
||||
"name_romanized": "Ahmad"
|
||||
},
|
||||
"spouse": {
|
||||
"person_index": 2,
|
||||
"relation": "wife"
|
||||
}
|
||||
},
|
||||
"context": "Buyer, son of Ahmad, husband of Fatima Hatun."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"literalName": "فاطمه خاتوم",
|
||||
"literalName_romanized": "Fatima Hatun"
|
||||
},
|
||||
"roles": [
|
||||
"buyer"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": "بنت علیاوغلو",
|
||||
"patronymic_romanized": "bint Ali-oğlu",
|
||||
"honorific": "خاتوم",
|
||||
"honorific_romanized": "Hatun"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"name": "علیاوغلو",
|
||||
"name_romanized": "Ali-oğlu"
|
||||
},
|
||||
"spouse": {
|
||||
"person_index": 1,
|
||||
"relation": "husband"
|
||||
}
|
||||
},
|
||||
"context": "Buyer, daughter of Ali-oğlu, wife of Muhammad bin Ahmad Efendi."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"literalName": "حسن افندی بن عمر",
|
||||
"literalName_romanized": "Hasan Efendi bin Umar"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": "بن عمر",
|
||||
"patronymic_romanized": "bin Umar",
|
||||
"honorific": "افندی",
|
||||
"honorific_romanized": "Efendi"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"name": "عمر",
|
||||
"name_romanized": "Umar"
|
||||
}
|
||||
},
|
||||
"context": "Witness to the transaction, son of Umar."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"literalName": "ابراهيم چلبی بن مصطفی",
|
||||
"literalName_romanized": "Ibrahim Çelebi bin Mustafa"
|
||||
},
|
||||
"roles": [
|
||||
"witness"
|
||||
],
|
||||
"biographical": {
|
||||
"patronymic": "بن مصطفی",
|
||||
"patronymic_romanized": "bin Mustafa",
|
||||
"honorific": "چلبی",
|
||||
"honorific_romanized": "Çelebi"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": {
|
||||
"name": "مصطفی",
|
||||
"name_romanized": "Mustafa"
|
||||
}
|
||||
},
|
||||
"context": "Witness to the transaction, son of Mustafa."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "فی اوائل شهر رجب سنة ١٢٥٨",
|
||||
"expression_romanized": "Fi awāli shahr Rajab sanat 1258",
|
||||
"normalized": "Beginning of Rajab, 1258 AH",
|
||||
"calendar": "Hijri"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "قصبه دميرجیکوی",
|
||||
"name_romanized": "Kasaba-ı Demirciköy",
|
||||
"type": "District/Town"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,166 @@
|
|||
|
||||
```json
|
||||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "waqf_aleppo_1225h",
|
||||
"observed_at": "2023-10-27T10:00:00Z",
|
||||
"source_type": "waqf_document",
|
||||
"source_reference": "Aleppo Waqf, 1225 H"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"literalName": "الحاج أحمد بن محمد العمري",
|
||||
"literalName_romanized": "al-Hajj Ahmad ibn Muhammad al-Umari",
|
||||
"givenName": "أحمد",
|
||||
"givenName_romanized": "Ahmad",
|
||||
"patronym": "محمد",
|
||||
"patronym_romanized": "Muhammad",
|
||||
"baseSurname": "العمري",
|
||||
"baseSurname_romanized": "al-Umari",
|
||||
"honorificPrefix": "الحاج",
|
||||
"honorificPrefix_romanized": "al-Hajj"
|
||||
},
|
||||
"roles": [
|
||||
{
|
||||
"role_title": "تاجر",
|
||||
"role_title_romanized": "tajir",
|
||||
"role_in_source": "founder"
|
||||
}
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": true,
|
||||
"address": "مدينة حلب الشهباء"
|
||||
},
|
||||
"family_relationships": {
|
||||
"parent": [
|
||||
{
|
||||
"person_index": 1,
|
||||
"target_name": "محمد بن عبد الله العمري"
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"context": "The founder (waqif) of the endowment, a deceased merchant from Aleppo."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"literalName": "المرحوم محمد بن عبد الله العمري",
|
||||
"literalName_romanized": "al-marhum Muhammad ibn Abd Allah al-Umari",
|
||||
"givenName": "محمد",
|
||||
"givenName_romanized": "Muhammad",
|
||||
"patronym": "عبد الله",
|
||||
"patronym_romanized": "Abd Allah",
|
||||
"baseSurname": "العمري",
|
||||
"baseSurname_romanized": "al-Umari"
|
||||
},
|
||||
"roles": [
|
||||
{
|
||||
"role_title": null,
|
||||
"role_title_romanized": null,
|
||||
"role_in_source": null
|
||||
}
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": true,
|
||||
"address": null
|
||||
},
|
||||
"family_relationships": {
|
||||
"parent": [],
|
||||
"children": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"target_name": "أحمد بن محمد العمري"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "The deceased father of the founder, Ahmad al-Umari."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"literalName": "الحاج إبراهيم بن يوسف التركماني",
|
||||
"literalName_romanized": "al-Hajj Ibrahim ibn Yusuf al-Turkmani",
|
||||
"givenName": "إبراهيم",
|
||||
"givenName_romanized": "Ibrahim",
|
||||
"patronym": "يوسف",
|
||||
"patronym_romanized": "Yusuf",
|
||||
"baseSurname": "التركماني",
|
||||
"baseSurname_romanized": "al-Turkmani",
|
||||
"honorificPrefix": "الحاج",
|
||||
"honorificPrefix_romanized": "al-Hajj"
|
||||
},
|
||||
"roles": [
|
||||
{
|
||||
"role_title": "شاهد",
|
||||
"role_title_romanized": "shahid",
|
||||
"role_in_source": "witness"
|
||||
}
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": null,
|
||||
"address": null
|
||||
},
|
||||
"family_relationships": {
|
||||
"parent": [],
|
||||
"children": []
|
||||
},
|
||||
"context": "One of the witnesses to the endowment deed."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"literalName": "السيد علي بن حسين الحلبي",
|
||||
"literalName_romanized": "al-Sayyid Ali ibn Husayn al-Halabi",
|
||||
"givenName": "علي",
|
||||
"givenName_romanized": "Ali",
|
||||
"patronym": "حسين",
|
||||
"patronym_romanized": "Husayn",
|
||||
"baseSurname": "الحلبي",
|
||||
"baseSurname_romanized": "al-Halabi",
|
||||
"honorificPrefix": "السيد",
|
||||
"honorificPrefix_romanized": "al-Sayyid"
|
||||
},
|
||||
"roles": [
|
||||
{
|
||||
"role_title": "شاهد",
|
||||
"role_title_romanized": "shahid",
|
||||
"role_in_source": "witness"
|
||||
}
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": null,
|
||||
"address": null
|
||||
},
|
||||
"family_relationships": {
|
||||
"parent": [],
|
||||
"children": []
|
||||
},
|
||||
"context": "The second witness to the endowment deed."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "شهر رجب سنة ألف ومائتين وخمس وعشرين هجرية",
|
||||
"expression_romanized": "Shahr Rajab sanat alf wa mi'ayn wa khamsa wa 'ishrin hijriyyah",
|
||||
"normalized": "1811-01",
|
||||
"calendar": "Hijri",
|
||||
"type": "DATE"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "حلب الشهباء",
|
||||
"name_romanized": "Halab al-Shahba'",
|
||||
"type": "city"
|
||||
},
|
||||
{
|
||||
"name": "محلة الجديدة",
|
||||
"name_romanized": "Mahallat al-Jadida",
|
||||
"type": "neighborhood"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "obs_001",
|
||||
"source_type": "metrical_book",
|
||||
"source_reference": "Метрическая книга Троицкой церкви села Покровского за 1892 год"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"full_name_cyrillic": "Иван",
|
||||
"full_name_romanized": "Ivan",
|
||||
"given_name_cyrillic": "Иван",
|
||||
"given_name_romanized": "Ivan"
|
||||
},
|
||||
"roles": [
|
||||
"newborn"
|
||||
],
|
||||
"biographical": {
|
||||
"sex": "male",
|
||||
"estate": "peasant"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father": "Пётр Иванович Сидоров",
|
||||
"mother": "Анна Фёдоровна"
|
||||
},
|
||||
"context": "Born March 15, baptized March 17, 1892, son of Pyotr Ivanovich Sidorov and Anna Fyodorovna."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"full_name_cyrillic": "Пётр Иванович Сидоров",
|
||||
"full_name_romanized": "Pyotr Ivanovich Sidorov",
|
||||
"given_name_cyrillic": "Пётр",
|
||||
"given_name_romanized": "Pyotr",
|
||||
"patronymic_cyrillic": "Иванович",
|
||||
"patronymic_romanized": "Ivanovich",
|
||||
"surname_cyrillic": "Сидоров",
|
||||
"surname_romanized": "Sidorov"
|
||||
},
|
||||
"roles": [
|
||||
"father"
|
||||
],
|
||||
"biographical": {
|
||||
"sex": "male",
|
||||
"estate": "крестьянин (peasant)",
|
||||
"religion": "православный (Orthodox)"
|
||||
},
|
||||
"family_relationships": {
|
||||
"son": "Иван",
|
||||
"wife": "Анна Фёдоровна",
|
||||
"sibling": "Мария Ивановна Сидорова"
|
||||
},
|
||||
"context": "Peasant from the village of Ivanovka, father of the newborn Ivan, husband of Anna Fyodorovna."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"full_name_cyrillic": "Анна Фёдоровна",
|
||||
"full_name_romanized": "Anna Fyodorovna",
|
||||
"given_name_cyrillic": "Анна",
|
||||
"given_name_romanized": "Anna",
|
||||
"patronymic_cyrillic": "Фёдоровна",
|
||||
"patronymic_romanized": "Fyodorovna"
|
||||
},
|
||||
"roles": [
|
||||
"mother"
|
||||
],
|
||||
"biographical": {
|
||||
"sex": "female",
|
||||
"religion": "православный (Orthodox)"
|
||||
},
|
||||
"family_relationships": {
|
||||
"son": "Иван",
|
||||
"husband": "Пётр Иванович Сидоров"
|
||||
},
|
||||
"context": "Lawful wife of Pyotr Ivanovich Sidorov, mother of the newborn Ivan."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"full_name_cyrillic": "Николай Петрович Кузнецов",
|
||||
"full_name_romanized": "Nikolay Petrovich Kuznetsov",
|
||||
"given_name_cyrillic": "Николай",
|
||||
"given_name_romanized": "Nikolay",
|
||||
"patronymic_cyrillic": "Петрович",
|
||||
"patronymic_romanized": "Petrovich",
|
||||
"surname_cyrillic": "Кузнецов",
|
||||
"surname_romanized": "Kuznetsov"
|
||||
},
|
||||
"roles": [
|
||||
"godparent"
|
||||
],
|
||||
"biographical": {
|
||||
"sex": "male",
|
||||
"estate": "крестьянин (peasant)"
|
||||
},
|
||||
"family_relationships": {
|
||||
"godson": "Иван"
|
||||
},
|
||||
"context": "Godparent of Ivan, a peasant from the same village (Ivanovka)."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"full_name_cyrillic": "Мария Ивановна Сидорова",
|
||||
"full_name_romanized": "Maria Ivanovna Sidorova",
|
||||
"given_name_cyrillic": "Мария",
|
||||
"given_name_romanized": "Maria",
|
||||
"patronymic_cyrillic": "Ивановна",
|
||||
"patronymic_romanized": "Ivanovna",
|
||||
"surname_cyrillic": "Сидорова",
|
||||
"surname_romanized": "Sidorova"
|
||||
},
|
||||
"roles": [
|
||||
"godparent"
|
||||
],
|
||||
"biographical": {
|
||||
"sex": "female",
|
||||
"estate": "крестьянская дочь (peasant's daughter)",
|
||||
"marital_status": "девица (unmarried)"
|
||||
},
|
||||
"family_relationships": {
|
||||
"godson": "Иван",
|
||||
"brother": "Пётр Иванович Сидоров"
|
||||
},
|
||||
"context": "Godparent of Ivan, an unmarried peasant's daughter from the village of Ivanovka."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "за 1892 год",
|
||||
"expression_romanized": "za 1892 god",
|
||||
"normalized": "1892",
|
||||
"calendar": "Gregorian"
|
||||
},
|
||||
{
|
||||
"expression": "Марта 15 дня",
|
||||
"expression_romanized": "Marta 15 dnya",
|
||||
"normalized": "1892-03-15",
|
||||
"calendar": "Julian"
|
||||
},
|
||||
{
|
||||
"expression": "17 дня",
|
||||
"expression_romanized": "17 dnya",
|
||||
"normalized": "1892-03-17",
|
||||
"calendar": "Julian"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "Троицкой церкви",
|
||||
"name_romanized": "Troitskoy tserkvi",
|
||||
"type": "church"
|
||||
},
|
||||
{
|
||||
"name": "села Покровского",
|
||||
"name_romanized": "sela Pokrovskogo",
|
||||
"type": "village"
|
||||
},
|
||||
{
|
||||
"name": "деревни Ивановки",
|
||||
"name_romanized": "derevni Ivanovki",
|
||||
"type": "village"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "obs_1892_03_17_ivan_sidorov",
|
||||
"source_type": "metrical_book",
|
||||
"source_reference": "Метрическая книга Троицкой церкви села Покровского за 1892 год"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"full_name_cyrillic": "Иван",
|
||||
"full_name_romanized": "Ivan",
|
||||
"given_name_cyrillic": "Иван",
|
||||
"given_name_romanized": "Ivan"
|
||||
},
|
||||
"roles": [
|
||||
"newborn"
|
||||
],
|
||||
"biographical": {
|
||||
"sex": "male",
|
||||
"estate": null,
|
||||
"religion": "Orthodox"
|
||||
},
|
||||
"family_relationships": {
|
||||
"son_of": [
|
||||
"Пётр Иванович Сидоров",
|
||||
"Анна Фёдоровна"
|
||||
]
|
||||
},
|
||||
"context": "Родился 15 марта, крещён 17 марта 1892 года. Сын крестьянина Петра Ивановича Сидорова и Анны Фёдоровны."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"full_name_cyrillic": "Пётр Иванович Сидоров",
|
||||
"full_name_romanized": "Pyotr Ivanovich Sidorov",
|
||||
"given_name_cyrillic": "Пётр",
|
||||
"given_name_romanized": "Pyotr",
|
||||
"patronymic_cyrillic": "Иванович",
|
||||
"patronymic_romanized": "Ivanovich",
|
||||
"surname_cyrillic": "Сидоров",
|
||||
"surname_romanized": "Sidorov"
|
||||
},
|
||||
"roles": [
|
||||
"parent",
|
||||
"father"
|
||||
],
|
||||
"biographical": {
|
||||
"sex": "male",
|
||||
"estate": "крестьянин",
|
||||
"religion": "Orthodox"
|
||||
},
|
||||
"family_relationships": {
|
||||
"father_of": [
|
||||
"Иван"
|
||||
],
|
||||
"husband_of": [
|
||||
"Анна Фёдоровна"
|
||||
]
|
||||
},
|
||||
"context": "Отец новорождённого Ивана. Крестьянин из деревни Ивановки."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"full_name_cyrillic": "Анна Фёдоровна",
|
||||
"full_name_romanized": "Anna Fyodorovna",
|
||||
"given_name_cyrillic": "Анна",
|
||||
"given_name_romanized": "Anna",
|
||||
"patronymic_cyrillic": "Фёдоровна",
|
||||
"patronymic_romanized": "Fyodorovna"
|
||||
},
|
||||
"roles": [
|
||||
"parent",
|
||||
"mother"
|
||||
],
|
||||
"biographical": {
|
||||
"sex": "female",
|
||||
"estate": "крестьянка",
|
||||
"religion": "Orthodox"
|
||||
},
|
||||
"family_relationships": {
|
||||
"mother_of": [
|
||||
"Иван"
|
||||
],
|
||||
"wife_of": [
|
||||
"Пётр Иванович Сидоров"
|
||||
]
|
||||
},
|
||||
"context": "Мать новорождённого Ивана. Законная жена крестьянина Петра Ивановича Сидорова."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"full_name_cyrillic": "Николай Петрович Кузнецов",
|
||||
"full_name_romanized": "Nikolai Petrovich Kuznetsov",
|
||||
"given_name_cyrillic": "Николай",
|
||||
"given_name_romanized": "Nikolai",
|
||||
"patronymic_cyrillic": "Петрович",
|
||||
"patronymic_romanized": "Petrovich",
|
||||
"surname_cyrillic": "Кузнецов",
|
||||
"surname_romanized": "Kuznetsov"
|
||||
},
|
||||
"roles": [
|
||||
"godparent",
|
||||
"godfather"
|
||||
],
|
||||
"biographical": {
|
||||
"sex": "male",
|
||||
"estate": "крестьянин",
|
||||
"religion": "Orthodox"
|
||||
},
|
||||
"family_relationships": {
|
||||
"godparent_of": [
|
||||
"Иван"
|
||||
]
|
||||
},
|
||||
"context": "Восприемник (крёстный отец) Ивана. Крестьянин из той же деревни Ивановки."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"full_name_cyrillic": "Мария Ивановна Сидорова",
|
||||
"full_name_romanized": "Maria Ivanovna Sidorova",
|
||||
"given_name_cyrillic": "Мария",
|
||||
"given_name_romanized": "Maria",
|
||||
"patronymic_cyrillic": "Ивановна",
|
||||
"patronymic_romanized": "Ivanovna",
|
||||
"surname_cyrillic": "Сидорова",
|
||||
"surname_romanized": "Sidorova"
|
||||
},
|
||||
"roles": [
|
||||
"godparent",
|
||||
"godmother"
|
||||
],
|
||||
"biographical": {
|
||||
"sex": "female",
|
||||
"estate": "крестьянская дочь девица",
|
||||
"religion": "Orthodox"
|
||||
},
|
||||
"family_relationships": {
|
||||
"godparent_of": [
|
||||
"Иван"
|
||||
]
|
||||
},
|
||||
"context": "Восприемница (крёстная мать) Ивана. Крестьянская дочь девица из той же деревни Ивановки."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "Марта 15 дня",
|
||||
"expression_romanized": "Marta 15 dnya",
|
||||
"normalized": "1892-03-15",
|
||||
"calendar": "Julian"
|
||||
},
|
||||
{
|
||||
"expression": "Марта 15 дня",
|
||||
"expression_romanized": "Marta 15 dnya",
|
||||
"normalized": "1892-03-27",
|
||||
"calendar": "Gregorian"
|
||||
},
|
||||
{
|
||||
"expression": "17 дня",
|
||||
"expression_romanized": "17 dnya",
|
||||
"normalized": "1892-03-17",
|
||||
"calendar": "Julian"
|
||||
},
|
||||
{
|
||||
"expression": "17 дня",
|
||||
"expression_romanized": "17 dnya",
|
||||
"normalized": "1892-03-29",
|
||||
"calendar": "Gregorian"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "село Покровское",
|
||||
"name_romanized": "selo Pokrovskoye",
|
||||
"type": "village"
|
||||
},
|
||||
{
|
||||
"name": "Троицкая церковь",
|
||||
"name_romanized": "Troitskaya tserkov",
|
||||
"type": "church"
|
||||
},
|
||||
{
|
||||
"name": "деревня Ивановка",
|
||||
"name_romanized": "derevnya Ivanovka",
|
||||
"type": "village"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,221 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "baptism_mexico_1742-02-23_001",
|
||||
"source_type": "baptismal_register",
|
||||
"source_reference": "Ciudad de México, 23 de febrero de 1742. Bautismo de Juan José, hijo de Don Pedro García de la Cruz y Doña María Josefa de los Reyes."
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"given_name": "Antonio",
|
||||
"surname": "de Mendoza"
|
||||
},
|
||||
"roles": [
|
||||
"teniente de cura",
|
||||
"priest"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"Br.",
|
||||
"Don"
|
||||
]
|
||||
},
|
||||
"family_relationships": {},
|
||||
"context": "El sacerdote que oficia el bautismo."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"given_name": "Juan José",
|
||||
"surname": ""
|
||||
},
|
||||
"roles": [
|
||||
"baptized_infant"
|
||||
],
|
||||
"biographical": {
|
||||
"casta": "español",
|
||||
"legitimacy": "legítimo"
|
||||
},
|
||||
"family_relationships": {
|
||||
"parents": [
|
||||
{
|
||||
"person_index": 2,
|
||||
"relationship": "father"
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"relationship": "mother"
|
||||
}
|
||||
],
|
||||
"godparents": [
|
||||
{
|
||||
"person_index": 4,
|
||||
"relationship": "godfather"
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"relationship": "godmother"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "El infante bautizado."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"given_name": "Pedro",
|
||||
"surname": "García de la Cruz"
|
||||
},
|
||||
"roles": [
|
||||
"father"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"Don"
|
||||
],
|
||||
"casta": "español",
|
||||
"origin": {
|
||||
"place": "villa de Puebla de los Ángeles",
|
||||
"type": "natural"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"children": [
|
||||
{
|
||||
"person_index": 1,
|
||||
"relationship": "son"
|
||||
}
|
||||
],
|
||||
"spouse": [
|
||||
{
|
||||
"person_index": 3,
|
||||
"relationship": "wife"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "Padre del bautizado."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"given_name": "María Josefa",
|
||||
"surname": "de los Reyes"
|
||||
},
|
||||
"roles": [
|
||||
"mother"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"Doña"
|
||||
],
|
||||
"casta": "español",
|
||||
"origin": {
|
||||
"place": "esta ciudad",
|
||||
"type": "natural"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"children": [
|
||||
{
|
||||
"person_index": 1,
|
||||
"relationship": "son"
|
||||
}
|
||||
],
|
||||
"spouse": [
|
||||
{
|
||||
"person_index": 2,
|
||||
"relationship": "husband"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "Madre del bautizado."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"given_name": "Francisco Xavier",
|
||||
"surname": "de Castañeda"
|
||||
},
|
||||
"roles": [
|
||||
"godfather"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"Don"
|
||||
],
|
||||
"casta": "español",
|
||||
"origin": {
|
||||
"place": "esta ciudad",
|
||||
"type": "vecino"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"spouse": [
|
||||
{
|
||||
"person_index": 5,
|
||||
"relationship": "wife"
|
||||
}
|
||||
],
|
||||
"godchild": [
|
||||
{
|
||||
"person_index": 1,
|
||||
"relationship": "godson"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "Padrino del bautizado."
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"pnv_name": {
|
||||
"given_name": "Ana María",
|
||||
"surname": "de la Encarnación"
|
||||
},
|
||||
"roles": [
|
||||
"godmother"
|
||||
],
|
||||
"biographical": {
|
||||
"honorifics": [
|
||||
"Doña"
|
||||
]
|
||||
},
|
||||
"family_relationships": {
|
||||
"spouse": [
|
||||
{
|
||||
"person_index": 4,
|
||||
"relationship": "husband"
|
||||
}
|
||||
],
|
||||
"godchild": [
|
||||
{
|
||||
"person_index": 1,
|
||||
"relationship": "godson"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "Madrina del bautizado, esposa legítima del padrino."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "a veinte y tres días del mes de febrero de mil setecientos cuarenta y dos años",
|
||||
"normalized": "1742-02-23",
|
||||
"calendar": "Gregorian"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "ciudad de México",
|
||||
"type": "city"
|
||||
},
|
||||
{
|
||||
"name": "esta santa iglesia catedral",
|
||||
"type": "church"
|
||||
},
|
||||
{
|
||||
"name": "villa de Puebla de los Ángeles",
|
||||
"type": "villa"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,259 @@
|
|||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "baptism_mexico_1742-02-23_001",
|
||||
"source_type": "baptismal_register",
|
||||
"source_reference": "Ciudad de México, 23 de febrero de 1742. Bautismo de Juan José, hijo de Don Pedro García de la Cruz y Doña María Josefa de los Reyes."
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"given_name": "Antonio",
|
||||
"surname": "de Mendoza"
|
||||
},
|
||||
"roles": [
|
||||
"teniente de cura",
|
||||
"priest"
|
||||
],
|
||||
"honorifics": [
|
||||
"Br.",
|
||||
"Don"
|
||||
],
|
||||
"biographical": {},
|
||||
"family_relationships": {},
|
||||
"context": "El sacerdote que oficia el bautismo."
|
||||
},
|
||||
{
|
||||
"person_index": 1,
|
||||
"pnv_name": {
|
||||
"given_name": "Juan José",
|
||||
"surname": ""
|
||||
},
|
||||
"roles": [
|
||||
"baptized_infant"
|
||||
],
|
||||
"biographical": {
|
||||
"casta": "español",
|
||||
"legitimacy": "legítimo"
|
||||
},
|
||||
"family_relationships": {
|
||||
"parents": [
|
||||
{
|
||||
"person_index": 2,
|
||||
"relationship": "father"
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"relationship": "mother"
|
||||
}
|
||||
],
|
||||
"godparents": [
|
||||
{
|
||||
"person_index": 4,
|
||||
"relationship": "godfather"
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"relationship": "godmother"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "El niño bautizado, hijo legítimo de Pedro García de la Cruz y María Josefa de los Reyes."
|
||||
},
|
||||
{
|
||||
"person_index": 2,
|
||||
"pnv_name": {
|
||||
"given_name": "Pedro",
|
||||
"surname": "García de la Cruz"
|
||||
},
|
||||
"roles": [
|
||||
"father"
|
||||
],
|
||||
"honorifics": [
|
||||
"Don"
|
||||
],
|
||||
"biographical": {
|
||||
"casta": "español",
|
||||
"origin": {
|
||||
"type": "natural",
|
||||
"place": "Puebla de los Ángeles"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"children": [
|
||||
{
|
||||
"person_index": 1,
|
||||
"relationship": "son"
|
||||
}
|
||||
],
|
||||
"spouse": [
|
||||
{
|
||||
"person_index": 3,
|
||||
"relationship": "wife"
|
||||
}
|
||||
],
|
||||
"compadres": [
|
||||
{
|
||||
"person_index": 4,
|
||||
"relationship": "godfather_of_his_child"
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"relationship": "godmother_of_his_child"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "Padre del bautizado, natural de Puebla de los Ángeles."
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"pnv_name": {
|
||||
"given_name": "María Josefa",
|
||||
"surname": "de los Reyes"
|
||||
},
|
||||
"roles": [
|
||||
"mother"
|
||||
],
|
||||
"honorifics": [
|
||||
"Doña"
|
||||
],
|
||||
"biographical": {
|
||||
"casta": "español",
|
||||
"origin": {
|
||||
"type": "natural",
|
||||
"place": "Ciudad de México"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"children": [
|
||||
{
|
||||
"person_index": 1,
|
||||
"relationship": "son"
|
||||
}
|
||||
],
|
||||
"spouse": [
|
||||
{
|
||||
"person_index": 2,
|
||||
"relationship": "husband"
|
||||
}
|
||||
],
|
||||
"comadres": [
|
||||
{
|
||||
"person_index": 4,
|
||||
"relationship": "godfather_of_her_child"
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"relationship": "godmother_of_her_child"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "Madre del bautizado, natural de la Ciudad de México."
|
||||
},
|
||||
{
|
||||
"person_index": 4,
|
||||
"pnv_name": {
|
||||
"given_name": "Francisco Xavier",
|
||||
"surname": "de Castañeda"
|
||||
},
|
||||
"roles": [
|
||||
"godfather"
|
||||
],
|
||||
"honorifics": [
|
||||
"Don"
|
||||
],
|
||||
"biographical": {
|
||||
"casta": "español",
|
||||
"origin": {
|
||||
"type": "resident",
|
||||
"place": "Ciudad de México"
|
||||
}
|
||||
},
|
||||
"family_relationships": {
|
||||
"godchild": [
|
||||
{
|
||||
"person_index": 1,
|
||||
"relationship": "godson"
|
||||
}
|
||||
],
|
||||
"spouse": [
|
||||
{
|
||||
"person_index": 5,
|
||||
"relationship": "wife"
|
||||
}
|
||||
],
|
||||
"compadres": [
|
||||
{
|
||||
"person_index": 2,
|
||||
"relationship": "father_of_godchild"
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"relationship": "mother_of_godchild"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "Padrino del bautizado, vecino de la Ciudad de México."
|
||||
},
|
||||
{
|
||||
"person_index": 5,
|
||||
"pnv_name": {
|
||||
"given_name": "Ana María",
|
||||
"surname": "de la Encarnación"
|
||||
},
|
||||
"roles": [
|
||||
"godmother"
|
||||
],
|
||||
"honorifics": [
|
||||
"Doña"
|
||||
],
|
||||
"biographical": {},
|
||||
"family_relationships": {
|
||||
"godchild": [
|
||||
{
|
||||
"person_index": 1,
|
||||
"relationship": "godson"
|
||||
}
|
||||
],
|
||||
"spouse": [
|
||||
{
|
||||
"person_index": 4,
|
||||
"relationship": "husband"
|
||||
}
|
||||
],
|
||||
"comadres": [
|
||||
{
|
||||
"person_index": 2,
|
||||
"relationship": "father_of_godchild"
|
||||
},
|
||||
{
|
||||
"person_index": 3,
|
||||
"relationship": "mother_of_godchild"
|
||||
}
|
||||
]
|
||||
},
|
||||
"context": "Madrina del bautizado y esposa legítima del padrino."
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "a veinte y tres días del mes de febrero de mil setecientos cuarenta y dos años",
|
||||
"normalized": "1742-02-23",
|
||||
"calendar": "Gregorian"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "Ciudad de México",
|
||||
"type": "city"
|
||||
},
|
||||
{
|
||||
"name": "Puebla de los Ángeles",
|
||||
"type": "villa"
|
||||
},
|
||||
{
|
||||
"name": "esta santa iglesia catedral",
|
||||
"type": "church"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -88,6 +88,8 @@ enums:
|
|||
description: Entry requires further enrichment processing
|
||||
new_entry:
|
||||
description: Newly added entry not yet enriched
|
||||
google_maps_searched:
|
||||
description: Google Maps search attempted but not yet fully enriched
|
||||
|
||||
InstitutionTypeCodeEnum:
|
||||
description: Single-letter GLAMORCUBESFIXPHDNT type codes
|
||||
|
|
@ -182,6 +184,10 @@ enums:
|
|||
description: GeoNames geographic entity identifier
|
||||
LinkedIn:
|
||||
description: LinkedIn profile or company page
|
||||
GHCID_PREVIOUS:
|
||||
description: Previous GHCID before relocation or reorganization
|
||||
OCLC:
|
||||
description: OCLC (Online Computer Library Center) identifier
|
||||
|
||||
LocationResolutionMethodEnum:
|
||||
description: Method used to resolve settlement location
|
||||
|
|
@ -228,6 +234,8 @@ enums:
|
|||
description: Verified through web search
|
||||
CITY_NAME_LOOKUP:
|
||||
description: Looked up city name directly
|
||||
MANUAL_RESEARCH:
|
||||
description: Manually researched and assigned location
|
||||
GEONAMES_CITY_LOOKUP:
|
||||
description: Looked up city in GeoNames database
|
||||
NAME_EXTRACTION:
|
||||
|
|
@ -252,8 +260,6 @@ enums:
|
|||
description: Extracted location from institution name
|
||||
GEONAMES_FUZZY:
|
||||
description: Fuzzy matched in GeoNames
|
||||
MANUAL_RESEARCH:
|
||||
description: Manually researched location
|
||||
WIKIDATA_ENRICHMENT:
|
||||
description: Enriched from Wikidata
|
||||
COORDINATE_LOOKUP:
|
||||
|
|
@ -430,6 +436,9 @@ classes:
|
|||
organisatie:
|
||||
range: string
|
||||
description: Organization name from source
|
||||
organisation:
|
||||
range: string
|
||||
description: Organization name from source (British spelling variant)
|
||||
isil_code_na:
|
||||
range: string
|
||||
description: ISIL code from Nationaal Archief
|
||||
|
|
@ -650,10 +659,19 @@ classes:
|
|||
range: string
|
||||
description: Status of Wikidata enrichment for this entry
|
||||
comment:
|
||||
range: ReferenceLink
|
||||
multivalued: true
|
||||
any_of:
|
||||
- range: string
|
||||
- range: ReferenceLink
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Comments about this entry (array of objects with label field)
|
||||
description: Comments about this entry (can be a string or array of objects with label field)
|
||||
comments:
|
||||
any_of:
|
||||
- range: string
|
||||
- range: ReferenceLink
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Comments about this entry (string or array of objects with label field)
|
||||
succeeded_by:
|
||||
range: ReferenceLink
|
||||
multivalued: true
|
||||
|
|
@ -666,6 +684,15 @@ classes:
|
|||
label:
|
||||
range: string
|
||||
description: Name/label of the duplicate institution
|
||||
entry_index:
|
||||
range: integer
|
||||
description: Index of the duplicate entry in source data
|
||||
entry_file:
|
||||
range: string
|
||||
description: Filename of the duplicate entry
|
||||
reason:
|
||||
range: string
|
||||
description: Reason why this is considered a duplicate
|
||||
|
||||
TimeEntry:
|
||||
description: Structured time entry from source data
|
||||
|
|
@ -676,10 +703,13 @@ classes:
|
|||
- range: integer
|
||||
description: Time label (date string or year)
|
||||
type:
|
||||
range: TimeEntryType
|
||||
multivalued: true
|
||||
any_of:
|
||||
- range: string
|
||||
multivalued: true
|
||||
- range: TimeEntryType
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Type of time point (begin, end, etc.)
|
||||
description: Type of time point (begin, end, etc.) - can be strings or TimeEntryType objects
|
||||
|
||||
TimeEntryType:
|
||||
description: Type classification for time entry
|
||||
|
|
@ -847,6 +877,11 @@ classes:
|
|||
data_source:
|
||||
range: string
|
||||
description: Data source type (CSV_REGISTRY, API_SCRAPING, etc.)
|
||||
data_sources:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of data sources (e.g., NDE registry, Google Maps, website)
|
||||
data_tier:
|
||||
range: DataTierEnum
|
||||
description: Quality tier of the data
|
||||
|
|
@ -856,6 +891,12 @@ classes:
|
|||
extraction_method:
|
||||
range: string
|
||||
description: Method used to extract the data
|
||||
enrichment_date:
|
||||
range: string
|
||||
description: When enrichment was performed (ISO date string)
|
||||
enrichment_method:
|
||||
range: string
|
||||
description: Method used to enrich the data (e.g., website_research)
|
||||
confidence_score:
|
||||
range: float
|
||||
description: Confidence score (0-1)
|
||||
|
|
@ -889,6 +930,15 @@ classes:
|
|||
wikidata_property:
|
||||
range: string
|
||||
description: Wikidata property ID (e.g., P856)
|
||||
archive_location:
|
||||
range: string
|
||||
description: Location of archived copy (e.g., web/1186/hartebrug.nl)
|
||||
claim_extracted_from:
|
||||
range: string
|
||||
description: Source path from which claim was extracted (e.g., original_entry.reference)
|
||||
verified_via_web_archive:
|
||||
range: boolean
|
||||
description: Whether claim was verified via web archive
|
||||
|
||||
ProvenanceSources:
|
||||
description: Sources organized by type
|
||||
|
|
@ -938,6 +988,52 @@ classes:
|
|||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Nationaal Archief ISIL registry source records
|
||||
whois_research:
|
||||
range: SourceRecord
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: WHOIS domain research source records
|
||||
manual_research:
|
||||
range: SourceRecord
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Manual research source records
|
||||
website:
|
||||
range: SourceRecord
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Website source records (institution website data)
|
||||
web_scrape:
|
||||
range: SourceRecord
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Web scrape source records (scraped website data)
|
||||
# Data tier summary fields (for provenance summaries)
|
||||
TIER_1_AUTHORITATIVE:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of TIER_1 authoritative sources
|
||||
TIER_2_VERIFIED:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of TIER_2 verified sources
|
||||
TIER_3_CROWD_SOURCED:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of TIER_3 crowd-sourced sources
|
||||
TIER_4_INFERRED:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of TIER_4 inferred sources
|
||||
museum_register:
|
||||
range: SourceRecord
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Museum register source records
|
||||
|
||||
SourceRecord:
|
||||
description: Individual source record with claims
|
||||
|
|
@ -999,6 +1095,20 @@ classes:
|
|||
source_file:
|
||||
range: string
|
||||
description: Source file name
|
||||
research_date:
|
||||
range: string
|
||||
description: Date of research (YYYY-MM-DD format)
|
||||
url:
|
||||
range: uri
|
||||
description: URL of the source (website URL, etc.)
|
||||
data_extracted:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of data types/fields extracted from this source
|
||||
merge_note:
|
||||
range: string
|
||||
description: Note about merge operations involving this source record
|
||||
|
||||
DataTierSummary:
|
||||
description: Summary of data tiers present in entry
|
||||
|
|
@ -1029,7 +1139,7 @@ classes:
|
|||
attributes:
|
||||
identifier_scheme:
|
||||
range: IdentifierSchemeEnum
|
||||
required: true
|
||||
required: false
|
||||
description: Type of identifier
|
||||
identifier_value:
|
||||
any_of:
|
||||
|
|
@ -1051,6 +1161,14 @@ classes:
|
|||
notes:
|
||||
range: string
|
||||
description: Additional note about this identifier (alias for note)
|
||||
scheme:
|
||||
range: string
|
||||
description: Identifier scheme (alias for identifier_scheme, used in some data sources)
|
||||
value:
|
||||
any_of:
|
||||
- range: string
|
||||
- range: integer
|
||||
description: Identifier value (alias for identifier_value, used in some data sources)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GHCID BLOCK - Heritage Custodian ID with history
|
||||
|
|
@ -1277,6 +1395,15 @@ classes:
|
|||
resolution_notes:
|
||||
range: string
|
||||
description: Additional notes from location resolution process
|
||||
specific_location:
|
||||
range: string
|
||||
description: More specific location info within the city (e.g., neighborhood, district)
|
||||
specific_geonames_id:
|
||||
range: integer
|
||||
description: GeoNames ID for the specific location (if different from main city)
|
||||
correction_note:
|
||||
range: string
|
||||
description: Note explaining any correction made to the location resolution
|
||||
|
||||
SourceCoordinates:
|
||||
description: Source of coordinates for resolution
|
||||
|
|
@ -1296,13 +1423,19 @@ classes:
|
|||
attributes:
|
||||
type:
|
||||
range: string
|
||||
description: Type of research source (e.g., note, wikidata, web_archive, official_source)
|
||||
description: Type of research source (e.g., note, wikidata, web_archive, official_source, whois)
|
||||
text:
|
||||
range: string
|
||||
description: Text or description of the research source
|
||||
value:
|
||||
range: string
|
||||
description: Value from this source (e.g., plus code, address)
|
||||
notes:
|
||||
range: string
|
||||
description: Additional notes about this source
|
||||
note:
|
||||
range: string
|
||||
description: Additional note about this source (singular alias for notes)
|
||||
id:
|
||||
range: string
|
||||
description: Identifier for the source (e.g., Wikidata Q-number)
|
||||
|
|
@ -1315,6 +1448,56 @@ classes:
|
|||
coordinates:
|
||||
range: string
|
||||
description: Coordinates from this source (e.g., "31.515, 34.434")
|
||||
data:
|
||||
range: ResearchSourceData
|
||||
description: Structured data from the source (e.g., WHOIS registrant info)
|
||||
|
||||
ResearchSourceData:
|
||||
description: Structured data from a research source
|
||||
attributes:
|
||||
registrant_name:
|
||||
range: string
|
||||
description: WHOIS registrant name
|
||||
registrant_address:
|
||||
range: string
|
||||
description: WHOIS registrant address
|
||||
registrant_city:
|
||||
range: string
|
||||
description: WHOIS registrant city
|
||||
registrant_state:
|
||||
range: string
|
||||
description: WHOIS registrant state/province
|
||||
registrant_country:
|
||||
range: string
|
||||
description: WHOIS registrant country
|
||||
registrant_postal_code:
|
||||
range: string
|
||||
description: WHOIS registrant postal code
|
||||
# Additional flexible fields for other data types
|
||||
organization:
|
||||
range: string
|
||||
description: Organization name
|
||||
email:
|
||||
range: string
|
||||
description: Contact email
|
||||
phone:
|
||||
range: string
|
||||
description: Contact phone
|
||||
creation_date:
|
||||
range: string
|
||||
description: Domain creation date
|
||||
updated_date:
|
||||
range: string
|
||||
description: Domain updated date
|
||||
expiration_date:
|
||||
range: string
|
||||
description: Domain expiration date
|
||||
domain_registered:
|
||||
range: string
|
||||
description: Domain registration date
|
||||
registry:
|
||||
range: string
|
||||
description: Domain registrar name
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GOOGLE MAPS ENRICHMENT
|
||||
|
|
@ -1459,21 +1642,28 @@ classes:
|
|||
- range: HoursStatus
|
||||
description: Opening hours information (string or status object)
|
||||
admission:
|
||||
range: string
|
||||
description: Admission price information
|
||||
any_of:
|
||||
- range: string
|
||||
- range: AdmissionInfo
|
||||
description: Admission price information (string or structured object)
|
||||
related_places:
|
||||
range: string
|
||||
multivalued: true
|
||||
any_of:
|
||||
- range: string
|
||||
multivalued: true
|
||||
- range: RelatedPlace
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Related places nearby
|
||||
description: Related places nearby (strings or structured objects)
|
||||
review_topics:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Topics mentioned in reviews
|
||||
reviews_summary:
|
||||
range: string
|
||||
description: Summary of reviews
|
||||
any_of:
|
||||
- range: string
|
||||
- range: ReviewsSummary
|
||||
description: Summary of reviews (string or structured breakdown)
|
||||
sample_reviews:
|
||||
any_of:
|
||||
- range: string
|
||||
|
|
@ -1510,10 +1700,13 @@ classes:
|
|||
inlined_as_list: true
|
||||
description: Nearby organizations (strings or structured objects)
|
||||
features:
|
||||
range: string
|
||||
multivalued: true
|
||||
any_of:
|
||||
- range: string
|
||||
multivalued: true
|
||||
- range: PlaceFeature
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Features of the place
|
||||
description: Features of the place (strings or key-value objects)
|
||||
hours_status:
|
||||
range: string
|
||||
description: Current opening status (e.g., "Closed · Opens 2 pm Wed")
|
||||
|
|
@ -1590,6 +1783,23 @@ classes:
|
|||
match_notes:
|
||||
range: string
|
||||
description: Notes about how the Google Maps match was determined
|
||||
price_level:
|
||||
any_of:
|
||||
- range: integer
|
||||
- range: string
|
||||
description: Google Maps price level (0-4 or string description)
|
||||
match_warning:
|
||||
range: string
|
||||
description: Warning about potential issues with the match
|
||||
location_note:
|
||||
range: string
|
||||
description: Note about the physical location of the place
|
||||
search_attempted:
|
||||
range: boolean
|
||||
description: Whether a Google Maps search was attempted
|
||||
result:
|
||||
range: string
|
||||
description: Result of search operation (found, not_found, found_via_user_link, etc.)
|
||||
|
||||
RejectedGoogleMapsData:
|
||||
description: Rejected Google Maps data preserved for audit trail
|
||||
|
|
@ -1612,6 +1822,53 @@ classes:
|
|||
returned_country:
|
||||
range: string
|
||||
description: Country code actually returned by Google Maps
|
||||
website:
|
||||
range: uri
|
||||
description: Website URL from Google Maps
|
||||
latitude:
|
||||
range: float
|
||||
description: Latitude coordinate
|
||||
longitude:
|
||||
range: float
|
||||
description: Longitude coordinate
|
||||
enriched_at:
|
||||
range: datetime
|
||||
description: When enrichment was performed
|
||||
|
||||
PlaceFeature:
|
||||
description: A feature flag for a place (e.g., native_garden, shop, volunteers)
|
||||
class_uri: schema:PropertyValue
|
||||
attributes:
|
||||
native_garden:
|
||||
range: boolean
|
||||
description: Has a native garden
|
||||
shop:
|
||||
range: boolean
|
||||
description: Has a shop
|
||||
volunteers:
|
||||
range: boolean
|
||||
description: Has volunteers
|
||||
parking:
|
||||
range: boolean
|
||||
description: Has parking
|
||||
cafe:
|
||||
range: boolean
|
||||
description: Has a cafe
|
||||
restaurant:
|
||||
range: boolean
|
||||
description: Has a restaurant
|
||||
gift_shop:
|
||||
range: boolean
|
||||
description: Has a gift shop
|
||||
wheelchair_accessible:
|
||||
range: boolean
|
||||
description: Is wheelchair accessible
|
||||
guided_tours:
|
||||
range: boolean
|
||||
description: Offers guided tours
|
||||
audio_guide:
|
||||
range: boolean
|
||||
description: Offers audio guides
|
||||
|
||||
LlmVerification:
|
||||
description: LLM-based verification results for Google Maps matching
|
||||
|
|
@ -1696,6 +1953,25 @@ classes:
|
|||
minute:
|
||||
range: integer
|
||||
|
||||
ReviewsSummary:
|
||||
description: Breakdown of reviews by star rating
|
||||
attributes:
|
||||
5_star:
|
||||
range: integer
|
||||
description: Number of 5-star reviews
|
||||
4_star:
|
||||
range: integer
|
||||
description: Number of 4-star reviews
|
||||
3_star:
|
||||
range: integer
|
||||
description: Number of 3-star reviews
|
||||
2_star:
|
||||
range: integer
|
||||
description: Number of 2-star reviews
|
||||
1_star:
|
||||
range: integer
|
||||
description: Number of 1-star reviews
|
||||
|
||||
GoogleReview:
|
||||
description: Google Maps review
|
||||
attributes:
|
||||
|
|
@ -1722,6 +1998,16 @@ classes:
|
|||
range: string
|
||||
description: Alias for relative_time_description (review date)
|
||||
|
||||
AdmissionInfo:
|
||||
description: Structured admission price information from Google Maps
|
||||
attributes:
|
||||
price:
|
||||
range: string
|
||||
description: Admission price (e.g., "€9.00")
|
||||
notes:
|
||||
range: string
|
||||
description: Additional notes about admission (e.g., "Additional fees might apply")
|
||||
|
||||
PhotoMetadata:
|
||||
description: Google Maps photo metadata
|
||||
attributes:
|
||||
|
|
@ -1805,8 +2091,10 @@ classes:
|
|||
wikidata_temporal:
|
||||
range: WikidataTemporal
|
||||
wikidata_inception:
|
||||
range: string
|
||||
description: Inception date (P571)
|
||||
any_of:
|
||||
- range: string
|
||||
- range: WikidataTimeValue
|
||||
description: Inception date (P571) - can be string or structured time value
|
||||
wikidata_classification:
|
||||
range: WikidataClassification
|
||||
wikidata_instance_of:
|
||||
|
|
@ -1923,6 +2211,29 @@ classes:
|
|||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Search terms attempted when looking for Wikidata entity
|
||||
wikidata_description_nl:
|
||||
range: string
|
||||
description: Description in Dutch language
|
||||
wikidata_claims:
|
||||
range: WikidataClaims
|
||||
description: Structured Wikidata claims with property metadata
|
||||
inlined: true
|
||||
_resolved_entities:
|
||||
range: WikidataResolvedEntities
|
||||
description: Resolved Wikidata property and entity metadata cache
|
||||
inlined: true
|
||||
|
||||
WikidataClaims:
|
||||
description: |
|
||||
Structured Wikidata claims with property metadata and values.
|
||||
Uses flexible dict-like structure for various claim types.
|
||||
class_uri: linkml:Any
|
||||
|
||||
WikidataResolvedEntities:
|
||||
description: |
|
||||
Cache of resolved Wikidata property and entity metadata.
|
||||
Keys are property IDs (P123), values are property metadata.
|
||||
class_uri: linkml:Any
|
||||
|
||||
WikidataApiMetadata:
|
||||
description: API call metadata
|
||||
|
|
@ -2035,6 +2346,19 @@ classes:
|
|||
inlined_as_list: true
|
||||
description: Main subject (P921)
|
||||
|
||||
WikidataTimeValue:
|
||||
description: Wikidata time value with precision metadata
|
||||
attributes:
|
||||
time:
|
||||
range: string
|
||||
description: Time value in ISO 8601 format (e.g., +2015-00-00T00:00:00Z)
|
||||
precision:
|
||||
range: integer
|
||||
description: Precision level (9=year, 10=month, 11=day, etc.)
|
||||
calendarmodel:
|
||||
range: uri
|
||||
description: Calendar model URI (e.g., http://www.wikidata.org/entity/Q1985727 for Gregorian)
|
||||
|
||||
WikidataEntity:
|
||||
description: Reference to a Wikidata entity
|
||||
attributes:
|
||||
|
|
@ -2081,7 +2405,10 @@ classes:
|
|||
description: Location properties from Wikidata
|
||||
attributes:
|
||||
country:
|
||||
range: WikidataEntity
|
||||
any_of:
|
||||
- range: string
|
||||
- range: WikidataEntity
|
||||
description: Country Q-ID (can be string or WikidataEntity object)
|
||||
headquarters_location:
|
||||
range: WikidataEntity
|
||||
coordinates:
|
||||
|
|
@ -2135,8 +2462,10 @@ classes:
|
|||
multivalued: true
|
||||
inlined_as_list: true
|
||||
parent_organization:
|
||||
range: WikidataEntity
|
||||
description: Parent organization (P749)
|
||||
any_of:
|
||||
- range: string
|
||||
- range: WikidataEntity
|
||||
description: Parent organization Q-ID or entity (P749)
|
||||
subsidiary:
|
||||
range: WikidataEntity
|
||||
multivalued: true
|
||||
|
|
@ -2410,12 +2739,54 @@ classes:
|
|||
website_found:
|
||||
range: boolean
|
||||
description: Whether a website was found
|
||||
official_website:
|
||||
range: uri
|
||||
description: Official website URL found during research
|
||||
research_notes:
|
||||
range: string
|
||||
description: Notes from research
|
||||
organizational_change:
|
||||
range: OrganizationalChange
|
||||
description: Organizational change information (closures, mergers, etc.)
|
||||
# WHOIS/domain information fields
|
||||
domain:
|
||||
range: string
|
||||
description: Domain name of the website
|
||||
domain_registered:
|
||||
range: string
|
||||
description: Date domain was registered (YYYY-MM-DD)
|
||||
registrar:
|
||||
range: string
|
||||
description: Domain registrar name
|
||||
registration_country:
|
||||
range: string
|
||||
description: Country where domain is registered (ISO 3166-1 alpha-2)
|
||||
site_launched:
|
||||
range: string
|
||||
description: Year or date when site was launched
|
||||
collections:
|
||||
range: WebCollection
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Collections documented on the website
|
||||
# Duplicate/canonical entry tracking
|
||||
is_canonical_entry:
|
||||
range: boolean
|
||||
description: Whether this is the canonical entry (vs duplicate)
|
||||
duplicate_entries:
|
||||
range: DuplicateEntry
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: References to duplicate entries of this institution
|
||||
organization_status:
|
||||
range: string
|
||||
description: Current status of the organization (ACTIVE, CLOSED, etc.)
|
||||
research_timestamp:
|
||||
range: datetime
|
||||
description: When research was performed
|
||||
website:
|
||||
range: uri
|
||||
description: Website URL found during research
|
||||
# Migration tracking fields
|
||||
claims_migrated:
|
||||
range: boolean
|
||||
|
|
@ -2442,6 +2813,12 @@ classes:
|
|||
merger_target:
|
||||
range: string
|
||||
description: Target organization in merger
|
||||
successor_name:
|
||||
range: string
|
||||
description: Name of successor organization (for mergers)
|
||||
successor_location:
|
||||
range: string
|
||||
description: Location of successor organization (for mergers)
|
||||
notes:
|
||||
range: string
|
||||
description: Additional notes
|
||||
|
|
@ -2474,6 +2851,33 @@ classes:
|
|||
range: string
|
||||
description: Archive status (new, updated, etc.)
|
||||
|
||||
WebCollection:
|
||||
description: A collection documented on a heritage institution website
|
||||
attributes:
|
||||
name:
|
||||
range: string
|
||||
required: true
|
||||
description: Name of the collection
|
||||
description:
|
||||
range: string
|
||||
description: Description of the collection
|
||||
url:
|
||||
range: uri
|
||||
description: URL to the collection page
|
||||
type:
|
||||
range: string
|
||||
description: Type of collection (oral_history, photographs, documents, etc.)
|
||||
item_count:
|
||||
any_of:
|
||||
- range: integer
|
||||
- range: string
|
||||
description: Number of items in the collection (integer or descriptive string)
|
||||
total_hours:
|
||||
any_of:
|
||||
- range: float
|
||||
- range: string
|
||||
description: Total hours of content (for audio/video collections)
|
||||
|
||||
WebArchiveFailure:
|
||||
description: Failed archive attempt record
|
||||
attributes:
|
||||
|
|
@ -2603,7 +3007,8 @@ classes:
|
|||
- range: string
|
||||
- range: string
|
||||
multivalued: true
|
||||
description: Extracted value (alias for claim_value, can be string or list)
|
||||
- range: OpeningHoursMap
|
||||
description: Extracted value (alias for claim_value, can be string, list, or structured object like opening hours)
|
||||
raw_value:
|
||||
range: string
|
||||
description: Raw value before processing
|
||||
|
|
@ -2728,6 +3133,9 @@ classes:
|
|||
job_title_en:
|
||||
range: string
|
||||
description: Job title in English
|
||||
department_en:
|
||||
range: string
|
||||
description: Department name in English
|
||||
|
||||
RawSource:
|
||||
description: Raw source information for web enrichment
|
||||
|
|
@ -2741,6 +3149,9 @@ classes:
|
|||
fetch_timestamp:
|
||||
range: datetime
|
||||
description: When the source was fetched
|
||||
published_date:
|
||||
range: datetime
|
||||
description: When the source content was published
|
||||
source_type:
|
||||
range: string
|
||||
description: Type of source (official_website, etc.)
|
||||
|
|
@ -2756,6 +3167,63 @@ classes:
|
|||
raw_markdown_hash:
|
||||
range: string
|
||||
description: SHA-256 hash of the raw markdown content
|
||||
exa_highlights:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Highlighted excerpts from Exa search results
|
||||
exa_highlight_scores:
|
||||
range: float
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Relevance scores for Exa highlights
|
||||
|
||||
OpeningHoursMap:
|
||||
description: Opening hours as a day-keyed map
|
||||
class_uri: schema:OpeningHoursSpecification
|
||||
attributes:
|
||||
maandag:
|
||||
range: string
|
||||
description: Monday hours (Dutch)
|
||||
dinsdag:
|
||||
range: string
|
||||
description: Tuesday hours (Dutch)
|
||||
woensdag:
|
||||
range: string
|
||||
description: Wednesday hours (Dutch)
|
||||
donderdag:
|
||||
range: string
|
||||
description: Thursday hours (Dutch)
|
||||
vrijdag:
|
||||
range: string
|
||||
description: Friday hours (Dutch)
|
||||
zaterdag:
|
||||
range: string
|
||||
description: Saturday hours (Dutch)
|
||||
zondag:
|
||||
range: string
|
||||
description: Sunday hours (Dutch)
|
||||
monday:
|
||||
range: string
|
||||
description: Monday hours (English)
|
||||
tuesday:
|
||||
range: string
|
||||
description: Tuesday hours (English)
|
||||
wednesday:
|
||||
range: string
|
||||
description: Wednesday hours (English)
|
||||
thursday:
|
||||
range: string
|
||||
description: Thursday hours (English)
|
||||
friday:
|
||||
range: string
|
||||
description: Friday hours (English)
|
||||
saturday:
|
||||
range: string
|
||||
description: Saturday hours (English)
|
||||
sunday:
|
||||
range: string
|
||||
description: Sunday hours (English)
|
||||
|
||||
SourceReference:
|
||||
description: Structured source reference for a claim
|
||||
|
|
@ -2879,8 +3347,12 @@ classes:
|
|||
range: string
|
||||
description: Note explaining manual correction made to the name
|
||||
merge_notes:
|
||||
range: string
|
||||
description: Notes about name merging or deduplication
|
||||
any_of:
|
||||
- range: string
|
||||
- range: MergeNote
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Notes about name merging or deduplication (string or array of structured objects)
|
||||
abbreviation:
|
||||
range: string
|
||||
description: Short form or abbreviation of the name
|
||||
|
|
@ -2891,10 +3363,49 @@ classes:
|
|||
range: string
|
||||
description: Official registered name
|
||||
former_names:
|
||||
range: string
|
||||
multivalued: true
|
||||
any_of:
|
||||
- range: string
|
||||
multivalued: true
|
||||
- range: FormerName
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Previous names the institution was known by
|
||||
description: Previous names the institution was known by (strings or structured objects)
|
||||
short_name:
|
||||
range: string
|
||||
description: Short name or commonly used abbreviated form of the institution name
|
||||
|
||||
FormerName:
|
||||
description: A former name of the institution with optional metadata
|
||||
attributes:
|
||||
name:
|
||||
range: string
|
||||
required: true
|
||||
description: The former name
|
||||
abbreviated:
|
||||
range: string
|
||||
description: Abbreviated form of the former name
|
||||
used_until:
|
||||
range: string
|
||||
description: Date until which this name was used (YYYY-MM or YYYY)
|
||||
used_from:
|
||||
range: string
|
||||
description: Date from which this name was used (YYYY-MM or YYYY)
|
||||
notes:
|
||||
range: string
|
||||
description: Additional notes about this former name
|
||||
|
||||
MergeNote:
|
||||
description: Note about a merge operation between duplicate entries
|
||||
attributes:
|
||||
source:
|
||||
range: string
|
||||
description: Source entry identifier that was merged
|
||||
merged_on:
|
||||
range: string
|
||||
description: Date when merge occurred (YYYY-MM-DD)
|
||||
reason:
|
||||
range: string
|
||||
description: Reason for the merge (e.g., duplicate Wikidata ID, same place ID)
|
||||
|
||||
MatchingSource:
|
||||
description: Source that contributed to name consensus
|
||||
|
|
@ -2910,6 +3421,9 @@ classes:
|
|||
score:
|
||||
range: float
|
||||
description: Match score
|
||||
notes:
|
||||
range: string
|
||||
description: Additional notes about this source match
|
||||
|
||||
AlternativeName:
|
||||
description: Alternative name with language and source information
|
||||
|
|
@ -3168,14 +3682,54 @@ classes:
|
|||
source:
|
||||
range: string
|
||||
description: Source of this platform information
|
||||
source_references:
|
||||
description:
|
||||
range: string
|
||||
multivalued: true
|
||||
description: Description of this platform
|
||||
source_references:
|
||||
any_of:
|
||||
- range: string
|
||||
multivalued: true
|
||||
- range: PlatformSourceReference
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: References to source data
|
||||
description: References to source data (strings or structured objects)
|
||||
enrichment_source:
|
||||
range: string
|
||||
description: Source of enrichment (e.g., manual_curation, api_scraping)
|
||||
host_organization:
|
||||
range: string
|
||||
description: Organization hosting this platform
|
||||
host_website:
|
||||
range: uri
|
||||
description: Main website of the host organization
|
||||
language:
|
||||
range: string
|
||||
description: Primary language of the platform (ISO 639-1 code)
|
||||
features:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Features of this platform
|
||||
platforms:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Sub-platforms or related platforms
|
||||
|
||||
PlatformSourceReference:
|
||||
description: Structured source reference for a digital platform
|
||||
attributes:
|
||||
url:
|
||||
range: uri
|
||||
description: Source URL
|
||||
fetch_timestamp:
|
||||
range: datetime
|
||||
description: When the source was fetched
|
||||
data_extracted:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Data fields extracted from this source
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# UNESCO ICH ENRICHMENT
|
||||
|
|
@ -3336,6 +3890,12 @@ classes:
|
|||
override_reason:
|
||||
range: string
|
||||
description: Reason for manual coordinate override
|
||||
source_url:
|
||||
range: uri
|
||||
description: URL source of coordinates (e.g., Google Maps link)
|
||||
note:
|
||||
range: string
|
||||
description: Additional note about coordinate provenance
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ADDITIONAL ENRICHMENT TYPES
|
||||
|
|
@ -3444,9 +4004,16 @@ classes:
|
|||
range: float
|
||||
review_count:
|
||||
range: integer
|
||||
description: Number of reviews
|
||||
reviews:
|
||||
range: integer
|
||||
description: Number of reviews (alias for review_count)
|
||||
place_type:
|
||||
range: string
|
||||
description: Type of place (Museum, Cafe, etc.)
|
||||
type:
|
||||
range: string
|
||||
description: Type of place (alias for place_type)
|
||||
|
||||
MuseumRegisterEnrichment:
|
||||
description: Dutch Museum Register (Museumregister Nederland) data
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"generated": "2025-12-08T17:42:08.000Z",
|
||||
"generated": "2025-12-12T16:08:52.770Z",
|
||||
"version": "1.0.0",
|
||||
"categories": [
|
||||
{
|
||||
|
|
@ -247,6 +247,11 @@
|
|||
"path": "modules/classes/ConfidenceMeasure.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "ConflictStatus",
|
||||
"path": "modules/classes/ConflictStatus.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "ConservationLab",
|
||||
"path": "modules/classes/ConservationLab.yaml",
|
||||
|
|
@ -452,6 +457,16 @@
|
|||
"path": "modules/classes/FinancialStatement.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "FindingAid",
|
||||
"path": "modules/classes/FindingAid.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "FindingAidType",
|
||||
"path": "modules/classes/FindingAidType.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "Foremalarkiv",
|
||||
"path": "modules/classes/Foremalarkiv.yaml",
|
||||
|
|
@ -487,11 +502,6 @@
|
|||
"path": "modules/classes/Fylkesarkiv.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "GLAM",
|
||||
"path": "modules/classes/GLAM.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "GalleryType",
|
||||
"path": "modules/classes/GalleryType.yaml",
|
||||
|
|
@ -507,6 +517,11 @@
|
|||
"path": "modules/classes/GiftShop.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "GLAM",
|
||||
"path": "modules/classes/GLAM.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "GovernmentArchive",
|
||||
"path": "modules/classes/GovernmentArchive.yaml",
|
||||
|
|
@ -518,13 +533,13 @@
|
|||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "HistoricBuilding",
|
||||
"path": "modules/classes/HistoricBuilding.yaml",
|
||||
"name": "HistoricalArchive",
|
||||
"path": "modules/classes/HistoricalArchive.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "HistoricalArchive",
|
||||
"path": "modules/classes/HistoricalArchive.yaml",
|
||||
"name": "HistoricBuilding",
|
||||
"path": "modules/classes/HistoricBuilding.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
|
|
@ -607,11 +622,6 @@
|
|||
"path": "modules/classes/Kustodie.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "LGBTArchive",
|
||||
"path": "modules/classes/LGBTArchive.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "Landsarkiv",
|
||||
"path": "modules/classes/Landsarkiv.yaml",
|
||||
|
|
@ -642,6 +652,11 @@
|
|||
"path": "modules/classes/LegalResponsibilityCollection.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "LGBTArchive",
|
||||
"path": "modules/classes/LGBTArchive.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "LibraryType",
|
||||
"path": "modules/classes/LibraryType.yaml",
|
||||
|
|
@ -787,11 +802,6 @@
|
|||
"path": "modules/classes/Organization.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "OrganizationBranch",
|
||||
"path": "modules/classes/OrganizationBranch.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "OrganizationalChangeEvent",
|
||||
"path": "modules/classes/OrganizationalChangeEvent.yaml",
|
||||
|
|
@ -807,6 +817,11 @@
|
|||
"path": "modules/classes/OrganizationalSubdivision.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "OrganizationBranch",
|
||||
"path": "modules/classes/OrganizationBranch.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "OutdoorSite",
|
||||
"path": "modules/classes/OutdoorSite.yaml",
|
||||
|
|
@ -837,16 +852,6 @@
|
|||
"path": "modules/classes/PerformingArtsArchive.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "PersonObservation",
|
||||
"path": "modules/classes/PersonObservation.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "PersonOrOrganization",
|
||||
"path": "modules/classes/PersonOrOrganization.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "PersonalCollectionType",
|
||||
"path": "modules/classes/PersonalCollectionType.yaml",
|
||||
|
|
@ -862,6 +867,16 @@
|
|||
"path": "modules/classes/Personenstandsarchiv.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "PersonObservation",
|
||||
"path": "modules/classes/PersonObservation.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "PersonOrOrganization",
|
||||
"path": "modules/classes/PersonOrOrganization.yaml",
|
||||
"category": "classes"
|
||||
},
|
||||
{
|
||||
"name": "PhotoArchive",
|
||||
"path": "modules/classes/PhotoArchive.yaml",
|
||||
|
|
@ -1323,6 +1338,11 @@
|
|||
"path": "modules/enums/CommercialCustodianTypeEnum.yaml",
|
||||
"category": "enums"
|
||||
},
|
||||
{
|
||||
"name": "ConflictStatusEnum",
|
||||
"path": "modules/enums/ConflictStatusEnum.yaml",
|
||||
"category": "enums"
|
||||
},
|
||||
{
|
||||
"name": "CustodianPrimaryTypeEnum",
|
||||
"path": "modules/enums/CustodianPrimaryTypeEnum.yaml",
|
||||
|
|
@ -1428,11 +1448,6 @@
|
|||
"path": "modules/enums/OfficialInstitutionTypeEnum.yaml",
|
||||
"category": "enums"
|
||||
},
|
||||
{
|
||||
"name": "OrganizationBranchTypeEnum",
|
||||
"path": "modules/enums/OrganizationBranchTypeEnum.yaml",
|
||||
"category": "enums"
|
||||
},
|
||||
{
|
||||
"name": "OrganizationalChangeEventTypeEnum",
|
||||
"path": "modules/enums/OrganizationalChangeEventTypeEnum.yaml",
|
||||
|
|
@ -1443,6 +1458,11 @@
|
|||
"path": "modules/enums/OrganizationalUnitTypeEnum.yaml",
|
||||
"category": "enums"
|
||||
},
|
||||
{
|
||||
"name": "OrganizationBranchTypeEnum",
|
||||
"path": "modules/enums/OrganizationBranchTypeEnum.yaml",
|
||||
"category": "enums"
|
||||
},
|
||||
{
|
||||
"name": "PersonalCollectionTypeEnum",
|
||||
"path": "modules/enums/PersonalCollectionTypeEnum.yaml",
|
||||
|
|
@ -1744,6 +1764,11 @@
|
|||
"path": "modules/slots/confidence_value.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "conflict_status",
|
||||
"path": "modules/slots/conflict_status.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "contact",
|
||||
"path": "modules/slots/contact.yaml",
|
||||
|
|
@ -1824,6 +1849,11 @@
|
|||
"path": "modules/slots/digital_platform.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "digitization_status",
|
||||
"path": "modules/slots/digitization_status.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "dissolution_date",
|
||||
"path": "modules/slots/dissolution_date.yaml",
|
||||
|
|
@ -1839,6 +1869,11 @@
|
|||
"path": "modules/slots/documentation_url.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "embargo_end_date",
|
||||
"path": "modules/slots/embargo_end_date.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "emic_name",
|
||||
"path": "modules/slots/emic_name.yaml",
|
||||
|
|
@ -2229,6 +2264,11 @@
|
|||
"path": "modules/slots/parent_custodian.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "parent_society",
|
||||
"path": "modules/slots/parent_society.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "parent_unit",
|
||||
"path": "modules/slots/parent_unit.yaml",
|
||||
|
|
@ -2279,6 +2319,16 @@
|
|||
"path": "modules/slots/platform_type.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "policy_id",
|
||||
"path": "modules/slots/policy_id.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "policy_name",
|
||||
"path": "modules/slots/policy_name.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "portal_data_sources",
|
||||
"path": "modules/slots/portal_data_sources.yaml",
|
||||
|
|
@ -2359,6 +2409,11 @@
|
|||
"path": "modules/slots/retrieved_on.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "rico_equivalent",
|
||||
"path": "modules/slots/rico_equivalent.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "role_end_date",
|
||||
"path": "modules/slots/role_end_date.yaml",
|
||||
|
|
@ -2384,6 +2439,16 @@
|
|||
"path": "modules/slots/safeguards.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "security_level",
|
||||
"path": "modules/slots/security_level.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "serves_finding_aids",
|
||||
"path": "modules/slots/serves_finding_aids.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "service_area",
|
||||
"path": "modules/slots/service_area.yaml",
|
||||
|
|
@ -2504,6 +2569,11 @@
|
|||
"path": "modules/slots/time_of_destruction.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "typical_domains",
|
||||
"path": "modules/slots/typical_domains.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "unit_affiliation",
|
||||
"path": "modules/slots/unit_affiliation.yaml",
|
||||
|
|
@ -2568,6 +2638,11 @@
|
|||
"name": "website",
|
||||
"path": "modules/slots/website.yaml",
|
||||
"category": "slots"
|
||||
},
|
||||
{
|
||||
"name": "wikidata_id",
|
||||
"path": "modules/slots/wikidata_id.yaml",
|
||||
"category": "slots"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
import React, { useState, useRef, useCallback, useEffect, memo } from 'react';
|
||||
import type { EmbeddingPoint } from './EmbeddingProjector';
|
||||
import { isTargetInsideAny } from '../../utils/dom';
|
||||
import './PointDetailsPanel.css';
|
||||
|
||||
interface NearestNeighbor {
|
||||
|
|
@ -169,9 +170,7 @@ const PointDetailsPanelComponent: React.FC<PointDetailsPanelProps> = ({
|
|||
// Drag handlers
|
||||
const handleMouseDown = useCallback((e: React.MouseEvent) => {
|
||||
// Don't start drag if clicking on buttons
|
||||
if ((e.target as HTMLElement).closest('button') ||
|
||||
(e.target as HTMLElement).closest('input') ||
|
||||
(e.target as HTMLElement).closest('.neighbor-item')) {
|
||||
if (isTargetInsideAny(e.target, ['button', 'input', '.neighbor-item'])) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,13 +23,14 @@
|
|||
}
|
||||
|
||||
/* Ensure content area can grow but footer stays visible */
|
||||
.layout-content > *:not(.layout-footer) {
|
||||
.layout-content > *:not(.layout-footer):not(.layout-main) {
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
/* Main content wrapper - takes available space */
|
||||
.layout-main {
|
||||
flex: 1 0 auto; /* Grow to fill space, don't shrink, auto basis */
|
||||
min-height: min-content; /* At least as tall as content */
|
||||
}
|
||||
|
||||
/* Footer Styles - minimal, at the very bottom */
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import { useLanguage } from '../../contexts/LanguageContext';
|
|||
import './Layout.css';
|
||||
|
||||
// Pages that handle their own footer (full-screen apps with sidebars)
|
||||
const PAGES_WITH_CUSTOM_FOOTER = ['/map', '/visualize', '/query-builder', '/linkml', '/ontology'];
|
||||
const PAGES_WITH_CUSTOM_FOOTER = ['/map', '/visualize', '/query-builder', '/linkml', '/ontology', '/conversation'];
|
||||
|
||||
export function Layout() {
|
||||
const currentYear = new Date().getFullYear();
|
||||
|
|
|
|||
|
|
@ -505,6 +505,18 @@
|
|||
font-weight: 500;
|
||||
}
|
||||
|
||||
/* External Link Icon */
|
||||
.nav-external-icon {
|
||||
margin-left: 0.35rem;
|
||||
font-size: 0.75em;
|
||||
opacity: 0.7;
|
||||
}
|
||||
|
||||
.nav-dropdown-item:hover .nav-external-icon,
|
||||
.nav-mobile-link:hover .nav-external-icon {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
/* Mobile Section Styles */
|
||||
.nav-mobile-section {
|
||||
border-bottom: 1px solid rgba(23, 42, 89, 0.1);
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import { Link, useLocation, useNavigate } from 'react-router-dom';
|
|||
import { useAuth } from '../../contexts/AuthContext';
|
||||
import { useLanguage, translations } from '../../contexts/LanguageContext';
|
||||
import { useUIState } from '../../contexts/UIStateContext';
|
||||
import { isTargetInsideAny } from '../../utils/dom';
|
||||
import './Navigation.css';
|
||||
|
||||
export function Navigation() {
|
||||
|
|
@ -41,14 +42,13 @@ export function Navigation() {
|
|||
const WHEEL_RESET_DELAY = 300; // Reset wheel accumulator after this many ms of no wheel events
|
||||
|
||||
const handleScroll = (e: Event) => {
|
||||
const target = e.target as Element;
|
||||
|
||||
// Ignore scroll events from the navigation itself
|
||||
if (target.closest('.navigation') || target.closest('.nav-mobile-menu')) {
|
||||
if (isTargetInsideAny(e.target, ['.navigation', '.nav-mobile-menu'])) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Get scroll position from the target element
|
||||
const target = e.target as Element;
|
||||
const scrollTop = target instanceof HTMLElement ? target.scrollTop : 0;
|
||||
const scrollingUp = scrollTop < lastScrollTop;
|
||||
const scrollDelta = lastScrollTop - scrollTop;
|
||||
|
|
@ -78,10 +78,8 @@ export function Navigation() {
|
|||
|
||||
// Wheel event handler for non-scrollable areas
|
||||
const handleWheel = (e: WheelEvent) => {
|
||||
const target = e.target as Element;
|
||||
|
||||
// Ignore wheel events from the navigation itself
|
||||
if (target.closest('.navigation') || target.closest('.nav-mobile-menu')) {
|
||||
if (isTargetInsideAny(e.target, ['.navigation', '.nav-mobile-menu'])) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -381,6 +379,15 @@ export function Navigation() {
|
|||
<Link to="/settings" className={`nav-dropdown-item ${isActive('/settings') ? 'active' : ''}`}>
|
||||
{t('preferences')}
|
||||
</Link>
|
||||
<a
|
||||
href="https://bronhouder.nl/database"
|
||||
className="nav-dropdown-item"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
{t('database')}
|
||||
<span className="nav-external-icon" aria-hidden="true">↗</span>
|
||||
</a>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
|
@ -500,6 +507,15 @@ export function Navigation() {
|
|||
<Link to="/settings" className={`nav-mobile-link ${isActive('/settings') ? 'active' : ''}`}>
|
||||
{t('preferences')}
|
||||
</Link>
|
||||
<a
|
||||
href="https://bronhouder.nl/database"
|
||||
className="nav-mobile-link"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
{t('database')}
|
||||
<span className="nav-external-icon" aria-hidden="true">↗</span>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ import { CustodianTimeline } from './CustodianTimeline';
|
|||
// import { VoronoiStippling } from './VoronoiStippling';
|
||||
import { ErrorBoundary } from '../common/ErrorBoundary';
|
||||
import { safeString } from '../../utils/safeString';
|
||||
import { isTargetInsideAny } from '../../utils/dom';
|
||||
import { useWikidataImage } from '../../hooks/useWikidataImage';
|
||||
import type { Archive } from '../../types/werkgebied';
|
||||
|
||||
|
|
@ -428,6 +429,10 @@ const InstitutionInfoPanelComponent: React.FC<InstitutionInfoPanelProps> = ({
|
|||
// Tab state for info/youtube
|
||||
const [activeTab, setActiveTab] = useState<'info' | 'youtube'>('info');
|
||||
|
||||
// Export dropdown state
|
||||
const [showExportMenu, setShowExportMenu] = useState(false);
|
||||
const exportMenuRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// Track if user has manually positioned this panel
|
||||
const hasUserPositioned = useRef(false);
|
||||
|
||||
|
|
@ -474,8 +479,7 @@ const InstitutionInfoPanelComponent: React.FC<InstitutionInfoPanelProps> = ({
|
|||
// Drag handlers
|
||||
const handleMouseDown = useCallback((e: React.MouseEvent) => {
|
||||
// Don't start drag if clicking on buttons or links
|
||||
if ((e.target as HTMLElement).closest('button') ||
|
||||
(e.target as HTMLElement).closest('a')) {
|
||||
if (isTargetInsideAny(e.target, ['button', 'a'])) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -551,6 +555,19 @@ const InstitutionInfoPanelComponent: React.FC<InstitutionInfoPanelProps> = ({
|
|||
return () => window.removeEventListener('keydown', handleKeyDown);
|
||||
}, [onClose]);
|
||||
|
||||
// Close export menu when clicking outside
|
||||
useEffect(() => {
|
||||
const handleClickOutside = (e: MouseEvent) => {
|
||||
if (exportMenuRef.current && !exportMenuRef.current.contains(e.target as Node)) {
|
||||
setShowExportMenu(false);
|
||||
}
|
||||
};
|
||||
if (showExportMenu) {
|
||||
document.addEventListener('mousedown', handleClickOutside);
|
||||
}
|
||||
return () => document.removeEventListener('mousedown', handleClickOutside);
|
||||
}, [showExportMenu]);
|
||||
|
||||
// GHCID click handler - cycle through displays
|
||||
const handleGhcidClick = () => {
|
||||
if (ghcidDisplay === 'current') {
|
||||
|
|
|
|||
|
|
@ -762,6 +762,36 @@ const MediaGalleryComponent: React.FC<MediaGalleryProps> = ({
|
|||
const [youtubeReady, setYoutubeReady] = useState(false);
|
||||
const [failedPhotoUrls, setFailedPhotoUrls] = useState<Set<string>>(new Set());
|
||||
|
||||
/**
|
||||
* Get specific failure reason based on URL pattern
|
||||
* Returns user-friendly error message for common failure types
|
||||
* @internal Reserved for future diagnostic display
|
||||
*/
|
||||
const getFailureReason = (url: string): string => {
|
||||
const lowerUrl = url.toLowerCase();
|
||||
|
||||
// Google Places/Maps images require API key
|
||||
if (lowerUrl.includes('lh3.googleusercontent.com') ||
|
||||
lowerUrl.includes('maps.googleapis.com') ||
|
||||
lowerUrl.includes('googleusercontent.com/p/')) {
|
||||
return t('Google Places afbeelding niet beschikbaar (API-sleutel vereist)',
|
||||
'Google Places image unavailable (API key required)');
|
||||
}
|
||||
|
||||
// Relative URLs that weren't resolved
|
||||
if (!url.startsWith('http://') && !url.startsWith('https://')) {
|
||||
return t('Ongeldige URL (niet volledig)', 'Invalid URL (not fully resolved)');
|
||||
}
|
||||
|
||||
// YouTube videos in image field
|
||||
if (lowerUrl.includes('youtube.com/watch') || lowerUrl.includes('youtu.be/')) {
|
||||
return t('Video-URL in afbeeldingsveld', 'Video URL in image field');
|
||||
}
|
||||
|
||||
// Generic expired/404 error
|
||||
return t('Afbeelding kon niet worden geladen', 'Image could not be loaded');
|
||||
};
|
||||
|
||||
// Mark failed photos but DON'T remove them - prevents crash when all photos fail
|
||||
const effectivePhotos = useMemo(() => {
|
||||
// Start with initial photos, add wikidata image if available and not already included
|
||||
|
|
@ -770,11 +800,11 @@ const MediaGalleryComponent: React.FC<MediaGalleryProps> = ({
|
|||
allPhotos.push({ url: wikidataImageUrl, attribution: 'Wikimedia Commons' });
|
||||
}
|
||||
|
||||
// Mark failed photos but DON'T remove them
|
||||
// Mark failed photos with specific failure reasons
|
||||
const photosWithStatus = allPhotos.map(photo => ({
|
||||
...photo,
|
||||
failed: failedPhotoUrls.has(photo.url),
|
||||
failReason: failedPhotoUrls.has(photo.url) ? 'Image could not be loaded' : undefined
|
||||
failReason: failedPhotoUrls.has(photo.url) ? getFailureReason(photo.url) : undefined
|
||||
}));
|
||||
|
||||
// Sort: working images first, failed images last
|
||||
|
|
@ -783,7 +813,7 @@ const MediaGalleryComponent: React.FC<MediaGalleryProps> = ({
|
|||
if (!a.failed && b.failed) return -1;
|
||||
return 0;
|
||||
});
|
||||
}, [initialPhotos, failedPhotoUrls, wikidataImageUrl]);
|
||||
}, [initialPhotos, failedPhotoUrls, wikidataImageUrl, t]);
|
||||
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const playerRef = useRef<YTPlayer | null>(null);
|
||||
|
|
|
|||
|
|
@ -125,6 +125,14 @@
|
|||
border-bottom-right-radius: 4px;
|
||||
}
|
||||
|
||||
/* Ensure all text inside user messages is white */
|
||||
.conversation-panel__message--user .conversation-panel__message-content p,
|
||||
.conversation-panel__message--user .conversation-panel__message-content span,
|
||||
.conversation-panel__message--user .conversation-panel__loading,
|
||||
.conversation-panel__message--user .conversation-panel__error {
|
||||
color: white;
|
||||
}
|
||||
|
||||
.conversation-panel__message--assistant .conversation-panel__message-content {
|
||||
background: var(--surface-secondary, #f5f5f5);
|
||||
color: var(--text-primary, #212121);
|
||||
|
|
@ -497,6 +505,12 @@
|
|||
color: var(--error-color, #d32f2f);
|
||||
}
|
||||
|
||||
.conversation-panel__toolbar-btn--warning:hover:not(:disabled) {
|
||||
background: #fff3e0;
|
||||
border-color: #ff9800;
|
||||
color: #f57c00;
|
||||
}
|
||||
|
||||
/* History Dropdown */
|
||||
.conversation-panel__history-selector {
|
||||
position: relative;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
*/
|
||||
|
||||
import React, { useState, useRef, useEffect, useCallback } from 'react';
|
||||
import { Send, Loader2, Sparkles, AlertCircle, Copy, Check, ChevronDown, History, Download, Upload, Trash2, X } from 'lucide-react';
|
||||
import { Send, Loader2, Sparkles, AlertCircle, Copy, Check, ChevronDown, History, Download, Upload, Trash2, X, RefreshCw } from 'lucide-react';
|
||||
import { useLanguage } from '../../contexts/LanguageContext';
|
||||
import './ConversationPanel.css';
|
||||
|
||||
|
|
@ -87,6 +87,8 @@ const TEXT = {
|
|||
exportSuccess: { nl: 'Conversatie geëxporteerd', en: 'Conversation exported' },
|
||||
importSuccess: { nl: 'Conversatie geïmporteerd', en: 'Conversation imported' },
|
||||
importError: { nl: 'Ongeldig bestand', en: 'Invalid file' },
|
||||
resetCache: { nl: 'Cache wissen', en: 'Clear cache' },
|
||||
cacheCleared: { nl: 'Cache gewist - probeer uw vraag opnieuw', en: 'Cache cleared - try your question again' },
|
||||
};
|
||||
|
||||
// Example questions to help users get started - shorter list
|
||||
|
|
@ -453,6 +455,39 @@ export const ConversationPanel: React.FC<ConversationPanelProps> = ({ onQueryGen
|
|||
showNotification(t('conversationCleared'));
|
||||
};
|
||||
|
||||
/**
|
||||
* Clear all caches (IndexedDB semantic cache + conversation) and reload
|
||||
*/
|
||||
const handleResetCache = async () => {
|
||||
try {
|
||||
// Clear IndexedDB semantic cache
|
||||
const databases = ['GLAM_SemanticCache', 'GLAM_InstitutionsCache'];
|
||||
for (const dbName of databases) {
|
||||
await new Promise<void>((resolve) => {
|
||||
const request = indexedDB.deleteDatabase(dbName);
|
||||
request.onsuccess = () => resolve();
|
||||
request.onerror = () => resolve(); // Continue even if error
|
||||
request.onblocked = () => resolve();
|
||||
});
|
||||
}
|
||||
|
||||
// Clear messages
|
||||
setMessages([]);
|
||||
|
||||
// Show notification
|
||||
showNotification(t('cacheCleared'));
|
||||
|
||||
// Reload after short delay to ensure notification is seen
|
||||
setTimeout(() => {
|
||||
window.location.reload();
|
||||
}, 1500);
|
||||
} catch (err) {
|
||||
console.error('Failed to clear cache:', err);
|
||||
// Force reload anyway
|
||||
window.location.reload();
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="conversation-panel">
|
||||
{/* Notification Toast */}
|
||||
|
|
@ -600,6 +635,16 @@ export const ConversationPanel: React.FC<ConversationPanelProps> = ({ onQueryGen
|
|||
<Upload size={16} />
|
||||
</button>
|
||||
|
||||
{/* Reset Cache Button - clears IndexedDB and reloads */}
|
||||
<button
|
||||
className="conversation-panel__toolbar-btn conversation-panel__toolbar-btn--warning"
|
||||
onClick={handleResetCache}
|
||||
title={t('resetCache')}
|
||||
type="button"
|
||||
>
|
||||
<RefreshCw size={16} />
|
||||
</button>
|
||||
|
||||
{/* Clear Conversation Button */}
|
||||
{messages.length > 0 && (
|
||||
<button
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import React, { useState, useRef, useCallback, useEffect } from 'react';
|
|||
import type { GraphNode, GraphLink } from '@/types/rdf';
|
||||
import { getSemanticCategory, SEMANTIC_CATEGORY_INFO } from '@/types/rdf';
|
||||
import { NodeRelationshipDiagram } from './NodeRelationshipDiagram';
|
||||
import { isTargetInsideAny } from '@/utils/dom';
|
||||
import './RdfNodeDetailsPanel.css';
|
||||
|
||||
interface RdfNodeDetailsPanelProps {
|
||||
|
|
@ -141,10 +142,12 @@ export const RdfNodeDetailsPanel: React.FC<RdfNodeDetailsPanelProps> = ({
|
|||
// Drag handlers
|
||||
const handleMouseDown = useCallback((e: React.MouseEvent) => {
|
||||
// Only start drag if clicking on header area (not buttons)
|
||||
if ((e.target as HTMLElement).closest('.rdf-panel__close') ||
|
||||
(e.target as HTMLElement).closest('.rdf-panel__minimize') ||
|
||||
(e.target as HTMLElement).closest('.rdf-panel__copy-btn') ||
|
||||
(e.target as HTMLElement).closest('.rdf-panel__external-link')) {
|
||||
if (isTargetInsideAny(e.target, [
|
||||
'.rdf-panel__close',
|
||||
'.rdf-panel__minimize',
|
||||
'.rdf-panel__copy-btn',
|
||||
'.rdf-panel__external-link'
|
||||
])) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import rehypeRaw from 'rehype-raw';
|
|||
import { linkmlSchemaService } from '../../lib/linkml/linkml-schema-service';
|
||||
import type { SemanticInfo, SlotSemanticInfo, EnumSemanticInfo } from '../../lib/linkml/linkml-schema-service';
|
||||
import { EnumNetworkOverlay } from './EnumNetworkOverlay';
|
||||
import { isTargetInsideAny } from '../../utils/dom';
|
||||
import './SemanticDetailsPanel.css';
|
||||
|
||||
/**
|
||||
|
|
@ -226,8 +227,7 @@ export const SemanticDetailsPanel: React.FC<SemanticDetailsPanelProps> = ({
|
|||
// Drag handlers
|
||||
const handleMouseDown = useCallback((e: React.MouseEvent) => {
|
||||
// Only start drag if clicking on header area (not buttons)
|
||||
if ((e.target as HTMLElement).closest('.semantic-panel__close') ||
|
||||
(e.target as HTMLElement).closest('.semantic-panel__minimize')) {
|
||||
if (isTargetInsideAny(e.target, ['.semantic-panel__close', '.semantic-panel__minimize'])) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -112,6 +112,161 @@
|
|||
transform: scale(1.1);
|
||||
}
|
||||
|
||||
/* Header Actions Container */
|
||||
.person-info-panel__header-actions {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
/* Export Button */
|
||||
.person-info-panel__export-wrapper {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.person-info-panel__export-btn {
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
background: rgba(255, 255, 255, 0.15);
|
||||
border: none;
|
||||
border-radius: 50%;
|
||||
color: white;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.person-info-panel__export-btn:hover {
|
||||
background: rgba(255, 255, 255, 0.25);
|
||||
transform: scale(1.1);
|
||||
}
|
||||
|
||||
.person-info-panel__export-btn--loading {
|
||||
cursor: wait;
|
||||
opacity: 0.7;
|
||||
}
|
||||
|
||||
.person-info-panel__export-btn:disabled {
|
||||
cursor: wait;
|
||||
}
|
||||
|
||||
.person-info-panel__export-spinner {
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border: 2px solid rgba(255, 255, 255, 0.3);
|
||||
border-top-color: white;
|
||||
border-radius: 50%;
|
||||
animation: person-export-spin 0.8s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes person-export-spin {
|
||||
to {
|
||||
transform: rotate(360deg);
|
||||
}
|
||||
}
|
||||
|
||||
.person-info-panel__export-menu {
|
||||
position: absolute;
|
||||
top: 100%;
|
||||
right: 0;
|
||||
margin-top: 0.5rem;
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.15), 0 2px 8px rgba(0, 0, 0, 0.1);
|
||||
overflow: hidden;
|
||||
z-index: 10002;
|
||||
min-width: 160px;
|
||||
}
|
||||
|
||||
.person-info-panel__export-section {
|
||||
padding: 0.25rem 0;
|
||||
}
|
||||
|
||||
.person-info-panel__export-section:not(:last-child) {
|
||||
border-bottom: 1px solid #e2e8f0;
|
||||
}
|
||||
|
||||
.person-info-panel__export-section-label {
|
||||
padding: 0.4rem 0.875rem 0.25rem;
|
||||
font-size: 0.65rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: #94a3b8;
|
||||
}
|
||||
|
||||
.person-info-panel__export-menu button {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
width: 100%;
|
||||
padding: 0.6rem 0.875rem;
|
||||
background: none;
|
||||
border: none;
|
||||
text-align: left;
|
||||
font-size: 0.85rem;
|
||||
color: #2d3748;
|
||||
cursor: pointer;
|
||||
transition: background-color 0.15s ease;
|
||||
}
|
||||
|
||||
.person-info-panel__export-menu button:hover {
|
||||
background: #f0f4ff;
|
||||
color: #0a3dfa;
|
||||
}
|
||||
|
||||
.person-info-panel__export-menu button:not(:last-child) {
|
||||
border-bottom: 1px solid #f1f5f9;
|
||||
}
|
||||
|
||||
.person-info-panel__export-icon {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 20px;
|
||||
font-size: 0.7rem;
|
||||
font-weight: 700;
|
||||
font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
|
||||
color: #64748b;
|
||||
}
|
||||
|
||||
.person-info-panel__copy-toast {
|
||||
position: absolute;
|
||||
top: 100%;
|
||||
right: 0;
|
||||
margin-top: 0.5rem;
|
||||
padding: 0.5rem 0.75rem;
|
||||
background: #10b981;
|
||||
color: white;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
border-radius: 6px;
|
||||
white-space: nowrap;
|
||||
z-index: 10003;
|
||||
animation: person-toast-fade 2s ease forwards;
|
||||
box-shadow: 0 2px 8px rgba(16, 185, 129, 0.3);
|
||||
}
|
||||
|
||||
@keyframes person-toast-fade {
|
||||
0% {
|
||||
opacity: 0;
|
||||
transform: translateY(-4px);
|
||||
}
|
||||
10% {
|
||||
opacity: 1;
|
||||
transform: translateY(0);
|
||||
}
|
||||
80% {
|
||||
opacity: 1;
|
||||
}
|
||||
100% {
|
||||
opacity: 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Content */
|
||||
.person-info-panel__content {
|
||||
padding: 1rem;
|
||||
|
|
@ -491,6 +646,137 @@
|
|||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
/* Career History - Accordion Styles */
|
||||
.person-info-panel__career-accordion {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
margin-top: 0.5rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-item {
|
||||
background: #ffffff;
|
||||
border: 1px solid #e2e8f0;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
transition: box-shadow 0.2s ease, border-color 0.2s ease;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-item:hover {
|
||||
border-color: #cbd5e1;
|
||||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-item--expanded {
|
||||
border-color: #0a3dfa;
|
||||
box-shadow: 0 2px 12px rgba(10, 61, 250, 0.1);
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-header {
|
||||
width: 100%;
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
justify-content: space-between;
|
||||
padding: 0.75rem;
|
||||
background: transparent;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
text-align: left;
|
||||
transition: background-color 0.15s ease;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-header:hover {
|
||||
background-color: #f8fafc;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-header:focus {
|
||||
outline: none;
|
||||
background-color: #f1f5f9;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-header:focus-visible {
|
||||
outline: 2px solid #0a3dfa;
|
||||
outline-offset: -2px;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-summary {
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-title {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-subtitle {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.25rem;
|
||||
font-size: 0.85rem;
|
||||
color: #4a5568;
|
||||
}
|
||||
|
||||
.person-info-panel__career-location-brief {
|
||||
color: #64748b;
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-chevron {
|
||||
flex-shrink: 0;
|
||||
font-size: 0.7rem;
|
||||
color: #64748b;
|
||||
transition: transform 0.25s ease;
|
||||
margin-left: 0.5rem;
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-chevron--expanded {
|
||||
transform: rotate(180deg);
|
||||
}
|
||||
|
||||
/* Accordion Content - Collapsible */
|
||||
.person-info-panel__career-accordion-content {
|
||||
max-height: 0;
|
||||
overflow: hidden;
|
||||
transition: max-height 0.3s ease-out, opacity 0.2s ease;
|
||||
opacity: 0;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-content--expanded {
|
||||
max-height: 500px;
|
||||
opacity: 1;
|
||||
transition: max-height 0.3s ease-in, opacity 0.2s ease 0.1s;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-body {
|
||||
padding: 0 0.75rem 0.75rem 0.75rem;
|
||||
border-top: 1px solid #f0f0f0;
|
||||
margin-top: 0;
|
||||
padding-top: 0.75rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-detail-row {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.8rem;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-detail-label {
|
||||
color: #64748b;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.person-info-panel__career-detail-value {
|
||||
color: #334155;
|
||||
}
|
||||
|
||||
/* Legacy career styles (retained for compatibility) */
|
||||
.person-info-panel__career-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
|
|
@ -525,26 +811,24 @@
|
|||
color: #64748b;
|
||||
font-weight: 500;
|
||||
white-space: nowrap;
|
||||
margin-left: 0.5rem;
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
.person-info-panel__current-badge {
|
||||
display: inline-block;
|
||||
padding: 0.2rem 0.5rem;
|
||||
padding: 0.15rem 0.4rem;
|
||||
background: #10b981;
|
||||
color: white;
|
||||
border-radius: 12px;
|
||||
font-size: 0.7rem;
|
||||
font-size: 0.65rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.5px;
|
||||
margin-left: 0.5rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-company {
|
||||
color: #2d3748;
|
||||
font-weight: 500;
|
||||
margin-bottom: 0.25rem;
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
|
||||
|
|
@ -571,12 +855,18 @@
|
|||
}
|
||||
|
||||
.person-info-panel__career-detail {
|
||||
display: inline-block;
|
||||
padding: 0.2rem 0.4rem;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding: 0.2rem 0.5rem;
|
||||
background: #f1f5f9;
|
||||
border-radius: 4px;
|
||||
font-size: 0.7rem;
|
||||
color: #6b7280;
|
||||
color: #475569;
|
||||
}
|
||||
|
||||
.person-info-panel__career-detail-icon {
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
|
||||
/* Responsive adjustments */
|
||||
|
|
@ -627,4 +917,42 @@
|
|||
margin-left: 0;
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
/* Accordion mobile adjustments */
|
||||
.person-info-panel__career-accordion-header {
|
||||
padding: 0.625rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-title {
|
||||
flex-direction: column;
|
||||
align-items: flex-start;
|
||||
gap: 0.25rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-subtitle {
|
||||
flex-direction: column;
|
||||
align-items: flex-start;
|
||||
gap: 0.15rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-location-brief {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
.person-info-panel__career-location-brief::before {
|
||||
content: '';
|
||||
}
|
||||
|
||||
.person-info-panel__career-accordion-body {
|
||||
padding: 0 0.625rem 0.625rem 0.625rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-details {
|
||||
flex-direction: column;
|
||||
gap: 0.375rem;
|
||||
}
|
||||
|
||||
.person-info-panel__career-detail {
|
||||
width: fit-content;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@
|
|||
import React, { useEffect, useState, useRef, useCallback, memo, useMemo } from 'react';
|
||||
import type { SocialNetworkNode, HeritageType, RoleCategory, ExtendedProfileData } from '@/types/socialNetwork';
|
||||
import { getNodeColor, HERITAGE_TYPE_LABELS, HERITAGE_TYPE_COLORS as _HERITAGE_TYPE_COLORS, ROLE_CATEGORY_LABELS, ROLE_CATEGORY_COLORS as _ROLE_CATEGORY_COLORS } from '@/types/socialNetwork';
|
||||
import { isTargetInside } from '@/utils/dom';
|
||||
import './PersonInfoPanel.css';
|
||||
|
||||
// Re-export for JSX usage (TypeScript 5.9 flow analysis workaround)
|
||||
|
|
@ -67,6 +68,310 @@ const PersonInfoPanelComponent: React.FC<PersonInfoPanelProps> = ({
|
|||
void _profileLoading;
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
const dragStartRef = useRef<{ x: number; y: number; posX: number; posY: number } | null>(null);
|
||||
|
||||
// Accordion state for career history items
|
||||
const [expandedCareerItems, setExpandedCareerItems] = useState<Set<number>>(new Set());
|
||||
|
||||
// Export dropdown state
|
||||
const [showExportMenu, setShowExportMenu] = useState(false);
|
||||
const [isExporting, setIsExporting] = useState(false);
|
||||
const [copySuccess, setCopySuccess] = useState<string | null>(null);
|
||||
const exportMenuRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// Toggle career item expansion
|
||||
const toggleCareerItem = useCallback((index: number) => {
|
||||
setExpandedCareerItems(prev => {
|
||||
const newSet = new Set(prev);
|
||||
if (newSet.has(index)) {
|
||||
newSet.delete(index);
|
||||
} else {
|
||||
newSet.add(index);
|
||||
}
|
||||
return newSet;
|
||||
});
|
||||
}, []);
|
||||
|
||||
// Close export menu when clicking outside
|
||||
useEffect(() => {
|
||||
const handleClickOutside = (event: MouseEvent) => {
|
||||
if (exportMenuRef.current && !exportMenuRef.current.contains(event.target as Node)) {
|
||||
setShowExportMenu(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (showExportMenu) {
|
||||
document.addEventListener('mousedown', handleClickOutside);
|
||||
return () => document.removeEventListener('mousedown', handleClickOutside);
|
||||
}
|
||||
}, [showExportMenu]);
|
||||
|
||||
// Export profile data
|
||||
const exportProfile = useCallback(async (format: 'json' | 'yaml' | 'markdown' | 'csv') => {
|
||||
setIsExporting(true);
|
||||
|
||||
// Small delay to show loading state for UX feedback
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
|
||||
const exportData = {
|
||||
name: person.name,
|
||||
linkedin_url: person.linkedinUrl,
|
||||
headline: profileData?.headline || person.headline,
|
||||
location: profileData?.location || person.location,
|
||||
heritage_relevant: person.heritageRelevant,
|
||||
heritage_type: person.heritageType,
|
||||
role_category: person.roleCategory,
|
||||
current_company: profileData?.current_company,
|
||||
department: profileData?.department,
|
||||
total_experience: profileData?.total_experience,
|
||||
connections: profileData?.connections,
|
||||
followers: profileData?.followers,
|
||||
about: profileData?.about,
|
||||
languages: profileData?.languages,
|
||||
skills: profileData?.skills,
|
||||
education: profileData?.education,
|
||||
career_history: profileData?.career_history,
|
||||
exported_at: new Date().toISOString(),
|
||||
};
|
||||
|
||||
let content: string;
|
||||
let filename: string;
|
||||
let mimeType: string;
|
||||
|
||||
const slug = person.linkedinUrl?.split('/in/')[1]?.replace(/\//g, '') || person.name.toLowerCase().replace(/\s+/g, '-');
|
||||
|
||||
switch (format) {
|
||||
case 'json':
|
||||
content = JSON.stringify(exportData, null, 2);
|
||||
filename = `${slug}_profile.json`;
|
||||
mimeType = 'application/json';
|
||||
break;
|
||||
case 'yaml':
|
||||
content = convertToYaml(exportData);
|
||||
filename = `${slug}_profile.yaml`;
|
||||
mimeType = 'text/yaml';
|
||||
break;
|
||||
case 'markdown':
|
||||
content = convertToMarkdown(exportData);
|
||||
filename = `${slug}_profile.md`;
|
||||
mimeType = 'text/markdown';
|
||||
break;
|
||||
case 'csv':
|
||||
content = convertCareerHistoryToCsv(exportData);
|
||||
filename = `${slug}_career_history.csv`;
|
||||
mimeType = 'text/csv';
|
||||
break;
|
||||
}
|
||||
|
||||
const blob = new Blob([content], { type: mimeType });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const link = document.createElement('a');
|
||||
link.href = url;
|
||||
link.download = filename;
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
document.body.removeChild(link);
|
||||
URL.revokeObjectURL(url);
|
||||
setShowExportMenu(false);
|
||||
setIsExporting(false);
|
||||
}, [person, profileData]);
|
||||
|
||||
// Copy profile to clipboard
|
||||
const copyToClipboard = useCallback(async (format: 'json' | 'yaml' | 'markdown') => {
|
||||
setIsExporting(true);
|
||||
|
||||
const exportData = {
|
||||
name: person.name,
|
||||
linkedin_url: person.linkedinUrl,
|
||||
headline: profileData?.headline || person.headline,
|
||||
location: profileData?.location || person.location,
|
||||
heritage_relevant: person.heritageRelevant,
|
||||
heritage_type: person.heritageType,
|
||||
role_category: person.roleCategory,
|
||||
current_company: profileData?.current_company,
|
||||
department: profileData?.department,
|
||||
total_experience: profileData?.total_experience,
|
||||
connections: profileData?.connections,
|
||||
followers: profileData?.followers,
|
||||
about: profileData?.about,
|
||||
languages: profileData?.languages,
|
||||
skills: profileData?.skills,
|
||||
education: profileData?.education,
|
||||
career_history: profileData?.career_history,
|
||||
exported_at: new Date().toISOString(),
|
||||
};
|
||||
|
||||
let content: string;
|
||||
switch (format) {
|
||||
case 'json':
|
||||
content = JSON.stringify(exportData, null, 2);
|
||||
break;
|
||||
case 'yaml':
|
||||
content = convertToYaml(exportData);
|
||||
break;
|
||||
case 'markdown':
|
||||
content = convertToMarkdown(exportData);
|
||||
break;
|
||||
}
|
||||
|
||||
try {
|
||||
await navigator.clipboard.writeText(content);
|
||||
setCopySuccess(format.toUpperCase());
|
||||
setTimeout(() => setCopySuccess(null), 2000);
|
||||
} catch (err) {
|
||||
console.error('Failed to copy:', err);
|
||||
}
|
||||
|
||||
setShowExportMenu(false);
|
||||
setIsExporting(false);
|
||||
}, [person, profileData]);
|
||||
|
||||
// Convert career history to CSV format
|
||||
const convertCareerHistoryToCsv = (data: Record<string, unknown>): string => {
|
||||
const careerHistory = data.career_history as Array<Record<string, unknown>> | undefined;
|
||||
if (!careerHistory || careerHistory.length === 0) {
|
||||
return 'No career history available';
|
||||
}
|
||||
|
||||
// CSV header
|
||||
const headers = ['Role', 'Company', 'Location', 'Dates', 'Duration', 'Description', 'Level', 'Company Size', 'Industry', 'Current'];
|
||||
let csv = headers.join(',') + '\n';
|
||||
|
||||
// CSV rows
|
||||
careerHistory.forEach((job) => {
|
||||
const role = escapeCSV(String(job.role || job.title || ''));
|
||||
const company = escapeCSV(String(job.organization || job.company || ''));
|
||||
const location = escapeCSV(String(job.location || ''));
|
||||
const dates = escapeCSV(String(job.dates || job.duration_text || ''));
|
||||
const duration = escapeCSV(String(job.duration || ''));
|
||||
const description = escapeCSV(String(job.description || ''));
|
||||
const level = escapeCSV(String(job.level || ''));
|
||||
const companySize = escapeCSV(String(job.company_size || ''));
|
||||
const industry = escapeCSV(String(job.industry || ''));
|
||||
const current = job.current ? 'Yes' : 'No';
|
||||
|
||||
csv += `${role},${company},${location},${dates},${duration},${description},${level},${companySize},${industry},${current}\n`;
|
||||
});
|
||||
|
||||
return csv;
|
||||
};
|
||||
|
||||
// Helper to escape CSV values
|
||||
const escapeCSV = (value: string): string => {
|
||||
if (value.includes(',') || value.includes('"') || value.includes('\n')) {
|
||||
return `"${value.replace(/"/g, '""')}"`;
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
// Convert object to YAML format (simple implementation)
|
||||
const convertToYaml = (obj: Record<string, unknown>, indent = 0): string => {
|
||||
const spaces = ' '.repeat(indent);
|
||||
let yaml = '';
|
||||
|
||||
for (const [key, value] of Object.entries(obj)) {
|
||||
if (value === null || value === undefined) continue;
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
if (value.length === 0) continue;
|
||||
yaml += `${spaces}${key}:\n`;
|
||||
value.forEach((item) => {
|
||||
if (typeof item === 'object' && item !== null) {
|
||||
yaml += `${spaces}- \n`;
|
||||
const itemYaml = convertToYaml(item as Record<string, unknown>, indent + 2);
|
||||
yaml += itemYaml.split('\n').map(line => line ? `${spaces} ${line.trim()}` : '').filter(Boolean).join('\n') + '\n';
|
||||
} else {
|
||||
yaml += `${spaces}- ${String(item)}\n`;
|
||||
}
|
||||
});
|
||||
} else if (typeof value === 'object') {
|
||||
yaml += `${spaces}${key}:\n`;
|
||||
yaml += convertToYaml(value as Record<string, unknown>, indent + 1);
|
||||
} else if (typeof value === 'string' && (value.includes('\n') || value.length > 80)) {
|
||||
yaml += `${spaces}${key}: |\n`;
|
||||
value.split('\n').forEach(line => {
|
||||
yaml += `${spaces} ${line}\n`;
|
||||
});
|
||||
} else {
|
||||
yaml += `${spaces}${key}: ${JSON.stringify(value)}\n`;
|
||||
}
|
||||
}
|
||||
|
||||
return yaml;
|
||||
};
|
||||
|
||||
// Convert object to Markdown format
|
||||
const convertToMarkdown = (data: Record<string, unknown>): string => {
|
||||
let md = `# ${data.name}\n\n`;
|
||||
|
||||
if (data.headline) md += `**${data.headline}**\n\n`;
|
||||
if (data.location) md += `📍 ${data.location}\n\n`;
|
||||
if (data.linkedin_url) md += `🔗 [LinkedIn Profile](${data.linkedin_url})\n\n`;
|
||||
|
||||
if (data.heritage_relevant) {
|
||||
md += `## Heritage Profile\n`;
|
||||
md += `- **Heritage Relevant**: Yes\n`;
|
||||
if (data.heritage_type) md += `- **Type**: ${data.heritage_type}\n`;
|
||||
if (data.role_category) md += `- **Role Category**: ${data.role_category}\n`;
|
||||
md += '\n';
|
||||
}
|
||||
|
||||
if (data.current_company) {
|
||||
md += `## Current Position\n`;
|
||||
md += `**${data.current_company}**`;
|
||||
if (data.department) md += ` - ${data.department}`;
|
||||
md += '\n\n';
|
||||
}
|
||||
|
||||
if (data.about) {
|
||||
md += `## About\n${data.about}\n\n`;
|
||||
}
|
||||
|
||||
if (data.career_history && Array.isArray(data.career_history) && data.career_history.length > 0) {
|
||||
md += `## Career History\n\n`;
|
||||
(data.career_history as Array<Record<string, unknown>>).forEach((job) => {
|
||||
const role = job.role || job.title || 'Position';
|
||||
const company = job.organization || job.company || '';
|
||||
md += `### ${role}${company ? ` at ${company}` : ''}\n`;
|
||||
if (job.dates) md += `*${job.dates}*\n`;
|
||||
if (job.location) md += `📍 ${job.location}\n`;
|
||||
if (job.description) md += `\n${job.description}\n`;
|
||||
md += '\n';
|
||||
});
|
||||
}
|
||||
|
||||
if (data.education && Array.isArray(data.education) && data.education.length > 0) {
|
||||
md += `## Education\n\n`;
|
||||
(data.education as Array<Record<string, unknown>>).forEach((edu) => {
|
||||
md += `### ${edu.degree || 'Degree'}\n`;
|
||||
if (edu.institution) md += `**${edu.institution}**\n`;
|
||||
if (edu.years) md += `*${edu.years}*\n`;
|
||||
md += '\n';
|
||||
});
|
||||
}
|
||||
|
||||
if (data.skills && Array.isArray(data.skills) && data.skills.length > 0) {
|
||||
md += `## Skills\n`;
|
||||
md += (data.skills as string[]).slice(0, 20).join(', ');
|
||||
if ((data.skills as string[]).length > 20) {
|
||||
md += `, +${(data.skills as string[]).length - 20} more`;
|
||||
}
|
||||
md += '\n\n';
|
||||
}
|
||||
|
||||
if (data.languages && Array.isArray(data.languages) && data.languages.length > 0) {
|
||||
md += `## Languages\n`;
|
||||
(data.languages as Array<Record<string, unknown>>).forEach((lang) => {
|
||||
md += `- ${lang.language}`;
|
||||
if (lang.proficiency) md += ` (${lang.proficiency})`;
|
||||
md += '\n';
|
||||
});
|
||||
md += '\n';
|
||||
}
|
||||
|
||||
md += `---\n*Exported on ${new Date().toLocaleDateString()}*\n`;
|
||||
|
||||
return md;
|
||||
};
|
||||
|
||||
// Calculate initial position - to the right of click, within viewport
|
||||
useEffect(() => {
|
||||
|
|
@ -93,7 +398,7 @@ const PersonInfoPanelComponent: React.FC<PersonInfoPanelProps> = ({
|
|||
|
||||
// Drag handlers
|
||||
const handleMouseDown = useCallback((e: React.MouseEvent) => {
|
||||
if ((e.target as HTMLElement).closest('.person-info-panel__close')) return;
|
||||
if (isTargetInside(e.target, '.person-info-panel__close')) return;
|
||||
|
||||
setIsDragging(true);
|
||||
dragStartRef.current = {
|
||||
|
|
@ -246,13 +551,81 @@ const PersonInfoPanelComponent: React.FC<PersonInfoPanelProps> = ({
|
|||
)}
|
||||
</div>
|
||||
</div>
|
||||
<button
|
||||
className="person-info-panel__close"
|
||||
onClick={onClose}
|
||||
aria-label="Close panel"
|
||||
>
|
||||
✕
|
||||
</button>
|
||||
|
||||
{/* Header Actions */}
|
||||
<div className="person-info-panel__header-actions">
|
||||
{/* Export Button */}
|
||||
<div className="person-info-panel__export-wrapper" ref={exportMenuRef}>
|
||||
<button
|
||||
className={`person-info-panel__export-btn ${isExporting ? 'person-info-panel__export-btn--loading' : ''}`}
|
||||
onClick={() => setShowExportMenu(!showExportMenu)}
|
||||
aria-label="Export profile"
|
||||
title="Export profile"
|
||||
disabled={isExporting}
|
||||
>
|
||||
{isExporting ? (
|
||||
<span className="person-info-panel__export-spinner" />
|
||||
) : (
|
||||
<svg viewBox="0 0 24 24" width="14" height="14" fill="currentColor">
|
||||
<path d="M19 9h-4V3H9v6H5l7 7 7-7zM5 18v2h14v-2H5z"/>
|
||||
</svg>
|
||||
)}
|
||||
</button>
|
||||
{showExportMenu && (
|
||||
<div className="person-info-panel__export-menu">
|
||||
<div className="person-info-panel__export-section">
|
||||
<div className="person-info-panel__export-section-label">Download</div>
|
||||
<button onClick={() => exportProfile('json')}>
|
||||
<span className="person-info-panel__export-icon">{ }</span>
|
||||
JSON
|
||||
</button>
|
||||
<button onClick={() => exportProfile('yaml')}>
|
||||
<span className="person-info-panel__export-icon">---</span>
|
||||
YAML
|
||||
</button>
|
||||
<button onClick={() => exportProfile('markdown')}>
|
||||
<span className="person-info-panel__export-icon">#</span>
|
||||
Markdown
|
||||
</button>
|
||||
{profileData?.career_history && profileData.career_history.length > 0 && (
|
||||
<button onClick={() => exportProfile('csv')}>
|
||||
<span className="person-info-panel__export-icon">📊</span>
|
||||
CSV (Career)
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
<div className="person-info-panel__export-section">
|
||||
<div className="person-info-panel__export-section-label">Copy to Clipboard</div>
|
||||
<button onClick={() => copyToClipboard('json')}>
|
||||
<span className="person-info-panel__export-icon">📋</span>
|
||||
Copy JSON
|
||||
</button>
|
||||
<button onClick={() => copyToClipboard('yaml')}>
|
||||
<span className="person-info-panel__export-icon">📋</span>
|
||||
Copy YAML
|
||||
</button>
|
||||
<button onClick={() => copyToClipboard('markdown')}>
|
||||
<span className="person-info-panel__export-icon">📋</span>
|
||||
Copy Markdown
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{copySuccess && (
|
||||
<div className="person-info-panel__copy-toast">
|
||||
✓ {copySuccess} copied!
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<button
|
||||
className="person-info-panel__close"
|
||||
onClick={onClose}
|
||||
aria-label="Close panel"
|
||||
>
|
||||
✕
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Content - Always show all available metadata */}
|
||||
|
|
@ -396,48 +769,151 @@ const PersonInfoPanelComponent: React.FC<PersonInfoPanelProps> = ({
|
|||
</div>
|
||||
)}
|
||||
|
||||
{/* Career History */}
|
||||
{profileData?.career_history && profileData.career_history.length > 0 && (
|
||||
<div className="person-info-panel__section">
|
||||
<div className="person-info-panel__label">Career History</div>
|
||||
<div className="person-info-panel__career-list">
|
||||
{profileData.career_history.slice(0, 5).map((job, index) => (
|
||||
<div key={index} className="person-info-panel__career-item">
|
||||
<div className="person-info-panel__career-header">
|
||||
<div className="person-info-panel__career-role">{job.role}</div>
|
||||
<div className="person-info-panel__career-dates">{job.dates}</div>
|
||||
{job.current && (
|
||||
<span className="person-info-panel__current-badge">Current</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="person-info-panel__career-company">{job.organization}</div>
|
||||
{job.location && (
|
||||
<div className="person-info-panel__career-location">{job.location}</div>
|
||||
)}
|
||||
{job.description && (
|
||||
<div className="person-info-panel__career-description">{job.description}</div>
|
||||
)}
|
||||
<div className="person-info-panel__career-details">
|
||||
{job.level && (
|
||||
<span className="person-info-panel__career-detail">{job.level}</span>
|
||||
)}
|
||||
{job.company_size && (
|
||||
<span className="person-info-panel__career-detail">{job.company_size}</span>
|
||||
)}
|
||||
{job.industry && (
|
||||
<span className="person-info-panel__career-detail">{job.industry}</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
{profileData.career_history.length > 5 && (
|
||||
<div className="person-info-panel__more-indicator">
|
||||
+{profileData.career_history.length - 5} more positions
|
||||
</div>
|
||||
)}
|
||||
{/* Career History - Accordion UI */}
|
||||
{profileData?.career_history && profileData.career_history.length > 0 && (() => {
|
||||
// Pre-process career history to handle field variants and filter empty items
|
||||
const validCareerItems = profileData.career_history
|
||||
.map((job, originalIndex) => {
|
||||
// Handle field name variants
|
||||
const jobRole = job.role || job.title || null;
|
||||
const jobCompany = job.organization || job.company || null;
|
||||
const jobLocation = job.location || null;
|
||||
const jobDates = job.dates || job.duration || job.duration_text || null;
|
||||
|
||||
// Handle "Unknown" role as effectively empty
|
||||
const displayRole = (jobRole && jobRole !== 'Unknown') ? jobRole : null;
|
||||
|
||||
// Skip items with no meaningful content
|
||||
if (!displayRole && !jobCompany) return null;
|
||||
|
||||
return {
|
||||
...job,
|
||||
displayRole,
|
||||
jobCompany,
|
||||
jobLocation,
|
||||
jobDates,
|
||||
originalIndex,
|
||||
};
|
||||
})
|
||||
.filter(Boolean);
|
||||
|
||||
if (validCareerItems.length === 0) return null;
|
||||
|
||||
return (
|
||||
<div className="person-info-panel__section">
|
||||
<div className="person-info-panel__label">
|
||||
Career History ({validCareerItems.length} position{validCareerItems.length !== 1 ? 's' : ''})
|
||||
</div>
|
||||
<div className="person-info-panel__career-accordion">
|
||||
{validCareerItems.map((job) => {
|
||||
if (!job) return null;
|
||||
const isExpanded = expandedCareerItems.has(job.originalIndex);
|
||||
const hasDetails = job.description || job.level || job.company_size || job.industry;
|
||||
|
||||
return (
|
||||
<div
|
||||
key={job.originalIndex}
|
||||
className={`person-info-panel__career-accordion-item ${isExpanded ? 'person-info-panel__career-accordion-item--expanded' : ''}`}
|
||||
>
|
||||
{/* Accordion Header - Always Visible */}
|
||||
<button
|
||||
className="person-info-panel__career-accordion-header"
|
||||
onClick={() => toggleCareerItem(job.originalIndex)}
|
||||
aria-expanded={isExpanded}
|
||||
aria-controls={`career-content-${job.originalIndex}`}
|
||||
>
|
||||
<div className="person-info-panel__career-accordion-summary">
|
||||
<div className="person-info-panel__career-accordion-title">
|
||||
{job.displayRole && (
|
||||
<span className="person-info-panel__career-role">{job.displayRole}</span>
|
||||
)}
|
||||
{!job.displayRole && job.jobCompany && (
|
||||
<span className="person-info-panel__career-role">{job.jobCompany}</span>
|
||||
)}
|
||||
{job.current && (
|
||||
<span className="person-info-panel__current-badge">Current</span>
|
||||
)}
|
||||
</div>
|
||||
{job.displayRole && job.jobCompany && (
|
||||
<div className="person-info-panel__career-accordion-subtitle">
|
||||
<span className="person-info-panel__career-company">{job.jobCompany}</span>
|
||||
{job.jobLocation && job.jobLocation.trim() && (
|
||||
<span className="person-info-panel__career-location-brief">
|
||||
• {job.jobLocation.split(',')[0]}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
{!job.displayRole && job.jobLocation && job.jobLocation.trim() && (
|
||||
<div className="person-info-panel__career-accordion-subtitle">
|
||||
<span className="person-info-panel__career-location-brief">
|
||||
{job.jobLocation.split(',')[0]}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
{job.jobDates && (
|
||||
<div className="person-info-panel__career-dates">{job.jobDates}</div>
|
||||
)}
|
||||
</div>
|
||||
<span className={`person-info-panel__career-accordion-chevron ${isExpanded ? 'person-info-panel__career-accordion-chevron--expanded' : ''}`}>
|
||||
▼
|
||||
</span>
|
||||
</button>
|
||||
|
||||
{/* Accordion Content - Collapsible */}
|
||||
<div
|
||||
id={`career-content-${job.originalIndex}`}
|
||||
className={`person-info-panel__career-accordion-content ${isExpanded ? 'person-info-panel__career-accordion-content--expanded' : ''}`}
|
||||
aria-hidden={!isExpanded}
|
||||
>
|
||||
<div className="person-info-panel__career-accordion-body">
|
||||
{/* Full Location - only show if location exists, has content, and contains a comma */}
|
||||
{job.jobLocation && job.jobLocation.trim() && job.jobLocation.includes(',') && (
|
||||
<div className="person-info-panel__career-detail-row">
|
||||
<span className="person-info-panel__career-detail-label">Location:</span>
|
||||
<span className="person-info-panel__career-detail-value">{job.jobLocation}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Description */}
|
||||
{job.description && (
|
||||
<div className="person-info-panel__career-description">
|
||||
{job.description}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Metadata Badges */}
|
||||
{hasDetails && (
|
||||
<div className="person-info-panel__career-details">
|
||||
{job.level && (
|
||||
<span className="person-info-panel__career-detail">
|
||||
<span className="person-info-panel__career-detail-icon">📊</span>
|
||||
{job.level}
|
||||
</span>
|
||||
)}
|
||||
{job.company_size && (
|
||||
<span className="person-info-panel__career-detail">
|
||||
<span className="person-info-panel__career-detail-icon">👥</span>
|
||||
{job.company_size}
|
||||
</span>
|
||||
)}
|
||||
{job.industry && (
|
||||
<span className="person-info-panel__career-detail">
|
||||
<span className="person-info-panel__career-detail-icon">🏢</span>
|
||||
{job.industry}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
);
|
||||
})()}
|
||||
|
||||
{/* Role Category */}
|
||||
{roleLabel && (
|
||||
|
|
|
|||
|
|
@ -515,6 +515,8 @@ interface GeoAPIFeature {
|
|||
social_media?: Record<string, string> | string;
|
||||
// Logo URL extracted from web claims
|
||||
logo_url?: string;
|
||||
// Web claims - structured data extracted from websites
|
||||
web_claims?: string | { claims?: WebClaim[] } | WebClaim[];
|
||||
// YouTube enrichment - may be object or JSON string
|
||||
youtube_enrichment?: string | {
|
||||
status?: string;
|
||||
|
|
@ -575,6 +577,19 @@ interface GeoAPISearchResponse {
|
|||
results: GeoAPISearchResult[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Web claim from website scraping
|
||||
*/
|
||||
interface WebClaim {
|
||||
claim_type?: string;
|
||||
claim_value?: string;
|
||||
raw_value?: string;
|
||||
source_url?: string;
|
||||
retrieved_on?: string;
|
||||
xpath?: string;
|
||||
extraction_method?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loading progress for UI feedback
|
||||
*/
|
||||
|
|
@ -613,6 +628,124 @@ function parseProvinceFromGhcid(ghcid: string | null | undefined): string {
|
|||
return PROVINCE_CODE_MAP[code] || '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse web_claims from JSON string or object
|
||||
*/
|
||||
function parseWebClaims(value: unknown): WebClaim[] | undefined {
|
||||
if (!value) return undefined;
|
||||
|
||||
try {
|
||||
let parsed = value;
|
||||
if (typeof value === 'string') {
|
||||
parsed = JSON.parse(value);
|
||||
}
|
||||
|
||||
// Web claims can be an array directly or wrapped in an object
|
||||
const claims: WebClaim[] = Array.isArray(parsed) ? parsed : ((parsed as Record<string, unknown>).claims as WebClaim[] || []);
|
||||
|
||||
return claims.length > 0 ? claims : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a potentially relative URL against a base URL
|
||||
*/
|
||||
function resolveUrl(url: string, baseUrl?: string): string {
|
||||
// Already absolute URL
|
||||
if (url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//')) {
|
||||
return url.startsWith('//') ? `https:${url}` : url;
|
||||
}
|
||||
|
||||
// No base URL to resolve against
|
||||
if (!baseUrl) return url;
|
||||
|
||||
try {
|
||||
// Use URL API to resolve relative URLs
|
||||
const base = new URL(baseUrl);
|
||||
return new URL(url, base).href;
|
||||
} catch {
|
||||
// If URL parsing fails, return as-is
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a URL is a valid image URL (not a video, not relative without base)
|
||||
*/
|
||||
function isValidImageUrl(url: string): boolean {
|
||||
if (!url) return false;
|
||||
|
||||
// Must be absolute URL
|
||||
if (!url.startsWith('http://') && !url.startsWith('https://')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Filter out non-image URLs
|
||||
const invalidPatterns = [
|
||||
'youtube.com/watch',
|
||||
'youtu.be/',
|
||||
'vimeo.com/',
|
||||
'twitter.com/',
|
||||
'facebook.com/',
|
||||
'.mp4',
|
||||
'.webm',
|
||||
'.mov',
|
||||
'.avi',
|
||||
];
|
||||
|
||||
const lowerUrl = url.toLowerCase();
|
||||
return !invalidPatterns.some(pattern => lowerUrl.includes(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract logo URL from web_claims - prefer logo_img_attr extraction method
|
||||
* Priority: logo_img_attr > og_image > favicon_link > others
|
||||
* Also resolves relative URLs against source_url
|
||||
*/
|
||||
function extractLogoFromWebClaims(webClaimsValue: unknown): string | undefined {
|
||||
const claims = parseWebClaims(webClaimsValue);
|
||||
if (!claims || claims.length === 0) return undefined;
|
||||
|
||||
// Filter for logo claims
|
||||
const logoClaims = claims.filter(c => c.claim_type === 'logo' && c.claim_value);
|
||||
|
||||
if (logoClaims.length === 0) return undefined;
|
||||
|
||||
// Sort by preference: logo_img_attr > og_image > favicon_link > others
|
||||
const sorted = logoClaims.sort((a, b) => {
|
||||
const priority: Record<string, number> = {
|
||||
'logo_img_attr': 3,
|
||||
'og_image': 2,
|
||||
'favicon_link': 1,
|
||||
};
|
||||
return (priority[b.extraction_method || ''] || 0) - (priority[a.extraction_method || ''] || 0);
|
||||
});
|
||||
|
||||
// Filter out favicons, loading placeholders, and non-image URLs
|
||||
const bestLogo = sorted.find(c => {
|
||||
const url = c.claim_value || '';
|
||||
// Skip favicon-like URLs
|
||||
if (c.extraction_method === 'favicon_link') return false;
|
||||
// Skip very small images or placeholder images
|
||||
if (url.includes('favicon') || url.includes('loading')) return false;
|
||||
|
||||
// Resolve the URL and check if it's valid
|
||||
const resolvedUrl = resolveUrl(url, c.source_url);
|
||||
return isValidImageUrl(resolvedUrl);
|
||||
});
|
||||
|
||||
const selectedClaim = bestLogo || sorted[0];
|
||||
if (!selectedClaim?.claim_value) return undefined;
|
||||
|
||||
// Resolve relative URLs against source_url
|
||||
const resolvedUrl = resolveUrl(selectedClaim.claim_value, selectedClaim.source_url);
|
||||
|
||||
// Final validation - only return if it's a valid image URL
|
||||
return isValidImageUrl(resolvedUrl) ? resolvedUrl : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely extract a year number from a value that might be:
|
||||
* - A number (return as-is)
|
||||
|
|
@ -985,7 +1118,11 @@ function featureToInstitution(feature: GeoAPIFeature): Institution {
|
|||
dissolution_year: safeExtractYear(props.dissolution_year),
|
||||
social_media: normalizeSocialMedia(props.social_media),
|
||||
youtube: normalizeYouTubeEnrichment(props.youtube_enrichment),
|
||||
logo_url: props.logo_url,
|
||||
// Extract logo URL from web_claims (primary) or use direct logo_url prop (fallback)
|
||||
// Priority: web_claims logo_img_attr > web_claims og_image > props.logo_url
|
||||
// Only use props.logo_url if it's a valid absolute image URL (not relative, not video)
|
||||
logo_url: extractLogoFromWebClaims(props.web_claims) ||
|
||||
(props.logo_url && isValidImageUrl(props.logo_url) ? props.logo_url : undefined),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -1578,6 +1715,11 @@ function detailResponseToInstitution(data: Record<string, unknown>): Institution
|
|||
// Handle social media
|
||||
const socialMedia = data.social_media as Record<string, string> | undefined;
|
||||
|
||||
// Extract logo URL from web_claims (primary) or use direct logo_url (fallback)
|
||||
// Priority: web_claims logo_img_attr > web_claims og_image > props.logo_url
|
||||
const logoUrl = extractLogoFromWebClaims(data.web_claims) ||
|
||||
(data.logo_url && isValidImageUrl(data.logo_url as string) ? data.logo_url as string : undefined);
|
||||
|
||||
return {
|
||||
lat: data.lat as number,
|
||||
lon: data.lon as number,
|
||||
|
|
@ -1621,6 +1763,7 @@ function detailResponseToInstitution(data: Record<string, unknown>): Institution
|
|||
dissolution_year: dissolutionYear,
|
||||
social_media: socialMedia,
|
||||
youtube,
|
||||
logo_url: logoUrl,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -1682,7 +1825,7 @@ export function useInstitutionDetail(ghcid: string | null): UseInstitutionDetail
|
|||
// Transform it to Institution format
|
||||
const inst = detailResponseToInstitution(data);
|
||||
|
||||
console.log('[GeoAPI Detail] Loaded institution:', inst.name);
|
||||
console.log('[GeoAPI Detail] Loaded institution:', inst.name, 'logo_url:', inst.logo_url, 'youtube:', inst.youtube);
|
||||
|
||||
// Store in cache
|
||||
detailCache.set(ghcid, { data: inst, timestamp: Date.now() });
|
||||
|
|
|
|||
|
|
@ -354,21 +354,56 @@ async function callDSPy(
|
|||
answer: m.role === 'assistant' ? m.content : '',
|
||||
})).filter(m => m.question || m.answer) || [];
|
||||
|
||||
const response = await fetch(`${DSPY_URL}/query`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
question,
|
||||
language: options.language || 'nl',
|
||||
context: conversationContext, // Backend expects conversation history here
|
||||
include_visualization: true,
|
||||
}),
|
||||
});
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(`${DSPY_URL}/query`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
question,
|
||||
language: options.language || 'nl',
|
||||
context: conversationContext, // Backend expects conversation history here
|
||||
include_visualization: true,
|
||||
}),
|
||||
});
|
||||
} catch (networkError) {
|
||||
// Network error - server unreachable
|
||||
console.error('[DSPy] Network error:', networkError);
|
||||
const lang = options.language || 'nl';
|
||||
return {
|
||||
answer: lang === 'nl'
|
||||
? '⚠️ **Serverfout**: Kan geen verbinding maken met de RAG-server. Controleer of de backend draait op poort 8003.'
|
||||
: '⚠️ **Server Error**: Cannot connect to RAG server. Check if backend is running on port 8003.',
|
||||
confidence: 0,
|
||||
};
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
// Fallback response if DSPy service unavailable
|
||||
// HTTP error - log details for debugging
|
||||
console.error(`[DSPy] HTTP ${response.status}: ${response.statusText}`);
|
||||
const lang = options.language || 'nl';
|
||||
|
||||
if (response.status === 404) {
|
||||
return {
|
||||
answer: lang === 'nl'
|
||||
? '⚠️ **Serverfout (404)**: De RAG API endpoint is niet gevonden. Controleer de proxy configuratie in vite.config.ts en herstart de frontend.'
|
||||
: '⚠️ **Server Error (404)**: RAG API endpoint not found. Check proxy configuration in vite.config.ts and restart frontend.',
|
||||
confidence: 0,
|
||||
};
|
||||
}
|
||||
|
||||
if (response.status >= 500) {
|
||||
return {
|
||||
answer: lang === 'nl'
|
||||
? `⚠️ **Serverfout (${response.status})**: De RAG-server heeft een interne fout. Controleer de backend logs.`
|
||||
: `⚠️ **Server Error (${response.status})**: RAG server internal error. Check backend logs.`,
|
||||
confidence: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Other HTTP errors - fall back to context-based answer
|
||||
return {
|
||||
answer: generateFallbackAnswer(question, context, options.language || 'nl'),
|
||||
answer: generateFallbackAnswer(question, context, lang),
|
||||
confidence: 0.5,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -137,6 +137,135 @@ interface FullFeature {
|
|||
properties: Record<string, unknown>;
|
||||
}
|
||||
|
||||
// Parse web_claims JSON for logo extraction
|
||||
interface WebClaim {
|
||||
claim_type?: string;
|
||||
claim_value?: string;
|
||||
raw_value?: string;
|
||||
source_url?: string;
|
||||
retrieved_on?: string;
|
||||
xpath?: string;
|
||||
extraction_method?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse web_claims from JSON string or object
|
||||
*/
|
||||
function parseWebClaims(value: unknown): WebClaim[] | undefined {
|
||||
if (!value) return undefined;
|
||||
|
||||
try {
|
||||
let parsed = value;
|
||||
if (typeof value === 'string') {
|
||||
parsed = JSON.parse(value);
|
||||
}
|
||||
|
||||
// Web claims can be an array directly or wrapped in an object
|
||||
const claims: WebClaim[] = Array.isArray(parsed) ? parsed : ((parsed as Record<string, unknown>).claims as WebClaim[] || []);
|
||||
|
||||
return claims.length > 0 ? claims : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a potentially relative URL against a base URL
|
||||
*/
|
||||
function resolveUrl(url: string, baseUrl?: string): string {
|
||||
// Already absolute URL
|
||||
if (url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//')) {
|
||||
return url.startsWith('//') ? `https:${url}` : url;
|
||||
}
|
||||
|
||||
// No base URL to resolve against
|
||||
if (!baseUrl) return url;
|
||||
|
||||
try {
|
||||
// Use URL API to resolve relative URLs
|
||||
const base = new URL(baseUrl);
|
||||
return new URL(url, base).href;
|
||||
} catch {
|
||||
// If URL parsing fails, return as-is
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a URL is a valid image URL (not a video, not relative without base)
|
||||
*/
|
||||
function isValidImageUrl(url: string): boolean {
|
||||
if (!url) return false;
|
||||
|
||||
// Must be absolute URL
|
||||
if (!url.startsWith('http://') && !url.startsWith('https://')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Filter out non-image URLs
|
||||
const invalidPatterns = [
|
||||
'youtube.com/watch',
|
||||
'youtu.be/',
|
||||
'vimeo.com/',
|
||||
'twitter.com/',
|
||||
'facebook.com/',
|
||||
'.mp4',
|
||||
'.webm',
|
||||
'.mov',
|
||||
'.avi',
|
||||
];
|
||||
|
||||
const lowerUrl = url.toLowerCase();
|
||||
return !invalidPatterns.some(pattern => lowerUrl.includes(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract logo URL from web_claims - prefer logo_img_attr extraction method
|
||||
* Priority: logo_img_attr > og_image > favicon_link > others
|
||||
* Also resolves relative URLs against source_url
|
||||
*/
|
||||
function extractLogoFromWebClaims(webClaimsValue: unknown): string | undefined {
|
||||
const claims = parseWebClaims(webClaimsValue);
|
||||
if (!claims || claims.length === 0) return undefined;
|
||||
|
||||
// Filter for logo claims
|
||||
const logoClaims = claims.filter(c => c.claim_type === 'logo' && c.claim_value);
|
||||
|
||||
if (logoClaims.length === 0) return undefined;
|
||||
|
||||
// Sort by preference: logo_img_attr > og_image > favicon_link > others
|
||||
const sorted = logoClaims.sort((a, b) => {
|
||||
const priority: Record<string, number> = {
|
||||
'logo_img_attr': 3,
|
||||
'og_image': 2,
|
||||
'favicon_link': 1,
|
||||
};
|
||||
return (priority[b.extraction_method || ''] || 0) - (priority[a.extraction_method || ''] || 0);
|
||||
});
|
||||
|
||||
// Filter out favicons, loading placeholders, and non-image URLs
|
||||
const bestLogo = sorted.find(c => {
|
||||
const url = c.claim_value || '';
|
||||
// Skip favicon-like URLs
|
||||
if (c.extraction_method === 'favicon_link') return false;
|
||||
// Skip very small images or placeholder images
|
||||
if (url.includes('favicon') || url.includes('loading')) return false;
|
||||
|
||||
// Resolve the URL and check if it's valid
|
||||
const resolvedUrl = resolveUrl(url, c.source_url);
|
||||
return isValidImageUrl(resolvedUrl);
|
||||
});
|
||||
|
||||
const selectedClaim = bestLogo || sorted[0];
|
||||
if (!selectedClaim?.claim_value) return undefined;
|
||||
|
||||
// Resolve relative URLs against source_url
|
||||
const resolvedUrl = resolveUrl(selectedClaim.claim_value, selectedClaim.source_url);
|
||||
|
||||
// Final validation - only return if it's a valid image URL
|
||||
return isValidImageUrl(resolvedUrl) ? resolvedUrl : undefined;
|
||||
}
|
||||
|
||||
function fullFeatureToInstitution(feature: FullFeature): Institution {
|
||||
const props = feature.properties;
|
||||
const [lon, lat] = feature.geometry.coordinates;
|
||||
|
|
@ -149,6 +278,13 @@ function fullFeatureToInstitution(feature: FullFeature): Institution {
|
|||
const socialMedia = parseSocialMedia(props.social_media);
|
||||
const youtube = parseYouTube(props.youtube_enrichment);
|
||||
|
||||
// Extract logo URL from web_claims (primary) or use direct logo_url prop (fallback)
|
||||
// Priority: web_claims logo_img_attr > web_claims og_image > props.logo_url
|
||||
// Only use props.logo_url if it's a valid absolute image URL (not relative, not video)
|
||||
const webClaimsLogo = extractLogoFromWebClaims(props.web_claims);
|
||||
const fallbackLogo = props.logo_url as string | undefined;
|
||||
const logoUrl = webClaimsLogo || (fallbackLogo && isValidImageUrl(fallbackLogo) ? fallbackLogo : undefined);
|
||||
|
||||
return {
|
||||
lat,
|
||||
lon,
|
||||
|
|
@ -183,6 +319,7 @@ function fullFeatureToInstitution(feature: FullFeature): Institution {
|
|||
dissolution_year: safeExtractYear(props.dissolution_year),
|
||||
social_media: socialMedia,
|
||||
youtube,
|
||||
logo_url: logoUrl,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ const CACHE_KEY = 'all_institutions';
|
|||
const DEFAULT_CONFIG: InstitutionsCacheConfig = {
|
||||
staleTtlMs: 1 * 60 * 60 * 1000, // 1 hour - trigger background refresh
|
||||
expiredTtlMs: 24 * 60 * 60 * 1000, // 24 hours - force foreground refresh
|
||||
cacheVersion: '1.0.0',
|
||||
cacheVersion: '1.1.0', // Bumped from 1.0.0 to include logo_url in cached data
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
* - Migration support for format changes
|
||||
*/
|
||||
|
||||
const STORAGE_VERSION = 1;
|
||||
const STORAGE_VERSION = 2; // Incremented to trigger migration to progressive default
|
||||
const STORAGE_KEY_PREFIX = 'rdf-visualizer';
|
||||
|
||||
/**
|
||||
|
|
@ -268,10 +268,19 @@ export function clearRecentQueries(): boolean {
|
|||
* Migrate UI state from old version to current
|
||||
*/
|
||||
function migrateUIState(oldState: UIState): UIState {
|
||||
// Currently only version 1 exists
|
||||
// Future migrations would go here
|
||||
// Version 1 -> 2: Reset dataBackend to 'progressive' (new recommended default)
|
||||
// This ensures all users start with progressive mode after the update
|
||||
if (oldState.version < 2) {
|
||||
console.log('Migration v1->v2: Setting dataBackend to progressive (new recommended default)');
|
||||
oldState = {
|
||||
...oldState,
|
||||
dataBackend: 'progressive',
|
||||
};
|
||||
}
|
||||
|
||||
// For now, merge with defaults to add any missing fields
|
||||
// Future migrations would go here (e.g., version 2 -> 3)
|
||||
|
||||
// Merge with defaults to add any missing fields and update version
|
||||
return deepMerge(DEFAULT_UI_STATE, { ...oldState, version: STORAGE_VERSION });
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,8 @@
|
|||
grid-template-columns: 1fr;
|
||||
gap: 0;
|
||||
height: 100%;
|
||||
overflow: hidden;
|
||||
min-height: 0;
|
||||
overflow: visible;
|
||||
}
|
||||
|
||||
.conversation-layout--with-viz {
|
||||
|
|
@ -115,6 +116,7 @@
|
|||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 100%;
|
||||
min-height: 0;
|
||||
background: white;
|
||||
border-right: 1px solid var(--border-color, #e5e5e5);
|
||||
}
|
||||
|
|
@ -151,6 +153,37 @@
|
|||
color: #ffd700;
|
||||
}
|
||||
|
||||
.conversation-chat__header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.conversation-chat__new-btn {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
padding: 8px 14px;
|
||||
background: rgba(255, 255, 255, 0.15);
|
||||
border: 1px solid rgba(255, 255, 255, 0.3);
|
||||
border-radius: 8px;
|
||||
color: white;
|
||||
font-size: 0.875rem;
|
||||
font-weight: 500;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.conversation-chat__new-btn:hover {
|
||||
background: rgba(255, 255, 255, 0.25);
|
||||
border-color: rgba(255, 255, 255, 0.5);
|
||||
}
|
||||
|
||||
.conversation-chat__new-btn:active {
|
||||
background: rgba(255, 255, 255, 0.3);
|
||||
transform: scale(0.98);
|
||||
}
|
||||
|
||||
/* ============================================================================
|
||||
Input Area
|
||||
============================================================================ */
|
||||
|
|
@ -375,6 +408,17 @@
|
|||
background: #fff5f5;
|
||||
}
|
||||
|
||||
.conversation-chat__action-btn--warning {
|
||||
border-color: #f59e0b;
|
||||
color: #b45309;
|
||||
}
|
||||
|
||||
.conversation-chat__action-btn--warning:hover:not(:disabled) {
|
||||
border-color: #d97706;
|
||||
color: #d97706;
|
||||
background: #fffbeb;
|
||||
}
|
||||
|
||||
/* History dropdown */
|
||||
.conversation-chat__history-selector {
|
||||
position: relative;
|
||||
|
|
@ -475,6 +519,7 @@
|
|||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 16px;
|
||||
min-height: 0;
|
||||
}
|
||||
|
||||
/* Welcome state */
|
||||
|
|
@ -482,11 +527,12 @@
|
|||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
justify-content: flex-start;
|
||||
text-align: center;
|
||||
padding: 48px 24px;
|
||||
max-width: 600px;
|
||||
margin: 0 auto;
|
||||
min-height: min-content;
|
||||
}
|
||||
|
||||
.conversation-chat__welcome-header {
|
||||
|
|
@ -588,6 +634,7 @@
|
|||
margin: 0;
|
||||
white-space: pre-wrap;
|
||||
word-wrap: break-word;
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.conversation-message__loading {
|
||||
|
|
|
|||
|
|
@ -43,6 +43,8 @@ import {
|
|||
Layers,
|
||||
Database,
|
||||
Zap,
|
||||
RefreshCw,
|
||||
Plus,
|
||||
} from 'lucide-react';
|
||||
import { useLanguage } from '../contexts/LanguageContext';
|
||||
import { useMultiDatabaseRAG, type RAGResponse, type ConversationMessage, type VisualizationType, type InstitutionData } from '../hooks/useMultiDatabaseRAG';
|
||||
|
|
@ -110,6 +112,8 @@ const TEXT = {
|
|||
export: { nl: 'Export', en: 'Export' },
|
||||
import: { nl: 'Import', en: 'Import' },
|
||||
clear: { nl: 'Wis', en: 'Clear' },
|
||||
new: { nl: 'Nieuw', en: 'New' },
|
||||
newConversation: { nl: 'Nieuw gesprek starten', en: 'Start new conversation' },
|
||||
embeddings: { nl: 'Embeddings', en: 'Embeddings' },
|
||||
advanced: { nl: 'Geavanceerd', en: 'Advanced' },
|
||||
simple: { nl: 'Eenvoudig', en: 'Simple' },
|
||||
|
|
@ -1032,6 +1036,15 @@ const ConversationPage: React.FC = () => {
|
|||
<p>{t('pageSubtitle')}</p>
|
||||
</div>
|
||||
</div>
|
||||
<button
|
||||
className="conversation-chat__new-btn"
|
||||
onClick={handleClearConversation}
|
||||
title="New Conversation"
|
||||
type="button"
|
||||
>
|
||||
<Plus size={20} />
|
||||
<span>New</span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Input Area - Top */}
|
||||
|
|
@ -1183,6 +1196,17 @@ const ConversationPage: React.FC = () => {
|
|||
</button>
|
||||
)}
|
||||
|
||||
{/* Reset Cache - Warning Style */}
|
||||
<button
|
||||
className="conversation-chat__action-btn conversation-chat__action-btn--warning"
|
||||
onClick={handleClearCache}
|
||||
title={t('clearCache')}
|
||||
type="button"
|
||||
>
|
||||
<RefreshCw size={16} />
|
||||
<span>{t('clearCache')}</span>
|
||||
</button>
|
||||
|
||||
{/* Cache Status Indicator */}
|
||||
{lastCacheLookup && (
|
||||
<div className={`conversation-cache-status ${lastCacheLookup.found ? 'conversation-cache-status--hit' : 'conversation-cache-status--miss'}`}>
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
max-width: none;
|
||||
margin: 0;
|
||||
padding: 1rem 1.5rem;
|
||||
min-height: calc(100vh - 60px);
|
||||
padding-bottom: 2rem;
|
||||
animation: fadeIn 0.5s ease-in;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -250,6 +250,15 @@
|
|||
font-size: 1.25rem;
|
||||
}
|
||||
|
||||
.card-logo {
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
border-radius: 4px;
|
||||
object-fit: contain;
|
||||
background: #f5f7fa;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.type-badge {
|
||||
font-size: 0.7rem;
|
||||
font-weight: 600;
|
||||
|
|
@ -453,6 +462,15 @@
|
|||
font-size: 2.5rem;
|
||||
}
|
||||
|
||||
.modal-logo {
|
||||
width: 56px;
|
||||
height: 56px;
|
||||
border-radius: 8px;
|
||||
object-fit: contain;
|
||||
background: #f5f7fa;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.modal-header h2 {
|
||||
font-size: 1.25rem;
|
||||
font-weight: 600;
|
||||
|
|
@ -800,6 +818,10 @@
|
|||
color: #a0a0b0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .card-logo {
|
||||
background: #1e1e32;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .card-title {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
|
@ -880,6 +902,10 @@
|
|||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .modal-logo {
|
||||
background: #1e1e32;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .detail-section h4 {
|
||||
color: #a0a0b0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -526,11 +526,22 @@ function InstitutionCard({
|
|||
const typeInfo = TYPE_INFO[institution.type] || TYPE_INFO['U'];
|
||||
const countryCode = institution.ghcid?.current?.substring(0, 2) || '';
|
||||
const hasNetwork = hasStaffNetworkData(getCustodianSlug(institution.name));
|
||||
const [logoError, setLogoError] = useState(false);
|
||||
|
||||
return (
|
||||
<div className="institution-card" style={{ '--type-color': typeInfo.color } as React.CSSProperties}>
|
||||
<div className="card-header">
|
||||
<span className="type-icon" title={typeInfo.name}>{typeInfo.icon}</span>
|
||||
{/* Logo or type icon */}
|
||||
{institution.logo_url && !logoError ? (
|
||||
<img
|
||||
src={institution.logo_url}
|
||||
alt=""
|
||||
className="card-logo"
|
||||
onError={() => setLogoError(true)}
|
||||
/>
|
||||
) : (
|
||||
<span className="type-icon" title={typeInfo.name}>{typeInfo.icon}</span>
|
||||
)}
|
||||
<span className="type-badge" style={{ backgroundColor: typeInfo.color }}>
|
||||
{typeInfo.name}
|
||||
</span>
|
||||
|
|
@ -587,6 +598,7 @@ function InstitutionDetailModal({
|
|||
}) {
|
||||
const t = (key: keyof typeof TEXT) => TEXT[key][language];
|
||||
const typeInfo = TYPE_INFO[institution.type] || TYPE_INFO['U'];
|
||||
const [logoError, setLogoError] = useState(false);
|
||||
|
||||
// Close on escape key
|
||||
useEffect(() => {
|
||||
|
|
@ -603,7 +615,17 @@ function InstitutionDetailModal({
|
|||
<button className="modal-close" onClick={onClose}>×</button>
|
||||
|
||||
<div className="modal-header">
|
||||
<span className="modal-type-icon">{typeInfo.icon}</span>
|
||||
{/* Logo or type icon */}
|
||||
{institution.logo_url && !logoError ? (
|
||||
<img
|
||||
src={institution.logo_url}
|
||||
alt=""
|
||||
className="modal-logo"
|
||||
onError={() => setLogoError(true)}
|
||||
/>
|
||||
) : (
|
||||
<span className="modal-type-icon">{typeInfo.icon}</span>
|
||||
)}
|
||||
<div>
|
||||
<h2>{institution.name}</h2>
|
||||
<span className="modal-type-badge" style={{ backgroundColor: typeInfo.color }}>
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
*/
|
||||
|
||||
import { useEffect, useRef, useState, useMemo, useCallback } from 'react';
|
||||
import { useSearchParams } from 'react-router-dom';
|
||||
import { useSearchParams, useNavigate } from 'react-router-dom';
|
||||
import maplibregl from 'maplibre-gl';
|
||||
import type { StyleSpecification, MapLayerMouseEvent, GeoJSONSource } from 'maplibre-gl';
|
||||
import 'maplibre-gl/dist/maplibre-gl.css';
|
||||
|
|
@ -207,6 +207,7 @@ function institutionsToGeoJSON(institutions: Institution[]): GeoJSON.FeatureColl
|
|||
|
||||
export default function NDEMapPage() {
|
||||
const [searchParams, setSearchParams] = useSearchParams();
|
||||
const navigate = useNavigate();
|
||||
const mapRef = useRef<HTMLDivElement>(null);
|
||||
const mapInstanceRef = useRef<maplibregl.Map | null>(null);
|
||||
const [mapReady, setMapReady] = useState(false);
|
||||
|
|
@ -2060,7 +2061,7 @@ export default function NDEMapPage() {
|
|||
|
||||
{/* Link to settings for backend selection */}
|
||||
<p className="settings-hint">
|
||||
{t('Wijzig databron in', 'Change data source in')} <a href="#" onClick={(e) => { e.preventDefault(); /* Open settings modal */ }}>{t('Instellingen', 'Settings')}</a>
|
||||
{t('Wijzig databron in', 'Change data source in')} <a href="/settings" onClick={(e) => { e.preventDefault(); navigate('/settings'); }}>{t('Instellingen', 'Settings')}</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -457,10 +457,13 @@ export interface ExtendedProfileData {
|
|||
country?: string;
|
||||
}>;
|
||||
career_history?: Array<{
|
||||
organization: string;
|
||||
role: string;
|
||||
dates: string;
|
||||
organization?: string;
|
||||
company?: string; // Alternative to organization
|
||||
role?: string;
|
||||
title?: string; // Alternative to role
|
||||
dates?: string;
|
||||
duration?: string;
|
||||
duration_text?: string; // Alternative to duration
|
||||
location?: string;
|
||||
current?: boolean;
|
||||
company_size?: string;
|
||||
|
|
|
|||
52
frontend/src/utils/dom.ts
Normal file
52
frontend/src/utils/dom.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
/**
|
||||
* DOM utility functions for safe element operations
|
||||
*/
|
||||
|
||||
/**
|
||||
* Safely calls .closest() on an event target.
|
||||
* Works with HTMLElement, SVGElement, and Text nodes.
|
||||
*
|
||||
* @param target - The event target (may be Element, Text node, or null)
|
||||
* @param selector - CSS selector to match
|
||||
* @returns The closest matching element, or null if not found
|
||||
*/
|
||||
export function safeClosest(target: EventTarget | null, selector: string): Element | null {
|
||||
if (!target) return null;
|
||||
|
||||
// If target is a Text node or other non-Element, get its parent element
|
||||
let element: Element | null = null;
|
||||
|
||||
if (target instanceof Element) {
|
||||
element = target;
|
||||
} else if (target instanceof Node && target.parentElement) {
|
||||
// Text nodes, comment nodes, etc.
|
||||
element = target.parentElement;
|
||||
}
|
||||
|
||||
if (!element) return null;
|
||||
|
||||
// Now safely call closest()
|
||||
return element.closest(selector);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an event target is inside an element matching the selector
|
||||
*
|
||||
* @param target - The event target
|
||||
* @param selector - CSS selector to match
|
||||
* @returns true if target is inside an element matching selector
|
||||
*/
|
||||
export function isTargetInside(target: EventTarget | null, selector: string): boolean {
|
||||
return safeClosest(target, selector) !== null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an event target matches any of the given selectors
|
||||
*
|
||||
* @param target - The event target
|
||||
* @param selectors - Array of CSS selectors to match
|
||||
* @returns true if target is inside any element matching the selectors
|
||||
*/
|
||||
export function isTargetInsideAny(target: EventTarget | null, selectors: string[]): boolean {
|
||||
return selectors.some(selector => isTargetInside(target, selector));
|
||||
}
|
||||
|
|
@ -55,6 +55,11 @@ export default defineConfig({
|
|||
changeOrigin: true,
|
||||
rewrite: (path) => path.replace(/^\/ducklake/, ''),
|
||||
},
|
||||
// RAG API proxy (Heritage RAG backend on port 8003)
|
||||
'/api/rag': {
|
||||
target: 'http://localhost:8003',
|
||||
changeOrigin: true,
|
||||
},
|
||||
// Generic API fallback
|
||||
'/api': {
|
||||
target: 'http://localhost:8000',
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ python = "^3.11"
|
|||
|
||||
# Core data processing
|
||||
pandas = "^2.1.0"
|
||||
numpy = "^1.26.0"
|
||||
numpy = ">=2.0.0"
|
||||
|
||||
# Text processing (direct dependencies only)
|
||||
# NOTE: NLP extraction (NER) is handled by coding subagents via Task tool
|
||||
|
|
@ -47,7 +47,7 @@ rdflib = "^7.0.0"
|
|||
SPARQLWrapper = "^2.0.0"
|
||||
|
||||
# Database and storage
|
||||
duckdb = "^0.9.0"
|
||||
duckdb = ">=1.0.0"
|
||||
sqlalchemy = "^2.0.0"
|
||||
pyarrow = "^14.0.0"
|
||||
|
||||
|
|
@ -71,6 +71,9 @@ pydantic-settings = "^2.0.0"
|
|||
# DSPy for LLM-powered SPARQL generation
|
||||
dspy-ai = "^2.5.0"
|
||||
openai = "^1.0.0" # DSPy backend for OpenAI/Anthropic
|
||||
qdrant-client = "^1.16.2"
|
||||
sentence-transformers = "^5.2.0"
|
||||
typedb-driver = "^3.0.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
# Testing
|
||||
|
|
|
|||
|
|
@ -88,6 +88,8 @@ enums:
|
|||
description: Entry requires further enrichment processing
|
||||
new_entry:
|
||||
description: Newly added entry not yet enriched
|
||||
google_maps_searched:
|
||||
description: Google Maps search attempted but not yet fully enriched
|
||||
|
||||
InstitutionTypeCodeEnum:
|
||||
description: Single-letter GLAMORCUBESFIXPHDNT type codes
|
||||
|
|
@ -184,6 +186,8 @@ enums:
|
|||
description: LinkedIn profile or company page
|
||||
GHCID_PREVIOUS:
|
||||
description: Previous GHCID before relocation or reorganization
|
||||
OCLC:
|
||||
description: OCLC (Online Computer Library Center) identifier
|
||||
|
||||
LocationResolutionMethodEnum:
|
||||
description: Method used to resolve settlement location
|
||||
|
|
@ -432,6 +436,9 @@ classes:
|
|||
organisatie:
|
||||
range: string
|
||||
description: Organization name from source
|
||||
organisation:
|
||||
range: string
|
||||
description: Organization name from source (British spelling variant)
|
||||
isil_code_na:
|
||||
range: string
|
||||
description: ISIL code from Nationaal Archief
|
||||
|
|
@ -652,10 +659,19 @@ classes:
|
|||
range: string
|
||||
description: Status of Wikidata enrichment for this entry
|
||||
comment:
|
||||
range: ReferenceLink
|
||||
multivalued: true
|
||||
any_of:
|
||||
- range: string
|
||||
- range: ReferenceLink
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Comments about this entry (array of objects with label field)
|
||||
description: Comments about this entry (can be a string or array of objects with label field)
|
||||
comments:
|
||||
any_of:
|
||||
- range: string
|
||||
- range: ReferenceLink
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Comments about this entry (string or array of objects with label field)
|
||||
succeeded_by:
|
||||
range: ReferenceLink
|
||||
multivalued: true
|
||||
|
|
@ -668,6 +684,15 @@ classes:
|
|||
label:
|
||||
range: string
|
||||
description: Name/label of the duplicate institution
|
||||
entry_index:
|
||||
range: integer
|
||||
description: Index of the duplicate entry in source data
|
||||
entry_file:
|
||||
range: string
|
||||
description: Filename of the duplicate entry
|
||||
reason:
|
||||
range: string
|
||||
description: Reason why this is considered a duplicate
|
||||
|
||||
TimeEntry:
|
||||
description: Structured time entry from source data
|
||||
|
|
@ -852,6 +877,11 @@ classes:
|
|||
data_source:
|
||||
range: string
|
||||
description: Data source type (CSV_REGISTRY, API_SCRAPING, etc.)
|
||||
data_sources:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of data sources (e.g., NDE registry, Google Maps, website)
|
||||
data_tier:
|
||||
range: DataTierEnum
|
||||
description: Quality tier of the data
|
||||
|
|
@ -861,6 +891,12 @@ classes:
|
|||
extraction_method:
|
||||
range: string
|
||||
description: Method used to extract the data
|
||||
enrichment_date:
|
||||
range: string
|
||||
description: When enrichment was performed (ISO date string)
|
||||
enrichment_method:
|
||||
range: string
|
||||
description: Method used to enrich the data (e.g., website_research)
|
||||
confidence_score:
|
||||
range: float
|
||||
description: Confidence score (0-1)
|
||||
|
|
@ -894,6 +930,15 @@ classes:
|
|||
wikidata_property:
|
||||
range: string
|
||||
description: Wikidata property ID (e.g., P856)
|
||||
archive_location:
|
||||
range: string
|
||||
description: Location of archived copy (e.g., web/1186/hartebrug.nl)
|
||||
claim_extracted_from:
|
||||
range: string
|
||||
description: Source path from which claim was extracted (e.g., original_entry.reference)
|
||||
verified_via_web_archive:
|
||||
range: boolean
|
||||
description: Whether claim was verified via web archive
|
||||
|
||||
ProvenanceSources:
|
||||
description: Sources organized by type
|
||||
|
|
@ -943,6 +988,52 @@ classes:
|
|||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Nationaal Archief ISIL registry source records
|
||||
whois_research:
|
||||
range: SourceRecord
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: WHOIS domain research source records
|
||||
manual_research:
|
||||
range: SourceRecord
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Manual research source records
|
||||
website:
|
||||
range: SourceRecord
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Website source records (institution website data)
|
||||
web_scrape:
|
||||
range: SourceRecord
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Web scrape source records (scraped website data)
|
||||
# Data tier summary fields (for provenance summaries)
|
||||
TIER_1_AUTHORITATIVE:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of TIER_1 authoritative sources
|
||||
TIER_2_VERIFIED:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of TIER_2 verified sources
|
||||
TIER_3_CROWD_SOURCED:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of TIER_3 crowd-sourced sources
|
||||
TIER_4_INFERRED:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of TIER_4 inferred sources
|
||||
museum_register:
|
||||
range: SourceRecord
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Museum register source records
|
||||
|
||||
SourceRecord:
|
||||
description: Individual source record with claims
|
||||
|
|
@ -1004,6 +1095,20 @@ classes:
|
|||
source_file:
|
||||
range: string
|
||||
description: Source file name
|
||||
research_date:
|
||||
range: string
|
||||
description: Date of research (YYYY-MM-DD format)
|
||||
url:
|
||||
range: uri
|
||||
description: URL of the source (website URL, etc.)
|
||||
data_extracted:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: List of data types/fields extracted from this source
|
||||
merge_note:
|
||||
range: string
|
||||
description: Note about merge operations involving this source record
|
||||
|
||||
DataTierSummary:
|
||||
description: Summary of data tiers present in entry
|
||||
|
|
@ -1034,7 +1139,7 @@ classes:
|
|||
attributes:
|
||||
identifier_scheme:
|
||||
range: IdentifierSchemeEnum
|
||||
required: true
|
||||
required: false
|
||||
description: Type of identifier
|
||||
identifier_value:
|
||||
any_of:
|
||||
|
|
@ -1056,6 +1161,14 @@ classes:
|
|||
notes:
|
||||
range: string
|
||||
description: Additional note about this identifier (alias for note)
|
||||
scheme:
|
||||
range: string
|
||||
description: Identifier scheme (alias for identifier_scheme, used in some data sources)
|
||||
value:
|
||||
any_of:
|
||||
- range: string
|
||||
- range: integer
|
||||
description: Identifier value (alias for identifier_value, used in some data sources)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GHCID BLOCK - Heritage Custodian ID with history
|
||||
|
|
@ -1285,6 +1398,12 @@ classes:
|
|||
specific_location:
|
||||
range: string
|
||||
description: More specific location info within the city (e.g., neighborhood, district)
|
||||
specific_geonames_id:
|
||||
range: integer
|
||||
description: GeoNames ID for the specific location (if different from main city)
|
||||
correction_note:
|
||||
range: string
|
||||
description: Note explaining any correction made to the location resolution
|
||||
|
||||
SourceCoordinates:
|
||||
description: Source of coordinates for resolution
|
||||
|
|
@ -1304,13 +1423,19 @@ classes:
|
|||
attributes:
|
||||
type:
|
||||
range: string
|
||||
description: Type of research source (e.g., note, wikidata, web_archive, official_source)
|
||||
description: Type of research source (e.g., note, wikidata, web_archive, official_source, whois)
|
||||
text:
|
||||
range: string
|
||||
description: Text or description of the research source
|
||||
value:
|
||||
range: string
|
||||
description: Value from this source (e.g., plus code, address)
|
||||
notes:
|
||||
range: string
|
||||
description: Additional notes about this source
|
||||
note:
|
||||
range: string
|
||||
description: Additional note about this source (singular alias for notes)
|
||||
id:
|
||||
range: string
|
||||
description: Identifier for the source (e.g., Wikidata Q-number)
|
||||
|
|
@ -1323,6 +1448,56 @@ classes:
|
|||
coordinates:
|
||||
range: string
|
||||
description: Coordinates from this source (e.g., "31.515, 34.434")
|
||||
data:
|
||||
range: ResearchSourceData
|
||||
description: Structured data from the source (e.g., WHOIS registrant info)
|
||||
|
||||
ResearchSourceData:
|
||||
description: Structured data from a research source
|
||||
attributes:
|
||||
registrant_name:
|
||||
range: string
|
||||
description: WHOIS registrant name
|
||||
registrant_address:
|
||||
range: string
|
||||
description: WHOIS registrant address
|
||||
registrant_city:
|
||||
range: string
|
||||
description: WHOIS registrant city
|
||||
registrant_state:
|
||||
range: string
|
||||
description: WHOIS registrant state/province
|
||||
registrant_country:
|
||||
range: string
|
||||
description: WHOIS registrant country
|
||||
registrant_postal_code:
|
||||
range: string
|
||||
description: WHOIS registrant postal code
|
||||
# Additional flexible fields for other data types
|
||||
organization:
|
||||
range: string
|
||||
description: Organization name
|
||||
email:
|
||||
range: string
|
||||
description: Contact email
|
||||
phone:
|
||||
range: string
|
||||
description: Contact phone
|
||||
creation_date:
|
||||
range: string
|
||||
description: Domain creation date
|
||||
updated_date:
|
||||
range: string
|
||||
description: Domain updated date
|
||||
expiration_date:
|
||||
range: string
|
||||
description: Domain expiration date
|
||||
domain_registered:
|
||||
range: string
|
||||
description: Domain registration date
|
||||
registry:
|
||||
range: string
|
||||
description: Domain registrar name
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GOOGLE MAPS ENRICHMENT
|
||||
|
|
@ -1485,8 +1660,10 @@ classes:
|
|||
inlined_as_list: true
|
||||
description: Topics mentioned in reviews
|
||||
reviews_summary:
|
||||
range: string
|
||||
description: Summary of reviews
|
||||
any_of:
|
||||
- range: string
|
||||
- range: ReviewsSummary
|
||||
description: Summary of reviews (string or structured breakdown)
|
||||
sample_reviews:
|
||||
any_of:
|
||||
- range: string
|
||||
|
|
@ -1523,10 +1700,13 @@ classes:
|
|||
inlined_as_list: true
|
||||
description: Nearby organizations (strings or structured objects)
|
||||
features:
|
||||
range: string
|
||||
multivalued: true
|
||||
any_of:
|
||||
- range: string
|
||||
multivalued: true
|
||||
- range: PlaceFeature
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Features of the place
|
||||
description: Features of the place (strings or key-value objects)
|
||||
hours_status:
|
||||
range: string
|
||||
description: Current opening status (e.g., "Closed · Opens 2 pm Wed")
|
||||
|
|
@ -1603,6 +1783,23 @@ classes:
|
|||
match_notes:
|
||||
range: string
|
||||
description: Notes about how the Google Maps match was determined
|
||||
price_level:
|
||||
any_of:
|
||||
- range: integer
|
||||
- range: string
|
||||
description: Google Maps price level (0-4 or string description)
|
||||
match_warning:
|
||||
range: string
|
||||
description: Warning about potential issues with the match
|
||||
location_note:
|
||||
range: string
|
||||
description: Note about the physical location of the place
|
||||
search_attempted:
|
||||
range: boolean
|
||||
description: Whether a Google Maps search was attempted
|
||||
result:
|
||||
range: string
|
||||
description: Result of search operation (found, not_found, found_via_user_link, etc.)
|
||||
|
||||
RejectedGoogleMapsData:
|
||||
description: Rejected Google Maps data preserved for audit trail
|
||||
|
|
@ -1625,6 +1822,53 @@ classes:
|
|||
returned_country:
|
||||
range: string
|
||||
description: Country code actually returned by Google Maps
|
||||
website:
|
||||
range: uri
|
||||
description: Website URL from Google Maps
|
||||
latitude:
|
||||
range: float
|
||||
description: Latitude coordinate
|
||||
longitude:
|
||||
range: float
|
||||
description: Longitude coordinate
|
||||
enriched_at:
|
||||
range: datetime
|
||||
description: When enrichment was performed
|
||||
|
||||
PlaceFeature:
|
||||
description: A feature flag for a place (e.g., native_garden, shop, volunteers)
|
||||
class_uri: schema:PropertyValue
|
||||
attributes:
|
||||
native_garden:
|
||||
range: boolean
|
||||
description: Has a native garden
|
||||
shop:
|
||||
range: boolean
|
||||
description: Has a shop
|
||||
volunteers:
|
||||
range: boolean
|
||||
description: Has volunteers
|
||||
parking:
|
||||
range: boolean
|
||||
description: Has parking
|
||||
cafe:
|
||||
range: boolean
|
||||
description: Has a cafe
|
||||
restaurant:
|
||||
range: boolean
|
||||
description: Has a restaurant
|
||||
gift_shop:
|
||||
range: boolean
|
||||
description: Has a gift shop
|
||||
wheelchair_accessible:
|
||||
range: boolean
|
||||
description: Is wheelchair accessible
|
||||
guided_tours:
|
||||
range: boolean
|
||||
description: Offers guided tours
|
||||
audio_guide:
|
||||
range: boolean
|
||||
description: Offers audio guides
|
||||
|
||||
LlmVerification:
|
||||
description: LLM-based verification results for Google Maps matching
|
||||
|
|
@ -1709,6 +1953,25 @@ classes:
|
|||
minute:
|
||||
range: integer
|
||||
|
||||
ReviewsSummary:
|
||||
description: Breakdown of reviews by star rating
|
||||
attributes:
|
||||
5_star:
|
||||
range: integer
|
||||
description: Number of 5-star reviews
|
||||
4_star:
|
||||
range: integer
|
||||
description: Number of 4-star reviews
|
||||
3_star:
|
||||
range: integer
|
||||
description: Number of 3-star reviews
|
||||
2_star:
|
||||
range: integer
|
||||
description: Number of 2-star reviews
|
||||
1_star:
|
||||
range: integer
|
||||
description: Number of 1-star reviews
|
||||
|
||||
GoogleReview:
|
||||
description: Google Maps review
|
||||
attributes:
|
||||
|
|
@ -1828,8 +2091,10 @@ classes:
|
|||
wikidata_temporal:
|
||||
range: WikidataTemporal
|
||||
wikidata_inception:
|
||||
range: string
|
||||
description: Inception date (P571)
|
||||
any_of:
|
||||
- range: string
|
||||
- range: WikidataTimeValue
|
||||
description: Inception date (P571) - can be string or structured time value
|
||||
wikidata_classification:
|
||||
range: WikidataClassification
|
||||
wikidata_instance_of:
|
||||
|
|
@ -1946,6 +2211,29 @@ classes:
|
|||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Search terms attempted when looking for Wikidata entity
|
||||
wikidata_description_nl:
|
||||
range: string
|
||||
description: Description in Dutch language
|
||||
wikidata_claims:
|
||||
range: WikidataClaims
|
||||
description: Structured Wikidata claims with property metadata
|
||||
inlined: true
|
||||
_resolved_entities:
|
||||
range: WikidataResolvedEntities
|
||||
description: Resolved Wikidata property and entity metadata cache
|
||||
inlined: true
|
||||
|
||||
WikidataClaims:
|
||||
description: |
|
||||
Structured Wikidata claims with property metadata and values.
|
||||
Uses flexible dict-like structure for various claim types.
|
||||
class_uri: linkml:Any
|
||||
|
||||
WikidataResolvedEntities:
|
||||
description: |
|
||||
Cache of resolved Wikidata property and entity metadata.
|
||||
Keys are property IDs (P123), values are property metadata.
|
||||
class_uri: linkml:Any
|
||||
|
||||
WikidataApiMetadata:
|
||||
description: API call metadata
|
||||
|
|
@ -2058,6 +2346,19 @@ classes:
|
|||
inlined_as_list: true
|
||||
description: Main subject (P921)
|
||||
|
||||
WikidataTimeValue:
|
||||
description: Wikidata time value with precision metadata
|
||||
attributes:
|
||||
time:
|
||||
range: string
|
||||
description: Time value in ISO 8601 format (e.g., +2015-00-00T00:00:00Z)
|
||||
precision:
|
||||
range: integer
|
||||
description: Precision level (9=year, 10=month, 11=day, etc.)
|
||||
calendarmodel:
|
||||
range: uri
|
||||
description: Calendar model URI (e.g., http://www.wikidata.org/entity/Q1985727 for Gregorian)
|
||||
|
||||
WikidataEntity:
|
||||
description: Reference to a Wikidata entity
|
||||
attributes:
|
||||
|
|
@ -2104,7 +2405,10 @@ classes:
|
|||
description: Location properties from Wikidata
|
||||
attributes:
|
||||
country:
|
||||
range: WikidataEntity
|
||||
any_of:
|
||||
- range: string
|
||||
- range: WikidataEntity
|
||||
description: Country Q-ID (can be string or WikidataEntity object)
|
||||
headquarters_location:
|
||||
range: WikidataEntity
|
||||
coordinates:
|
||||
|
|
@ -2158,8 +2462,10 @@ classes:
|
|||
multivalued: true
|
||||
inlined_as_list: true
|
||||
parent_organization:
|
||||
range: WikidataEntity
|
||||
description: Parent organization (P749)
|
||||
any_of:
|
||||
- range: string
|
||||
- range: WikidataEntity
|
||||
description: Parent organization Q-ID or entity (P749)
|
||||
subsidiary:
|
||||
range: WikidataEntity
|
||||
multivalued: true
|
||||
|
|
@ -2433,6 +2739,9 @@ classes:
|
|||
website_found:
|
||||
range: boolean
|
||||
description: Whether a website was found
|
||||
official_website:
|
||||
range: uri
|
||||
description: Official website URL found during research
|
||||
research_notes:
|
||||
range: string
|
||||
description: Notes from research
|
||||
|
|
@ -2504,6 +2813,12 @@ classes:
|
|||
merger_target:
|
||||
range: string
|
||||
description: Target organization in merger
|
||||
successor_name:
|
||||
range: string
|
||||
description: Name of successor organization (for mergers)
|
||||
successor_location:
|
||||
range: string
|
||||
description: Location of successor organization (for mergers)
|
||||
notes:
|
||||
range: string
|
||||
description: Additional notes
|
||||
|
|
@ -2552,6 +2867,16 @@ classes:
|
|||
type:
|
||||
range: string
|
||||
description: Type of collection (oral_history, photographs, documents, etc.)
|
||||
item_count:
|
||||
any_of:
|
||||
- range: integer
|
||||
- range: string
|
||||
description: Number of items in the collection (integer or descriptive string)
|
||||
total_hours:
|
||||
any_of:
|
||||
- range: float
|
||||
- range: string
|
||||
description: Total hours of content (for audio/video collections)
|
||||
|
||||
WebArchiveFailure:
|
||||
description: Failed archive attempt record
|
||||
|
|
@ -2682,7 +3007,8 @@ classes:
|
|||
- range: string
|
||||
- range: string
|
||||
multivalued: true
|
||||
description: Extracted value (alias for claim_value, can be string or list)
|
||||
- range: OpeningHoursMap
|
||||
description: Extracted value (alias for claim_value, can be string, list, or structured object like opening hours)
|
||||
raw_value:
|
||||
range: string
|
||||
description: Raw value before processing
|
||||
|
|
@ -2807,6 +3133,9 @@ classes:
|
|||
job_title_en:
|
||||
range: string
|
||||
description: Job title in English
|
||||
department_en:
|
||||
range: string
|
||||
description: Department name in English
|
||||
|
||||
RawSource:
|
||||
description: Raw source information for web enrichment
|
||||
|
|
@ -2838,6 +3167,63 @@ classes:
|
|||
raw_markdown_hash:
|
||||
range: string
|
||||
description: SHA-256 hash of the raw markdown content
|
||||
exa_highlights:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Highlighted excerpts from Exa search results
|
||||
exa_highlight_scores:
|
||||
range: float
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Relevance scores for Exa highlights
|
||||
|
||||
OpeningHoursMap:
|
||||
description: Opening hours as a day-keyed map
|
||||
class_uri: schema:OpeningHoursSpecification
|
||||
attributes:
|
||||
maandag:
|
||||
range: string
|
||||
description: Monday hours (Dutch)
|
||||
dinsdag:
|
||||
range: string
|
||||
description: Tuesday hours (Dutch)
|
||||
woensdag:
|
||||
range: string
|
||||
description: Wednesday hours (Dutch)
|
||||
donderdag:
|
||||
range: string
|
||||
description: Thursday hours (Dutch)
|
||||
vrijdag:
|
||||
range: string
|
||||
description: Friday hours (Dutch)
|
||||
zaterdag:
|
||||
range: string
|
||||
description: Saturday hours (Dutch)
|
||||
zondag:
|
||||
range: string
|
||||
description: Sunday hours (Dutch)
|
||||
monday:
|
||||
range: string
|
||||
description: Monday hours (English)
|
||||
tuesday:
|
||||
range: string
|
||||
description: Tuesday hours (English)
|
||||
wednesday:
|
||||
range: string
|
||||
description: Wednesday hours (English)
|
||||
thursday:
|
||||
range: string
|
||||
description: Thursday hours (English)
|
||||
friday:
|
||||
range: string
|
||||
description: Friday hours (English)
|
||||
saturday:
|
||||
range: string
|
||||
description: Saturday hours (English)
|
||||
sunday:
|
||||
range: string
|
||||
description: Sunday hours (English)
|
||||
|
||||
SourceReference:
|
||||
description: Structured source reference for a claim
|
||||
|
|
@ -2961,8 +3347,12 @@ classes:
|
|||
range: string
|
||||
description: Note explaining manual correction made to the name
|
||||
merge_notes:
|
||||
range: string
|
||||
description: Notes about name merging or deduplication
|
||||
any_of:
|
||||
- range: string
|
||||
- range: MergeNote
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Notes about name merging or deduplication (string or array of structured objects)
|
||||
abbreviation:
|
||||
range: string
|
||||
description: Short form or abbreviation of the name
|
||||
|
|
@ -2980,6 +3370,9 @@ classes:
|
|||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Previous names the institution was known by (strings or structured objects)
|
||||
short_name:
|
||||
range: string
|
||||
description: Short name or commonly used abbreviated form of the institution name
|
||||
|
||||
FormerName:
|
||||
description: A former name of the institution with optional metadata
|
||||
|
|
@ -3001,6 +3394,19 @@ classes:
|
|||
range: string
|
||||
description: Additional notes about this former name
|
||||
|
||||
MergeNote:
|
||||
description: Note about a merge operation between duplicate entries
|
||||
attributes:
|
||||
source:
|
||||
range: string
|
||||
description: Source entry identifier that was merged
|
||||
merged_on:
|
||||
range: string
|
||||
description: Date when merge occurred (YYYY-MM-DD)
|
||||
reason:
|
||||
range: string
|
||||
description: Reason for the merge (e.g., duplicate Wikidata ID, same place ID)
|
||||
|
||||
MatchingSource:
|
||||
description: Source that contributed to name consensus
|
||||
attributes:
|
||||
|
|
@ -3290,6 +3696,25 @@ classes:
|
|||
enrichment_source:
|
||||
range: string
|
||||
description: Source of enrichment (e.g., manual_curation, api_scraping)
|
||||
host_organization:
|
||||
range: string
|
||||
description: Organization hosting this platform
|
||||
host_website:
|
||||
range: uri
|
||||
description: Main website of the host organization
|
||||
language:
|
||||
range: string
|
||||
description: Primary language of the platform (ISO 639-1 code)
|
||||
features:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Features of this platform
|
||||
platforms:
|
||||
range: string
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
description: Sub-platforms or related platforms
|
||||
|
||||
PlatformSourceReference:
|
||||
description: Structured source reference for a digital platform
|
||||
|
|
@ -3465,6 +3890,12 @@ classes:
|
|||
override_reason:
|
||||
range: string
|
||||
description: Reason for manual coordinate override
|
||||
source_url:
|
||||
range: uri
|
||||
description: URL source of coordinates (e.g., Google Maps link)
|
||||
note:
|
||||
range: string
|
||||
description: Additional note about coordinate provenance
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ADDITIONAL ENRICHMENT TYPES
|
||||
|
|
|
|||
|
|
@ -1,106 +1,355 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Load TypeDB schemas from files into the glam-heritage database
|
||||
Load Heritage Custodian schema into TypeDB 3.x.
|
||||
|
||||
This script loads the Heritage Custodian Observation & Reconstruction schema
|
||||
into TypeDB. The schema must be loaded in parts due to TypeDB 3.x requirements:
|
||||
1. Attributes first (before entities can reference them)
|
||||
2. Relations second (before entities can play roles)
|
||||
3. Entities third (can now reference attributes and play roles)
|
||||
|
||||
Usage:
|
||||
python scripts/load_typedb_schema.py [--host HOST] [--port PORT] [--database DATABASE]
|
||||
|
||||
Prerequisites:
|
||||
- TypeDB server running (default: localhost:1729)
|
||||
- TypeDB Python driver installed (typedb-driver >= 3.0.0)
|
||||
- Database will be created if it doesn't exist
|
||||
|
||||
Example:
|
||||
# Start TypeDB server
|
||||
~/.typedb/typedb server
|
||||
|
||||
# Load schema
|
||||
poetry run python scripts/load_typedb_schema.py
|
||||
|
||||
TypeDB 3.x Migration Notes:
|
||||
- Uses Credentials and DriverOptions (not core_driver)
|
||||
- Uses TransactionType.SCHEMA (not SessionType.SCHEMA + TransactionType.WRITE)
|
||||
- tx.query().resolve() instead of tx.query.define()
|
||||
- No sessions - transactions are created directly on driver
|
||||
- 'entity' is a reserved word - renamed to 'observed-entity' in observation-of relation
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typedb.driver import TypeDB, SessionType, TransactionType
|
||||
|
||||
# Configuration
|
||||
SERVER_ADDRESS = "localhost:1729"
|
||||
DATABASE_NAME = "glam-heritage"
|
||||
SCHEMA_DIR = Path("/Users/kempersc/apps/glam/schemas/20251121/typedb")
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
# Schema files in order
|
||||
SCHEMA_FILES = [
|
||||
"01_name_entity_hub.tql",
|
||||
"02_heritage_custodian.tql",
|
||||
"03_identifiers.tql",
|
||||
"04_locations.tql",
|
||||
"05_digital_platforms.tql",
|
||||
"06_provenance.tql",
|
||||
"07_collections.tql",
|
||||
"08_relationships.tql",
|
||||
"09_change_events.tql",
|
||||
"10_rules.tql",
|
||||
]
|
||||
|
||||
def load_schema():
|
||||
"""Load TypeDB schema files into the database"""
|
||||
print(f"🔗 Connecting to TypeDB at {SERVER_ADDRESS}...")
|
||||
def get_schema_parts():
|
||||
"""Return the schema split into loadable parts.
|
||||
|
||||
with TypeDB.core_driver(SERVER_ADDRESS) as driver:
|
||||
# Check if database exists
|
||||
if not driver.databases.contains(DATABASE_NAME):
|
||||
print(f"❌ Database '{DATABASE_NAME}' does not exist!")
|
||||
print(f" Create it first: typedb console --command='database create {DATABASE_NAME}'")
|
||||
return False
|
||||
|
||||
print(f"✅ Connected to database: {DATABASE_NAME}")
|
||||
print(f"📂 Schema directory: {SCHEMA_DIR}")
|
||||
print()
|
||||
|
||||
# Load each schema file
|
||||
for schema_file in SCHEMA_FILES:
|
||||
schema_path = SCHEMA_DIR / schema_file
|
||||
|
||||
if not schema_path.exists():
|
||||
print(f"⚠️ Skipping {schema_file} (file not found)")
|
||||
continue
|
||||
|
||||
print(f"📝 Loading {schema_file}...")
|
||||
|
||||
try:
|
||||
with driver.session(DATABASE_NAME, SessionType.SCHEMA) as session:
|
||||
with session.transaction(TransactionType.WRITE) as tx:
|
||||
# Read schema file
|
||||
with open(schema_path, 'r') as f:
|
||||
schema_content = f.read()
|
||||
|
||||
# Execute TypeQL define query
|
||||
tx.query.define(schema_content)
|
||||
tx.commit()
|
||||
|
||||
print(f" ✅ Successfully loaded {schema_file}")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error loading {schema_file}: {e}")
|
||||
return False
|
||||
|
||||
print()
|
||||
print("🎉 All schemas loaded successfully!")
|
||||
return True
|
||||
|
||||
def verify_schema():
|
||||
"""Verify the loaded schema"""
|
||||
print("\n🔍 Verifying schema...")
|
||||
The schema is split into 3 parts that must be loaded in order:
|
||||
1. Attributes - all attribute type definitions
|
||||
2. Relations - all relation type definitions with role types
|
||||
3. Entities - all entity type definitions with owns/plays
|
||||
|
||||
with TypeDB.core_driver(SERVER_ADDRESS) as driver:
|
||||
with driver.session(DATABASE_NAME, SessionType.SCHEMA) as session:
|
||||
with session.transaction(TransactionType.READ) as tx:
|
||||
# Get all entity types
|
||||
result = tx.query.fetch("match $x sub entity; fetch $x;")
|
||||
entities = list(result)
|
||||
|
||||
print(f"✅ Found {len(entities)} entity types")
|
||||
|
||||
# Sample a few
|
||||
for i, entity in enumerate(entities[:5]):
|
||||
print(f" - {entity}")
|
||||
|
||||
if len(entities) > 5:
|
||||
print(f" ... and {len(entities) - 5} more")
|
||||
Note: The original .tql file uses 'entity' as a role name in observation-of,
|
||||
but 'entity' is a reserved word in TypeDB 3.x. This is fixed by renaming
|
||||
the role to 'observed-entity'.
|
||||
"""
|
||||
|
||||
# Part 1: Attributes
|
||||
attributes = """
|
||||
define
|
||||
attribute id, value string;
|
||||
attribute created, value datetime;
|
||||
attribute modified, value datetime;
|
||||
attribute observed-name, value string;
|
||||
attribute alternative-observed-name, value string;
|
||||
attribute observation-date, value datetime;
|
||||
attribute observation-context, value string;
|
||||
attribute standardized-name, value string;
|
||||
attribute endorsement-source, value string;
|
||||
attribute name-authority, value string;
|
||||
attribute valid-from, value datetime;
|
||||
attribute valid-to, value datetime;
|
||||
attribute legal-name, value string;
|
||||
attribute legal-form, value string;
|
||||
attribute registration-number, value string;
|
||||
attribute registration-date, value datetime;
|
||||
attribute registration-authority, value string;
|
||||
attribute dissolution-date, value datetime;
|
||||
attribute legal-status, value string;
|
||||
attribute governance-structure, value string;
|
||||
attribute source-uri, value string;
|
||||
attribute source-type, value string;
|
||||
attribute source-date, value datetime;
|
||||
attribute source-creator, value string;
|
||||
attribute activity-type, value string;
|
||||
attribute method, value string;
|
||||
attribute justification, value string;
|
||||
attribute started-at-time, value datetime;
|
||||
attribute ended-at-time, value datetime;
|
||||
attribute agent-name, value string;
|
||||
attribute agent-type, value string;
|
||||
attribute affiliation, value string;
|
||||
attribute contact, value string;
|
||||
attribute appellation-value, value string;
|
||||
attribute appellation-language, value string;
|
||||
attribute appellation-type, value string;
|
||||
attribute identifier-scheme, value string;
|
||||
attribute identifier-value, value string;
|
||||
attribute begin-of-the-begin, value datetime;
|
||||
attribute begin-of-the-end, value datetime;
|
||||
attribute end-of-the-begin, value datetime;
|
||||
attribute end-of-the-end, value datetime;
|
||||
attribute confidence-value, value double;
|
||||
attribute confidence-method, value string;
|
||||
attribute language-code-value, value string;
|
||||
"""
|
||||
|
||||
# Part 2: Relations
|
||||
relations = """
|
||||
define
|
||||
relation derivation,
|
||||
relates derived-entity,
|
||||
relates source-entity;
|
||||
|
||||
relation generation,
|
||||
relates generated-entity,
|
||||
relates generating-activity;
|
||||
|
||||
relation revision,
|
||||
relates revised-entity,
|
||||
relates prior-version;
|
||||
|
||||
relation activity-association,
|
||||
relates activity,
|
||||
relates agent;
|
||||
|
||||
relation activity-usage,
|
||||
relates activity,
|
||||
relates used-source;
|
||||
|
||||
relation source-citation,
|
||||
relates observation,
|
||||
relates source;
|
||||
|
||||
relation organizational-hierarchy,
|
||||
relates parent,
|
||||
relates child;
|
||||
|
||||
relation name-succession,
|
||||
relates predecessor,
|
||||
relates successor;
|
||||
|
||||
relation has-appellation,
|
||||
relates subject,
|
||||
relates appellation;
|
||||
|
||||
relation has-identifier,
|
||||
relates subject,
|
||||
relates identifier;
|
||||
|
||||
relation observation-of,
|
||||
relates observation,
|
||||
relates observed-entity;
|
||||
"""
|
||||
|
||||
# Part 3: Entities
|
||||
entities = """
|
||||
define
|
||||
entity custodian @abstract,
|
||||
owns id,
|
||||
owns created,
|
||||
owns modified,
|
||||
plays derivation:derived-entity,
|
||||
plays derivation:source-entity,
|
||||
plays generation:generated-entity,
|
||||
plays observation-of:observation,
|
||||
plays observation-of:observed-entity;
|
||||
|
||||
entity custodian-observation sub custodian,
|
||||
owns observed-name,
|
||||
owns alternative-observed-name,
|
||||
owns observation-date,
|
||||
owns observation-context,
|
||||
owns confidence-value,
|
||||
owns confidence-method,
|
||||
plays source-citation:observation,
|
||||
plays has-appellation:subject;
|
||||
|
||||
entity custodian-name sub custodian-observation,
|
||||
owns standardized-name,
|
||||
owns endorsement-source,
|
||||
owns name-authority,
|
||||
owns valid-from,
|
||||
owns valid-to,
|
||||
plays name-succession:predecessor,
|
||||
plays name-succession:successor;
|
||||
|
||||
entity custodian-reconstruction sub custodian,
|
||||
owns legal-name,
|
||||
owns legal-form,
|
||||
owns registration-number,
|
||||
owns registration-date,
|
||||
owns registration-authority,
|
||||
owns dissolution-date,
|
||||
owns legal-status,
|
||||
owns governance-structure,
|
||||
plays has-identifier:subject,
|
||||
plays organizational-hierarchy:parent,
|
||||
plays organizational-hierarchy:child,
|
||||
plays revision:revised-entity,
|
||||
plays revision:prior-version;
|
||||
|
||||
entity source-document,
|
||||
owns id,
|
||||
owns source-uri,
|
||||
owns source-type,
|
||||
owns source-date,
|
||||
owns source-creator,
|
||||
plays source-citation:source,
|
||||
plays activity-usage:used-source;
|
||||
|
||||
entity reconstruction-activity,
|
||||
owns id,
|
||||
owns activity-type,
|
||||
owns method,
|
||||
owns justification,
|
||||
owns started-at-time,
|
||||
owns ended-at-time,
|
||||
plays generation:generating-activity,
|
||||
plays activity-association:activity,
|
||||
plays activity-usage:activity;
|
||||
|
||||
entity agent,
|
||||
owns id,
|
||||
owns agent-name,
|
||||
owns agent-type,
|
||||
owns affiliation,
|
||||
owns contact,
|
||||
plays activity-association:agent;
|
||||
|
||||
entity appellation,
|
||||
owns appellation-value,
|
||||
owns appellation-language,
|
||||
owns appellation-type,
|
||||
plays has-appellation:appellation;
|
||||
|
||||
entity identifier,
|
||||
owns identifier-scheme,
|
||||
owns identifier-value,
|
||||
plays has-identifier:identifier;
|
||||
|
||||
entity time-span,
|
||||
owns begin-of-the-begin,
|
||||
owns begin-of-the-end,
|
||||
owns end-of-the-begin,
|
||||
owns end-of-the-end;
|
||||
|
||||
entity confidence-measure,
|
||||
owns confidence-value,
|
||||
owns confidence-method;
|
||||
|
||||
entity language-code,
|
||||
owns language-code-value;
|
||||
"""
|
||||
|
||||
return [
|
||||
("Attributes", attributes),
|
||||
("Relations", relations),
|
||||
("Entities", entities),
|
||||
]
|
||||
|
||||
|
||||
def load_schema(host: str = "localhost", port: int = 1729, database: str = "heritage_custodians"):
|
||||
"""Load the Heritage Custodian schema into TypeDB.
|
||||
|
||||
Args:
|
||||
host: TypeDB server host
|
||||
port: TypeDB server port
|
||||
database: Database name (will be created if doesn't exist)
|
||||
"""
|
||||
try:
|
||||
from typedb.driver import TypeDB, Credentials, DriverOptions, TransactionType
|
||||
except ImportError:
|
||||
print("Error: typedb-driver not installed. Run: poetry add typedb-driver")
|
||||
sys.exit(1)
|
||||
|
||||
# Connect to TypeDB
|
||||
address = f"{host}:{port}"
|
||||
credentials = Credentials("admin", "password")
|
||||
options = DriverOptions(is_tls_enabled=False)
|
||||
|
||||
print(f"Connecting to TypeDB at {address}...")
|
||||
|
||||
try:
|
||||
driver = TypeDB.driver(address, credentials, options)
|
||||
except Exception as e:
|
||||
print(f"Error connecting to TypeDB: {e}")
|
||||
print("Make sure TypeDB server is running: ~/.typedb/typedb server")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if database exists
|
||||
db_names = [db.name for db in driver.databases.all()]
|
||||
if database not in db_names:
|
||||
print(f"Database '{database}' not found. Creating...")
|
||||
driver.databases.create(database)
|
||||
print(f"Created database '{database}'")
|
||||
else:
|
||||
print(f"Using existing database '{database}'")
|
||||
|
||||
# Load schema parts
|
||||
print()
|
||||
schema_parts = get_schema_parts()
|
||||
|
||||
for name, schema in schema_parts:
|
||||
print(f"Loading {name}...")
|
||||
try:
|
||||
with driver.transaction(database, TransactionType.SCHEMA) as tx:
|
||||
tx.query(schema).resolve()
|
||||
tx.commit()
|
||||
print(f" ✓ {name} loaded successfully")
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
if "already exists" in error_msg.lower() or "redefinition" in error_msg.lower():
|
||||
print(f" ⚠ {name} already exists (skipping)")
|
||||
else:
|
||||
print(f" ✗ Error loading {name}: {e}")
|
||||
driver.close()
|
||||
sys.exit(1)
|
||||
|
||||
# Verify schema loaded
|
||||
print("\nVerifying schema...")
|
||||
with driver.transaction(database, TransactionType.READ) as tx:
|
||||
for type_name in ["custodian-observation", "custodian-name", "custodian-reconstruction"]:
|
||||
count_query = f"""
|
||||
match
|
||||
$inst isa {type_name};
|
||||
reduce $count = count;
|
||||
"""
|
||||
answer = tx.query(count_query).resolve()
|
||||
for row in answer:
|
||||
value = row.get("count")
|
||||
count = value.get_integer() if hasattr(value, 'get_integer') else int(str(value))
|
||||
print(f" {type_name}: {count} entities")
|
||||
break
|
||||
|
||||
driver.close()
|
||||
print("\n✓ Schema loaded successfully!")
|
||||
print(f"\nDatabase '{database}' is ready for data ingestion.")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Load Heritage Custodian schema into TypeDB 3.x"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host", default="localhost", help="TypeDB server host (default: localhost)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port", type=int, default=1729, help="TypeDB server port (default: 1729)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--database",
|
||||
default="heritage_custodians",
|
||||
help="Database name (default: heritage_custodians)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
load_schema(host=args.host, port=args.port, database=args.database)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 60)
|
||||
print("TypeDB Schema Loader")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
success = load_schema()
|
||||
|
||||
if success:
|
||||
verify_schema()
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
main()
|
||||
|
|
|
|||
472
scripts/test_pico_arabic_waqf.py
Normal file
472
scripts/test_pico_arabic_waqf.py
Normal file
|
|
@ -0,0 +1,472 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test PiCo extraction with Arabic waqf (endowment) document example.
|
||||
|
||||
This script tests the GLM annotator's ability to extract person observations
|
||||
from Arabic historical documents following the PiCo ontology pattern.
|
||||
|
||||
Usage:
|
||||
python scripts/test_pico_arabic_waqf.py
|
||||
|
||||
Environment Variables:
|
||||
ZAI_API_TOKEN - Required for Z.AI GLM-4.6 API
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import httpx
|
||||
|
||||
# Load environment variables from .env file
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
except ImportError:
|
||||
pass # dotenv not required if env vars set directly
|
||||
|
||||
|
||||
# Z.AI API configuration (per AGENTS.md Rule 11)
|
||||
# GLM-4.6 uses reasoning mode - essential for complex historical document extraction
|
||||
# Requires higher max_tokens to accommodate reasoning + output
|
||||
ZAI_API_URL = "https://api.z.ai/api/coding/paas/v4/chat/completions"
|
||||
ZAI_MODEL = "glm-4.6"
|
||||
|
||||
|
||||
# Arabic waqf document example (from pico.yaml)
|
||||
ARABIC_WAQF_TEXT = """بسم الله الرحمن الرحيم
|
||||
هذا ما وقف وحبس وسبل وأبد المرحوم الحاج أحمد بن محمد العمري، تاجر بمدينة
|
||||
حلب الشهباء، ابن المرحوم محمد بن عبد الله العمري. وقف جميع داره الكائنة
|
||||
بمحلة الجديدة على أولاده وأولاد أولاده ذكوراً وإناثاً. وإن انقرضوا لا سمح
|
||||
الله فعلى فقراء المسلمين. وشهد على ذلك الشهود: الحاج إبراهيم بن يوسف
|
||||
التركماني، والسيد علي بن حسين الحلبي. وكتب في شهر رجب سنة ألف ومائتين
|
||||
وخمس وعشرين هجرية."""
|
||||
|
||||
|
||||
# PiCo extraction system prompt (abbreviated version for testing)
|
||||
PICO_SYSTEM_PROMPT = """You are a historical document annotator following the PiCo (Person in Context) ontology.
|
||||
|
||||
Extract ALL persons mentioned in the source text, capturing:
|
||||
1. Names using PNV (Person Name Vocabulary) structure
|
||||
2. Roles in the source document
|
||||
3. Biographical information
|
||||
4. Family relationships between persons in THIS source
|
||||
5. For Arabic texts: include both original script AND romanized versions
|
||||
|
||||
### Arabic Naming Conventions
|
||||
- ابن/بن (ibn/bin): son of (patronymic)
|
||||
- بنت (bint): daughter of
|
||||
- الحاج (al-Hajj): honorific for pilgrimage completer
|
||||
- السيد (al-Sayyid): honorific (descendant of Prophet)
|
||||
- المرحوم (al-marhum): the late (deceased male)
|
||||
- آل (Al): family of
|
||||
|
||||
### Family Relationship Keys
|
||||
- parent: array of person references (person_index + target_name)
|
||||
- children: array of person references
|
||||
- spouse: array of person references
|
||||
|
||||
### Output Format
|
||||
Return ONLY valid JSON:
|
||||
|
||||
{
|
||||
"pico_observation": {
|
||||
"observation_id": "<source-derived-id>",
|
||||
"observed_at": "<ISO-timestamp>",
|
||||
"source_type": "<category>",
|
||||
"source_reference": "<identifier>"
|
||||
},
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"pnv_name": {
|
||||
"literalName": "Name in original script",
|
||||
"literalName_romanized": "Romanized name",
|
||||
"givenName": "Given name",
|
||||
"givenName_romanized": "Romanized given name",
|
||||
"patronym": "Father's name",
|
||||
"patronym_romanized": "Romanized patronym",
|
||||
"baseSurname": "Family/tribal name",
|
||||
"baseSurname_romanized": "Romanized surname",
|
||||
"honorificPrefix": "Title/honorific",
|
||||
"honorificPrefix_romanized": "Romanized honorific"
|
||||
},
|
||||
"roles": [
|
||||
{
|
||||
"role_title": "Role as stated",
|
||||
"role_title_romanized": "Romanized role",
|
||||
"role_in_source": "founder|witness|beneficiary|null"
|
||||
}
|
||||
],
|
||||
"biographical": {
|
||||
"deceased": true/false/null,
|
||||
"address": "Location if mentioned"
|
||||
},
|
||||
"family_relationships": {
|
||||
"parent": [{"person_index": N, "target_name": "Name"}],
|
||||
"children": [{"person_index": N, "target_name": "Name"}]
|
||||
},
|
||||
"context": "Brief description of person's role"
|
||||
}
|
||||
],
|
||||
"temporal_references": [
|
||||
{
|
||||
"expression": "Original text",
|
||||
"expression_romanized": "Romanized",
|
||||
"normalized": "ISO date or approximate",
|
||||
"calendar": "Hijri|Gregorian",
|
||||
"type": "DATE"
|
||||
}
|
||||
],
|
||||
"locations_mentioned": [
|
||||
{
|
||||
"name": "Original name",
|
||||
"name_romanized": "Romanized",
|
||||
"type": "city|neighborhood"
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
|
||||
async def call_glm_api(system_prompt: str, user_content: str) -> dict:
|
||||
"""Call Z.AI GLM-4.6 API and return parsed JSON response."""
|
||||
api_token = os.environ.get("ZAI_API_TOKEN")
|
||||
if not api_token:
|
||||
raise ValueError("ZAI_API_TOKEN not set in environment")
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": ZAI_MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_content},
|
||||
],
|
||||
"temperature": 0.1, # Low temperature for consistent extraction
|
||||
"max_tokens": 16000, # High limit for GLM-4.6 reasoning mode + output
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=300.0) as client: # 5 min timeout for GLM-4.6 reasoning
|
||||
response = await client.post(ZAI_API_URL, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
|
||||
# Save raw response for debugging
|
||||
raw_output_path = project_root / "data/entity_annotation/test_outputs"
|
||||
raw_output_path.mkdir(parents=True, exist_ok=True)
|
||||
raw_file = raw_output_path / f"raw_response_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
|
||||
with open(raw_file, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
print(f" Raw response saved to: {raw_file.name}")
|
||||
|
||||
# Parse JSON from response (handle markdown code blocks)
|
||||
json_content = content
|
||||
if "```json" in content:
|
||||
json_content = content.split("```json")[1].split("```")[0]
|
||||
elif "```" in content:
|
||||
parts = content.split("```")
|
||||
if len(parts) >= 2:
|
||||
json_content = parts[1]
|
||||
|
||||
# Try to parse, with fallback for truncated JSON
|
||||
try:
|
||||
return json.loads(json_content.strip())
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"\n⚠️ JSON parse error at position {e.pos}, attempting repair...")
|
||||
# Try to repair truncated JSON by closing brackets
|
||||
repaired = repair_truncated_json(json_content.strip())
|
||||
return json.loads(repaired)
|
||||
|
||||
|
||||
def repair_truncated_json(json_str: str) -> str:
|
||||
"""Attempt to repair truncated JSON by closing open brackets."""
|
||||
import re
|
||||
|
||||
# Count open/close brackets
|
||||
open_braces = json_str.count('{') - json_str.count('}')
|
||||
open_brackets = json_str.count('[') - json_str.count(']')
|
||||
|
||||
# Check if we're in the middle of a string
|
||||
# Find position of last complete key-value pair
|
||||
last_comma = json_str.rfind(',')
|
||||
last_colon = json_str.rfind(':')
|
||||
|
||||
if last_colon > last_comma:
|
||||
# We're in the middle of a value, try to find a safe truncation point
|
||||
# Look for the last complete object or array element
|
||||
safe_pos = last_comma
|
||||
if safe_pos > 0:
|
||||
json_str = json_str[:safe_pos]
|
||||
# Recount brackets after truncation
|
||||
open_braces = json_str.count('{') - json_str.count('}')
|
||||
open_brackets = json_str.count('[') - json_str.count(']')
|
||||
|
||||
# Close open brackets
|
||||
json_str = json_str.rstrip()
|
||||
|
||||
# Remove trailing comma if present
|
||||
if json_str.endswith(','):
|
||||
json_str = json_str[:-1]
|
||||
|
||||
# Add closing brackets
|
||||
json_str += ']' * open_brackets
|
||||
json_str += '}' * open_braces
|
||||
|
||||
return json_str
|
||||
|
||||
|
||||
def validate_extraction(result: dict) -> tuple[bool, list[str]]:
|
||||
"""Validate the extraction result against expected structure."""
|
||||
errors = []
|
||||
|
||||
# Check top-level structure
|
||||
if "pico_observation" not in result:
|
||||
errors.append("Missing 'pico_observation' field")
|
||||
if "persons" not in result:
|
||||
errors.append("Missing 'persons' field")
|
||||
|
||||
if "persons" in result:
|
||||
persons = result["persons"]
|
||||
|
||||
# Check minimum person count (should be at least 4: founder, father, 2 witnesses)
|
||||
if len(persons) < 4:
|
||||
errors.append(f"Expected at least 4 persons, got {len(persons)}")
|
||||
|
||||
# Check person structure
|
||||
for i, person in enumerate(persons):
|
||||
if "person_index" not in person:
|
||||
errors.append(f"Person {i}: missing 'person_index'")
|
||||
if "pnv_name" not in person:
|
||||
errors.append(f"Person {i}: missing 'pnv_name'")
|
||||
elif "literalName" not in person["pnv_name"]:
|
||||
errors.append(f"Person {i}: missing 'literalName' in pnv_name")
|
||||
|
||||
# Check for specific expected persons
|
||||
names = [p.get("pnv_name", {}).get("literalName_romanized", "") for p in persons]
|
||||
names_lower = [n.lower() for n in names]
|
||||
|
||||
if not any("ahmad" in n for n in names_lower):
|
||||
errors.append("Missing founder: Ahmad ibn Muhammad al-'Umari")
|
||||
if not any("ibrahim" in n for n in names_lower):
|
||||
errors.append("Missing witness: Ibrahim ibn Yusuf al-Turkmani")
|
||||
if not any("ali" in n for n in names_lower):
|
||||
errors.append("Missing witness: Ali ibn Husayn al-Halabi")
|
||||
|
||||
# Check temporal reference
|
||||
if "temporal_references" in result and result["temporal_references"]:
|
||||
temp = result["temporal_references"][0]
|
||||
if "calendar" in temp and temp["calendar"] != "Hijri":
|
||||
errors.append(f"Expected Hijri calendar, got {temp.get('calendar')}")
|
||||
|
||||
# Check locations
|
||||
if "locations_mentioned" in result:
|
||||
loc_names = [l.get("name_romanized", "").lower() for l in result["locations_mentioned"]]
|
||||
if not any("aleppo" in n or "halab" in n for n in loc_names):
|
||||
errors.append("Missing location: Aleppo (حلب)")
|
||||
|
||||
return len(errors) == 0, errors
|
||||
|
||||
|
||||
async def test_arabic_waqf_extraction():
|
||||
"""Test PiCo extraction from Arabic waqf document."""
|
||||
print("\n" + "=" * 70)
|
||||
print("TEST: PiCo Arabic Waqf Document Extraction")
|
||||
print("=" * 70)
|
||||
|
||||
# Check API token
|
||||
if not os.environ.get("ZAI_API_TOKEN"):
|
||||
print("\n⚠️ SKIPPED: ZAI_API_TOKEN not set")
|
||||
print("Set it with: export ZAI_API_TOKEN=<your_token>")
|
||||
return None
|
||||
|
||||
print(f"\nModel: {ZAI_MODEL}")
|
||||
print(f"API: {ZAI_API_URL}")
|
||||
|
||||
# Prepare user prompt
|
||||
user_prompt = f"""Extract all persons, relationships, dates, and locations from this Arabic waqf (endowment) document:
|
||||
|
||||
{ARABIC_WAQF_TEXT}
|
||||
|
||||
This is a historical Islamic endowment document from Aleppo. Extract all information following the PiCo ontology pattern."""
|
||||
|
||||
print("\n" + "-" * 40)
|
||||
print("SOURCE TEXT (Arabic Waqf Document)")
|
||||
print("-" * 40)
|
||||
print(ARABIC_WAQF_TEXT[:200] + "...")
|
||||
|
||||
# Call API
|
||||
print("\n⏳ Calling GLM-4.6 API (this may take 30-60 seconds)...")
|
||||
|
||||
try:
|
||||
start_time = datetime.now(timezone.utc)
|
||||
result = await call_glm_api(PICO_SYSTEM_PROMPT, user_prompt)
|
||||
end_time = datetime.now(timezone.utc)
|
||||
duration = (end_time - start_time).total_seconds()
|
||||
|
||||
print(f"✅ API call completed in {duration:.1f}s")
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
print(f"\n❌ API Error: {e.response.status_code}")
|
||||
print(f"Response: {e.response.text[:500]}")
|
||||
return False
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"\n❌ JSON Parse Error: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {type(e).__name__}: {e}")
|
||||
return False
|
||||
|
||||
# Display results
|
||||
print("\n" + "-" * 40)
|
||||
print("EXTRACTION RESULTS")
|
||||
print("-" * 40)
|
||||
|
||||
# PiCo observation metadata
|
||||
if "pico_observation" in result:
|
||||
obs = result["pico_observation"]
|
||||
print(f"\n📋 Observation ID: {obs.get('observation_id', 'N/A')}")
|
||||
print(f" Source Type: {obs.get('source_type', 'N/A')}")
|
||||
print(f" Source Ref: {obs.get('source_reference', 'N/A')}")
|
||||
|
||||
# Persons extracted
|
||||
persons = result.get("persons", [])
|
||||
print(f"\n👥 Persons Extracted: {len(persons)}")
|
||||
|
||||
for person in persons:
|
||||
idx = person.get("person_index", "?")
|
||||
name = person.get("pnv_name", {})
|
||||
lit_name = name.get("literalName", "")
|
||||
rom_name = name.get("literalName_romanized", "")
|
||||
|
||||
print(f"\n [{idx}] {lit_name}")
|
||||
if rom_name:
|
||||
print(f" Romanized: {rom_name}")
|
||||
|
||||
# Honorific
|
||||
if name.get("honorificPrefix"):
|
||||
hon = name.get("honorificPrefix", "")
|
||||
hon_rom = name.get("honorificPrefix_romanized", "")
|
||||
print(f" Honorific: {hon} ({hon_rom})")
|
||||
|
||||
# Patronym
|
||||
if name.get("patronym"):
|
||||
pat = name.get("patronym", "")
|
||||
pat_rom = name.get("patronym_romanized", "")
|
||||
print(f" Patronym: {pat} ({pat_rom})")
|
||||
|
||||
# Roles
|
||||
roles = person.get("roles", [])
|
||||
for role in roles:
|
||||
role_title = role.get("role_title", "")
|
||||
role_rom = role.get("role_title_romanized", "")
|
||||
role_in_src = role.get("role_in_source", "")
|
||||
if role_title or role_in_src:
|
||||
print(f" Role: {role_title} ({role_rom}) - {role_in_src}")
|
||||
|
||||
# Biographical
|
||||
bio = person.get("biographical", {})
|
||||
if bio.get("deceased"):
|
||||
print(f" Status: Deceased (المرحوم)")
|
||||
if bio.get("address"):
|
||||
print(f" Address: {bio.get('address')}")
|
||||
|
||||
# Family relationships
|
||||
fam = person.get("family_relationships", {})
|
||||
if fam.get("parent"):
|
||||
parents = [p.get("target_name", "") for p in fam["parent"]]
|
||||
print(f" Parents: {', '.join(parents)}")
|
||||
if fam.get("children"):
|
||||
children = [c.get("target_name", "") for c in fam["children"]]
|
||||
print(f" Children: {', '.join(children)}")
|
||||
|
||||
# Context
|
||||
if person.get("context"):
|
||||
print(f" Context: {person.get('context')}")
|
||||
|
||||
# Temporal references
|
||||
temps = result.get("temporal_references", [])
|
||||
if temps:
|
||||
print(f"\n📅 Temporal References: {len(temps)}")
|
||||
for temp in temps:
|
||||
expr = temp.get("expression", "")
|
||||
expr_rom = temp.get("expression_romanized", "")
|
||||
norm = temp.get("normalized", "")
|
||||
cal = temp.get("calendar", "")
|
||||
print(f" {expr}")
|
||||
if expr_rom:
|
||||
print(f" → {expr_rom}")
|
||||
print(f" → Normalized: {norm} ({cal})")
|
||||
|
||||
# Locations
|
||||
locs = result.get("locations_mentioned", [])
|
||||
if locs:
|
||||
print(f"\n📍 Locations: {len(locs)}")
|
||||
for loc in locs:
|
||||
name = loc.get("name", "")
|
||||
name_rom = loc.get("name_romanized", "")
|
||||
loc_type = loc.get("type", "")
|
||||
print(f" {name} ({name_rom}) - {loc_type}")
|
||||
|
||||
# Validate results
|
||||
print("\n" + "-" * 40)
|
||||
print("VALIDATION")
|
||||
print("-" * 40)
|
||||
|
||||
is_valid, errors = validate_extraction(result)
|
||||
|
||||
if is_valid:
|
||||
print("\n✅ All validations passed!")
|
||||
else:
|
||||
print(f"\n⚠️ Validation issues ({len(errors)}):")
|
||||
for error in errors:
|
||||
print(f" - {error}")
|
||||
|
||||
# Save result to file for inspection
|
||||
output_path = project_root / "data/entity_annotation/test_outputs"
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
output_file = output_path / f"arabic_waqf_extraction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n💾 Full result saved to: {output_file.relative_to(project_root)}")
|
||||
|
||||
# Final verdict
|
||||
print("\n" + "=" * 70)
|
||||
if is_valid:
|
||||
print("✅ TEST PASSED: Arabic waqf extraction successful")
|
||||
else:
|
||||
print("⚠️ TEST COMPLETED WITH WARNINGS: Check validation issues above")
|
||||
print("=" * 70)
|
||||
|
||||
return is_valid
|
||||
|
||||
|
||||
async def main():
|
||||
"""Run the test."""
|
||||
print("\n" + "#" * 70)
|
||||
print("# PiCo ARABIC WAQF EXTRACTION TEST")
|
||||
print("# Testing GLM-4.6 reasoning mode with historical Arabic document")
|
||||
print("#" * 70)
|
||||
|
||||
result = await test_arabic_waqf_extraction()
|
||||
|
||||
if result is None:
|
||||
return 0 # Skipped (no API key)
|
||||
return 0 if result else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = asyncio.run(main())
|
||||
sys.exit(exit_code)
|
||||
786
scripts/test_pico_batch.py
Normal file
786
scripts/test_pico_batch.py
Normal file
|
|
@ -0,0 +1,786 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Batch test runner for PiCo (Person in Context) extraction across multiple document types.
|
||||
|
||||
This script tests GLM-4.6 reasoning mode extraction from various historical document types:
|
||||
1. Arabic Waqf (Islamic endowment)
|
||||
2. Hebrew Ketubah (Jewish marriage contract)
|
||||
3. Spanish Colonial Baptism
|
||||
4. Dutch Marriage Certificate
|
||||
5. Latin Notarial Protocol
|
||||
|
||||
Usage:
|
||||
python scripts/test_pico_batch.py [--test-name NAME] [--all] [--list]
|
||||
|
||||
Examples:
|
||||
python scripts/test_pico_batch.py --all # Run all tests
|
||||
python scripts/test_pico_batch.py --test-name arabic # Run only Arabic waqf test
|
||||
python scripts/test_pico_batch.py --list # List available tests
|
||||
|
||||
Environment Variables:
|
||||
ZAI_API_TOKEN - Required for Z.AI GLM-4.6 API
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
# Load environment variables from .env file
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Configuration
|
||||
# =============================================================================
|
||||
|
||||
ZAI_API_URL = "https://api.z.ai/api/coding/paas/v4/chat/completions"
|
||||
ZAI_MODEL = "glm-4.6"
|
||||
MAX_TOKENS = 16000 # High limit for GLM-4.6 reasoning mode
|
||||
TIMEOUT = 300 # 5 minutes for complex reasoning
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Document Definitions
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class TestDocument:
|
||||
"""A historical document for PiCo extraction testing."""
|
||||
name: str
|
||||
language: str
|
||||
script: str
|
||||
date_period: str
|
||||
source_type: str
|
||||
source_text: str
|
||||
system_prompt: str
|
||||
expected_persons: int
|
||||
expected_locations: int
|
||||
validation_names: list[str] # Names that should appear in extraction
|
||||
|
||||
|
||||
# Arabic Waqf Document
|
||||
ARABIC_WAQF = TestDocument(
|
||||
name="arabic_waqf",
|
||||
language="Arabic",
|
||||
script="Arabic",
|
||||
date_period="1225 AH (1810 CE)",
|
||||
source_type="waqf_document",
|
||||
source_text="""بسم الله الرحمن الرحيم
|
||||
هذا ما وقف وحبس وسبل وأبد المرحوم الحاج أحمد بن محمد العمري، تاجر بمدينة
|
||||
حلب الشهباء، ابن المرحوم محمد بن عبد الله العمري. وقف جميع داره الكائنة
|
||||
بمحلة الجديدة على أولاده وأولاد أولاده ذكوراً وإناثاً. وإن انقرضوا لا سمح
|
||||
الله فعلى فقراء المسلمين. وشهد على ذلك الشهود: الحاج إبراهيم بن يوسف
|
||||
التركماني، والسيد علي بن حسين الحلبي. وكتب في شهر رجب سنة ألف ومائتين
|
||||
وخمس وعشرين هجرية.""",
|
||||
system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology.
|
||||
|
||||
Extract ALL persons from this Arabic waqf (endowment) document:
|
||||
1. Names using PNV structure with both Arabic script AND romanized versions
|
||||
2. Patronymics (ابن/بن = son of)
|
||||
3. Honorifics (الحاج = pilgrim, السيد = sayyid, المرحوم = the late)
|
||||
4. Family relationships between persons
|
||||
5. Roles in the document (founder, witness)
|
||||
6. Biographical info (deceased status, occupation, address)
|
||||
|
||||
Return ONLY valid JSON with this structure:
|
||||
{
|
||||
"pico_observation": {"observation_id": "...", "source_type": "...", "source_reference": "..."},
|
||||
"persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}],
|
||||
"temporal_references": [{"expression": "...", "expression_romanized": "...", "normalized": "...", "calendar": "..."}],
|
||||
"locations_mentioned": [{"name": "...", "name_romanized": "...", "type": "..."}]
|
||||
}""",
|
||||
expected_persons=4,
|
||||
expected_locations=2,
|
||||
validation_names=["ahmad", "ibrahim", "ali"]
|
||||
)
|
||||
|
||||
|
||||
# Hebrew Ketubah
|
||||
HEBREW_KETUBAH = TestDocument(
|
||||
name="hebrew_ketubah",
|
||||
language="Hebrew/Aramaic",
|
||||
script="Hebrew",
|
||||
date_period="5645 AM (1885 CE)",
|
||||
source_type="ketubah",
|
||||
source_text="""בס״ד
|
||||
|
||||
ביום שלישי בשבת, שנים עשר יום לחודש אייר שנת חמשת אלפים שש מאות
|
||||
וארבעים וחמש לבריאת עולם למנין שאנו מונין בו פה ווילנא
|
||||
|
||||
איך החתן הבחור יצחק בן הר״ר אברהם הכהן ז״ל אמר לה להדא בתולתא
|
||||
מרים בת הר״ר משה הלוי: הוי לי לאנתו כדת משה וישראל ואנא אפלח
|
||||
ואוקיר ואיזון ואפרנס יתיכי כהלכות גוברין יהודאין
|
||||
|
||||
ונתרצית מרת מרים בתולתא דא והות ליה לאנתו
|
||||
|
||||
עדים:
|
||||
שמעון בן יעקב הכהן
|
||||
דוד בן אליהו""",
|
||||
system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology.
|
||||
|
||||
Extract ALL persons from this Hebrew ketubah (Jewish marriage contract):
|
||||
1. Names using PNV structure with both Hebrew script AND romanized versions
|
||||
2. Patronymics (בן/בת = son/daughter of)
|
||||
3. Tribal affiliations (הכהן = the priest/Kohen, הלוי = the Levite)
|
||||
4. Honorifics (הר״ר = Rabbi, מרת = Mrs., ז״ל = of blessed memory)
|
||||
5. Family relationships between persons
|
||||
6. Roles in document (groom/חתן, bride/כלה, witness/עד)
|
||||
7. Deceased markers (ז״ל)
|
||||
|
||||
Return ONLY valid JSON with this structure:
|
||||
{
|
||||
"pico_observation": {"observation_id": "...", "source_type": "ketubah", "source_reference": "..."},
|
||||
"persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}],
|
||||
"temporal_references": [{"expression": "...", "expression_romanized": "...", "normalized": "...", "calendar": "Hebrew"}],
|
||||
"locations_mentioned": [{"name": "...", "name_romanized": "...", "type": "..."}]
|
||||
}""",
|
||||
expected_persons=6, # groom, bride, 2 fathers, 2 witnesses (fathers implicit)
|
||||
expected_locations=1,
|
||||
validation_names=["yitzchak", "miriam", "shimon", "david"]
|
||||
)
|
||||
|
||||
|
||||
# Spanish Colonial Baptism
|
||||
SPANISH_BAPTISM = TestDocument(
|
||||
name="spanish_colonial_baptism",
|
||||
language="Spanish",
|
||||
script="Latin",
|
||||
date_period="1742 CE",
|
||||
source_type="baptismal_register",
|
||||
source_text="""En la ciudad de México, a veinte y tres días del mes de febrero de mil
|
||||
setecientos cuarenta y dos años, yo el Br. Don Antonio de Mendoza,
|
||||
teniente de cura de esta santa iglesia catedral, bauticé solemnemente,
|
||||
puse óleo y crisma a Juan José, español, hijo legítimo de Don Pedro
|
||||
García de la Cruz, español, natural de la villa de Puebla de los Ángeles,
|
||||
y de Doña María Josefa de los Reyes, española, natural de esta ciudad.
|
||||
|
||||
Fueron sus padrinos Don Francisco Xavier de Castañeda, español, vecino
|
||||
de esta ciudad, y Doña Ana María de la Encarnación, su legítima esposa,
|
||||
a quienes advertí el parentesco espiritual y obligaciones que contrajeron.
|
||||
|
||||
Y lo firmé.
|
||||
Br. Don Antonio de Mendoza""",
|
||||
system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology.
|
||||
|
||||
Extract ALL persons from this Spanish colonial baptismal record:
|
||||
1. Names using PNV structure (given name, surname with particles like "de")
|
||||
2. Casta (racial/social) designations (español, mestizo, mulato, indio, etc.)
|
||||
3. Legitimacy markers (hijo legítimo, hijo natural)
|
||||
4. Place of origin (natural de, vecino de)
|
||||
5. Family relationships (parents, godparents/padrinos)
|
||||
6. Compadrazgo relationships (spiritual kinship between parents and godparents)
|
||||
7. Ecclesiastical roles (priest, teniente de cura)
|
||||
8. Honorifics (Don, Doña, Br./Bachiller)
|
||||
|
||||
Return ONLY valid JSON with this structure:
|
||||
{
|
||||
"pico_observation": {"observation_id": "...", "source_type": "baptismal_register", "source_reference": "..."},
|
||||
"persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}],
|
||||
"temporal_references": [{"expression": "...", "normalized": "...", "calendar": "Gregorian"}],
|
||||
"locations_mentioned": [{"name": "...", "type": "..."}]
|
||||
}""",
|
||||
expected_persons=6, # infant, father, mother, godfather, godmother, priest
|
||||
expected_locations=3,
|
||||
validation_names=["juan", "pedro", "maria", "francisco", "antonio"]
|
||||
)
|
||||
|
||||
|
||||
# Dutch Marriage Certificate
|
||||
DUTCH_MARRIAGE = TestDocument(
|
||||
name="dutch_marriage",
|
||||
language="Dutch",
|
||||
script="Latin",
|
||||
date_period="1885 CE",
|
||||
source_type="marriage_certificate",
|
||||
source_text="""Heden den vierden Maart achttien honderd vijf en tachtig, compareerden
|
||||
voor mij, Ambtenaar van den Burgerlijken Stand der Gemeente Haarlem:
|
||||
|
||||
Johannes Petrus van der Berg, oud dertig jaren, koopman, geboren te
|
||||
Amsterdam, wonende alhier, meerderjarige zoon van wijlen Pieter van der
|
||||
Berg, in leven koopman, en van Maria Johanna Bakker, zonder beroep,
|
||||
wonende te Amsterdam;
|
||||
|
||||
en
|
||||
|
||||
Cornelia Wilhelmina de Groot, oud vijf en twintig jaren, zonder beroep,
|
||||
geboren te Haarlem, wonende alhier, meerderjarige dochter van Hendrik
|
||||
de Groot, timmerman, en van wijlen Elisabeth van Dijk.
|
||||
|
||||
De getuigen waren:
|
||||
Willem Frederik Smit, oud veertig jaren, notaris
|
||||
Jacobus Hendrikus Jansen, oud vijf en dertig jaren, klerk""",
|
||||
system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology.
|
||||
|
||||
Extract ALL persons from this Dutch marriage certificate (huwelijksakte):
|
||||
1. Names using PNV structure with Dutch naming conventions
|
||||
2. Patronymics and tussenvoegsels (van der, de, etc.)
|
||||
3. Ages, occupations, birthplaces, residences
|
||||
4. Family relationships (parents identified with "zoon van" / "dochter van")
|
||||
5. Deceased markers ("wijlen" = the late)
|
||||
6. Roles in document (groom, bride, witnesses/getuigen)
|
||||
7. Civil status terminology
|
||||
|
||||
Return ONLY valid JSON with this structure:
|
||||
{
|
||||
"pico_observation": {"observation_id": "...", "source_type": "marriage_certificate", "source_reference": "..."},
|
||||
"persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}],
|
||||
"temporal_references": [{"expression": "...", "normalized": "...", "calendar": "Gregorian"}],
|
||||
"locations_mentioned": [{"name": "...", "type": "..."}]
|
||||
}""",
|
||||
expected_persons=8, # groom, bride, 4 parents (2 deceased), 2 witnesses
|
||||
expected_locations=2,
|
||||
validation_names=["johannes", "cornelia", "willem", "jacobus"]
|
||||
)
|
||||
|
||||
|
||||
# Russian Metrical Book Entry
|
||||
RUSSIAN_METRICAL = TestDocument(
|
||||
name="russian_metrical",
|
||||
language="Russian",
|
||||
script="Cyrillic",
|
||||
date_period="1892 CE",
|
||||
source_type="metrical_book",
|
||||
source_text="""Метрическая книга Троицкой церкви села Покровского за 1892 год
|
||||
|
||||
О родившихся
|
||||
|
||||
Марта 15 дня родился, 17 дня крещён Иван.
|
||||
|
||||
Родители: крестьянин деревни Ивановки Пётр Иванович Сидоров и законная
|
||||
жена его Анна Фёдоровна, оба православного вероисповедания.
|
||||
|
||||
Восприемники: крестьянин той же деревни Николай Петрович Кузнецов
|
||||
и крестьянская дочь девица Мария Ивановна Сидорова.""",
|
||||
system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology.
|
||||
|
||||
Extract ALL persons from this Russian metrical book (метрическая книга) entry:
|
||||
1. Names using Russian naming conventions: given name + patronymic (отчество) + surname
|
||||
2. Patronymic patterns (-ович/-евич for males, -овна/-евна for females)
|
||||
3. Estate/class designations (крестьянин = peasant, мещанин = townsman, дворянин = noble)
|
||||
4. Family relationships
|
||||
5. Roles (родители = parents, восприемники = godparents)
|
||||
6. Religious denomination (православный = Orthodox)
|
||||
7. Include both Cyrillic AND romanized versions
|
||||
|
||||
Return ONLY valid JSON with this structure:
|
||||
{
|
||||
"pico_observation": {"observation_id": "...", "source_type": "metrical_book", "source_reference": "..."},
|
||||
"persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}],
|
||||
"temporal_references": [{"expression": "...", "expression_romanized": "...", "normalized": "...", "calendar": "Gregorian/Julian"}],
|
||||
"locations_mentioned": [{"name": "...", "name_romanized": "...", "type": "..."}]
|
||||
}""",
|
||||
expected_persons=5, # infant, father, mother, godfather, godmother
|
||||
expected_locations=2,
|
||||
validation_names=["ivan", "petr", "anna", "nikolai", "maria"]
|
||||
)
|
||||
|
||||
|
||||
# Italian Notarial Act
|
||||
ITALIAN_NOTARIAL = TestDocument(
|
||||
name="italian_notarial",
|
||||
language="Italian",
|
||||
script="Latin",
|
||||
date_period="1654 CE",
|
||||
source_type="notarial_act",
|
||||
source_text="""Adì 15 Marzo 1654, in Venetia.
|
||||
|
||||
Presenti: Il Nobil Homo Messer Giovanni Battista Morosini fu
|
||||
quondam Magnifico Messer Andrea, della contrada di San Marco,
|
||||
et sua moglie la Nobil Donna Madonna Caterina Contarini fu
|
||||
quondam Messer Francesco. Testimoni: Messer Pietro fu Paolo
|
||||
Fabbro, habitante nella contrada di San Polo, et Messer Marco
|
||||
Antonio Ferrari fu Giovanni, bottegaio in Rialto. Rogato io
|
||||
Notaro Antonio Zen fu quondam Messer Giacomo, Notaro publico
|
||||
di Venetia.""",
|
||||
system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology.
|
||||
|
||||
Extract ALL persons from this Italian notarial act:
|
||||
1. Names using PNV structure (given name, surname)
|
||||
2. Venetian nobility titles (Nobil Homo, Magnifico Messer, Nobil Donna Madonna)
|
||||
3. Deceased father markers ("fu", "quondam" = the late)
|
||||
4. Family relationships (spouses, children of)
|
||||
5. Occupations (bottegaio = shopkeeper, notaro = notary)
|
||||
6. Roles in document (party, witness/testimone, notary)
|
||||
7. Residence/contrada information
|
||||
|
||||
Return ONLY valid JSON with this structure:
|
||||
{
|
||||
"pico_observation": {"observation_id": "...", "source_type": "notarial_act", "source_reference": "..."},
|
||||
"persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}],
|
||||
"temporal_references": [{"expression": "...", "normalized": "...", "calendar": "Gregorian"}],
|
||||
"locations_mentioned": [{"name": "...", "type": "..."}]
|
||||
}""",
|
||||
expected_persons=6, # Giovanni, Caterina, 2 witnesses, notary, plus fathers
|
||||
expected_locations=4,
|
||||
validation_names=["giovanni", "caterina", "pietro", "antonio"]
|
||||
)
|
||||
|
||||
|
||||
# Greek Orthodox Baptismal Register
|
||||
GREEK_BAPTISMAL = TestDocument(
|
||||
name="greek_baptismal",
|
||||
language="Greek",
|
||||
script="Greek",
|
||||
date_period="1875 CE",
|
||||
source_type="baptismal_register",
|
||||
source_text="""Ἐν Θεσσαλονίκῃ, τῇ δεκάτῃ πέμπτῃ Μαρτίου τοῦ ἔτους 1875.
|
||||
|
||||
Ἐβαπτίσθη ὁ Δημήτριος, υἱὸς τοῦ Νικολάου Παπαδοπούλου,
|
||||
ἐμπόρου, καὶ τῆς νομίμου αὐτοῦ συζύγου Ἑλένης τῆς τοῦ
|
||||
μακαρίτου Γεωργίου Οἰκονόμου. Νονὸς ὁ Κωνσταντῖνος
|
||||
Καρατζᾶς τοῦ Ἰωάννου, ἰατρός. Ἱερεύς: ὁ Πρωτοπρεσβύτερος
|
||||
Ἀθανάσιος Χρυσοστόμου.""",
|
||||
system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology.
|
||||
|
||||
Extract ALL persons from this Greek Orthodox baptismal register:
|
||||
1. Names with BOTH Greek script AND romanized versions
|
||||
2. Greek patronymics ("τοῦ" + genitive = son/daughter of)
|
||||
3. Deceased markers (μακαρίτης/μακαρίτισσα = the late)
|
||||
4. Family relationships (υἱός = son, σύζυγος = wife)
|
||||
5. Godparent (νονός/νονά)
|
||||
6. Occupations (ἔμπορος = merchant, ἰατρός = physician)
|
||||
7. Ecclesiastical titles (Πρωτοπρεσβύτερος = Archpriest)
|
||||
8. Roles in document (baptized, parents, godparent, priest)
|
||||
|
||||
Return ONLY valid JSON with this structure:
|
||||
{
|
||||
"pico_observation": {"observation_id": "...", "source_type": "baptismal_register", "source_reference": "..."},
|
||||
"persons": [{"person_index": 0, "pnv_name": {"literalName": "...", "literalName_romanized": "..."}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}],
|
||||
"temporal_references": [{"expression": "...", "expression_romanized": "...", "normalized": "...", "calendar": "Julian"}],
|
||||
"locations_mentioned": [{"name": "...", "name_romanized": "...", "type": "..."}]
|
||||
}""",
|
||||
expected_persons=6, # infant, father, mother, maternal grandfather, godfather, priest
|
||||
expected_locations=1,
|
||||
validation_names=["dimitrios", "nikolaos", "eleni", "konstantinos"]
|
||||
)
|
||||
|
||||
|
||||
# Ottoman Turkish Court Record (Sijill)
|
||||
OTTOMAN_SIJILL = TestDocument(
|
||||
name="ottoman_sijill",
|
||||
language="Ottoman Turkish",
|
||||
script="Arabic",
|
||||
date_period="1258 AH (1842 CE)",
|
||||
source_type="sijill",
|
||||
source_text="""بسم الله الرحمن الرحيم
|
||||
|
||||
مجلس شرع شريفده محمد آغا بن عبد الله مرحوم قصبه دميرجیکوی
|
||||
ساکنلرندن محمد بن احمد افندی و زوجهسی فاطمه خاتون بنت علیاوغلو
|
||||
حاضر اولوب محمد آغا طرفندن یکری بش غروش بدل معلوم ایله صاتیلدی
|
||||
|
||||
شهود الحال: حسن افندی بن عمر، ابراهیم چلبی بن مصطفی
|
||||
|
||||
فی اوائل شهر رجب سنة ١٢٥٨""",
|
||||
system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology.
|
||||
|
||||
Extract ALL persons from this Ottoman Turkish sijill (court record):
|
||||
1. Names with both Arabic script AND romanized versions
|
||||
2. Ottoman honorifics (آغا/Ağa, افندی/Efendi, چلبی/Çelebi, خاتون/Hatun)
|
||||
3. Patronymics (بن/bin = son of, بنت/bint = daughter of)
|
||||
4. Deceased markers (مرحوم/merhum)
|
||||
5. Family relationships (زوجه/zevce = wife)
|
||||
6. Roles in document (buyer, seller, witnesses)
|
||||
7. Residence information
|
||||
|
||||
Note: Ottoman Turkish uses Arabic script with Turkish vocabulary and grammatical structures.
|
||||
|
||||
Return ONLY valid JSON with this structure:
|
||||
{
|
||||
"pico_observation": {"observation_id": "...", "source_type": "sijill", "source_reference": "..."},
|
||||
"persons": [{"person_index": 0, "pnv_name": {"literalName": "...", "literalName_romanized": "..."}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}],
|
||||
"temporal_references": [{"expression": "...", "expression_romanized": "...", "normalized": "...", "calendar": "Hijri"}],
|
||||
"locations_mentioned": [{"name": "...", "name_romanized": "...", "type": "..."}]
|
||||
}""",
|
||||
expected_persons=6, # Mehmed Ağa, Mehmed bin Ahmed, Fatma Hatun, 2 witnesses + fathers
|
||||
expected_locations=1,
|
||||
validation_names=["mehmed", "fatma", "hasan", "ibrahim"]
|
||||
)
|
||||
|
||||
|
||||
# All available tests
|
||||
ALL_TESTS = {
|
||||
"arabic": ARABIC_WAQF,
|
||||
"hebrew": HEBREW_KETUBAH,
|
||||
"spanish": SPANISH_BAPTISM,
|
||||
"dutch": DUTCH_MARRIAGE,
|
||||
"russian": RUSSIAN_METRICAL,
|
||||
"italian": ITALIAN_NOTARIAL,
|
||||
"greek": GREEK_BAPTISMAL,
|
||||
"ottoman": OTTOMAN_SIJILL,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Functions
|
||||
# =============================================================================
|
||||
|
||||
async def call_glm_api(system_prompt: str, user_content: str) -> tuple[dict, float]:
|
||||
"""Call Z.AI GLM-4.6 API and return parsed JSON response with timing."""
|
||||
api_token = os.environ.get("ZAI_API_TOKEN")
|
||||
if not api_token:
|
||||
raise ValueError("ZAI_API_TOKEN not set in environment")
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": ZAI_MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_content},
|
||||
],
|
||||
"temperature": 0.1,
|
||||
"max_tokens": MAX_TOKENS,
|
||||
}
|
||||
|
||||
start_time = datetime.now(timezone.utc)
|
||||
|
||||
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
|
||||
response = await client.post(ZAI_API_URL, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
|
||||
end_time = datetime.now(timezone.utc)
|
||||
duration = (end_time - start_time).total_seconds()
|
||||
|
||||
# Parse JSON from response
|
||||
json_content = content
|
||||
if "```json" in content:
|
||||
json_content = content.split("```json")[1].split("```")[0]
|
||||
elif "```" in content:
|
||||
parts = content.split("```")
|
||||
if len(parts) >= 2:
|
||||
json_content = parts[1]
|
||||
|
||||
return json.loads(json_content.strip()), duration
|
||||
|
||||
|
||||
def extract_all_strings_recursive(obj, strings: list[str]) -> None:
|
||||
"""Recursively extract all string values from nested dicts/lists."""
|
||||
if isinstance(obj, str):
|
||||
strings.append(obj.lower())
|
||||
elif isinstance(obj, dict):
|
||||
for value in obj.values():
|
||||
extract_all_strings_recursive(value, strings)
|
||||
elif isinstance(obj, list):
|
||||
for item in obj:
|
||||
extract_all_strings_recursive(item, strings)
|
||||
|
||||
|
||||
def normalize_name_variant(name: str) -> list[str]:
|
||||
"""Generate common spelling variants for a name.
|
||||
|
||||
Handles cross-script romanization differences like:
|
||||
- mehmed/muhammad/mohammed
|
||||
- fatma/fatima
|
||||
- dimitrios/demetrios
|
||||
- yitzchak/isaac
|
||||
"""
|
||||
variants = [name.lower()]
|
||||
|
||||
# Arabic/Turkish name variants
|
||||
variant_map = {
|
||||
'mehmed': ['muhammad', 'mohammed', 'mehmet'],
|
||||
'fatma': ['fatima', 'fatmah'],
|
||||
'ahmed': ['ahmad'],
|
||||
'ibrahim': ['abraham', 'ibrahim'],
|
||||
'hasan': ['hassan'],
|
||||
'hussein': ['husayn', 'huseyin'],
|
||||
# Greek variants
|
||||
'dimitrios': ['demetrios', 'dimitris', 'dēmētrios'],
|
||||
'nikolaos': ['nicholas', 'nikolas'],
|
||||
'konstantinos': ['constantine', 'constantinos'],
|
||||
'georgios': ['george', 'geōrgios'],
|
||||
'eleni': ['helen', 'elena', 'elenē'],
|
||||
'athanasios': ['athanasius'],
|
||||
# Hebrew variants
|
||||
'yitzchak': ['isaac', 'itzhak', 'yitzhak'],
|
||||
'miriam': ['mirjam', 'myriam'],
|
||||
'shimon': ['simon', 'shimeon'],
|
||||
'avraham': ['abraham'],
|
||||
'moshe': ['moses'],
|
||||
'david': ['dovid'],
|
||||
'yaakov': ['jacob', 'jakob'],
|
||||
# Russian variants
|
||||
'petr': ['peter', 'pyotr', 'piotr'],
|
||||
'ivan': ['john', 'ioann'],
|
||||
'nikolai': ['nicholas', 'nikolay'],
|
||||
'maria': ['mary', 'mariya'],
|
||||
}
|
||||
|
||||
for key, values in variant_map.items():
|
||||
if name.lower() == key:
|
||||
variants.extend(values)
|
||||
elif name.lower() in values:
|
||||
variants.append(key)
|
||||
variants.extend(v for v in values if v != name.lower())
|
||||
|
||||
return variants
|
||||
|
||||
|
||||
def validate_extraction(result: dict, test: TestDocument) -> tuple[bool, list[str]]:
|
||||
"""Validate extraction result against expected values."""
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
# Check structure
|
||||
if "persons" not in result:
|
||||
errors.append("Missing 'persons' field")
|
||||
return False, errors
|
||||
|
||||
persons = result.get("persons", [])
|
||||
|
||||
# Check person count
|
||||
if len(persons) < test.expected_persons:
|
||||
warnings.append(f"Expected at least {test.expected_persons} persons, got {len(persons)}")
|
||||
|
||||
# Extract ALL string values from persons recursively for comprehensive name matching
|
||||
all_name_strings = []
|
||||
for person in persons:
|
||||
# Get pnv_name - could be nested structure
|
||||
pnv = person.get("pnv_name", {})
|
||||
extract_all_strings_recursive(pnv, all_name_strings)
|
||||
# Also check context field which often contains the original text
|
||||
if person.get("context"):
|
||||
all_name_strings.append(str(person["context"]).lower())
|
||||
|
||||
# Check for expected names with variant support
|
||||
for expected_name in test.validation_names:
|
||||
variants = normalize_name_variant(expected_name)
|
||||
found = False
|
||||
for variant in variants:
|
||||
if any(variant in name_str for name_str in all_name_strings):
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
warnings.append(f"Expected name '{expected_name}' (variants: {variants[:3]}) not found")
|
||||
|
||||
# Check locations
|
||||
locations = result.get("locations_mentioned", [])
|
||||
if len(locations) < test.expected_locations:
|
||||
warnings.append(f"Expected at least {test.expected_locations} locations, got {len(locations)}")
|
||||
|
||||
# Combine errors and warnings
|
||||
is_valid = len(errors) == 0
|
||||
all_issues = errors + warnings
|
||||
|
||||
return is_valid, all_issues
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Runner
|
||||
# =============================================================================
|
||||
|
||||
async def run_single_test(test: TestDocument) -> dict:
|
||||
"""Run extraction test for a single document type."""
|
||||
print(f"\n{'='*70}")
|
||||
print(f"TEST: {test.name.upper()}")
|
||||
print(f"Language: {test.language} | Script: {test.script} | Period: {test.date_period}")
|
||||
print(f"{'='*70}")
|
||||
|
||||
# Prepare user prompt
|
||||
user_prompt = f"""Extract all persons, relationships, dates, and locations from this {test.source_type}:
|
||||
|
||||
{test.source_text}
|
||||
|
||||
Follow the PiCo ontology pattern for person observations."""
|
||||
|
||||
print(f"\n📄 Source: {test.source_type}")
|
||||
print(f" Text length: {len(test.source_text)} chars")
|
||||
|
||||
# Call API
|
||||
print(f"\n⏳ Calling GLM-4.6 API...")
|
||||
|
||||
try:
|
||||
result, duration = await call_glm_api(test.system_prompt, user_prompt)
|
||||
print(f"✅ API call completed in {duration:.1f}s")
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
print(f"❌ API Error: {e.response.status_code}")
|
||||
return {"test": test.name, "status": "error", "error": str(e)}
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"❌ JSON Parse Error: {e}")
|
||||
return {"test": test.name, "status": "error", "error": str(e)}
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {type(e).__name__}: {e}")
|
||||
return {"test": test.name, "status": "error", "error": str(e)}
|
||||
|
||||
# Display summary
|
||||
persons = result.get("persons", [])
|
||||
locations = result.get("locations_mentioned", [])
|
||||
temporal = result.get("temporal_references", [])
|
||||
|
||||
print(f"\n📊 Extraction Summary:")
|
||||
print(f" Persons: {len(persons)}")
|
||||
print(f" Locations: {len(locations)}")
|
||||
print(f" Temporal refs: {len(temporal)}")
|
||||
|
||||
# Show persons
|
||||
print(f"\n👥 Persons:")
|
||||
for person in persons[:5]: # Show first 5
|
||||
idx = person.get("person_index", "?")
|
||||
name = person.get("pnv_name", {})
|
||||
if isinstance(name, str):
|
||||
lit_name = name
|
||||
else:
|
||||
lit_name = name.get("literalName_romanized") or name.get("literalName", "?")
|
||||
|
||||
# Handle roles - could be list of dicts, list of strings, or string
|
||||
roles_raw = person.get("roles", [])
|
||||
if isinstance(roles_raw, str):
|
||||
role = roles_raw
|
||||
elif isinstance(roles_raw, list) and len(roles_raw) > 0:
|
||||
first_role = roles_raw[0]
|
||||
if isinstance(first_role, dict):
|
||||
role = first_role.get("role_in_source", "-")
|
||||
else:
|
||||
role = str(first_role)
|
||||
else:
|
||||
role = "-"
|
||||
|
||||
print(f" [{idx}] {str(lit_name)[:50]} ({role})")
|
||||
|
||||
if len(persons) > 5:
|
||||
print(f" ... and {len(persons) - 5} more")
|
||||
|
||||
# Validate
|
||||
is_valid, issues = validate_extraction(result, test)
|
||||
|
||||
print(f"\n🔍 Validation: {'✅ PASSED' if is_valid else '⚠️ ISSUES'}")
|
||||
if issues:
|
||||
for issue in issues:
|
||||
print(f" - {issue}")
|
||||
|
||||
# Save result
|
||||
output_dir = project_root / "data/entity_annotation/test_outputs"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_file = output_dir / f"{test.name}_extraction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n💾 Saved: {output_file.name}")
|
||||
|
||||
return {
|
||||
"test": test.name,
|
||||
"status": "passed" if is_valid else "warning",
|
||||
"persons_extracted": len(persons),
|
||||
"locations_extracted": len(locations),
|
||||
"duration_seconds": duration,
|
||||
"issues": issues,
|
||||
"output_file": str(output_file)
|
||||
}
|
||||
|
||||
|
||||
async def run_all_tests() -> list[dict]:
|
||||
"""Run all extraction tests sequentially."""
|
||||
results = []
|
||||
|
||||
for name, test in ALL_TESTS.items():
|
||||
result = await run_single_test(test)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def print_summary(results: list[dict]):
|
||||
"""Print summary of all test results."""
|
||||
print("\n" + "=" * 70)
|
||||
print("BATCH TEST SUMMARY")
|
||||
print("=" * 70)
|
||||
|
||||
passed = sum(1 for r in results if r["status"] == "passed")
|
||||
warnings = sum(1 for r in results if r["status"] == "warning")
|
||||
errors = sum(1 for r in results if r["status"] == "error")
|
||||
|
||||
print(f"\n📊 Results: {passed} passed, {warnings} warnings, {errors} errors")
|
||||
print(f" Total tests: {len(results)}")
|
||||
|
||||
print(f"\n📋 Test Details:")
|
||||
for r in results:
|
||||
status_icon = {"passed": "✅", "warning": "⚠️", "error": "❌"}.get(r["status"], "?")
|
||||
print(f" {status_icon} {r['test']}: {r.get('persons_extracted', 0)} persons, {r.get('duration_seconds', 0):.1f}s")
|
||||
if r.get("issues"):
|
||||
for issue in r["issues"][:2]:
|
||||
print(f" - {issue}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
if errors == 0:
|
||||
print("✅ ALL TESTS COMPLETED SUCCESSFULLY")
|
||||
else:
|
||||
print(f"⚠️ {errors} TESTS FAILED - Check details above")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Main
|
||||
# =============================================================================
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(description="Batch test PiCo extraction")
|
||||
parser.add_argument("--test-name", "-t", choices=list(ALL_TESTS.keys()),
|
||||
help="Run specific test by name")
|
||||
parser.add_argument("--all", "-a", action="store_true",
|
||||
help="Run all tests")
|
||||
parser.add_argument("--list", "-l", action="store_true",
|
||||
help="List available tests")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check API token
|
||||
if not os.environ.get("ZAI_API_TOKEN"):
|
||||
print("❌ Error: ZAI_API_TOKEN not set")
|
||||
print("Set it with: export ZAI_API_TOKEN=<your_token>")
|
||||
print("Or add to .env file in project root")
|
||||
return 1
|
||||
|
||||
print("\n" + "#" * 70)
|
||||
print("# PiCo BATCH EXTRACTION TEST")
|
||||
print(f"# Model: {ZAI_MODEL} (reasoning mode)")
|
||||
print(f"# Max tokens: {MAX_TOKENS}")
|
||||
print("#" * 70)
|
||||
|
||||
if args.list:
|
||||
print("\n📋 Available tests:")
|
||||
for name, test in ALL_TESTS.items():
|
||||
print(f" {name}: {test.language} {test.source_type} ({test.date_period})")
|
||||
return 0
|
||||
|
||||
if args.test_name:
|
||||
test = ALL_TESTS[args.test_name]
|
||||
result = await run_single_test(test)
|
||||
return 0 if result["status"] != "error" else 1
|
||||
|
||||
if args.all:
|
||||
results = await run_all_tests()
|
||||
print_summary(results)
|
||||
errors = sum(1 for r in results if r["status"] == "error")
|
||||
return 0 if errors == 0 else 1
|
||||
|
||||
# Default: show help
|
||||
parser.print_help()
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = asyncio.run(main())
|
||||
sys.exit(exit_code)
|
||||
|
|
@ -665,7 +665,7 @@ def create_hybrid_retriever(
|
|||
return HybridRetriever(
|
||||
qdrant_host="bronhouder.nl",
|
||||
qdrant_port=443,
|
||||
sparql_endpoint="https://bronhouder.nl/query",
|
||||
sparql_endpoint="https://bronhouder.nl/sparql",
|
||||
use_production_qdrant=True,
|
||||
**kwargs
|
||||
)
|
||||
|
|
|
|||
|
|
@ -140,21 +140,26 @@ class TypeDBRetriever:
|
|||
self.database = database
|
||||
self.k = k
|
||||
|
||||
# Lazy-load TypeDB client
|
||||
# Lazy-load TypeDB client (TypeDB 3.x - no sessions)
|
||||
self._client = None
|
||||
self._session = None
|
||||
|
||||
logger.info(f"Initialized TypeDBRetriever: {host}:{port}/{database}")
|
||||
|
||||
@property
|
||||
def client(self):
|
||||
"""Lazy-load TypeDB client."""
|
||||
"""Lazy-load TypeDB client (TypeDB 3.x API)."""
|
||||
if self._client is None:
|
||||
try:
|
||||
from typedb.driver import TypeDB, SessionType
|
||||
from typedb.driver import TypeDB, Credentials, DriverOptions
|
||||
|
||||
self._client = TypeDB.core_driver(f"{self.host}:{self.port}")
|
||||
logger.info(f"Connected to TypeDB at {self.host}:{self.port}")
|
||||
# TypeDB 3.x requires credentials and options
|
||||
# Default credentials for local development (no auth)
|
||||
credentials = Credentials("admin", "password")
|
||||
options = DriverOptions(is_tls_enabled=False) # Disable TLS for local dev
|
||||
|
||||
address = f"{self.host}:{self.port}"
|
||||
self._client = TypeDB.driver(address, credentials, options)
|
||||
logger.info(f"Connected to TypeDB 3.x at {self.host}:{self.port}")
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"typedb-driver package required. Install with: pip install typedb-driver"
|
||||
|
|
@ -164,16 +169,10 @@ class TypeDBRetriever:
|
|||
raise
|
||||
return self._client
|
||||
|
||||
def _get_session(self):
|
||||
"""Get a data session."""
|
||||
from typedb.driver import SessionType
|
||||
|
||||
if self._session is None or not self._session.is_open():
|
||||
self._session = self.client.session(self.database, SessionType.DATA)
|
||||
return self._session
|
||||
|
||||
def _execute_read(self, typeql: str) -> list[dict[str, Any]]:
|
||||
"""Execute a TypeQL read query.
|
||||
"""Execute a TypeQL read query (TypeDB 3.x API).
|
||||
|
||||
TypeDB 3.x removed sessions - transactions are created directly on driver.
|
||||
|
||||
Args:
|
||||
typeql: TypeQL query string
|
||||
|
|
@ -184,23 +183,33 @@ class TypeDBRetriever:
|
|||
from typedb.driver import TransactionType
|
||||
|
||||
results = []
|
||||
session = self._get_session()
|
||||
|
||||
try:
|
||||
with session.transaction(TransactionType.READ) as tx:
|
||||
answer = tx.query.get(typeql)
|
||||
# TypeDB 3.x: transactions directly on driver, specifying database
|
||||
with self.client.transaction(self.database, TransactionType.READ) as tx:
|
||||
# TypeDB 3.x: query() returns a Promise, need to resolve it
|
||||
answer = tx.query(typeql).resolve()
|
||||
|
||||
for concept_map in answer:
|
||||
row = {}
|
||||
for var in concept_map.variables():
|
||||
concept = concept_map.get(var)
|
||||
if hasattr(concept, 'get_value'):
|
||||
row[var] = concept.get_value()
|
||||
elif hasattr(concept, 'get_iid'):
|
||||
row[var] = concept.get_iid()
|
||||
# TypeDB 3.x: QueryAnswer may be iterable depending on query type
|
||||
if hasattr(answer, '__iter__'):
|
||||
for row in answer:
|
||||
result_row = {}
|
||||
# Access columns by index or iterate
|
||||
if hasattr(row, 'concepts'):
|
||||
for i, concept in enumerate(row.concepts()):
|
||||
var_name = f"var_{i}"
|
||||
if hasattr(concept, 'get_value'):
|
||||
result_row[var_name] = concept.get_value()
|
||||
elif hasattr(concept, 'as_entity'):
|
||||
result_row[var_name] = str(concept)
|
||||
else:
|
||||
result_row[var_name] = str(concept)
|
||||
else:
|
||||
row[var] = str(concept)
|
||||
results.append(row)
|
||||
result_row["result"] = str(row)
|
||||
results.append(result_row)
|
||||
else:
|
||||
# Single result (e.g., count query)
|
||||
results.append({"result": str(answer)})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"TypeQL query failed: {e}")
|
||||
|
|
@ -603,28 +612,52 @@ class TypeDBRetriever:
|
|||
"relations": {},
|
||||
}
|
||||
|
||||
# Count heritage custodians
|
||||
# Count custodian entities by type (TypeDB 3.x API)
|
||||
# Schema types: custodian-observation, custodian-name, custodian-reconstruction
|
||||
entity_types = [
|
||||
("custodian-observation", "observations"),
|
||||
("custodian-name", "names"),
|
||||
("custodian-reconstruction", "reconstructions"),
|
||||
]
|
||||
|
||||
try:
|
||||
count_query = """
|
||||
match
|
||||
$inst isa heritage-custodian;
|
||||
get $inst;
|
||||
count;
|
||||
"""
|
||||
session = self._get_session()
|
||||
from typedb.driver import TransactionType
|
||||
with session.transaction(TransactionType.READ) as tx:
|
||||
answer = tx.query.get_aggregate(count_query)
|
||||
stats["entities"]["heritage_custodian"] = answer.as_value().as_long()
|
||||
|
||||
with self.client.transaction(self.database, TransactionType.READ) as tx:
|
||||
for type_name, stat_key in entity_types:
|
||||
try:
|
||||
# TypeDB 3.x count query syntax
|
||||
count_query = f"""
|
||||
match
|
||||
$inst isa {type_name};
|
||||
reduce $count = count;
|
||||
"""
|
||||
answer = tx.query(count_query).resolve()
|
||||
# Parse count result - TypeDB 3.x returns _Value objects
|
||||
count = 0
|
||||
for row in answer:
|
||||
# row.get("count") returns a _Value object
|
||||
value_obj = row.get("count")
|
||||
# Extract integer - try multiple methods
|
||||
if hasattr(value_obj, 'get_integer'):
|
||||
count = value_obj.get_integer()
|
||||
elif hasattr(value_obj, 'try_get_integer'):
|
||||
count = value_obj.try_get_integer() or 0
|
||||
else:
|
||||
# Fallback: string conversion
|
||||
count = int(str(value_obj))
|
||||
break
|
||||
stats["entities"][stat_key] = count
|
||||
except Exception as e:
|
||||
stats["entities"][stat_key] = f"error: {e}"
|
||||
|
||||
except Exception as e:
|
||||
stats["entities"]["error"] = str(e)
|
||||
|
||||
return stats
|
||||
|
||||
def close(self) -> None:
|
||||
"""Clean up resources."""
|
||||
if self._session and self._session.is_open():
|
||||
self._session.close()
|
||||
"""Clean up resources (TypeDB 3.x - no sessions to close)."""
|
||||
if self._client:
|
||||
self._client.close()
|
||||
self._client = None
|
||||
|
|
|
|||
Loading…
Reference in a new issue