#!/usr/bin/env python3 """ Batch test runner for PiCo (Person in Context) extraction across multiple document types. This script tests GLM-4.6 reasoning mode extraction from various historical document types: 1. Arabic Waqf (Islamic endowment) 2. Hebrew Ketubah (Jewish marriage contract) 3. Spanish Colonial Baptism 4. Dutch Marriage Certificate 5. Latin Notarial Protocol Usage: python scripts/test_pico_batch.py [--test-name NAME] [--all] [--list] Examples: python scripts/test_pico_batch.py --all # Run all tests python scripts/test_pico_batch.py --test-name arabic # Run only Arabic waqf test python scripts/test_pico_batch.py --list # List available tests Environment Variables: ZAI_API_TOKEN - Required for Z.AI GLM-4.6 API """ import asyncio import argparse import json import os import sys from pathlib import Path from datetime import datetime, timezone from dataclasses import dataclass from typing import Optional import httpx # Load environment variables from .env file project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) try: from dotenv import load_dotenv load_dotenv(project_root / ".env") except ImportError: pass # ============================================================================= # API Configuration # ============================================================================= ZAI_API_URL = "https://api.z.ai/api/coding/paas/v4/chat/completions" ZAI_MODEL = "glm-4.6" MAX_TOKENS = 16000 # High limit for GLM-4.6 reasoning mode TIMEOUT = 300 # 5 minutes for complex reasoning # ============================================================================= # Test Document Definitions # ============================================================================= @dataclass class TestDocument: """A historical document for PiCo extraction testing.""" name: str language: str script: str date_period: str source_type: str source_text: str system_prompt: str expected_persons: int expected_locations: int validation_names: list[str] # Names that should appear in extraction # Arabic Waqf Document ARABIC_WAQF = TestDocument( name="arabic_waqf", language="Arabic", script="Arabic", date_period="1225 AH (1810 CE)", source_type="waqf_document", source_text="""بسم الله الرحمن الرحيم هذا ما وقف وحبس وسبل وأبد المرحوم الحاج أحمد بن محمد العمري، تاجر بمدينة حلب الشهباء، ابن المرحوم محمد بن عبد الله العمري. وقف جميع داره الكائنة بمحلة الجديدة على أولاده وأولاد أولاده ذكوراً وإناثاً. وإن انقرضوا لا سمح الله فعلى فقراء المسلمين. وشهد على ذلك الشهود: الحاج إبراهيم بن يوسف التركماني، والسيد علي بن حسين الحلبي. وكتب في شهر رجب سنة ألف ومائتين وخمس وعشرين هجرية.""", system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology. Extract ALL persons from this Arabic waqf (endowment) document: 1. Names using PNV structure with both Arabic script AND romanized versions 2. Patronymics (ابن/بن = son of) 3. Honorifics (الحاج = pilgrim, السيد = sayyid, المرحوم = the late) 4. Family relationships between persons 5. Roles in the document (founder, witness) 6. Biographical info (deceased status, occupation, address) Return ONLY valid JSON with this structure: { "pico_observation": {"observation_id": "...", "source_type": "...", "source_reference": "..."}, "persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}], "temporal_references": [{"expression": "...", "expression_romanized": "...", "normalized": "...", "calendar": "..."}], "locations_mentioned": [{"name": "...", "name_romanized": "...", "type": "..."}] }""", expected_persons=4, expected_locations=2, validation_names=["ahmad", "ibrahim", "ali"] ) # Hebrew Ketubah HEBREW_KETUBAH = TestDocument( name="hebrew_ketubah", language="Hebrew/Aramaic", script="Hebrew", date_period="5645 AM (1885 CE)", source_type="ketubah", source_text="""בס״ד ביום שלישי בשבת, שנים עשר יום לחודש אייר שנת חמשת אלפים שש מאות וארבעים וחמש לבריאת עולם למנין שאנו מונין בו פה ווילנא איך החתן הבחור יצחק בן הר״ר אברהם הכהן ז״ל אמר לה להדא בתולתא מרים בת הר״ר משה הלוי: הוי לי לאנתו כדת משה וישראל ואנא אפלח ואוקיר ואיזון ואפרנס יתיכי כהלכות גוברין יהודאין ונתרצית מרת מרים בתולתא דא והות ליה לאנתו עדים: שמעון בן יעקב הכהן דוד בן אליהו""", system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology. Extract ALL persons from this Hebrew ketubah (Jewish marriage contract): 1. Names using PNV structure with both Hebrew script AND romanized versions 2. Patronymics (בן/בת = son/daughter of) 3. Tribal affiliations (הכהן = the priest/Kohen, הלוי = the Levite) 4. Honorifics (הר״ר = Rabbi, מרת = Mrs., ז״ל = of blessed memory) 5. Family relationships between persons 6. Roles in document (groom/חתן, bride/כלה, witness/עד) 7. Deceased markers (ז״ל) Return ONLY valid JSON with this structure: { "pico_observation": {"observation_id": "...", "source_type": "ketubah", "source_reference": "..."}, "persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}], "temporal_references": [{"expression": "...", "expression_romanized": "...", "normalized": "...", "calendar": "Hebrew"}], "locations_mentioned": [{"name": "...", "name_romanized": "...", "type": "..."}] }""", expected_persons=6, # groom, bride, 2 fathers, 2 witnesses (fathers implicit) expected_locations=1, validation_names=["yitzchak", "miriam", "shimon", "david"] ) # Spanish Colonial Baptism SPANISH_BAPTISM = TestDocument( name="spanish_colonial_baptism", language="Spanish", script="Latin", date_period="1742 CE", source_type="baptismal_register", source_text="""En la ciudad de México, a veinte y tres días del mes de febrero de mil setecientos cuarenta y dos años, yo el Br. Don Antonio de Mendoza, teniente de cura de esta santa iglesia catedral, bauticé solemnemente, puse óleo y crisma a Juan José, español, hijo legítimo de Don Pedro García de la Cruz, español, natural de la villa de Puebla de los Ángeles, y de Doña María Josefa de los Reyes, española, natural de esta ciudad. Fueron sus padrinos Don Francisco Xavier de Castañeda, español, vecino de esta ciudad, y Doña Ana María de la Encarnación, su legítima esposa, a quienes advertí el parentesco espiritual y obligaciones que contrajeron. Y lo firmé. Br. Don Antonio de Mendoza""", system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology. Extract ALL persons from this Spanish colonial baptismal record: 1. Names using PNV structure (given name, surname with particles like "de") 2. Casta (racial/social) designations (español, mestizo, mulato, indio, etc.) 3. Legitimacy markers (hijo legítimo, hijo natural) 4. Place of origin (natural de, vecino de) 5. Family relationships (parents, godparents/padrinos) 6. Compadrazgo relationships (spiritual kinship between parents and godparents) 7. Ecclesiastical roles (priest, teniente de cura) 8. Honorifics (Don, Doña, Br./Bachiller) Return ONLY valid JSON with this structure: { "pico_observation": {"observation_id": "...", "source_type": "baptismal_register", "source_reference": "..."}, "persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}], "temporal_references": [{"expression": "...", "normalized": "...", "calendar": "Gregorian"}], "locations_mentioned": [{"name": "...", "type": "..."}] }""", expected_persons=6, # infant, father, mother, godfather, godmother, priest expected_locations=3, validation_names=["juan", "pedro", "maria", "francisco", "antonio"] ) # Dutch Marriage Certificate DUTCH_MARRIAGE = TestDocument( name="dutch_marriage", language="Dutch", script="Latin", date_period="1885 CE", source_type="marriage_certificate", source_text="""Heden den vierden Maart achttien honderd vijf en tachtig, compareerden voor mij, Ambtenaar van den Burgerlijken Stand der Gemeente Haarlem: Johannes Petrus van der Berg, oud dertig jaren, koopman, geboren te Amsterdam, wonende alhier, meerderjarige zoon van wijlen Pieter van der Berg, in leven koopman, en van Maria Johanna Bakker, zonder beroep, wonende te Amsterdam; en Cornelia Wilhelmina de Groot, oud vijf en twintig jaren, zonder beroep, geboren te Haarlem, wonende alhier, meerderjarige dochter van Hendrik de Groot, timmerman, en van wijlen Elisabeth van Dijk. De getuigen waren: Willem Frederik Smit, oud veertig jaren, notaris Jacobus Hendrikus Jansen, oud vijf en dertig jaren, klerk""", system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology. Extract ALL persons from this Dutch marriage certificate (huwelijksakte): 1. Names using PNV structure with Dutch naming conventions 2. Patronymics and tussenvoegsels (van der, de, etc.) 3. Ages, occupations, birthplaces, residences 4. Family relationships (parents identified with "zoon van" / "dochter van") 5. Deceased markers ("wijlen" = the late) 6. Roles in document (groom, bride, witnesses/getuigen) 7. Civil status terminology Return ONLY valid JSON with this structure: { "pico_observation": {"observation_id": "...", "source_type": "marriage_certificate", "source_reference": "..."}, "persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}], "temporal_references": [{"expression": "...", "normalized": "...", "calendar": "Gregorian"}], "locations_mentioned": [{"name": "...", "type": "..."}] }""", expected_persons=8, # groom, bride, 4 parents (2 deceased), 2 witnesses expected_locations=2, validation_names=["johannes", "cornelia", "willem", "jacobus"] ) # Russian Metrical Book Entry RUSSIAN_METRICAL = TestDocument( name="russian_metrical", language="Russian", script="Cyrillic", date_period="1892 CE", source_type="metrical_book", source_text="""Метрическая книга Троицкой церкви села Покровского за 1892 год О родившихся Марта 15 дня родился, 17 дня крещён Иван. Родители: крестьянин деревни Ивановки Пётр Иванович Сидоров и законная жена его Анна Фёдоровна, оба православного вероисповедания. Восприемники: крестьянин той же деревни Николай Петрович Кузнецов и крестьянская дочь девица Мария Ивановна Сидорова.""", system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology. Extract ALL persons from this Russian metrical book (метрическая книга) entry: 1. Names using Russian naming conventions: given name + patronymic (отчество) + surname 2. Patronymic patterns (-ович/-евич for males, -овна/-евна for females) 3. Estate/class designations (крестьянин = peasant, мещанин = townsman, дворянин = noble) 4. Family relationships 5. Roles (родители = parents, восприемники = godparents) 6. Religious denomination (православный = Orthodox) 7. Include both Cyrillic AND romanized versions Return ONLY valid JSON with this structure: { "pico_observation": {"observation_id": "...", "source_type": "metrical_book", "source_reference": "..."}, "persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}], "temporal_references": [{"expression": "...", "expression_romanized": "...", "normalized": "...", "calendar": "Gregorian/Julian"}], "locations_mentioned": [{"name": "...", "name_romanized": "...", "type": "..."}] }""", expected_persons=5, # infant, father, mother, godfather, godmother expected_locations=2, validation_names=["ivan", "petr", "anna", "nikolai", "maria"] ) # Italian Notarial Act ITALIAN_NOTARIAL = TestDocument( name="italian_notarial", language="Italian", script="Latin", date_period="1654 CE", source_type="notarial_act", source_text="""Adì 15 Marzo 1654, in Venetia. Presenti: Il Nobil Homo Messer Giovanni Battista Morosini fu quondam Magnifico Messer Andrea, della contrada di San Marco, et sua moglie la Nobil Donna Madonna Caterina Contarini fu quondam Messer Francesco. Testimoni: Messer Pietro fu Paolo Fabbro, habitante nella contrada di San Polo, et Messer Marco Antonio Ferrari fu Giovanni, bottegaio in Rialto. Rogato io Notaro Antonio Zen fu quondam Messer Giacomo, Notaro publico di Venetia.""", system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology. Extract ALL persons from this Italian notarial act: 1. Names using PNV structure (given name, surname) 2. Venetian nobility titles (Nobil Homo, Magnifico Messer, Nobil Donna Madonna) 3. Deceased father markers ("fu", "quondam" = the late) 4. Family relationships (spouses, children of) 5. Occupations (bottegaio = shopkeeper, notaro = notary) 6. Roles in document (party, witness/testimone, notary) 7. Residence/contrada information Return ONLY valid JSON with this structure: { "pico_observation": {"observation_id": "...", "source_type": "notarial_act", "source_reference": "..."}, "persons": [{"person_index": 0, "pnv_name": {...}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}], "temporal_references": [{"expression": "...", "normalized": "...", "calendar": "Gregorian"}], "locations_mentioned": [{"name": "...", "type": "..."}] }""", expected_persons=6, # Giovanni, Caterina, 2 witnesses, notary, plus fathers expected_locations=4, validation_names=["giovanni", "caterina", "pietro", "antonio"] ) # Greek Orthodox Baptismal Register GREEK_BAPTISMAL = TestDocument( name="greek_baptismal", language="Greek", script="Greek", date_period="1875 CE", source_type="baptismal_register", source_text="""Ἐν Θεσσαλονίκῃ, τῇ δεκάτῃ πέμπτῃ Μαρτίου τοῦ ἔτους 1875. Ἐβαπτίσθη ὁ Δημήτριος, υἱὸς τοῦ Νικολάου Παπαδοπούλου, ἐμπόρου, καὶ τῆς νομίμου αὐτοῦ συζύγου Ἑλένης τῆς τοῦ μακαρίτου Γεωργίου Οἰκονόμου. Νονὸς ὁ Κωνσταντῖνος Καρατζᾶς τοῦ Ἰωάννου, ἰατρός. Ἱερεύς: ὁ Πρωτοπρεσβύτερος Ἀθανάσιος Χρυσοστόμου.""", system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology. Extract ALL persons from this Greek Orthodox baptismal register: 1. Names with BOTH Greek script AND romanized versions 2. Greek patronymics ("τοῦ" + genitive = son/daughter of) 3. Deceased markers (μακαρίτης/μακαρίτισσα = the late) 4. Family relationships (υἱός = son, σύζυγος = wife) 5. Godparent (νονός/νονά) 6. Occupations (ἔμπορος = merchant, ἰατρός = physician) 7. Ecclesiastical titles (Πρωτοπρεσβύτερος = Archpriest) 8. Roles in document (baptized, parents, godparent, priest) Return ONLY valid JSON with this structure: { "pico_observation": {"observation_id": "...", "source_type": "baptismal_register", "source_reference": "..."}, "persons": [{"person_index": 0, "pnv_name": {"literalName": "...", "literalName_romanized": "..."}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}], "temporal_references": [{"expression": "...", "expression_romanized": "...", "normalized": "...", "calendar": "Julian"}], "locations_mentioned": [{"name": "...", "name_romanized": "...", "type": "..."}] }""", expected_persons=6, # infant, father, mother, maternal grandfather, godfather, priest expected_locations=1, validation_names=["dimitrios", "nikolaos", "eleni", "konstantinos"] ) # Ottoman Turkish Court Record (Sijill) OTTOMAN_SIJILL = TestDocument( name="ottoman_sijill", language="Ottoman Turkish", script="Arabic", date_period="1258 AH (1842 CE)", source_type="sijill", source_text="""بسم الله الرحمن الرحيم مجلس شرع شريفده محمد آغا بن عبد الله مرحوم قصبه دميرجی‌کوی ساکنلرندن محمد بن احمد افندی و زوجه‌سی فاطمه خاتون بنت علی‌اوغلو حاضر اولوب محمد آغا طرفندن یکری بش غروش بدل معلوم ایله صاتیلدی شهود الحال: حسن افندی بن عمر، ابراهیم چلبی بن مصطفی فی اوائل شهر رجب سنة ١٢٥٨""", system_prompt="""You are a historical document annotator following the PiCo (Person in Context) ontology. Extract ALL persons from this Ottoman Turkish sijill (court record): 1. Names with both Arabic script AND romanized versions 2. Ottoman honorifics (آغا/Ağa, افندی/Efendi, چلبی/Çelebi, خاتون/Hatun) 3. Patronymics (بن/bin = son of, بنت/bint = daughter of) 4. Deceased markers (مرحوم/merhum) 5. Family relationships (زوجه/zevce = wife) 6. Roles in document (buyer, seller, witnesses) 7. Residence information Note: Ottoman Turkish uses Arabic script with Turkish vocabulary and grammatical structures. Return ONLY valid JSON with this structure: { "pico_observation": {"observation_id": "...", "source_type": "sijill", "source_reference": "..."}, "persons": [{"person_index": 0, "pnv_name": {"literalName": "...", "literalName_romanized": "..."}, "roles": [...], "biographical": {...}, "family_relationships": {...}, "context": "..."}], "temporal_references": [{"expression": "...", "expression_romanized": "...", "normalized": "...", "calendar": "Hijri"}], "locations_mentioned": [{"name": "...", "name_romanized": "...", "type": "..."}] }""", expected_persons=6, # Mehmed Ağa, Mehmed bin Ahmed, Fatma Hatun, 2 witnesses + fathers expected_locations=1, validation_names=["mehmed", "fatma", "hasan", "ibrahim"] ) # All available tests ALL_TESTS = { "arabic": ARABIC_WAQF, "hebrew": HEBREW_KETUBAH, "spanish": SPANISH_BAPTISM, "dutch": DUTCH_MARRIAGE, "russian": RUSSIAN_METRICAL, "italian": ITALIAN_NOTARIAL, "greek": GREEK_BAPTISMAL, "ottoman": OTTOMAN_SIJILL, } # ============================================================================= # API Functions # ============================================================================= async def call_glm_api(system_prompt: str, user_content: str) -> tuple[dict, float]: """Call Z.AI GLM-4.6 API and return parsed JSON response with timing.""" api_token = os.environ.get("ZAI_API_TOKEN") if not api_token: raise ValueError("ZAI_API_TOKEN not set in environment") headers = { "Authorization": f"Bearer {api_token}", "Content-Type": "application/json", } payload = { "model": ZAI_MODEL, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}, ], "temperature": 0.1, "max_tokens": MAX_TOKENS, } start_time = datetime.now(timezone.utc) async with httpx.AsyncClient(timeout=TIMEOUT) as client: response = await client.post(ZAI_API_URL, headers=headers, json=payload) response.raise_for_status() result = response.json() content = result["choices"][0]["message"]["content"] end_time = datetime.now(timezone.utc) duration = (end_time - start_time).total_seconds() # Parse JSON from response json_content = content if "```json" in content: json_content = content.split("```json")[1].split("```")[0] elif "```" in content: parts = content.split("```") if len(parts) >= 2: json_content = parts[1] return json.loads(json_content.strip()), duration def extract_all_strings_recursive(obj, strings: list[str]) -> None: """Recursively extract all string values from nested dicts/lists.""" if isinstance(obj, str): strings.append(obj.lower()) elif isinstance(obj, dict): for value in obj.values(): extract_all_strings_recursive(value, strings) elif isinstance(obj, list): for item in obj: extract_all_strings_recursive(item, strings) def normalize_name_variant(name: str) -> list[str]: """Generate common spelling variants for a name. Handles cross-script romanization differences like: - mehmed/muhammad/mohammed - fatma/fatima - dimitrios/demetrios - yitzchak/isaac """ variants = [name.lower()] # Arabic/Turkish name variants variant_map = { 'mehmed': ['muhammad', 'mohammed', 'mehmet'], 'fatma': ['fatima', 'fatmah'], 'ahmed': ['ahmad'], 'ibrahim': ['abraham', 'ibrahim'], 'hasan': ['hassan'], 'hussein': ['husayn', 'huseyin'], # Greek variants 'dimitrios': ['demetrios', 'dimitris', 'dēmētrios'], 'nikolaos': ['nicholas', 'nikolas'], 'konstantinos': ['constantine', 'constantinos'], 'georgios': ['george', 'geōrgios'], 'eleni': ['helen', 'elena', 'elenē'], 'athanasios': ['athanasius'], # Hebrew variants 'yitzchak': ['isaac', 'itzhak', 'yitzhak'], 'miriam': ['mirjam', 'myriam'], 'shimon': ['simon', 'shimeon'], 'avraham': ['abraham'], 'moshe': ['moses'], 'david': ['dovid'], 'yaakov': ['jacob', 'jakob'], # Russian variants 'petr': ['peter', 'pyotr', 'piotr'], 'ivan': ['john', 'ioann'], 'nikolai': ['nicholas', 'nikolay'], 'maria': ['mary', 'mariya'], } for key, values in variant_map.items(): if name.lower() == key: variants.extend(values) elif name.lower() in values: variants.append(key) variants.extend(v for v in values if v != name.lower()) return variants def validate_extraction(result: dict, test: TestDocument) -> tuple[bool, list[str]]: """Validate extraction result against expected values.""" errors = [] warnings = [] # Check structure if "persons" not in result: errors.append("Missing 'persons' field") return False, errors persons = result.get("persons", []) # Check person count if len(persons) < test.expected_persons: warnings.append(f"Expected at least {test.expected_persons} persons, got {len(persons)}") # Extract ALL string values from persons recursively for comprehensive name matching all_name_strings = [] for person in persons: # Get pnv_name - could be nested structure pnv = person.get("pnv_name", {}) extract_all_strings_recursive(pnv, all_name_strings) # Also check context field which often contains the original text if person.get("context"): all_name_strings.append(str(person["context"]).lower()) # Check for expected names with variant support for expected_name in test.validation_names: variants = normalize_name_variant(expected_name) found = False for variant in variants: if any(variant in name_str for name_str in all_name_strings): found = True break if not found: warnings.append(f"Expected name '{expected_name}' (variants: {variants[:3]}) not found") # Check locations locations = result.get("locations_mentioned", []) if len(locations) < test.expected_locations: warnings.append(f"Expected at least {test.expected_locations} locations, got {len(locations)}") # Combine errors and warnings is_valid = len(errors) == 0 all_issues = errors + warnings return is_valid, all_issues # ============================================================================= # Test Runner # ============================================================================= async def run_single_test(test: TestDocument) -> dict: """Run extraction test for a single document type.""" print(f"\n{'='*70}") print(f"TEST: {test.name.upper()}") print(f"Language: {test.language} | Script: {test.script} | Period: {test.date_period}") print(f"{'='*70}") # Prepare user prompt user_prompt = f"""Extract all persons, relationships, dates, and locations from this {test.source_type}: {test.source_text} Follow the PiCo ontology pattern for person observations.""" print(f"\n📄 Source: {test.source_type}") print(f" Text length: {len(test.source_text)} chars") # Call API print(f"\n⏳ Calling GLM-4.6 API...") try: result, duration = await call_glm_api(test.system_prompt, user_prompt) print(f"✅ API call completed in {duration:.1f}s") except httpx.HTTPStatusError as e: print(f"❌ API Error: {e.response.status_code}") return {"test": test.name, "status": "error", "error": str(e)} except json.JSONDecodeError as e: print(f"❌ JSON Parse Error: {e}") return {"test": test.name, "status": "error", "error": str(e)} except Exception as e: print(f"❌ Error: {type(e).__name__}: {e}") return {"test": test.name, "status": "error", "error": str(e)} # Display summary persons = result.get("persons", []) locations = result.get("locations_mentioned", []) temporal = result.get("temporal_references", []) print(f"\n📊 Extraction Summary:") print(f" Persons: {len(persons)}") print(f" Locations: {len(locations)}") print(f" Temporal refs: {len(temporal)}") # Show persons print(f"\n👥 Persons:") for person in persons[:5]: # Show first 5 idx = person.get("person_index", "?") name = person.get("pnv_name", {}) if isinstance(name, str): lit_name = name else: lit_name = name.get("literalName_romanized") or name.get("literalName", "?") # Handle roles - could be list of dicts, list of strings, or string roles_raw = person.get("roles", []) if isinstance(roles_raw, str): role = roles_raw elif isinstance(roles_raw, list) and len(roles_raw) > 0: first_role = roles_raw[0] if isinstance(first_role, dict): role = first_role.get("role_in_source", "-") else: role = str(first_role) else: role = "-" print(f" [{idx}] {str(lit_name)[:50]} ({role})") if len(persons) > 5: print(f" ... and {len(persons) - 5} more") # Validate is_valid, issues = validate_extraction(result, test) print(f"\n🔍 Validation: {'✅ PASSED' if is_valid else '⚠️ ISSUES'}") if issues: for issue in issues: print(f" - {issue}") # Save result output_dir = project_root / "data/entity_annotation/test_outputs" output_dir.mkdir(parents=True, exist_ok=True) output_file = output_dir / f"{test.name}_extraction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" with open(output_file, 'w', encoding='utf-8') as f: json.dump(result, f, ensure_ascii=False, indent=2) print(f"\n💾 Saved: {output_file.name}") return { "test": test.name, "status": "passed" if is_valid else "warning", "persons_extracted": len(persons), "locations_extracted": len(locations), "duration_seconds": duration, "issues": issues, "output_file": str(output_file) } async def run_all_tests() -> list[dict]: """Run all extraction tests sequentially.""" results = [] for name, test in ALL_TESTS.items(): result = await run_single_test(test) results.append(result) return results def print_summary(results: list[dict]): """Print summary of all test results.""" print("\n" + "=" * 70) print("BATCH TEST SUMMARY") print("=" * 70) passed = sum(1 for r in results if r["status"] == "passed") warnings = sum(1 for r in results if r["status"] == "warning") errors = sum(1 for r in results if r["status"] == "error") print(f"\n📊 Results: {passed} passed, {warnings} warnings, {errors} errors") print(f" Total tests: {len(results)}") print(f"\n📋 Test Details:") for r in results: status_icon = {"passed": "✅", "warning": "⚠️", "error": "❌"}.get(r["status"], "?") print(f" {status_icon} {r['test']}: {r.get('persons_extracted', 0)} persons, {r.get('duration_seconds', 0):.1f}s") if r.get("issues"): for issue in r["issues"][:2]: print(f" - {issue}") print("\n" + "=" * 70) if errors == 0: print("✅ ALL TESTS COMPLETED SUCCESSFULLY") else: print(f"⚠️ {errors} TESTS FAILED - Check details above") print("=" * 70) # ============================================================================= # Main # ============================================================================= async def main(): parser = argparse.ArgumentParser(description="Batch test PiCo extraction") parser.add_argument("--test-name", "-t", choices=list(ALL_TESTS.keys()), help="Run specific test by name") parser.add_argument("--all", "-a", action="store_true", help="Run all tests") parser.add_argument("--list", "-l", action="store_true", help="List available tests") args = parser.parse_args() # Check API token if not os.environ.get("ZAI_API_TOKEN"): print("❌ Error: ZAI_API_TOKEN not set") print("Set it with: export ZAI_API_TOKEN=") print("Or add to .env file in project root") return 1 print("\n" + "#" * 70) print("# PiCo BATCH EXTRACTION TEST") print(f"# Model: {ZAI_MODEL} (reasoning mode)") print(f"# Max tokens: {MAX_TOKENS}") print("#" * 70) if args.list: print("\n📋 Available tests:") for name, test in ALL_TESTS.items(): print(f" {name}: {test.language} {test.source_type} ({test.date_period})") return 0 if args.test_name: test = ALL_TESTS[args.test_name] result = await run_single_test(test) return 0 if result["status"] != "error" else 1 if args.all: results = await run_all_tests() print_summary(results) errors = sum(1 for r in results if r["status"] == "error") return 0 if errors == 0 else 1 # Default: show help parser.print_help() return 0 if __name__ == "__main__": exit_code = asyncio.run(main()) sys.exit(exit_code)