glam/data/entity_annotation/test_outputs/raw_response_20251212_132017.txt
kempersc 505c12601a Add test script for PiCo extraction from Arabic waqf documents
- Implemented a new script `test_pico_arabic_waqf.py` to test the GLM annotator's ability to extract person observations from Arabic historical documents.
- The script includes environment variable handling for API token, structured prompts for the GLM API, and validation of extraction results.
- Added comprehensive logging for API responses, extraction results, and validation errors.
- Included a sample Arabic waqf text for testing purposes, following the PiCo ontology pattern.
2025-12-12 17:50:17 +01:00

166 lines
No EOL
4.7 KiB
Text

```json
{
"pico_observation": {
"observation_id": "waqf_aleppo_1225h",
"observed_at": "2023-10-27T10:00:00Z",
"source_type": "waqf_document",
"source_reference": "Aleppo Waqf, 1225 H"
},
"persons": [
{
"person_index": 0,
"pnv_name": {
"literalName": "الحاج أحمد بن محمد العمري",
"literalName_romanized": "al-Hajj Ahmad ibn Muhammad al-Umari",
"givenName": "أحمد",
"givenName_romanized": "Ahmad",
"patronym": "محمد",
"patronym_romanized": "Muhammad",
"baseSurname": "العمري",
"baseSurname_romanized": "al-Umari",
"honorificPrefix": "الحاج",
"honorificPrefix_romanized": "al-Hajj"
},
"roles": [
{
"role_title": "تاجر",
"role_title_romanized": "tajir",
"role_in_source": "founder"
}
],
"biographical": {
"deceased": true,
"address": "مدينة حلب الشهباء"
},
"family_relationships": {
"parent": [
{
"person_index": 1,
"target_name": "محمد بن عبد الله العمري"
}
],
"children": []
},
"context": "The founder (waqif) of the endowment, a deceased merchant from Aleppo."
},
{
"person_index": 1,
"pnv_name": {
"literalName": "المرحوم محمد بن عبد الله العمري",
"literalName_romanized": "al-marhum Muhammad ibn Abd Allah al-Umari",
"givenName": "محمد",
"givenName_romanized": "Muhammad",
"patronym": "عبد الله",
"patronym_romanized": "Abd Allah",
"baseSurname": "العمري",
"baseSurname_romanized": "al-Umari"
},
"roles": [
{
"role_title": null,
"role_title_romanized": null,
"role_in_source": null
}
],
"biographical": {
"deceased": true,
"address": null
},
"family_relationships": {
"parent": [],
"children": [
{
"person_index": 0,
"target_name": "أحمد بن محمد العمري"
}
]
},
"context": "The deceased father of the founder, Ahmad al-Umari."
},
{
"person_index": 2,
"pnv_name": {
"literalName": "الحاج إبراهيم بن يوسف التركماني",
"literalName_romanized": "al-Hajj Ibrahim ibn Yusuf al-Turkmani",
"givenName": "إبراهيم",
"givenName_romanized": "Ibrahim",
"patronym": "يوسف",
"patronym_romanized": "Yusuf",
"baseSurname": "التركماني",
"baseSurname_romanized": "al-Turkmani",
"honorificPrefix": "الحاج",
"honorificPrefix_romanized": "al-Hajj"
},
"roles": [
{
"role_title": "شاهد",
"role_title_romanized": "shahid",
"role_in_source": "witness"
}
],
"biographical": {
"deceased": null,
"address": null
},
"family_relationships": {
"parent": [],
"children": []
},
"context": "One of the witnesses to the endowment deed."
},
{
"person_index": 3,
"pnv_name": {
"literalName": "السيد علي بن حسين الحلبي",
"literalName_romanized": "al-Sayyid Ali ibn Husayn al-Halabi",
"givenName": "علي",
"givenName_romanized": "Ali",
"patronym": "حسين",
"patronym_romanized": "Husayn",
"baseSurname": "الحلبي",
"baseSurname_romanized": "al-Halabi",
"honorificPrefix": "السيد",
"honorificPrefix_romanized": "al-Sayyid"
},
"roles": [
{
"role_title": "شاهد",
"role_title_romanized": "shahid",
"role_in_source": "witness"
}
],
"biographical": {
"deceased": null,
"address": null
},
"family_relationships": {
"parent": [],
"children": []
},
"context": "The second witness to the endowment deed."
}
],
"temporal_references": [
{
"expression": "شهر رجب سنة ألف ومائتين وخمس وعشرين هجرية",
"expression_romanized": "Shahr Rajab sanat alf wa mi'ayn wa khamsa wa 'ishrin hijriyyah",
"normalized": "1811-01",
"calendar": "Hijri",
"type": "DATE"
}
],
"locations_mentioned": [
{
"name": "حلب الشهباء",
"name_romanized": "Halab al-Shahba'",
"type": "city"
},
{
"name": "محلة الجديدة",
"name_romanized": "Mahallat al-Jadida",
"type": "neighborhood"
}
]
}
```