- Implemented a new script `test_pico_arabic_waqf.py` to test the GLM annotator's ability to extract person observations from Arabic historical documents. - The script includes environment variable handling for API token, structured prompts for the GLM API, and validation of extraction results. - Added comprehensive logging for API responses, extraction results, and validation errors. - Included a sample Arabic waqf text for testing purposes, following the PiCo ontology pattern.
166 lines
No EOL
4.7 KiB
Text
166 lines
No EOL
4.7 KiB
Text
|
|
```json
|
|
{
|
|
"pico_observation": {
|
|
"observation_id": "waqf_aleppo_1225h",
|
|
"observed_at": "2023-10-27T10:00:00Z",
|
|
"source_type": "waqf_document",
|
|
"source_reference": "Aleppo Waqf, 1225 H"
|
|
},
|
|
"persons": [
|
|
{
|
|
"person_index": 0,
|
|
"pnv_name": {
|
|
"literalName": "الحاج أحمد بن محمد العمري",
|
|
"literalName_romanized": "al-Hajj Ahmad ibn Muhammad al-Umari",
|
|
"givenName": "أحمد",
|
|
"givenName_romanized": "Ahmad",
|
|
"patronym": "محمد",
|
|
"patronym_romanized": "Muhammad",
|
|
"baseSurname": "العمري",
|
|
"baseSurname_romanized": "al-Umari",
|
|
"honorificPrefix": "الحاج",
|
|
"honorificPrefix_romanized": "al-Hajj"
|
|
},
|
|
"roles": [
|
|
{
|
|
"role_title": "تاجر",
|
|
"role_title_romanized": "tajir",
|
|
"role_in_source": "founder"
|
|
}
|
|
],
|
|
"biographical": {
|
|
"deceased": true,
|
|
"address": "مدينة حلب الشهباء"
|
|
},
|
|
"family_relationships": {
|
|
"parent": [
|
|
{
|
|
"person_index": 1,
|
|
"target_name": "محمد بن عبد الله العمري"
|
|
}
|
|
],
|
|
"children": []
|
|
},
|
|
"context": "The founder (waqif) of the endowment, a deceased merchant from Aleppo."
|
|
},
|
|
{
|
|
"person_index": 1,
|
|
"pnv_name": {
|
|
"literalName": "المرحوم محمد بن عبد الله العمري",
|
|
"literalName_romanized": "al-marhum Muhammad ibn Abd Allah al-Umari",
|
|
"givenName": "محمد",
|
|
"givenName_romanized": "Muhammad",
|
|
"patronym": "عبد الله",
|
|
"patronym_romanized": "Abd Allah",
|
|
"baseSurname": "العمري",
|
|
"baseSurname_romanized": "al-Umari"
|
|
},
|
|
"roles": [
|
|
{
|
|
"role_title": null,
|
|
"role_title_romanized": null,
|
|
"role_in_source": null
|
|
}
|
|
],
|
|
"biographical": {
|
|
"deceased": true,
|
|
"address": null
|
|
},
|
|
"family_relationships": {
|
|
"parent": [],
|
|
"children": [
|
|
{
|
|
"person_index": 0,
|
|
"target_name": "أحمد بن محمد العمري"
|
|
}
|
|
]
|
|
},
|
|
"context": "The deceased father of the founder, Ahmad al-Umari."
|
|
},
|
|
{
|
|
"person_index": 2,
|
|
"pnv_name": {
|
|
"literalName": "الحاج إبراهيم بن يوسف التركماني",
|
|
"literalName_romanized": "al-Hajj Ibrahim ibn Yusuf al-Turkmani",
|
|
"givenName": "إبراهيم",
|
|
"givenName_romanized": "Ibrahim",
|
|
"patronym": "يوسف",
|
|
"patronym_romanized": "Yusuf",
|
|
"baseSurname": "التركماني",
|
|
"baseSurname_romanized": "al-Turkmani",
|
|
"honorificPrefix": "الحاج",
|
|
"honorificPrefix_romanized": "al-Hajj"
|
|
},
|
|
"roles": [
|
|
{
|
|
"role_title": "شاهد",
|
|
"role_title_romanized": "shahid",
|
|
"role_in_source": "witness"
|
|
}
|
|
],
|
|
"biographical": {
|
|
"deceased": null,
|
|
"address": null
|
|
},
|
|
"family_relationships": {
|
|
"parent": [],
|
|
"children": []
|
|
},
|
|
"context": "One of the witnesses to the endowment deed."
|
|
},
|
|
{
|
|
"person_index": 3,
|
|
"pnv_name": {
|
|
"literalName": "السيد علي بن حسين الحلبي",
|
|
"literalName_romanized": "al-Sayyid Ali ibn Husayn al-Halabi",
|
|
"givenName": "علي",
|
|
"givenName_romanized": "Ali",
|
|
"patronym": "حسين",
|
|
"patronym_romanized": "Husayn",
|
|
"baseSurname": "الحلبي",
|
|
"baseSurname_romanized": "al-Halabi",
|
|
"honorificPrefix": "السيد",
|
|
"honorificPrefix_romanized": "al-Sayyid"
|
|
},
|
|
"roles": [
|
|
{
|
|
"role_title": "شاهد",
|
|
"role_title_romanized": "shahid",
|
|
"role_in_source": "witness"
|
|
}
|
|
],
|
|
"biographical": {
|
|
"deceased": null,
|
|
"address": null
|
|
},
|
|
"family_relationships": {
|
|
"parent": [],
|
|
"children": []
|
|
},
|
|
"context": "The second witness to the endowment deed."
|
|
}
|
|
],
|
|
"temporal_references": [
|
|
{
|
|
"expression": "شهر رجب سنة ألف ومائتين وخمس وعشرين هجرية",
|
|
"expression_romanized": "Shahr Rajab sanat alf wa mi'ayn wa khamsa wa 'ishrin hijriyyah",
|
|
"normalized": "1811-01",
|
|
"calendar": "Hijri",
|
|
"type": "DATE"
|
|
}
|
|
],
|
|
"locations_mentioned": [
|
|
{
|
|
"name": "حلب الشهباء",
|
|
"name_romanized": "Halab al-Shahba'",
|
|
"type": "city"
|
|
},
|
|
{
|
|
"name": "محلة الجديدة",
|
|
"name_romanized": "Mahallat al-Jadida",
|
|
"type": "neighborhood"
|
|
}
|
|
]
|
|
}
|
|
``` |