- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
53 lines
2 KiB
Python
53 lines
2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Replace incorrect Wikidata match for Museum of Islamic Art Cairo.
|
|
|
|
INCORRECT: Q6940902 (Museum of Islamic Ceramics)
|
|
CORRECT: Q3330629 (Museum of Islamic Art, Cairo)
|
|
"""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
|
|
input_file = Path("data/instances/egypt_institutions_wikidata_corrected.yaml")
|
|
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
institutions = yaml.safe_load(f)
|
|
|
|
print("🔧 Fixing Museum of Islamic Art Cairo Wikidata match...\n")
|
|
|
|
for inst in institutions:
|
|
if inst.get('name') == 'Museum of Islamic Art Cairo':
|
|
identifiers = inst.get('identifiers', [])
|
|
|
|
# Find and update Wikidata identifier
|
|
for i, identifier in enumerate(identifiers):
|
|
if identifier.get('identifier_scheme') == 'Wikidata':
|
|
old_qid = identifier.get('identifier_value')
|
|
|
|
# Replace with correct Q-number
|
|
identifier['identifier_value'] = 'Q3330629'
|
|
identifier['identifier_url'] = 'https://www.wikidata.org/wiki/Q3330629'
|
|
|
|
print(f"✅ CORRECTED: Museum of Islamic Art Cairo")
|
|
print(f" Old: {old_qid} (Museum of Islamic Ceramics) ❌")
|
|
print(f" New: Q3330629 (Museum of Islamic Art) ✅")
|
|
print(f" Method: Manual Wikidata SPARQL verification\n")
|
|
|
|
# Update provenance note
|
|
if 'provenance' in inst:
|
|
inst['provenance']['wikidata_match_note'] = (
|
|
"Manually corrected from Q6940902 (Museum of Islamic Ceramics) "
|
|
"to Q3330629 (Museum of Islamic Art) via SPARQL query verification."
|
|
)
|
|
break
|
|
|
|
# Write corrected dataset
|
|
with open(input_file, 'w', encoding='utf-8') as f:
|
|
yaml.dump(institutions, f, allow_unicode=True, sort_keys=False, width=120)
|
|
|
|
print("="*80)
|
|
print("✅ Museum of Islamic Art Cairo now correctly linked to Q3330629")
|
|
print(f"💾 Updated: {input_file}")
|
|
print("="*80)
|
|
|