- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
64 lines
2 KiB
Python
64 lines
2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Debug script to test Wikidata enrichment for specific institutions.
|
|
"""
|
|
|
|
import sys
|
|
sys.path.insert(0, '/Users/kempersc/apps/glam')
|
|
|
|
from scripts.enrich_tunisia_wikidata_validated import search_wikidata_with_validation
|
|
|
|
# Test cases from previous session findings
|
|
test_cases = [
|
|
{
|
|
'name': 'Kerkouane Archaeological Site and Museum',
|
|
'type': 'MUSEUM',
|
|
'city': 'Kerkouane',
|
|
'expected_qid': 'Q3329437',
|
|
'expected_label': 'musée archéologique de Kerkouane',
|
|
'issue': 'Match score 67% (below 70% threshold)'
|
|
},
|
|
{
|
|
'name': 'Diocesan Library of Tunis',
|
|
'type': 'LIBRARY',
|
|
'city': 'Tunis',
|
|
'expected_qid': 'Q28149782',
|
|
'expected_label': 'bibliothèque diocésaine de Tunis',
|
|
'issue': 'Instance type Q105338594 not in mapping'
|
|
}
|
|
]
|
|
|
|
print("Wikidata Enrichment Debug Test")
|
|
print("=" * 60)
|
|
|
|
for i, test in enumerate(test_cases, 1):
|
|
print(f"\nTest {i}: {test['name']}")
|
|
print(f" Type: {test['type']}")
|
|
print(f" City: {test['city']}")
|
|
print(f" Expected: {test['expected_qid']} - {test['expected_label']}")
|
|
print(f" Previous issue: {test['issue']}")
|
|
print(f"\n Running search...")
|
|
|
|
result = search_wikidata_with_validation(
|
|
name=test['name'],
|
|
inst_type=test['type'],
|
|
city=test['city']
|
|
)
|
|
|
|
if result:
|
|
print(f" ✅ FOUND!")
|
|
print(f" QID: {result['qid']}")
|
|
print(f" Name: {result.get('name', 'N/A')}")
|
|
print(f" Entity type: {result.get('entity_type', 'N/A')}")
|
|
print(f" Match score: {result.get('match_score', 0):.1f}%")
|
|
|
|
if result['qid'] == test['expected_qid']:
|
|
print(f" ✅ CORRECT MATCH! Threshold fix worked.")
|
|
else:
|
|
print(f" ⚠️ Different entity matched (expected {test['expected_qid']})")
|
|
else:
|
|
print(f" ❌ NOT FOUND")
|
|
print(f" May still be below 65% threshold or type validation failing")
|
|
|
|
print("\n" + "=" * 60)
|
|
print("Debug test complete")
|