glam/scripts/test_wikidata_debug.py
kempersc e5a532a8bc Add comprehensive tests for NLP institution extraction and RDF partnership integration
- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive).
- Added tests for extracted entities and result handling to validate the extraction process.
- Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format.
- Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns.
- Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
2025-11-19 23:20:47 +01:00

64 lines
2 KiB
Python

#!/usr/bin/env python3
"""
Debug script to test Wikidata enrichment for specific institutions.
"""
import sys
sys.path.insert(0, '/Users/kempersc/apps/glam')
from scripts.enrich_tunisia_wikidata_validated import search_wikidata_with_validation
# Test cases from previous session findings
test_cases = [
{
'name': 'Kerkouane Archaeological Site and Museum',
'type': 'MUSEUM',
'city': 'Kerkouane',
'expected_qid': 'Q3329437',
'expected_label': 'musée archéologique de Kerkouane',
'issue': 'Match score 67% (below 70% threshold)'
},
{
'name': 'Diocesan Library of Tunis',
'type': 'LIBRARY',
'city': 'Tunis',
'expected_qid': 'Q28149782',
'expected_label': 'bibliothèque diocésaine de Tunis',
'issue': 'Instance type Q105338594 not in mapping'
}
]
print("Wikidata Enrichment Debug Test")
print("=" * 60)
for i, test in enumerate(test_cases, 1):
print(f"\nTest {i}: {test['name']}")
print(f" Type: {test['type']}")
print(f" City: {test['city']}")
print(f" Expected: {test['expected_qid']} - {test['expected_label']}")
print(f" Previous issue: {test['issue']}")
print(f"\n Running search...")
result = search_wikidata_with_validation(
name=test['name'],
inst_type=test['type'],
city=test['city']
)
if result:
print(f" ✅ FOUND!")
print(f" QID: {result['qid']}")
print(f" Name: {result.get('name', 'N/A')}")
print(f" Entity type: {result.get('entity_type', 'N/A')}")
print(f" Match score: {result.get('match_score', 0):.1f}%")
if result['qid'] == test['expected_qid']:
print(f" ✅ CORRECT MATCH! Threshold fix worked.")
else:
print(f" ⚠️ Different entity matched (expected {test['expected_qid']})")
else:
print(f" ❌ NOT FOUND")
print(f" May still be below 65% threshold or type validation failing")
print("\n" + "=" * 60)
print("Debug test complete")