- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
151 lines
5 KiB
Python
151 lines
5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test alternative name matching for Tunisia enrichment.
|
|
Tests Diocesan Library and Kerkouane Museum before full run.
|
|
"""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
import sys
|
|
|
|
# Import the search function
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
from enrich_tunisia_wikidata_validated import search_wikidata_with_validation
|
|
|
|
def test_diocesan_library():
|
|
"""Test Diocesan Library of Tunis with alternative names."""
|
|
print("\n" + "="*60)
|
|
print("TEST 1: Diocesan Library of Tunis")
|
|
print("="*60)
|
|
|
|
primary_name = "Diocesan Library of Tunis"
|
|
alternative_names = [
|
|
"Bibliothèque Diocésaine de Tunis",
|
|
"Tunis Diocesan Library"
|
|
]
|
|
|
|
print(f"Primary name: {primary_name}")
|
|
print(f"Alternative names: {alternative_names}")
|
|
print()
|
|
|
|
# Test WITHOUT alternative names (baseline)
|
|
print("🔍 Test 1a: Searching with PRIMARY name only...")
|
|
result_primary = search_wikidata_with_validation(
|
|
name=primary_name,
|
|
inst_type='LIBRARY',
|
|
city='Tunis',
|
|
alternative_names=None
|
|
)
|
|
|
|
if result_primary:
|
|
print(f" ✅ Found: {result_primary['qid']} - {result_primary['name']}")
|
|
print(f" Match score: {result_primary['match_score']:.1f}%")
|
|
print(f" Matched name: {result_primary.get('matched_name', 'N/A')}")
|
|
else:
|
|
print(f" ❌ NOT FOUND with primary name")
|
|
|
|
# Test WITH alternative names
|
|
print("\n🔍 Test 1b: Searching with ALTERNATIVE names...")
|
|
result_alt = search_wikidata_with_validation(
|
|
name=primary_name,
|
|
inst_type='LIBRARY',
|
|
city='Tunis',
|
|
alternative_names=alternative_names
|
|
)
|
|
|
|
if result_alt:
|
|
print(f" ✅ Found: {result_alt['qid']} - {result_alt['name']}")
|
|
print(f" Match score: {result_alt['match_score']:.1f}%")
|
|
print(f" Matched name: {result_alt.get('matched_name', 'N/A')}")
|
|
|
|
if result_alt['qid'] == 'Q28149782':
|
|
print(f" ✅✅ CORRECT Q-NUMBER FOUND!")
|
|
else:
|
|
print(f" ⚠️ Wrong Q-number (expected Q28149782)")
|
|
else:
|
|
print(f" ❌ NOT FOUND even with alternative names")
|
|
|
|
return result_alt is not None and result_alt.get('qid') == 'Q28149782'
|
|
|
|
def test_kerkouane_museum():
|
|
"""Test Kerkouane Archaeological Site and Museum."""
|
|
print("\n" + "="*60)
|
|
print("TEST 2: Kerkouane Archaeological Site and Museum")
|
|
print("="*60)
|
|
|
|
primary_name = "Kerkouane Archaeological Site and Museum"
|
|
alternative_names = [
|
|
"Musée de Kerkouane",
|
|
"Kerkouane Museum"
|
|
]
|
|
|
|
print(f"Primary name: {primary_name}")
|
|
print(f"Alternative names: {alternative_names}")
|
|
print()
|
|
|
|
# Test WITHOUT alternative names (baseline)
|
|
print("🔍 Test 2a: Searching with PRIMARY name only...")
|
|
result_primary = search_wikidata_with_validation(
|
|
name=primary_name,
|
|
inst_type='MUSEUM',
|
|
city='El Haouaria',
|
|
alternative_names=None
|
|
)
|
|
|
|
if result_primary:
|
|
print(f" ✅ Found: {result_primary['qid']} - {result_primary['name']}")
|
|
print(f" Match score: {result_primary['match_score']:.1f}%")
|
|
print(f" Entity type: {result_primary.get('entity_type', 'N/A')}")
|
|
else:
|
|
print(f" ❌ NOT FOUND with primary name")
|
|
|
|
# Test WITH alternative names
|
|
print("\n🔍 Test 2b: Searching with ALTERNATIVE names...")
|
|
result_alt = search_wikidata_with_validation(
|
|
name=primary_name,
|
|
inst_type='MUSEUM',
|
|
city='El Haouaria',
|
|
alternative_names=alternative_names
|
|
)
|
|
|
|
if result_alt:
|
|
print(f" ✅ Found: {result_alt['qid']} - {result_alt['name']}")
|
|
print(f" Match score: {result_alt['match_score']:.1f}%")
|
|
print(f" Matched name: {result_alt.get('matched_name', 'N/A')}")
|
|
print(f" Entity type: {result_alt.get('entity_type', 'N/A')}")
|
|
|
|
if result_alt['qid'] == 'Q3329437':
|
|
print(f" ✅✅ CORRECT Q-NUMBER FOUND!")
|
|
else:
|
|
print(f" ⚠️ Different Q-number (might be correct)")
|
|
else:
|
|
print(f" ❌ NOT FOUND even with alternative names")
|
|
|
|
return result_alt is not None
|
|
|
|
def main():
|
|
print("\n🧪 TESTING ALTERNATIVE NAME MATCHING")
|
|
print("Testing whether French alternative names improve match scores")
|
|
print()
|
|
|
|
test1_passed = test_diocesan_library()
|
|
test2_passed = test_kerkouane_museum()
|
|
|
|
print("\n" + "="*60)
|
|
print("TEST SUMMARY")
|
|
print("="*60)
|
|
print(f"Diocesan Library: {'✅ PASS' if test1_passed else '❌ FAIL'}")
|
|
print(f"Kerkouane Museum: {'✅ PASS' if test2_passed else '❌ FAIL'}")
|
|
|
|
if test1_passed and test2_passed:
|
|
print("\n✅✅ ALL TESTS PASSED! Ready for full enrichment run.")
|
|
return 0
|
|
elif test1_passed or test2_passed:
|
|
print("\n⚠️ PARTIAL SUCCESS - review results before full run")
|
|
return 0
|
|
else:
|
|
print("\n❌ TESTS FAILED - alternative name approach needs adjustment")
|
|
return 1
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|