glam/scripts/test_alternative_names.py
kempersc e5a532a8bc Add comprehensive tests for NLP institution extraction and RDF partnership integration
- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive).
- Added tests for extracted entities and result handling to validate the extraction process.
- Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format.
- Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns.
- Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
2025-11-19 23:20:47 +01:00

151 lines
5 KiB
Python

#!/usr/bin/env python3
"""
Test alternative name matching for Tunisia enrichment.
Tests Diocesan Library and Kerkouane Museum before full run.
"""
import yaml
from pathlib import Path
import sys
# Import the search function
sys.path.insert(0, str(Path(__file__).parent))
from enrich_tunisia_wikidata_validated import search_wikidata_with_validation
def test_diocesan_library():
"""Test Diocesan Library of Tunis with alternative names."""
print("\n" + "="*60)
print("TEST 1: Diocesan Library of Tunis")
print("="*60)
primary_name = "Diocesan Library of Tunis"
alternative_names = [
"Bibliothèque Diocésaine de Tunis",
"Tunis Diocesan Library"
]
print(f"Primary name: {primary_name}")
print(f"Alternative names: {alternative_names}")
print()
# Test WITHOUT alternative names (baseline)
print("🔍 Test 1a: Searching with PRIMARY name only...")
result_primary = search_wikidata_with_validation(
name=primary_name,
inst_type='LIBRARY',
city='Tunis',
alternative_names=None
)
if result_primary:
print(f" ✅ Found: {result_primary['qid']} - {result_primary['name']}")
print(f" Match score: {result_primary['match_score']:.1f}%")
print(f" Matched name: {result_primary.get('matched_name', 'N/A')}")
else:
print(f" ❌ NOT FOUND with primary name")
# Test WITH alternative names
print("\n🔍 Test 1b: Searching with ALTERNATIVE names...")
result_alt = search_wikidata_with_validation(
name=primary_name,
inst_type='LIBRARY',
city='Tunis',
alternative_names=alternative_names
)
if result_alt:
print(f" ✅ Found: {result_alt['qid']} - {result_alt['name']}")
print(f" Match score: {result_alt['match_score']:.1f}%")
print(f" Matched name: {result_alt.get('matched_name', 'N/A')}")
if result_alt['qid'] == 'Q28149782':
print(f" ✅✅ CORRECT Q-NUMBER FOUND!")
else:
print(f" ⚠️ Wrong Q-number (expected Q28149782)")
else:
print(f" ❌ NOT FOUND even with alternative names")
return result_alt is not None and result_alt.get('qid') == 'Q28149782'
def test_kerkouane_museum():
"""Test Kerkouane Archaeological Site and Museum."""
print("\n" + "="*60)
print("TEST 2: Kerkouane Archaeological Site and Museum")
print("="*60)
primary_name = "Kerkouane Archaeological Site and Museum"
alternative_names = [
"Musée de Kerkouane",
"Kerkouane Museum"
]
print(f"Primary name: {primary_name}")
print(f"Alternative names: {alternative_names}")
print()
# Test WITHOUT alternative names (baseline)
print("🔍 Test 2a: Searching with PRIMARY name only...")
result_primary = search_wikidata_with_validation(
name=primary_name,
inst_type='MUSEUM',
city='El Haouaria',
alternative_names=None
)
if result_primary:
print(f" ✅ Found: {result_primary['qid']} - {result_primary['name']}")
print(f" Match score: {result_primary['match_score']:.1f}%")
print(f" Entity type: {result_primary.get('entity_type', 'N/A')}")
else:
print(f" ❌ NOT FOUND with primary name")
# Test WITH alternative names
print("\n🔍 Test 2b: Searching with ALTERNATIVE names...")
result_alt = search_wikidata_with_validation(
name=primary_name,
inst_type='MUSEUM',
city='El Haouaria',
alternative_names=alternative_names
)
if result_alt:
print(f" ✅ Found: {result_alt['qid']} - {result_alt['name']}")
print(f" Match score: {result_alt['match_score']:.1f}%")
print(f" Matched name: {result_alt.get('matched_name', 'N/A')}")
print(f" Entity type: {result_alt.get('entity_type', 'N/A')}")
if result_alt['qid'] == 'Q3329437':
print(f" ✅✅ CORRECT Q-NUMBER FOUND!")
else:
print(f" ⚠️ Different Q-number (might be correct)")
else:
print(f" ❌ NOT FOUND even with alternative names")
return result_alt is not None
def main():
print("\n🧪 TESTING ALTERNATIVE NAME MATCHING")
print("Testing whether French alternative names improve match scores")
print()
test1_passed = test_diocesan_library()
test2_passed = test_kerkouane_museum()
print("\n" + "="*60)
print("TEST SUMMARY")
print("="*60)
print(f"Diocesan Library: {'✅ PASS' if test1_passed else '❌ FAIL'}")
print(f"Kerkouane Museum: {'✅ PASS' if test2_passed else '❌ FAIL'}")
if test1_passed and test2_passed:
print("\n✅✅ ALL TESTS PASSED! Ready for full enrichment run.")
return 0
elif test1_passed or test2_passed:
print("\n⚠️ PARTIAL SUCCESS - review results before full run")
return 0
else:
print("\n❌ TESTS FAILED - alternative name approach needs adjustment")
return 1
if __name__ == '__main__':
sys.exit(main())