glam/scripts/test_wikidata_debug.py

#!/usr/bin/env python3
"""
Debug script to test Wikidata enrichment for specific institutions.
"""

import sys
sys.path.insert(0, '/Users/kempersc/apps/glam')

from scripts.enrich_tunisia_wikidata_validated import search_wikidata_with_validation

# Test cases from previous session findings
test_cases = [
    {
        'name': 'Kerkouane Archaeological Site and Museum',
        'type': 'MUSEUM',
        'city': 'Kerkouane',
        'expected_qid': 'Q3329437',
        'expected_label': 'musée archéologique de Kerkouane',
        'issue': 'Match score 67% (below 70% threshold)'
    },
    {
        'name': 'Diocesan Library of Tunis',
        'type': 'LIBRARY',
        'city': 'Tunis',
        'expected_qid': 'Q28149782',
        'expected_label': 'bibliothèque diocésaine de Tunis',
        'issue': 'Instance type Q105338594 not in mapping'
    }
]

print("Wikidata Enrichment Debug Test")
print("=" * 60)

for i, test in enumerate(test_cases, 1):
    print(f"\nTest {i}: {test['name']}")
    print(f"  Type: {test['type']}")
    print(f"  City: {test['city']}")
    print(f"  Expected: {test['expected_qid']} - {test['expected_label']}")
    print(f"  Previous issue: {test['issue']}")
    print(f"\n  Running search...")

    result = search_wikidata_with_validation(
        name=test['name'],
        inst_type=test['type'],
        city=test['city']
    )

    if result:
        print(f"  ✅ FOUND!")
        print(f"     QID: {result['qid']}")
        print(f"     Name: {result.get('name', 'N/A')}")
        print(f"     Entity type: {result.get('entity_type', 'N/A')}")
        print(f"     Match score: {result.get('match_score', 0):.1f}%")

        if result['qid'] == test['expected_qid']:
            print(f"  ✅ CORRECT MATCH! Threshold fix worked.")
        else:
            print(f"  ⚠️  Different entity matched (expected {test['expected_qid']})")
    else:
        print(f"  ❌ NOT FOUND")
        print(f"     May still be below 65% threshold or type validation failing")

print("\n" + "=" * 60)
print("Debug test complete")