glam/scripts/test_sousse_enrichment.py
kempersc e5a532a8bc Add comprehensive tests for NLP institution extraction and RDF partnership integration
- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive).
- Added tests for extracted entities and result handling to validate the extraction process.
- Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format.
- Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns.
- Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
2025-11-19 23:20:47 +01:00

58 lines
1.8 KiB
Python

#!/usr/bin/env python3
"""
Test the enrichment function directly for University of Sousse.
"""
import sys
import yaml
from pathlib import Path
# Import the enrichment function
sys.path.insert(0, str(Path(__file__).parent))
from enrich_tunisia_wikidata_validated import search_wikidata_with_validation
def main():
print("Testing University of Sousse enrichment")
print("="*60)
# Test data from our YAML
name = "University of Sousse"
inst_type = "UNIVERSITY"
city = "Sousse"
print(f"\nSearching for:")
print(f" Name: {name}")
print(f" Type: {inst_type}")
print(f" City: {city}")
print(f"\nCalling search_wikidata_with_validation()...")
result = search_wikidata_with_validation(name, inst_type, city, timeout=60)
print("\n" + "="*60)
print("RESULT:")
print("="*60)
if result:
print("✅ MATCH FOUND!")
print(f"\n QID: {result.get('qid')}")
print(f" Name: {result.get('name')}")
print(f" Description: {result.get('description', 'N/A')}")
print(f" Entity Type: {result.get('entity_type')}")
print(f" Match Score: {result.get('match_score')}%")
if result.get('viaf'):
print(f" VIAF: {result.get('viaf')}")
if result.get('isil'):
print(f" ISIL: {result.get('isil')}")
if result.get('latitude'):
print(f" Coordinates: {result.get('latitude')}, {result.get('longitude')}")
else:
print("❌ NO MATCH FOUND")
print("\nThis suggests:")
print(" - Entity type validation may be filtering it out")
print(" - Geographic validation may be too strict")
print(" - Name fuzzy matching may be below threshold")
print(" - Q3551673 may not be in the 200-result LIMIT")
if __name__ == '__main__':
main()