- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
58 lines
1.8 KiB
Python
58 lines
1.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test the enrichment function directly for University of Sousse.
|
|
"""
|
|
|
|
import sys
|
|
import yaml
|
|
from pathlib import Path
|
|
|
|
# Import the enrichment function
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
from enrich_tunisia_wikidata_validated import search_wikidata_with_validation
|
|
|
|
def main():
|
|
print("Testing University of Sousse enrichment")
|
|
print("="*60)
|
|
|
|
# Test data from our YAML
|
|
name = "University of Sousse"
|
|
inst_type = "UNIVERSITY"
|
|
city = "Sousse"
|
|
|
|
print(f"\nSearching for:")
|
|
print(f" Name: {name}")
|
|
print(f" Type: {inst_type}")
|
|
print(f" City: {city}")
|
|
|
|
print(f"\nCalling search_wikidata_with_validation()...")
|
|
result = search_wikidata_with_validation(name, inst_type, city, timeout=60)
|
|
|
|
print("\n" + "="*60)
|
|
print("RESULT:")
|
|
print("="*60)
|
|
|
|
if result:
|
|
print("✅ MATCH FOUND!")
|
|
print(f"\n QID: {result.get('qid')}")
|
|
print(f" Name: {result.get('name')}")
|
|
print(f" Description: {result.get('description', 'N/A')}")
|
|
print(f" Entity Type: {result.get('entity_type')}")
|
|
print(f" Match Score: {result.get('match_score')}%")
|
|
|
|
if result.get('viaf'):
|
|
print(f" VIAF: {result.get('viaf')}")
|
|
if result.get('isil'):
|
|
print(f" ISIL: {result.get('isil')}")
|
|
if result.get('latitude'):
|
|
print(f" Coordinates: {result.get('latitude')}, {result.get('longitude')}")
|
|
else:
|
|
print("❌ NO MATCH FOUND")
|
|
print("\nThis suggests:")
|
|
print(" - Entity type validation may be filtering it out")
|
|
print(" - Geographic validation may be too strict")
|
|
print(" - Name fuzzy matching may be below threshold")
|
|
print(" - Q3551673 may not be in the 200-result LIMIT")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|