- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
35 lines
1.1 KiB
Python
35 lines
1.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Test enrichment with ONLY University of Sousse."""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
from scripts.enrich_tunisia_wikidata_validated import search_wikidata_with_validation
|
|
|
|
input_file = Path('data/instances/tunisia/tunisian_institutions_enhanced.yaml')
|
|
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
institutions = data['institutions']
|
|
|
|
# Find University of Sousse
|
|
for inst in institutions:
|
|
if inst.get('name') == 'University of Sousse':
|
|
print(f"Testing: {inst['name']}")
|
|
print(f"Type: {inst.get('institution_type')}")
|
|
print(f"City: {inst.get('locations', [{}])[0].get('city', '')}")
|
|
|
|
result = search_wikidata_with_validation(
|
|
inst['name'],
|
|
inst.get('institution_type'),
|
|
inst.get('locations', [{}])[0].get('city', '')
|
|
)
|
|
|
|
if result:
|
|
print(f"\n✅ SUCCESS: Found {result['qid']}")
|
|
print(f" Name: {result.get('name')}")
|
|
print(f" Score: {result.get('match_score')}")
|
|
else:
|
|
print(f"\n❌ FAILURE: No match found")
|
|
|
|
break
|