- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
67 lines
2 KiB
Python
67 lines
2 KiB
Python
#!/usr/bin/env python3
|
|
"""Direct Wikidata searches for major Egyptian institutions."""
|
|
|
|
from SPARQLWrapper import SPARQLWrapper, JSON
|
|
|
|
def search_wikidata(search_term):
|
|
"""Search Wikidata for a specific term."""
|
|
endpoint = "https://query.wikidata.org/sparql"
|
|
|
|
query = f"""
|
|
SELECT DISTINCT ?item ?itemLabel ?itemDescription ?isil WHERE {{
|
|
?item rdfs:label ?label .
|
|
FILTER(CONTAINS(LCASE(?label), "{search_term.lower()}"))
|
|
?item wdt:P17 wd:Q79 . # Country: Egypt
|
|
OPTIONAL {{ ?item wdt:P791 ?isil }}
|
|
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en,ar" }}
|
|
}}
|
|
LIMIT 5
|
|
"""
|
|
|
|
sparql = SPARQLWrapper(endpoint)
|
|
sparql.setQuery(query)
|
|
sparql.setReturnFormat(JSON)
|
|
|
|
try:
|
|
results = sparql.query().convert()
|
|
return results['results']['bindings']
|
|
except Exception as e:
|
|
print(f" Error: {e}")
|
|
return []
|
|
|
|
print("="*80)
|
|
print("🔍 DIRECT WIKIDATA SEARCHES - Major Egyptian Institutions")
|
|
print("="*80)
|
|
print()
|
|
|
|
searches = [
|
|
("Egyptian Museum", "Egyptian Museum Cairo (EMC)"),
|
|
("Grand Egyptian Museum", "Grand Egyptian Museum (GEM)"),
|
|
("Bibliotheca Alexandrina", "Bibliotheca Alexandrina"),
|
|
("Dar al-Kutub", "Egyptian National Library and Archives"),
|
|
("National Archives Egypt", "National Archives of Egypt"),
|
|
]
|
|
|
|
for search_term, full_name in searches:
|
|
print(f"🔎 Searching: '{search_term}' ({full_name})")
|
|
print("-" * 80)
|
|
|
|
results = search_wikidata(search_term)
|
|
|
|
if not results:
|
|
print(" ❌ No results found in Wikidata\n")
|
|
continue
|
|
|
|
for idx, result in enumerate(results, 1):
|
|
qid = result['item']['value'].split('/')[-1]
|
|
label = result.get('itemLabel', {}).get('value', 'No label')
|
|
desc = result.get('itemDescription', {}).get('value', 'No description')
|
|
isil = result.get('isil', {}).get('value', 'N/A')
|
|
|
|
print(f" {idx}. {label} ({qid})")
|
|
print(f" Description: {desc}")
|
|
print(f" ISIL: {isil}")
|
|
print()
|
|
|
|
print("="*80)
|
|
|