glam/scripts/search_major_egypt_wikidata.py
kempersc e5a532a8bc Add comprehensive tests for NLP institution extraction and RDF partnership integration
- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive).
- Added tests for extracted entities and result handling to validate the extraction process.
- Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format.
- Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns.
- Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
2025-11-19 23:20:47 +01:00

67 lines
2 KiB
Python

#!/usr/bin/env python3
"""Direct Wikidata searches for major Egyptian institutions."""
from SPARQLWrapper import SPARQLWrapper, JSON
def search_wikidata(search_term):
"""Search Wikidata for a specific term."""
endpoint = "https://query.wikidata.org/sparql"
query = f"""
SELECT DISTINCT ?item ?itemLabel ?itemDescription ?isil WHERE {{
?item rdfs:label ?label .
FILTER(CONTAINS(LCASE(?label), "{search_term.lower()}"))
?item wdt:P17 wd:Q79 . # Country: Egypt
OPTIONAL {{ ?item wdt:P791 ?isil }}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en,ar" }}
}}
LIMIT 5
"""
sparql = SPARQLWrapper(endpoint)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
try:
results = sparql.query().convert()
return results['results']['bindings']
except Exception as e:
print(f" Error: {e}")
return []
print("="*80)
print("🔍 DIRECT WIKIDATA SEARCHES - Major Egyptian Institutions")
print("="*80)
print()
searches = [
("Egyptian Museum", "Egyptian Museum Cairo (EMC)"),
("Grand Egyptian Museum", "Grand Egyptian Museum (GEM)"),
("Bibliotheca Alexandrina", "Bibliotheca Alexandrina"),
("Dar al-Kutub", "Egyptian National Library and Archives"),
("National Archives Egypt", "National Archives of Egypt"),
]
for search_term, full_name in searches:
print(f"🔎 Searching: '{search_term}' ({full_name})")
print("-" * 80)
results = search_wikidata(search_term)
if not results:
print(" ❌ No results found in Wikidata\n")
continue
for idx, result in enumerate(results, 1):
qid = result['item']['value'].split('/')[-1]
label = result.get('itemLabel', {}).get('value', 'No label')
desc = result.get('itemDescription', {}).get('value', 'No description')
isil = result.get('isil', {}).get('value', 'N/A')
print(f" {idx}. {label} ({qid})")
print(f" Description: {desc}")
print(f" ISIL: {isil}")
print()
print("="*80)