- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
89 lines
3.3 KiB
Python
89 lines
3.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Manual Wikidata verification via direct URL construction.
|
|
When API is rate-limited, we can construct likely Wikidata URLs and document them for manual verification.
|
|
"""
|
|
|
|
# High-priority targets based on likelihood of Wikidata entries
|
|
targets = [
|
|
{
|
|
"name": "Museo Rodulfo Philippi",
|
|
"city": "Chañaral",
|
|
"type": "MUSEUM",
|
|
"notes": "Named after Rudolph Amandus Philippi (Q61987), famous German-Chilean naturalist",
|
|
"likely_q_numbers": [
|
|
("Q61987", "Rudolph Amandus Philippi (person) - check if museum mentioned"),
|
|
("Q?", "Search: 'Museo Philippi Chañaral Chile'")
|
|
]
|
|
},
|
|
{
|
|
"name": "Museo Rudolph Philippi",
|
|
"city": "Valdivia",
|
|
"type": "MUSEUM",
|
|
"notes": "Also named after Rudolph Amandus Philippi, may be same network as Chañaral museum",
|
|
"likely_q_numbers": [
|
|
("Q61987", "Rudolph Amandus Philippi (person) - check if museum mentioned"),
|
|
("Q?", "Search: 'Museo Philippi Valdivia Chile'")
|
|
]
|
|
},
|
|
{
|
|
"name": "Instituto Alemán Puerto Montt",
|
|
"city": "Puerto Montt",
|
|
"type": "MIXED",
|
|
"notes": "German school in Chile - German schools often have Wikidata entries",
|
|
"likely_q_numbers": [
|
|
("Q?", "Search: 'Instituto Alemán Puerto Montt'"),
|
|
("Q?", "Search: 'Deutsche Schule Puerto Montt'")
|
|
]
|
|
},
|
|
{
|
|
"name": "Fundación Iglesias Patrimoniales",
|
|
"city": "Unknown",
|
|
"type": "RESEARCH_CENTER",
|
|
"notes": "UNESCO-related foundation for heritage churches",
|
|
"likely_q_numbers": [
|
|
("Q?", "Search: 'Fundación Iglesias Patrimoniales Chile UNESCO'")
|
|
]
|
|
},
|
|
{
|
|
"name": "Museo de las Iglesias",
|
|
"city": "Castro",
|
|
"type": "MUSEUM",
|
|
"notes": "Museum in Castro, Chiloé - UNESCO World Heritage area",
|
|
"likely_q_numbers": [
|
|
("Q?", "Search: 'Museo Iglesias Castro Chiloé Chile'")
|
|
]
|
|
}
|
|
]
|
|
|
|
print("=" * 100)
|
|
print("MANUAL WIKIDATA VERIFICATION GUIDE - BATCH 14")
|
|
print("=" * 100)
|
|
print("\nWhen Wikidata API is rate-limited, verify these institutions manually via web browser:\n")
|
|
|
|
for i, target in enumerate(targets, 1):
|
|
print(f"\n{i}. {target['name']} ({target['city']})")
|
|
print(f" Type: {target['type']}")
|
|
print(f" Notes: {target['notes']}")
|
|
print(f"\n Manual verification steps:")
|
|
|
|
for q_id, search_query in target['likely_q_numbers']:
|
|
if q_id.startswith("Q") and "?" not in q_id:
|
|
print(f" - Visit: https://www.wikidata.org/wiki/{q_id}")
|
|
print(f" Check if page mentions: {target['name']}")
|
|
else:
|
|
print(f" - Go to: https://www.wikidata.org/")
|
|
print(f" Search: {search_query}")
|
|
|
|
print(f"\n If match found:")
|
|
print(f" - Copy Q-number (e.g., Q1234567)")
|
|
print(f" - Verify location matches: {target['city']}")
|
|
print(f" - Verify type matches: {target['type']}")
|
|
print(f" - Add to batch14_manual_results.txt")
|
|
|
|
print("\n" + "=" * 100)
|
|
print("\nSave validated Q-numbers to: scripts/batch14_manual_results.txt")
|
|
print("Format: institution_name|Q-number|confidence|notes")
|
|
print("\nExample:")
|
|
print("Museo Rodulfo Philippi|Q61987|0.85|Found in Philippi's biography page")
|
|
print("=" * 100)
|