glam/scripts/manual_web_verification_batch14.py
kempersc e5a532a8bc Add comprehensive tests for NLP institution extraction and RDF partnership integration
- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive).
- Added tests for extracted entities and result handling to validate the extraction process.
- Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format.
- Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns.
- Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
2025-11-19 23:20:47 +01:00

89 lines
3.3 KiB
Python

#!/usr/bin/env python3
"""
Manual Wikidata verification via direct URL construction.
When API is rate-limited, we can construct likely Wikidata URLs and document them for manual verification.
"""
# High-priority targets based on likelihood of Wikidata entries
targets = [
{
"name": "Museo Rodulfo Philippi",
"city": "Chañaral",
"type": "MUSEUM",
"notes": "Named after Rudolph Amandus Philippi (Q61987), famous German-Chilean naturalist",
"likely_q_numbers": [
("Q61987", "Rudolph Amandus Philippi (person) - check if museum mentioned"),
("Q?", "Search: 'Museo Philippi Chañaral Chile'")
]
},
{
"name": "Museo Rudolph Philippi",
"city": "Valdivia",
"type": "MUSEUM",
"notes": "Also named after Rudolph Amandus Philippi, may be same network as Chañaral museum",
"likely_q_numbers": [
("Q61987", "Rudolph Amandus Philippi (person) - check if museum mentioned"),
("Q?", "Search: 'Museo Philippi Valdivia Chile'")
]
},
{
"name": "Instituto Alemán Puerto Montt",
"city": "Puerto Montt",
"type": "MIXED",
"notes": "German school in Chile - German schools often have Wikidata entries",
"likely_q_numbers": [
("Q?", "Search: 'Instituto Alemán Puerto Montt'"),
("Q?", "Search: 'Deutsche Schule Puerto Montt'")
]
},
{
"name": "Fundación Iglesias Patrimoniales",
"city": "Unknown",
"type": "RESEARCH_CENTER",
"notes": "UNESCO-related foundation for heritage churches",
"likely_q_numbers": [
("Q?", "Search: 'Fundación Iglesias Patrimoniales Chile UNESCO'")
]
},
{
"name": "Museo de las Iglesias",
"city": "Castro",
"type": "MUSEUM",
"notes": "Museum in Castro, Chiloé - UNESCO World Heritage area",
"likely_q_numbers": [
("Q?", "Search: 'Museo Iglesias Castro Chiloé Chile'")
]
}
]
print("=" * 100)
print("MANUAL WIKIDATA VERIFICATION GUIDE - BATCH 14")
print("=" * 100)
print("\nWhen Wikidata API is rate-limited, verify these institutions manually via web browser:\n")
for i, target in enumerate(targets, 1):
print(f"\n{i}. {target['name']} ({target['city']})")
print(f" Type: {target['type']}")
print(f" Notes: {target['notes']}")
print(f"\n Manual verification steps:")
for q_id, search_query in target['likely_q_numbers']:
if q_id.startswith("Q") and "?" not in q_id:
print(f" - Visit: https://www.wikidata.org/wiki/{q_id}")
print(f" Check if page mentions: {target['name']}")
else:
print(f" - Go to: https://www.wikidata.org/")
print(f" Search: {search_query}")
print(f"\n If match found:")
print(f" - Copy Q-number (e.g., Q1234567)")
print(f" - Verify location matches: {target['city']}")
print(f" - Verify type matches: {target['type']}")
print(f" - Add to batch14_manual_results.txt")
print("\n" + "=" * 100)
print("\nSave validated Q-numbers to: scripts/batch14_manual_results.txt")
print("Format: institution_name|Q-number|confidence|notes")
print("\nExample:")
print("Museo Rodulfo Philippi|Q61987|0.85|Found in Philippi's biography page")
print("=" * 100)