#!/usr/bin/env python3 """ Finalize Batch 11 Matches - Manual Validation ============================================= After review, these are the VALIDATED matches from Batch 11 """ import json # Manually validated matches after review VALIDATED_MATCHES = [ { "museum_name": "Museo Histórico-Arqueológico", "city": "Quillota", "q_number": "Q12184920", "wikidata_name": "Museo Histórico - Arqueológico de Quillota", "confidence": "HIGH", "reason": "Perfect name and location match" }, { "museum_name": "Museo Mapuche de Purén", "city": "Capitán Pastene", "q_number": "Q86282614", "wikidata_name": "Museo Mapuche de Purén", "confidence": "HIGH", "reason": "Exact name match, Capitán Pastene is in Purén commune" }, { "museum_name": "Museo Pleistocénico", "city": "Osorno", "q_number": "Q112044601", "wikidata_name": "Museo del Pleistoceno de Osorno", "confidence": "HIGH", "reason": "Perfect name and location match" }, { "museum_name": "Red de Museos Aysén", "city": "Coyhaique", "q_number": "Q53877849", "wikidata_name": "Museo Regional de Aysén", "confidence": "HIGH", "reason": "Regional museum network, Coyhaique location matches" }, { "museum_name": "Museo Territorial Yagan Usi", "city": "Cabo de Hornos", "q_number": "Q6775581", "wikidata_name": "Museo Territorial Yagán Usi - Martín González Calderón", "confidence": "HIGH", "reason": "Exact name match, Puerto Williams is capital of Cabo de Hornos" } ] # Rejected matches (keep for documentation) REJECTED_MATCHES = [ { "museum_name": "Museo de Tocopilla", "q_number": "Q112135646", "wikidata_name": "Museo Di", "reason": "Virtual LGBT museum, completely different subject" }, { "museum_name": "Museo Rodulfo Philippi", "q_number": "Q112135646", "wikidata_name": "Museo Di", "reason": "Same false positive" }, { "museum_name": "Museo del Libro del Mar", "q_number": "Q112135646", "wikidata_name": "Museo Di", "reason": "Same false positive" }, { "museum_name": "Museo de Historia Local Los Perales", "q_number": "Q6171788", "wikidata_name": "Zoológico de Quilpué", "reason": "Zoo, not a museum" }, { "museum_name": "Museo de las Iglesias", "q_number": "Q112135646", "wikidata_name": "Museo Di", "reason": "Same false positive" }, { "museum_name": "Museo Histórico Municipal", "q_number": "Q112135646", "wikidata_name": "Museo Di", "reason": "Same false positive" } ] # Needs manual research (no Wikidata entry found) NEEDS_RESEARCH = [ { "museum_name": "Museo de Tocopilla", "city": "María Elena / Tocopilla", "status": "No Wikidata entry found" }, { "museum_name": "Museo Rodulfo Philippi", "city": "Chañaral", "status": "No Wikidata entry found (possibly confused with Rudolf Philippi museums elsewhere)" }, { "museum_name": "Museo del Libro del Mar", "city": "San Antonio", "status": "No Wikidata entry found" }, { "museum_name": "Museo de Historia Local Los Perales", "city": "Quilpué", "status": "No Wikidata entry found" }, { "museum_name": "Museo de las Iglesias", "city": "Castro, Chiloé", "status": "No Wikidata entry found" }, { "museum_name": "Museo Histórico Municipal", "city": "Puerto Natales, Última Esperanza", "status": "No Wikidata entry found" } ] # Items requiring further investigation NEEDS_VERIFICATION = [ { "museum_name": "Museo Histórico y Cultural", "city": "Cauquenes", "q_number": "Q86281191", "wikidata_name": "Museo Histórico Cultural Antuhuenu", "location": "Nacimiento", "issue": "Location mismatch - Nacimiento is in Bío Bío, not Cauquenes in Maule", "recommendation": "REJECT - Different museum" }, { "museum_name": "Museo Rudolph Philippi", "city": "Valdivia", "q_number": "Q86283174", "wikidata_name": "Mira Valdivia", "issue": "Name doesn't match - unclear if 'Mira Valdivia' is Rudolph Philippi museum", "recommendation": "REJECT - Needs manual Wikidata search for 'Museo Rudolph Philippi Valdivia'" } ] def main(): print("="*80) print("BATCH 11 FINAL VALIDATION") print("="*80) print(f"\n✅ VALIDATED MATCHES: {len(VALIDATED_MATCHES)}") for match in VALIDATED_MATCHES: print(f"\n{match['museum_name']}") print(f" → {match['wikidata_name']} ({match['q_number']})") print(f" Confidence: {match['confidence']}") print(f" Reason: {match['reason']}") print(f"\n\n❌ REJECTED MATCHES: {len(REJECTED_MATCHES)}") for match in REJECTED_MATCHES: print(f"\n{match['museum_name']}") print(f" ✗ {match['wikidata_name']} ({match['q_number']})") print(f" Reason: {match['reason']}") print(f"\n\n🔍 NEEDS MANUAL RESEARCH: {len(NEEDS_RESEARCH)}") for item in NEEDS_RESEARCH: print(f" • {item['museum_name']} ({item['city']})") print(f"\n\n⚠️ NEEDS VERIFICATION: {len(NEEDS_VERIFICATION)}") for item in NEEDS_VERIFICATION: print(f"\n{item['museum_name']}") print(f" → {item['wikidata_name']} ({item['q_number']})") print(f" Issue: {item['issue']}") print(f" Recommendation: {item['recommendation']}") # Summary current_coverage = 55 validated_additions = len(VALIDATED_MATCHES) new_coverage = current_coverage + validated_additions print("\n" + "="*80) print("BATCH 11 SUMMARY") print("="*80) print(f"Current coverage: {current_coverage}/90 (61.1%)") print(f"Validated matches: {validated_additions}") print(f"New coverage: {new_coverage}/90 ({(new_coverage/90*100):.1f}%)") print(f"Museums without Wikidata: {len(NEEDS_RESEARCH)}") if new_coverage / 90 >= 0.67: print(f"\n✅ Reached 67% coverage milestone!") # Save final results output = { 'batch': 11, 'validation_date': '2025-11-09', 'validated_matches': VALIDATED_MATCHES, 'rejected_matches': REJECTED_MATCHES, 'needs_research': NEEDS_RESEARCH, 'needs_verification': NEEDS_VERIFICATION, 'summary': { 'validated': len(VALIDATED_MATCHES), 'rejected': len(REJECTED_MATCHES), 'needs_research': len(NEEDS_RESEARCH), 'current_coverage': f"{current_coverage}/90", 'new_coverage': f"{new_coverage}/90 ({(new_coverage/90*100):.1f}%)" } } with open('scripts/batch11_final_validation.json', 'w', encoding='utf-8') as f: json.dump(output, f, indent=2, ensure_ascii=False) print(f"\n💾 Final validation saved to: scripts/batch11_final_validation.json") if __name__ == "__main__": main()