#!/usr/bin/env python3 """ Manual Review of Batch 11 Query Results ======================================== Review and filter the automated matches to identify: 1. Valid matches (high confidence) 2. Questionable matches (needs verification) 3. False positives (clearly wrong) """ import json def review_matches(): """Review and categorize Batch 11 matches""" with open('scripts/batch11_query_results.json', 'r', encoding='utf-8') as f: data = json.load(f) print("="*80) print("BATCH 11 MATCH REVIEW") print("="*80) valid_matches = [] questionable_matches = [] false_positives = [] # Known false positive: Museo Di (Q112135646) - Virtual LGBT museum MUSEO_DI = "Q112135646" for match in data['matches']: museum_name = match['museum']['name'] q_number = match['match']['q_number'] wd_name = match['match']['wikidata_name'] location = match['match']['location'] description = match['match']['description'] score = match['match']['match_score'] print(f"\n{'='*80}") print(f"Target: {museum_name}") print(f"Match: {wd_name} ({q_number})") print(f"Location: {location}") print(f"Description: {description}") print(f"Score: {score:.2f}") # Filter obvious false positives if q_number == MUSEO_DI: print("❌ FALSE POSITIVE: Virtual LGBT museum (incorrect match)") false_positives.append(match) elif "Zoológico" in wd_name or "zoo" in description.lower(): print("❌ FALSE POSITIVE: Zoo, not a museum") false_positives.append(match) # High confidence matches (score > 0.95 and location matches) elif score > 0.95 and location and location.lower() in museum_name.lower(): print("✅ VALID MATCH: High confidence") valid_matches.append(match) # Exact name matches elif museum_name.lower() in wd_name.lower() or wd_name.lower() in museum_name.lower(): if location and (match['museum']['city'].lower() in location.lower() or match['museum']['region'].lower() in location.lower()): print("✅ VALID MATCH: Name and location align") valid_matches.append(match) else: print("⚠️ QUESTIONABLE: Name matches but location unclear") questionable_matches.append(match) # Location mismatch elif location and location != match['museum']['city']: print("⚠️ QUESTIONABLE: Location mismatch") questionable_matches.append(match) else: print("⚠️ QUESTIONABLE: Needs manual verification") questionable_matches.append(match) # Summary print("\n" + "="*80) print("REVIEW SUMMARY") print("="*80) print(f"✅ Valid matches: {len(valid_matches)}") print(f"⚠️ Questionable matches: {len(questionable_matches)}") print(f"❌ False positives: {len(false_positives)}") if valid_matches: print("\n" + "-"*80) print("VALID MATCHES (High Confidence):") print("-"*80) for match in valid_matches: print(f"{match['museum']['name']}") print(f" → {match['match']['wikidata_name']} ({match['match']['q_number']})") if questionable_matches: print("\n" + "-"*80) print("QUESTIONABLE MATCHES (Need Verification):") print("-"*80) for match in questionable_matches: print(f"{match['museum']['name']}") print(f" → {match['match']['wikidata_name']} ({match['match']['q_number']})") print(f" Location: {match['match']['location']} vs {match['museum']['city']}") if false_positives: print("\n" + "-"*80) print("FALSE POSITIVES (Rejected):") print("-"*80) for match in false_positives: print(f"{match['museum']['name']}") print(f" ✗ {match['match']['wikidata_name']} ({match['match']['q_number']})") # Save filtered results filtered = { 'batch': 11, 'review_date': '2025-11-09', 'valid_matches': valid_matches, 'questionable_matches': questionable_matches, 'false_positives': false_positives, 'summary': { 'valid': len(valid_matches), 'questionable': len(questionable_matches), 'false_positives': len(false_positives), 'expected_coverage': f"{55 + len(valid_matches)}/90", 'expected_coverage_percent': f"{((55 + len(valid_matches))/90*100):.1f}%" } } with open('scripts/batch11_reviewed_matches.json', 'w', encoding='utf-8') as f: json.dump(filtered, f, indent=2, ensure_ascii=False) print(f"\n💾 Filtered results saved to: scripts/batch11_reviewed_matches.json") print(f"\n📊 Conservative coverage estimate: {55 + len(valid_matches)}/90 = {((55 + len(valid_matches))/90*100):.1f}%") if __name__ == "__main__": review_matches()