#!/usr/bin/env python3 """ Chilean Batch 14: Targeted Wikidata Search for Final 2 Matches Strategy: Focus on institutions with distinctive names or known entities Target institutions (prioritized by likelihood of Wikidata presence): 1. Museo Rodulfo Philippi / Rudolph Philippi - Named after famous German scientist 2. Fundación Iglesias Patrimoniales - Heritage foundation for Chiloé churches (UNESCO) 3. Instituto Alemán Puerto Montt - German school with heritage collections 4. Centro Cultural Sofia Hott - Named after specific person """ import json from SPARQLWrapper import SPARQLWrapper, JSON from typing import Dict, List, Optional, Any def query_wikidata_person_institution(person_name: str, location: str) -> Optional[List[Dict]]: """ Query Wikidata for institutions named after specific people. """ endpoint = "https://query.wikidata.org/sparql" sparql = SPARQLWrapper(endpoint) sparql.setReturnFormat(JSON) # Search for museums/institutions named after the person query = f""" SELECT DISTINCT ?item ?itemLabel ?itemDescription ?location ?locationLabel ?instanceOf ?instanceOfLabel WHERE {{ {{ # Museums in Chile ?item wdt:P31/wdt:P279* wd:Q33506 . ?item wdt:P17 wd:Q298 . }} UNION {{ # Cultural institutions in Chile ?item wdt:P31/wdt:P279* wd:Q7075 . ?item wdt:P17 wd:Q298 . }} # Name contains person's name ?item rdfs:label ?label . FILTER(CONTAINS(LCASE(?label), LCASE("{person_name}"))) OPTIONAL {{ ?item wdt:P131 ?location }} ?item wdt:P31 ?instanceOf . SERVICE wikibase:label {{ bd:serviceParam wikibase:language "es,en,de" }} }} LIMIT 20 """ print(f" Searching for institutions named after '{person_name}'...") sparql.setQuery(query) try: results: Any = sparql.query().convert() if isinstance(results, dict): bindings = results.get('results', {}).get('bindings', []) return bindings if bindings else None except Exception as e: print(f" Query failed: {e}") return None def query_wikidata_exact_name(institution_name: str, city: Optional[str] = None) -> Optional[List[Dict]]: """ Query Wikidata for exact institution name. """ endpoint = "https://query.wikidata.org/sparql" sparql = SPARQLWrapper(endpoint) sparql.setReturnFormat(JSON) location_filter = "" if city: location_filter = f'FILTER(CONTAINS(LCASE(?locationLabel), LCASE("{city}")))' query = f""" SELECT DISTINCT ?item ?itemLabel ?itemDescription ?location ?locationLabel ?instanceOf ?instanceOfLabel WHERE {{ ?item rdfs:label "{institution_name}"@es . ?item wdt:P31 ?instanceOf . ?item wdt:P17 wd:Q298 . OPTIONAL {{ ?item wdt:P131 ?location }} {location_filter} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "es,en" }} }} LIMIT 10 """ print(f" Searching for exact name: '{institution_name}'...") sparql.setQuery(query) try: results: Any = sparql.query().convert() if isinstance(results, dict): bindings = results.get('results', {}).get('bindings', []) return bindings if bindings else None except Exception as e: print(f" Query failed: {e}") return None def main(): """Search for final 2 matches to reach 70% coverage.""" search_strategies = [ { "name": "Museo Rodulfo Philippi", "city": "Chañaral", "search_type": "person_named", "search_terms": ["Philippi", "Rodulfo Philippi", "Rudolf Philippi"], "rationale": "Named after Rodolfo Amando Philippi, famous German-Chilean naturalist" }, { "name": "Museo Rudolph Philippi", "city": "Valdivia", "search_type": "person_named", "search_terms": ["Philippi", "Rudolph Philippi", "Rudolf Philippi"], "rationale": "Another museum named after same scientist (alternate spelling)" }, { "name": "Instituto Alemán Puerto Montt", "city": "Puerto Montt", "search_type": "exact_name", "search_terms": ["Instituto Alemán Puerto Montt", "Deutsche Schule Puerto Montt"], "rationale": "German school, may have Wikidata entry" }, { "name": "Fundación Iglesias Patrimoniales", "city": "Chiloé", "search_type": "exact_name", "search_terms": ["Fundación Iglesias Patrimoniales", "Fundación Iglesias Patrimoniales de Chiloé"], "rationale": "Foundation for UNESCO World Heritage churches" }, { "name": "Centro Cultural Sofia Hott", "city": "Osorno", "search_type": "person_named", "search_terms": ["Sofia Hott", "Sofía Hott"], "rationale": "Cultural center named after specific person" } ] all_search_results = {} print("=" * 80) print("Chilean Batch 14: Targeted Wikidata Search") print("Target: Find 2 more matches to reach 70% coverage (63/90)") print("=" * 80) print() for strategy in search_strategies: print(f"\n{'=' * 80}") print(f"Institution: {strategy['name']}") print(f"Location: {strategy['city']}") print(f"Strategy: {strategy['search_type']}") print(f"Rationale: {strategy['rationale']}") print(f"{'=' * 80}") institution_results = { "metadata": strategy, "wikidata_results": [] } for search_term in strategy['search_terms']: if strategy['search_type'] == 'person_named': results = query_wikidata_person_institution(search_term, strategy['city']) else: results = query_wikidata_exact_name(search_term, strategy['city']) if results: print(f" Found {len(results)} results") for result in results[:5]: item_id = result['item']['value'].split('/')[-1] item_label = result.get('itemLabel', {}).get('value', 'No label') item_desc = result.get('itemDescription', {}).get('value', 'No description') location = result.get('locationLabel', {}).get('value', 'No location') instance = result.get('instanceOfLabel', {}).get('value', 'No type') print(f" {item_id}: {item_label}") print(f" Desc: {item_desc}") print(f" Location: {location}") print(f" Type: {instance}") institution_results['wikidata_results'].append({ 'q_number': item_id, 'label': item_label, 'description': item_desc, 'location': location, 'instance_of': instance, 'search_term_used': search_term }) else: print(f" No results found") all_search_results[strategy['name']] = institution_results # Save results output_file = 'scripts/batch14_targeted_search_results.json' with open(output_file, 'w', encoding='utf-8') as f: json.dump(all_search_results, f, indent=2, ensure_ascii=False) print(f"\n{'=' * 80}") print(f"Results saved to: {output_file}") print("=" * 80) print() print("NEXT STEPS:") print("1. Review results for valid matches") print("2. Verify Q-numbers match correct institutions") print("3. Apply enrichment to reach 70% target") print() if __name__ == '__main__': main()