#!/usr/bin/env python3 """ Brazil Batch 13 Wikidata Enrichment Script Target: 10-12 institutions to reach ~60-65% coverage (73-75 out of 121) Focus: National institutions, state museums, major universities This script uses manual Wikidata searches and documents Q-numbers found. Run the actual enrichment by calling wikidata-authenticated_search_entity for each institution. """ import yaml from datetime import datetime, timezone from pathlib import Path print("="*80) print("BRAZIL BATCH 13 WIKIDATA ENRICHMENT") print("="*80) print("Target: 10-12 institutions (aiming for 60-65% coverage)") print("="*80) print() # ============================================================================= # BATCH 13 TARGET INSTITUTIONS # ============================================================================= BATCH13_TARGETS = [ { 'name': 'Instituto Moreira Salles', 'id': 'https://w3id.org/heritage/custodian/br/instituto-moreira-salles', 'search_query': 'Instituto Moreira Salles Brazil', 'context': 'Major cultural institute with photographic collections', 'expected_qid': 'Q10302915', # Known from research 'priority': 1 }, { 'name': 'Sistema Brasileiro de Museus', 'id': 'https://w3id.org/heritage/custodian/br/sistema-brasileiro-de-museus-sbm', 'search_query': 'Sistema Brasileiro de Museus', 'context': 'National museum system created 2004, coordinated by IBRAM', 'priority': 1 }, { 'name': 'Brasiliana Fotográfica', 'id': 'https://w3id.org/heritage/custodian/br/brasiliana-fotografica', 'search_query': 'Brasiliana Fotográfica', 'context': 'Inter-institutional photography collaboration', 'priority': 1 }, { 'name': 'Universidade Federal de Rondônia', 'id': '3008281717687280329', 'search_query': 'Universidade Federal de Rondônia', 'context': 'Federal university in Rondônia (UNIR)', 'expected_qid': 'Q10365614', # Federal universities follow pattern 'priority': 1 }, { 'name': 'Fundação de Cultura Elias Mansour', 'id': 'https://w3id.org/heritage/custodian/br/ac-funda-o-de-cultura-elias-mansour-fem', 'search_query': 'Fundação de Cultura Elias Mansour Acre', 'context': 'State cultural foundation in Acre', 'priority': 2 }, { 'name': 'Museu dos Povos Acreanos', 'id': 'https://w3id.org/heritage/custodian/br/ac-museu-dos-povos-acreanos', 'search_query': 'Museu dos Povos Acreanos Rio Branco', 'context': 'Museum in Rio Branco (opened 2023)', 'priority': 2 }, { 'name': 'Museu Histórico de Alcântara', 'id': 'https://w3id.org/heritage/custodian/br/mt-museu-hist-rico', 'search_query': 'Museu Histórico de Alcântara Maranhão', 'context': 'Historical museum in Alcântara, Maranhão', 'priority': 2 }, { 'name': 'Secretaria de Estado da Cultura do Amapá', 'id': '1423599463777727402', 'search_query': 'Secretaria Cultura Amapá', 'context': 'State culture secretariat of Amapá', 'priority': 3 }, { 'name': 'Secretaria de Estado da Cultura do Tocantins', 'id': '709508309148680086', 'search_query': 'Secretaria Cultura Tocantins', 'context': 'State culture secretariat of Tocantins', 'priority': 3 }, { 'name': 'Instituto Histórico e Geográfico de Alagoas', 'id': '2519599505258789521', 'search_query': 'Instituto Histórico Geográfico Alagoas', 'context': 'Historical and geographic institute in Alagoas', 'priority': 3 }, { 'name': 'Sistema de Museus de Ouro Preto', 'id': 'https://w3id.org/heritage/custodian/br/mg-ouro-preto-system', 'search_query': 'Sistema Museus Ouro Preto', 'context': "Brazil's first municipal museum network (2006)", 'priority': 3 }, { 'name': 'Museu Histórico de Goiás', 'id': 'https://w3id.org/heritage/custodian/br/go-museu-hist-rico-mham', 'search_query': 'Museu Histórico de Goiás', 'context': 'Historical museum in Goiás state', 'priority': 3 } ] print("BATCH 13 TARGET INSTITUTIONS") print("="*80) print(f"Total targets: {len(BATCH13_TARGETS)}") print() for idx, target in enumerate(BATCH13_TARGETS, 1): print(f"{idx}. {target['name']}") print(f" Priority: {target['priority']}") print(f" Search query: {target['search_query']}") print(f" Context: {target['context']}") if 'expected_qid' in target: print(f" Expected Q-ID: {target['expected_qid']}") print() print("="*80) print("INSTRUCTIONS FOR ENRICHMENT:") print("="*80) print(""" 1. For each institution above, call the wikidata-authenticated_search_entity tool with the search query provided 2. Verify the Q-number returned matches the institution by checking: - Label matches the institution name - Description mentions correct location (Brazil, state) - Instance of (P31) is correct type (museum, university, etc.) 3. Record results in BATCH13_MATCHES dictionary below 4. For institutions not found via search, use wikidata-authenticated_execute_sparql with geographic and type filters (P17=Q155 for Brazil, P131 for state) 5. After collecting all Q-numbers, create batch13_enriched.yaml with verified matches 6. Run merge script to integrate into main dataset """) print("\nReady to begin searches!") print("Use the wikidata-authenticated_search_entity tool for each target above.") print("="*80) # Template for recording results BATCH13_MATCHES = { # Will be filled in as searches are completed # Example format: # "Instituto Moreira Salles": { # "qid": "Q10302915", # "label": "Instituto Moreira Salles", # "confidence": 0.95, # "notes": "Verified via search - cultural institute" # } } # Calculate current status current_with_wikidata = 67 total_brazilian = 121 current_coverage = (current_with_wikidata / total_brazilian) * 100 print(f"\nCURRENT STATUS:") print(f" Institutions with Q-numbers: {current_with_wikidata}/121 ({current_coverage:.1f}%)") print(f" Target after Batch 13: 73-77/121 (60-64%)") print(f" Need to find: 6-10 Q-numbers") print()