177 lines
6.2 KiB
Python
177 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Brazil Batch 13 Wikidata Enrichment Script
|
|
|
|
Target: 10-12 institutions to reach ~60-65% coverage (73-75 out of 121)
|
|
Focus: National institutions, state museums, major universities
|
|
|
|
This script uses manual Wikidata searches and documents Q-numbers found.
|
|
Run the actual enrichment by calling wikidata-authenticated_search_entity for each institution.
|
|
"""
|
|
|
|
import yaml
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
print("="*80)
|
|
print("BRAZIL BATCH 13 WIKIDATA ENRICHMENT")
|
|
print("="*80)
|
|
print("Target: 10-12 institutions (aiming for 60-65% coverage)")
|
|
print("="*80)
|
|
print()
|
|
|
|
# =============================================================================
|
|
# BATCH 13 TARGET INSTITUTIONS
|
|
# =============================================================================
|
|
|
|
BATCH13_TARGETS = [
|
|
{
|
|
'name': 'Instituto Moreira Salles',
|
|
'id': 'https://w3id.org/heritage/custodian/br/instituto-moreira-salles',
|
|
'search_query': 'Instituto Moreira Salles Brazil',
|
|
'context': 'Major cultural institute with photographic collections',
|
|
'expected_qid': 'Q10302915', # Known from research
|
|
'priority': 1
|
|
},
|
|
{
|
|
'name': 'Sistema Brasileiro de Museus',
|
|
'id': 'https://w3id.org/heritage/custodian/br/sistema-brasileiro-de-museus-sbm',
|
|
'search_query': 'Sistema Brasileiro de Museus',
|
|
'context': 'National museum system created 2004, coordinated by IBRAM',
|
|
'priority': 1
|
|
},
|
|
{
|
|
'name': 'Brasiliana Fotográfica',
|
|
'id': 'https://w3id.org/heritage/custodian/br/brasiliana-fotografica',
|
|
'search_query': 'Brasiliana Fotográfica',
|
|
'context': 'Inter-institutional photography collaboration',
|
|
'priority': 1
|
|
},
|
|
{
|
|
'name': 'Universidade Federal de Rondônia',
|
|
'id': '3008281717687280329',
|
|
'search_query': 'Universidade Federal de Rondônia',
|
|
'context': 'Federal university in Rondônia (UNIR)',
|
|
'expected_qid': 'Q10365614', # Federal universities follow pattern
|
|
'priority': 1
|
|
},
|
|
{
|
|
'name': 'Fundação de Cultura Elias Mansour',
|
|
'id': 'https://w3id.org/heritage/custodian/br/ac-funda-o-de-cultura-elias-mansour-fem',
|
|
'search_query': 'Fundação de Cultura Elias Mansour Acre',
|
|
'context': 'State cultural foundation in Acre',
|
|
'priority': 2
|
|
},
|
|
{
|
|
'name': 'Museu dos Povos Acreanos',
|
|
'id': 'https://w3id.org/heritage/custodian/br/ac-museu-dos-povos-acreanos',
|
|
'search_query': 'Museu dos Povos Acreanos Rio Branco',
|
|
'context': 'Museum in Rio Branco (opened 2023)',
|
|
'priority': 2
|
|
},
|
|
{
|
|
'name': 'Museu Histórico de Alcântara',
|
|
'id': 'https://w3id.org/heritage/custodian/br/mt-museu-hist-rico',
|
|
'search_query': 'Museu Histórico de Alcântara Maranhão',
|
|
'context': 'Historical museum in Alcântara, Maranhão',
|
|
'priority': 2
|
|
},
|
|
{
|
|
'name': 'Secretaria de Estado da Cultura do Amapá',
|
|
'id': '1423599463777727402',
|
|
'search_query': 'Secretaria Cultura Amapá',
|
|
'context': 'State culture secretariat of Amapá',
|
|
'priority': 3
|
|
},
|
|
{
|
|
'name': 'Secretaria de Estado da Cultura do Tocantins',
|
|
'id': '709508309148680086',
|
|
'search_query': 'Secretaria Cultura Tocantins',
|
|
'context': 'State culture secretariat of Tocantins',
|
|
'priority': 3
|
|
},
|
|
{
|
|
'name': 'Instituto Histórico e Geográfico de Alagoas',
|
|
'id': '2519599505258789521',
|
|
'search_query': 'Instituto Histórico Geográfico Alagoas',
|
|
'context': 'Historical and geographic institute in Alagoas',
|
|
'priority': 3
|
|
},
|
|
{
|
|
'name': 'Sistema de Museus de Ouro Preto',
|
|
'id': 'https://w3id.org/heritage/custodian/br/mg-ouro-preto-system',
|
|
'search_query': 'Sistema Museus Ouro Preto',
|
|
'context': "Brazil's first municipal museum network (2006)",
|
|
'priority': 3
|
|
},
|
|
{
|
|
'name': 'Museu Histórico de Goiás',
|
|
'id': 'https://w3id.org/heritage/custodian/br/go-museu-hist-rico-mham',
|
|
'search_query': 'Museu Histórico de Goiás',
|
|
'context': 'Historical museum in Goiás state',
|
|
'priority': 3
|
|
}
|
|
]
|
|
|
|
print("BATCH 13 TARGET INSTITUTIONS")
|
|
print("="*80)
|
|
print(f"Total targets: {len(BATCH13_TARGETS)}")
|
|
print()
|
|
|
|
for idx, target in enumerate(BATCH13_TARGETS, 1):
|
|
print(f"{idx}. {target['name']}")
|
|
print(f" Priority: {target['priority']}")
|
|
print(f" Search query: {target['search_query']}")
|
|
print(f" Context: {target['context']}")
|
|
if 'expected_qid' in target:
|
|
print(f" Expected Q-ID: {target['expected_qid']}")
|
|
print()
|
|
|
|
print("="*80)
|
|
print("INSTRUCTIONS FOR ENRICHMENT:")
|
|
print("="*80)
|
|
print("""
|
|
1. For each institution above, call the wikidata-authenticated_search_entity tool
|
|
with the search query provided
|
|
|
|
2. Verify the Q-number returned matches the institution by checking:
|
|
- Label matches the institution name
|
|
- Description mentions correct location (Brazil, state)
|
|
- Instance of (P31) is correct type (museum, university, etc.)
|
|
|
|
3. Record results in BATCH13_MATCHES dictionary below
|
|
|
|
4. For institutions not found via search, use wikidata-authenticated_execute_sparql
|
|
with geographic and type filters (P17=Q155 for Brazil, P131 for state)
|
|
|
|
5. After collecting all Q-numbers, create batch13_enriched.yaml with verified matches
|
|
|
|
6. Run merge script to integrate into main dataset
|
|
""")
|
|
|
|
print("\nReady to begin searches!")
|
|
print("Use the wikidata-authenticated_search_entity tool for each target above.")
|
|
print("="*80)
|
|
|
|
# Template for recording results
|
|
BATCH13_MATCHES = {
|
|
# Will be filled in as searches are completed
|
|
# Example format:
|
|
# "Instituto Moreira Salles": {
|
|
# "qid": "Q10302915",
|
|
# "label": "Instituto Moreira Salles",
|
|
# "confidence": 0.95,
|
|
# "notes": "Verified via search - cultural institute"
|
|
# }
|
|
}
|
|
|
|
# Calculate current status
|
|
current_with_wikidata = 67
|
|
total_brazilian = 121
|
|
current_coverage = (current_with_wikidata / total_brazilian) * 100
|
|
|
|
print(f"\nCURRENT STATUS:")
|
|
print(f" Institutions with Q-numbers: {current_with_wikidata}/121 ({current_coverage:.1f}%)")
|
|
print(f" Target after Batch 13: 73-77/121 (60-64%)")
|
|
print(f" Need to find: 6-10 Q-numbers")
|
|
print()
|