60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
#!/usr/bin/env python3
|
|
"""Find Brazilian institution IDs from main dataset."""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
|
|
# Load main dataset
|
|
print("Loading main dataset...")
|
|
with open("data/instances/all/globalglam-20251111.yaml", 'r', encoding='utf-8') as f:
|
|
institutions = list(yaml.safe_load_all(f))
|
|
# Flatten if nested
|
|
if len(institutions) == 1 and isinstance(institutions[0], list):
|
|
institutions = institutions[0]
|
|
|
|
print(f"Loaded {len(institutions)} institutions")
|
|
|
|
# Target names to search for
|
|
targets = [
|
|
"Universidade Federal de Rondônia",
|
|
"UNIR",
|
|
"Secretaria de Estado da Cultura do Tocantins",
|
|
"Secretaria.*Cultura.*Tocantins",
|
|
"Museu do Estado de Pernambuco",
|
|
"Museu Histórico Nacional",
|
|
"Fundação Cultural Palmares",
|
|
"Museu Imperial"
|
|
]
|
|
|
|
# Search for Brazilian institutions matching targets
|
|
print("\n" + "="*80)
|
|
print("SEARCHING FOR TARGET INSTITUTIONS")
|
|
print("="*80)
|
|
|
|
for inst in institutions:
|
|
if not isinstance(inst, dict):
|
|
continue
|
|
|
|
# Check if Brazilian
|
|
locations = inst.get('locations', [])
|
|
is_brazil = any(loc.get('country') == 'BR' for loc in locations if isinstance(loc, dict))
|
|
|
|
if not is_brazil:
|
|
continue
|
|
|
|
name = inst.get('name', '')
|
|
description = inst.get('description', '')
|
|
inst_id = inst.get('id', 'NO_ID')
|
|
|
|
# Check if name matches any target
|
|
for target in targets:
|
|
if target.lower() in name.lower() or (description and target.lower() in description.lower()):
|
|
print(f"\n✓ MATCH: {target}")
|
|
print(f" ID: {inst_id}")
|
|
print(f" Name: {name}")
|
|
if description:
|
|
desc_short = description[:100] + "..." if len(description) > 100 else description
|
|
print(f" Description: {desc_short}")
|
|
break
|
|
|
|
print("\n" + "="*80)
|