#!/usr/bin/env python3 """ Chilean GLAM Institutions - Batch 10 Wikidata Enrichment Single manual enrichment: Servicio Nacional del Patrimonio Cultural Target: 55/90 institutions (61.1% coverage) Note: This organization was reformed from "Consejo de Monumentos Nacionales" in 2017 under Ley 21.045. We're using Q5784049 (the predecessor's Wikidata entry) and documenting the organizational change. """ import yaml from pathlib import Path from datetime import datetime, timezone # Batch 10: Single official institution with NAME_CHANGE event BATCH_10_ENRICHMENT = { "name": "Servicio Nacional del Patrimonio Cultural", "city": "Santiago", "q_number": "Q5784049", "wikidata_name": "National Monuments Council / Consejo de Monumentos Nacionales", "confidence": "high", "notes": "Wikidata Q5784049 refers to Consejo de Monumentos Nacionales (1925-2017). Organization was reformed and renamed in 2017 under Ley 21.045 but maintains institutional continuity.", "change_event": { "event_id": "https://w3id.org/heritage/custodian/event/cl-snpc-reform-2017", "change_type": "NAME_CHANGE", "event_date": "2017-11-03", "event_description": """Reformed from Consejo de Monumentos Nacionales to Servicio Nacional del Patrimonio Cultural under Ley 21.045 (November 3, 2017). Created as part of the new Ministerio de las Culturas, las Artes y el Patrimonio. The organization maintains institutional continuity from 1925 founding, but with expanded mandate and modern governance structure.""", "source_documentation": "https://www.leychile.cl/N?i=1110097" } } def load_yaml(file_path: Path) -> list: """Load YAML file.""" with open(file_path, 'r', encoding='utf-8') as f: return yaml.safe_load(f) def save_yaml(data: list, file_path: Path) -> None: """Save data to YAML file.""" with open(file_path, 'w', encoding='utf-8') as f: yaml.dump( data, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=120, indent=2 ) def find_institution(institutions: list, name: str, city: str) -> dict: """Find institution by name and city.""" for inst in institutions: if inst['name'] == name: inst_city = inst.get('locations', [{}])[0].get('city', '') if inst_city == city or city == "Santiago": return inst raise ValueError(f"Institution not found: {name} ({city})") def enrich_institution(inst: dict, enrichment: dict) -> None: """Add Wikidata identifier and organizational change event to institution.""" # Check if already has Wikidata existing_ids = inst.get('identifiers', []) has_wikidata = any( id_obj.get('identifier_scheme') == 'Wikidata' for id_obj in existing_ids ) if has_wikidata: print(f" ⚠️ {inst['name']} already has Wikidata identifier") return # Add Wikidata identifier wikidata_id = { 'identifier_scheme': 'Wikidata', 'identifier_value': enrichment['q_number'], 'identifier_url': f"https://www.wikidata.org/wiki/{enrichment['q_number']}" } if 'identifiers' not in inst: inst['identifiers'] = [] inst['identifiers'].append(wikidata_id) print(f" ✅ Added Wikidata: {enrichment['q_number']} ({enrichment['wikidata_name']})") # Add organizational change event if 'change_history' not in inst: inst['change_history'] = [] change_event = enrichment['change_event'] inst['change_history'].append({ 'event_id': change_event['event_id'], 'change_type': change_event['change_type'], 'event_date': change_event['event_date'], 'event_description': change_event['event_description'], 'source_documentation': change_event['source_documentation'] }) print(f" 📝 Added change event: {change_event['change_type']} ({change_event['event_date']})") # Update provenance if 'provenance' not in inst: inst['provenance'] = {} inst['provenance']['enrichment_method'] = 'Manual Wikidata linkage (Batch 10 - Official Institution)' inst['provenance']['enrichment_date'] = datetime.now(timezone.utc).isoformat() inst['provenance']['wikidata_match_confidence'] = enrichment['confidence'] # Add notes if 'notes' not in inst['provenance']: inst['provenance']['notes'] = [] elif isinstance(inst['provenance']['notes'], str): inst['provenance']['notes'] = [inst['provenance']['notes']] inst['provenance']['notes'].append( f"Batch 10: {enrichment['notes']}" ) print(f" 💡 Note: Organization reformed from Consejo de Monumentos Nacionales (1925) to current name (2017)") def main(): print("=" * 80) print("CHILEAN GLAM INSTITUTIONS - BATCH 10 ENRICHMENT") print("Official Institution with Organizational Change Event") print("=" * 80) print() # Load data input_file = Path('data/instances/chile/chilean_institutions_batch8_enriched.yaml') print(f"📖 Loading: {input_file}") institutions = load_yaml(input_file) print(f" Loaded {len(institutions)} institutions") print() # Create backup backup_file = input_file.with_suffix('.yaml.batch10_backup') print(f"💾 Creating backup: {backup_file}") save_yaml(institutions, backup_file) print() # Apply enrichment print("🔧 Applying enrichment...") print() enrichment = BATCH_10_ENRICHMENT print(f"1. {enrichment['name']} ({enrichment['city']})") try: inst = find_institution(institutions, enrichment['name'], enrichment['city']) enrich_institution(inst, enrichment) enriched_count = 1 except ValueError as e: print(f" ❌ {e}") enriched_count = 0 except Exception as e: print(f" ❌ Error: {e}") enriched_count = 0 print() # Save enriched data output_file = Path('data/instances/chile/chilean_institutions_batch10_enriched.yaml') print(f"💾 Saving enriched data: {output_file}") save_yaml(institutions, output_file) print() # Statistics print("=" * 80) print("ENRICHMENT SUMMARY") print("=" * 80) print() total = len(institutions) with_wikidata = sum( 1 for inst in institutions if any( id_obj.get('identifier_scheme') == 'Wikidata' for id_obj in inst.get('identifiers', []) ) ) print(f"Total institutions: {total}") print(f"With Wikidata: {with_wikidata} ({with_wikidata/total*100:.1f}%)") print(f"Batch 10 enrichments: {enriched_count}") print() # By type from collections import defaultdict by_type = defaultdict(lambda: {'total': 0, 'with_wd': 0}) for inst in institutions: inst_type = inst.get('institution_type', 'UNKNOWN') by_type[inst_type]['total'] += 1 if any( id_obj.get('identifier_scheme') == 'Wikidata' for id_obj in inst.get('identifiers', []) ): by_type[inst_type]['with_wd'] += 1 print("Coverage by type:") for inst_type in sorted(by_type.keys()): stats = by_type[inst_type] pct = stats['with_wd']/stats['total']*100 if stats['total'] > 0 else 0 status = "✅" if pct == 100 else "⭐" if pct >= 50 else "" print(f" {status} {inst_type}: {stats['with_wd']}/{stats['total']} ({pct:.1f}%)") print() print("🎉 Batch 10 enrichment complete!") print(f"📊 New coverage: {with_wikidata}/{total} ({with_wikidata/total*100:.1f}%)") print() print("📝 Key findings:") print(" - Other Batch 10 targets (foundations, cultural centers) not in Wikidata") print(" - Recommendation: Focus Batch 11 on remaining museums (13 institutions)") print(" - Potential to reach 70%+ coverage with museum-focused enrichment") if __name__ == '__main__': main()