#!/usr/bin/env python3 """ Chilean Heritage Institutions - Batch 18 Enrichment Push to 75% coverage by enriching 2 libraries with municipal parent organizations Target: 66/90 → 68/90 (75.6% coverage) Enrichment Strategy: - Add Q-numbers for parent municipalities (Coquimbo, Pichilemu) - Both are regional/tourist cities with well-documented communes in Wikidata - Conservative approach: only add when municipality clearly documented Date: 2025-11-09 """ import yaml from pathlib import Path # Input/output paths INPUT_FILE = Path(__file__).parent.parent / "data/instances/chile/chilean_institutions_batch17_enriched.yaml" OUTPUT_FILE = Path(__file__).parent.parent / "data/instances/chile/chilean_institutions_batch18_enriched.yaml" # Enrichment mappings: institution ID → (Q-number, enrichment type, reason) ENRICHMENTS = { # Libraries with municipal parent organizations "https://w3id.org/heritage/custodian/cl/l-biblioteca-jorge-iribarren-cha-0018": { "q_number": "Q23660214", "type": "parent_organization", "reason": "Parent organization - Municipal library in Coquimbo commune" }, "https://w3id.org/heritage/custodian/cl/l-biblioteca-p-blica-n-244-0045": { "q_number": "Q23660186", "type": "parent_organization", "reason": "Parent organization - Municipal library in Pichilemu commune" } } def enrich_institution(inst: dict) -> bool: """ Enrich institution with Wikidata Q-number if applicable. Returns True if enriched, False otherwise. """ inst_id = inst.get('id') if inst_id not in ENRICHMENTS: return False enrichment = ENRICHMENTS[inst_id] q_number = enrichment['q_number'] enrich_type = enrichment['type'] reason = enrichment['reason'] # Check if already has this Wikidata identifier if inst.get('identifiers'): for ident in inst['identifiers']: if ident.get('identifier_scheme') == 'Wikidata' and ident.get('identifier_value') == q_number: print(f"⚠ Already enriched: {inst.get('name')} (Q{q_number})") return False # Add Wikidata identifier if not inst.get('identifiers'): inst['identifiers'] = [] inst['identifiers'].append({ 'identifier_scheme': 'Wikidata', 'identifier_value': q_number, 'identifier_url': f'https://www.wikidata.org/wiki/{q_number}' }) # Update provenance notes if not inst.get('provenance'): inst['provenance'] = {} existing_notes = inst['provenance'].get('notes', '') enrichment_note = f"Wikidata enrichment (Batch 18): {enrich_type} - {reason}" if existing_notes: inst['provenance']['notes'] = f"{existing_notes} | {enrichment_note}" else: inst['provenance']['notes'] = enrichment_note print(f"✓ Enriched: {inst.get('name')}") print(f" Q-number: {q_number} ({enrich_type})") print(f" Reason: {reason}") print() return True def main(): # Load institutions with open(INPUT_FILE, 'r', encoding='utf-8') as f: institutions = yaml.safe_load(f) # Enrich institutions enriched_count = 0 for inst in institutions: if enrich_institution(inst): enriched_count += 1 # Save enriched data with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: yaml.dump(institutions, f, allow_unicode=True, sort_keys=False, width=120) print("=" * 60) print("Batch 18 Enrichment Complete") print("=" * 60) print(f"Institutions enriched: {enriched_count}") print(f"Output: {OUTPUT_FILE}") print() print("Next: Calculate coverage statistics to verify 75% target reached") if __name__ == '__main__': main()