#!/usr/bin/env python3 """ Chilean GLAM Batch 17 Enrichment - Library Parent Organization Linkage Strategy: Apply municipality/parent organization Q-numbers to municipal libraries where we have confirmed parent organization Wikidata entries. Batch 17 Target: - Biblioteca Pública Federico Varela → Q3763 (Chañaral municipality) Rationale: - Federico Varela is a municipal public library in Chañaral - Chañaral municipality has Wikidata entry Q3763 - Parent organization enrichment is appropriate for municipal libraries """ import yaml from datetime import datetime, timezone from pathlib import Path # Repository root REPO_ROOT = Path(__file__).parent.parent def enrich_batch_17(): """Apply parent organization Q-numbers to municipal libraries.""" # Load current data input_file = REPO_ROOT / 'data' / 'instances' / 'chile' / 'chilean_institutions_batch16_enriched.yaml' with open(input_file, 'r', encoding='utf-8') as f: institutions = yaml.safe_load(f) enrichment_count = 0 # Batch 17 enrichments: Municipal library parent organizations enrichments = { 'https://w3id.org/heritage/custodian/cl/l-biblioteca-p-blica-federico-va-0015': { 'q_number': 'Q3763', 'wikidata_name': 'Chañaral, commune in Atacama Region, Chile', 'match_reason': 'Parent organization - Municipal public library in Chañaral', 'enrichment_type': 'parent_organization' } } # Apply enrichments for inst in institutions: inst_id = inst.get('id') if inst_id in enrichments: enrichment = enrichments[inst_id] # Add Wikidata identifier if 'identifiers' not in inst: inst['identifiers'] = [] # Check if Wikidata identifier already exists has_wikidata = any( id_obj.get('identifier_scheme') == 'Wikidata' for id_obj in inst['identifiers'] ) if not has_wikidata: inst['identifiers'].append({ 'identifier_scheme': 'Wikidata', 'identifier_value': enrichment['q_number'], 'identifier_url': f'https://www.wikidata.org/wiki/{enrichment["q_number"]}' }) # Update provenance inst['provenance']['data_tier'] = 'TIER_3_CROWD_SOURCED' inst['provenance']['last_updated'] = datetime.now(timezone.utc).isoformat() inst['provenance']['enrichment_batch'] = 17 inst['provenance']['wikidata_match_confidence'] = 'MEDIUM' inst['provenance']['wikidata_match_reason'] = enrichment['match_reason'] inst['provenance']['wikidata_name'] = enrichment['wikidata_name'] if 'notes' not in inst['provenance']: inst['provenance']['notes'] = [] inst['provenance']['notes'].append( f"Batch 17: {enrichment['enrichment_type']} enrichment - " f"{enrichment['wikidata_name']} (parent organization for municipal library)" ) enrichment_count += 1 print(f"✓ Enriched: {inst['name']}") print(f" Q-number: {enrichment['q_number']} ({enrichment['enrichment_type']})") print(f" Reason: {enrichment['match_reason']}\n") # Save updated data output_file = REPO_ROOT / 'data' / 'instances' / 'chile' / 'chilean_institutions_batch17_enriched.yaml' with open(output_file, 'w', encoding='utf-8') as f: yaml.dump(institutions, f, allow_unicode=True, sort_keys=False, default_flow_style=False) print(f"\n{'='*60}") print(f"Batch 17 Enrichment Complete") print(f"{'='*60}") print(f"Institutions enriched: {enrichment_count}") print(f"Output: {output_file}") print(f"\nNext: Calculate coverage statistics to assess impact") return enrichment_count if __name__ == '__main__': enriched = enrich_batch_17() if enriched == 0: print("\n⚠️ WARNING: No institutions were enriched!") print("This may indicate the target institutions already have Wikidata identifiers") print("or the institution IDs have changed.")