#!/usr/bin/env python3 """ Add alternative_names to library entries where Google Maps shows branch name but original entry shows library system name. """ import yaml import os from datetime import datetime # Define library entries to update LIBRARY_UPDATES = { "1391_kb_isil.yaml": { "alternative_names": [ {"name": "Bibliotheek Kop van Overijssel", "source": "original_entry", "note": "Library system name"}, {"name": "Bibliotheek Steenwijk", "source": "google_maps", "note": "Branch location name"} ], "custodian_name_update": { "claim_value": "Bibliotheek Kop van Overijssel", "source": "original_entry", "note": "Library system serving Steenwijkerland, Westerveld municipalities. Steenwijk is main branch." } }, "1416_kb_isil.yaml": { "alternative_names": [ {"name": "NOBB", "source": "original_entry", "note": "Library system abbreviation"}, {"name": "Bibliotheek Oss", "source": "google_maps", "note": "Main branch location"} ], "custodian_name_update": { "claim_value": "NOBB", "source": "original_entry", "note": "Noord Oost Brabantse Bibliotheken - regional library system. Oss is one of multiple branches." } }, "1456_kb_isil.yaml": { "alternative_names": [ {"name": "Bibliotheken Zuidoost Fryslân", "source": "original_entry", "note": "Library system name"}, {"name": "Bibliotheek Wolvega", "source": "google_maps", "note": "Branch location name"} ], "custodian_name_update": { "claim_value": "Bibliotheken Zuidoost Fryslân", "source": "original_entry", "note": "Regional library system. Wolvega is one of multiple branch locations." } }, "1459_kb_isil.yaml": { "alternative_names": [ {"name": "Bibliotheek Saba", "source": "original_entry", "note": "Island library system"}, {"name": "Queen Wilhelmina Library", "source": "google_maps", "note": "Historical/popular name"} ], "custodian_name_update": { "claim_value": "Bibliotheek Saba", "source": "original_entry", "note": "Public library of Saba island (Caribbean Netherlands). Queen Wilhelmina Library is historical name." } }, "1460_kb_isil.yaml": { "alternative_names": [ {"name": "Bibliotheek Curaçao", "source": "original_entry", "note": "Short name"}, {"name": "Nationale Bibliotheek Curaçao", "source": "google_maps", "note": "Full official name"} ], "custodian_name_update": { "claim_value": "Nationale Bibliotheek Curaçao", "source": "google_maps", "note": "National library of Curaçao. Both names are valid." } }, "1463_kb_isil.yaml": { "alternative_names": [ {"name": "Bibliotheken Noord Fryslân", "source": "original_entry", "note": "Library system name"}, {"name": "Bibliotheek Dokkum", "source": "google_maps", "note": "Main branch location"} ], "custodian_name_update": { "claim_value": "Bibliotheken Noord Fryslân", "source": "original_entry", "note": "Regional library system serving North Friesland. Dokkum is one of multiple branches." } }, "1467_kb_isil.yaml": { "alternative_names": [ {"name": "Bibliotheek Berkel & IJssel", "source": "original_entry", "note": "Library system name (pre-merger)"}, {"name": "BIJ de bieb / Zutphen", "source": "google_maps", "note": "Current brand name"} ], "custodian_name_update": { "claim_value": "Bibliotheek Berkel & IJssel", "source": "original_entry", "note": "Library system. 'BIJ de bieb' is a branded service/location name." } }, "1489_kb_isil.yaml": { "alternative_names": [ {"name": "Bibliotheek Krimpenerwaard", "source": "original_entry", "note": "Library system name"}, {"name": "Bibliotheek Schoonhoven", "source": "google_maps", "note": "Branch location name"} ], "custodian_name_update": { "claim_value": "Bibliotheek Krimpenerwaard", "source": "original_entry", "note": "Regional library system. Schoonhoven is one of multiple branch locations." } } } ENTRIES_DIR = "/Users/kempersc/apps/glam/data/nde/enriched/entries" def update_entry(filename: str, updates: dict): """Update an entry with alternative names and fix custodian_name.""" filepath = os.path.join(ENTRIES_DIR, filename) if not os.path.exists(filepath): print(f" ⚠ File not found: {filename}") return False with open(filepath, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) # Add alternative_names if not present if 'alternative_names' not in data: data['alternative_names'] = [] # Add new alternative names (avoid duplicates) existing_names = {alt.get('name', '') for alt in data.get('alternative_names', [])} for alt in updates['alternative_names']: if alt['name'] not in existing_names: data['alternative_names'].append(alt) existing_names.add(alt['name']) # Update custodian_name if 'custodian_name' in data and 'custodian_name_update' in updates: update_info = updates['custodian_name_update'] data['custodian_name']['claim_value'] = update_info['claim_value'] data['custodian_name']['source'] = update_info['source'] data['custodian_name']['confidence'] = 0.90 data['custodian_name']['manual_override'] = True data['custodian_name']['manual_override_reason'] = update_info['note'] data['custodian_name']['manual_override_timestamp'] = datetime.now().isoformat() with open(filepath, 'w', encoding='utf-8') as f: yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False) return True def main(): print("Adding alternative_names to library system entries...") print("=" * 60) updated = 0 for filename, updates in LIBRARY_UPDATES.items(): print(f"Processing: {filename}") if update_entry(filename, updates): print(f" ✓ Updated with {len(updates['alternative_names'])} alternative names") updated += 1 print("=" * 60) print(f"Updated {updated}/{len(LIBRARY_UPDATES)} entries") if __name__ == "__main__": main()