glam/scripts/add_alternative_names_libraries.py
2025-12-01 23:55:55 +01:00

156 lines
6.5 KiB
Python

#!/usr/bin/env python3
"""
Add alternative_names to library entries where Google Maps shows branch name
but original entry shows library system name.
"""
import yaml
import os
from datetime import datetime
# Define library entries to update
LIBRARY_UPDATES = {
"1391_kb_isil.yaml": {
"alternative_names": [
{"name": "Bibliotheek Kop van Overijssel", "source": "original_entry", "note": "Library system name"},
{"name": "Bibliotheek Steenwijk", "source": "google_maps", "note": "Branch location name"}
],
"custodian_name_update": {
"claim_value": "Bibliotheek Kop van Overijssel",
"source": "original_entry",
"note": "Library system serving Steenwijkerland, Westerveld municipalities. Steenwijk is main branch."
}
},
"1416_kb_isil.yaml": {
"alternative_names": [
{"name": "NOBB", "source": "original_entry", "note": "Library system abbreviation"},
{"name": "Bibliotheek Oss", "source": "google_maps", "note": "Main branch location"}
],
"custodian_name_update": {
"claim_value": "NOBB",
"source": "original_entry",
"note": "Noord Oost Brabantse Bibliotheken - regional library system. Oss is one of multiple branches."
}
},
"1456_kb_isil.yaml": {
"alternative_names": [
{"name": "Bibliotheken Zuidoost Fryslân", "source": "original_entry", "note": "Library system name"},
{"name": "Bibliotheek Wolvega", "source": "google_maps", "note": "Branch location name"}
],
"custodian_name_update": {
"claim_value": "Bibliotheken Zuidoost Fryslân",
"source": "original_entry",
"note": "Regional library system. Wolvega is one of multiple branch locations."
}
},
"1459_kb_isil.yaml": {
"alternative_names": [
{"name": "Bibliotheek Saba", "source": "original_entry", "note": "Island library system"},
{"name": "Queen Wilhelmina Library", "source": "google_maps", "note": "Historical/popular name"}
],
"custodian_name_update": {
"claim_value": "Bibliotheek Saba",
"source": "original_entry",
"note": "Public library of Saba island (Caribbean Netherlands). Queen Wilhelmina Library is historical name."
}
},
"1460_kb_isil.yaml": {
"alternative_names": [
{"name": "Bibliotheek Curaçao", "source": "original_entry", "note": "Short name"},
{"name": "Nationale Bibliotheek Curaçao", "source": "google_maps", "note": "Full official name"}
],
"custodian_name_update": {
"claim_value": "Nationale Bibliotheek Curaçao",
"source": "google_maps",
"note": "National library of Curaçao. Both names are valid."
}
},
"1463_kb_isil.yaml": {
"alternative_names": [
{"name": "Bibliotheken Noord Fryslân", "source": "original_entry", "note": "Library system name"},
{"name": "Bibliotheek Dokkum", "source": "google_maps", "note": "Main branch location"}
],
"custodian_name_update": {
"claim_value": "Bibliotheken Noord Fryslân",
"source": "original_entry",
"note": "Regional library system serving North Friesland. Dokkum is one of multiple branches."
}
},
"1467_kb_isil.yaml": {
"alternative_names": [
{"name": "Bibliotheek Berkel & IJssel", "source": "original_entry", "note": "Library system name (pre-merger)"},
{"name": "BIJ de bieb / Zutphen", "source": "google_maps", "note": "Current brand name"}
],
"custodian_name_update": {
"claim_value": "Bibliotheek Berkel & IJssel",
"source": "original_entry",
"note": "Library system. 'BIJ de bieb' is a branded service/location name."
}
},
"1489_kb_isil.yaml": {
"alternative_names": [
{"name": "Bibliotheek Krimpenerwaard", "source": "original_entry", "note": "Library system name"},
{"name": "Bibliotheek Schoonhoven", "source": "google_maps", "note": "Branch location name"}
],
"custodian_name_update": {
"claim_value": "Bibliotheek Krimpenerwaard",
"source": "original_entry",
"note": "Regional library system. Schoonhoven is one of multiple branch locations."
}
}
}
ENTRIES_DIR = "/Users/kempersc/apps/glam/data/nde/enriched/entries"
def update_entry(filename: str, updates: dict):
"""Update an entry with alternative names and fix custodian_name."""
filepath = os.path.join(ENTRIES_DIR, filename)
if not os.path.exists(filepath):
print(f" ⚠ File not found: {filename}")
return False
with open(filepath, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
# Add alternative_names if not present
if 'alternative_names' not in data:
data['alternative_names'] = []
# Add new alternative names (avoid duplicates)
existing_names = {alt.get('name', '') for alt in data.get('alternative_names', [])}
for alt in updates['alternative_names']:
if alt['name'] not in existing_names:
data['alternative_names'].append(alt)
existing_names.add(alt['name'])
# Update custodian_name
if 'custodian_name' in data and 'custodian_name_update' in updates:
update_info = updates['custodian_name_update']
data['custodian_name']['claim_value'] = update_info['claim_value']
data['custodian_name']['source'] = update_info['source']
data['custodian_name']['confidence'] = 0.90
data['custodian_name']['manual_override'] = True
data['custodian_name']['manual_override_reason'] = update_info['note']
data['custodian_name']['manual_override_timestamp'] = datetime.now().isoformat()
with open(filepath, 'w', encoding='utf-8') as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
return True
def main():
print("Adding alternative_names to library system entries...")
print("=" * 60)
updated = 0
for filename, updates in LIBRARY_UPDATES.items():
print(f"Processing: {filename}")
if update_entry(filename, updates):
print(f" ✓ Updated with {len(updates['alternative_names'])} alternative names")
updated += 1
print("=" * 60)
print(f"Updated {updated}/{len(LIBRARY_UPDATES)} entries")
if __name__ == "__main__":
main()