156 lines
6.5 KiB
Python
156 lines
6.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Add alternative_names to library entries where Google Maps shows branch name
|
|
but original entry shows library system name.
|
|
"""
|
|
import yaml
|
|
import os
|
|
from datetime import datetime
|
|
|
|
# Define library entries to update
|
|
LIBRARY_UPDATES = {
|
|
"1391_kb_isil.yaml": {
|
|
"alternative_names": [
|
|
{"name": "Bibliotheek Kop van Overijssel", "source": "original_entry", "note": "Library system name"},
|
|
{"name": "Bibliotheek Steenwijk", "source": "google_maps", "note": "Branch location name"}
|
|
],
|
|
"custodian_name_update": {
|
|
"claim_value": "Bibliotheek Kop van Overijssel",
|
|
"source": "original_entry",
|
|
"note": "Library system serving Steenwijkerland, Westerveld municipalities. Steenwijk is main branch."
|
|
}
|
|
},
|
|
"1416_kb_isil.yaml": {
|
|
"alternative_names": [
|
|
{"name": "NOBB", "source": "original_entry", "note": "Library system abbreviation"},
|
|
{"name": "Bibliotheek Oss", "source": "google_maps", "note": "Main branch location"}
|
|
],
|
|
"custodian_name_update": {
|
|
"claim_value": "NOBB",
|
|
"source": "original_entry",
|
|
"note": "Noord Oost Brabantse Bibliotheken - regional library system. Oss is one of multiple branches."
|
|
}
|
|
},
|
|
"1456_kb_isil.yaml": {
|
|
"alternative_names": [
|
|
{"name": "Bibliotheken Zuidoost Fryslân", "source": "original_entry", "note": "Library system name"},
|
|
{"name": "Bibliotheek Wolvega", "source": "google_maps", "note": "Branch location name"}
|
|
],
|
|
"custodian_name_update": {
|
|
"claim_value": "Bibliotheken Zuidoost Fryslân",
|
|
"source": "original_entry",
|
|
"note": "Regional library system. Wolvega is one of multiple branch locations."
|
|
}
|
|
},
|
|
"1459_kb_isil.yaml": {
|
|
"alternative_names": [
|
|
{"name": "Bibliotheek Saba", "source": "original_entry", "note": "Island library system"},
|
|
{"name": "Queen Wilhelmina Library", "source": "google_maps", "note": "Historical/popular name"}
|
|
],
|
|
"custodian_name_update": {
|
|
"claim_value": "Bibliotheek Saba",
|
|
"source": "original_entry",
|
|
"note": "Public library of Saba island (Caribbean Netherlands). Queen Wilhelmina Library is historical name."
|
|
}
|
|
},
|
|
"1460_kb_isil.yaml": {
|
|
"alternative_names": [
|
|
{"name": "Bibliotheek Curaçao", "source": "original_entry", "note": "Short name"},
|
|
{"name": "Nationale Bibliotheek Curaçao", "source": "google_maps", "note": "Full official name"}
|
|
],
|
|
"custodian_name_update": {
|
|
"claim_value": "Nationale Bibliotheek Curaçao",
|
|
"source": "google_maps",
|
|
"note": "National library of Curaçao. Both names are valid."
|
|
}
|
|
},
|
|
"1463_kb_isil.yaml": {
|
|
"alternative_names": [
|
|
{"name": "Bibliotheken Noord Fryslân", "source": "original_entry", "note": "Library system name"},
|
|
{"name": "Bibliotheek Dokkum", "source": "google_maps", "note": "Main branch location"}
|
|
],
|
|
"custodian_name_update": {
|
|
"claim_value": "Bibliotheken Noord Fryslân",
|
|
"source": "original_entry",
|
|
"note": "Regional library system serving North Friesland. Dokkum is one of multiple branches."
|
|
}
|
|
},
|
|
"1467_kb_isil.yaml": {
|
|
"alternative_names": [
|
|
{"name": "Bibliotheek Berkel & IJssel", "source": "original_entry", "note": "Library system name (pre-merger)"},
|
|
{"name": "BIJ de bieb / Zutphen", "source": "google_maps", "note": "Current brand name"}
|
|
],
|
|
"custodian_name_update": {
|
|
"claim_value": "Bibliotheek Berkel & IJssel",
|
|
"source": "original_entry",
|
|
"note": "Library system. 'BIJ de bieb' is a branded service/location name."
|
|
}
|
|
},
|
|
"1489_kb_isil.yaml": {
|
|
"alternative_names": [
|
|
{"name": "Bibliotheek Krimpenerwaard", "source": "original_entry", "note": "Library system name"},
|
|
{"name": "Bibliotheek Schoonhoven", "source": "google_maps", "note": "Branch location name"}
|
|
],
|
|
"custodian_name_update": {
|
|
"claim_value": "Bibliotheek Krimpenerwaard",
|
|
"source": "original_entry",
|
|
"note": "Regional library system. Schoonhoven is one of multiple branch locations."
|
|
}
|
|
}
|
|
}
|
|
|
|
ENTRIES_DIR = "/Users/kempersc/apps/glam/data/nde/enriched/entries"
|
|
|
|
def update_entry(filename: str, updates: dict):
|
|
"""Update an entry with alternative names and fix custodian_name."""
|
|
filepath = os.path.join(ENTRIES_DIR, filename)
|
|
|
|
if not os.path.exists(filepath):
|
|
print(f" ⚠ File not found: {filename}")
|
|
return False
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
# Add alternative_names if not present
|
|
if 'alternative_names' not in data:
|
|
data['alternative_names'] = []
|
|
|
|
# Add new alternative names (avoid duplicates)
|
|
existing_names = {alt.get('name', '') for alt in data.get('alternative_names', [])}
|
|
for alt in updates['alternative_names']:
|
|
if alt['name'] not in existing_names:
|
|
data['alternative_names'].append(alt)
|
|
existing_names.add(alt['name'])
|
|
|
|
# Update custodian_name
|
|
if 'custodian_name' in data and 'custodian_name_update' in updates:
|
|
update_info = updates['custodian_name_update']
|
|
data['custodian_name']['claim_value'] = update_info['claim_value']
|
|
data['custodian_name']['source'] = update_info['source']
|
|
data['custodian_name']['confidence'] = 0.90
|
|
data['custodian_name']['manual_override'] = True
|
|
data['custodian_name']['manual_override_reason'] = update_info['note']
|
|
data['custodian_name']['manual_override_timestamp'] = datetime.now().isoformat()
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
|
|
return True
|
|
|
|
def main():
|
|
print("Adding alternative_names to library system entries...")
|
|
print("=" * 60)
|
|
|
|
updated = 0
|
|
for filename, updates in LIBRARY_UPDATES.items():
|
|
print(f"Processing: {filename}")
|
|
if update_entry(filename, updates):
|
|
print(f" ✓ Updated with {len(updates['alternative_names'])} alternative names")
|
|
updated += 1
|
|
|
|
print("=" * 60)
|
|
print(f"Updated {updated}/{len(LIBRARY_UPDATES)} entries")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|