glam/scripts/batch_update_zcbs.py
2025-11-30 23:30:29 +01:00

83 lines
2.5 KiB
Python

#!/usr/bin/env python3
"""Batch update NDE entries with ZCBS platform URLs."""
import yaml
import os
from pathlib import Path
from datetime import datetime, timezone
# Paths
CROSSREF_FILE = Path("/Users/kempersc/apps/glam/data/nde/enriched/sources/zcbs/zcbs_nde_crossref.yaml")
NDE_ENTRIES_DIR = Path("/Users/kempersc/apps/glam/data/nde/enriched/entries")
def load_crossref():
"""Load cross-reference file."""
with open(CROSSREF_FILE, 'r') as f:
return yaml.safe_load(f)
def update_entry(entry_path, zcbs_data):
"""Update a single NDE entry with ZCBS enrichment."""
with open(entry_path, 'r') as f:
content = yaml.safe_load(f)
# Create ZCBS enrichment section
zcbs_enrichment = {
'zcbs_id': zcbs_data['zcbs_id'],
'zcbs_name': zcbs_data['zcbs_name'],
'platform_urls': zcbs_data['platform_urls'],
'enrichment_timestamp': datetime.now(timezone.utc).isoformat(),
'source': 'https://www.dezijpe.nl/cgi-bin/boerderij.pl?misc=90',
'match_score': zcbs_data['match_score']
}
content['zcbs_enrichment'] = zcbs_enrichment
# Write back
with open(entry_path, 'w') as f:
yaml.dump(content, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
return True
def main():
print("Loading cross-reference file...")
crossref = load_crossref()
needs_enrichment = crossref.get('needs_enrichment', [])
print(f"Found {len(needs_enrichment)} entries needing ZCBS enrichment")
updated = 0
skipped = 0
errors = []
for item in needs_enrichment:
entry_file = NDE_ENTRIES_DIR / item['nde_file']
if not entry_file.exists():
errors.append(f"File not found: {item['nde_file']}")
continue
# Skip if no platform URLs
if not item.get('platform_urls'):
skipped += 1
print(f" Skipped (no URLs): {item['nde_file']}")
continue
try:
update_entry(entry_file, item)
updated += 1
print(f" Updated: {item['nde_file']} (ZCBS #{item['zcbs_id']}, score: {item['match_score']})")
except Exception as e:
errors.append(f"Error updating {item['nde_file']}: {e}")
print(f"\n=== Summary ===")
print(f"Updated: {updated}")
print(f"Skipped: {skipped}")
print(f"Errors: {len(errors)}")
if errors:
print("\nErrors:")
for e in errors:
print(f" - {e}")
if __name__ == "__main__":
main()