#!/usr/bin/env python3 """Batch update NDE entries with ZCBS platform URLs.""" import yaml import os from pathlib import Path from datetime import datetime, timezone # Paths CROSSREF_FILE = Path("/Users/kempersc/apps/glam/data/nde/enriched/sources/zcbs/zcbs_nde_crossref.yaml") NDE_ENTRIES_DIR = Path("/Users/kempersc/apps/glam/data/nde/enriched/entries") def load_crossref(): """Load cross-reference file.""" with open(CROSSREF_FILE, 'r') as f: return yaml.safe_load(f) def update_entry(entry_path, zcbs_data): """Update a single NDE entry with ZCBS enrichment.""" with open(entry_path, 'r') as f: content = yaml.safe_load(f) # Create ZCBS enrichment section zcbs_enrichment = { 'zcbs_id': zcbs_data['zcbs_id'], 'zcbs_name': zcbs_data['zcbs_name'], 'platform_urls': zcbs_data['platform_urls'], 'enrichment_timestamp': datetime.now(timezone.utc).isoformat(), 'source': 'https://www.dezijpe.nl/cgi-bin/boerderij.pl?misc=90', 'match_score': zcbs_data['match_score'] } content['zcbs_enrichment'] = zcbs_enrichment # Write back with open(entry_path, 'w') as f: yaml.dump(content, f, default_flow_style=False, allow_unicode=True, sort_keys=False) return True def main(): print("Loading cross-reference file...") crossref = load_crossref() needs_enrichment = crossref.get('needs_enrichment', []) print(f"Found {len(needs_enrichment)} entries needing ZCBS enrichment") updated = 0 skipped = 0 errors = [] for item in needs_enrichment: entry_file = NDE_ENTRIES_DIR / item['nde_file'] if not entry_file.exists(): errors.append(f"File not found: {item['nde_file']}") continue # Skip if no platform URLs if not item.get('platform_urls'): skipped += 1 print(f" Skipped (no URLs): {item['nde_file']}") continue try: update_entry(entry_file, item) updated += 1 print(f" Updated: {item['nde_file']} (ZCBS #{item['zcbs_id']}, score: {item['match_score']})") except Exception as e: errors.append(f"Error updating {item['nde_file']}: {e}") print(f"\n=== Summary ===") print(f"Updated: {updated}") print(f"Skipped: {skipped}") print(f"Errors: {len(errors)}") if errors: print("\nErrors:") for e in errors: print(f" - {e}") if __name__ == "__main__": main()