83 lines
2.5 KiB
Python
83 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Batch update NDE entries with ZCBS platform URLs."""
|
|
|
|
import yaml
|
|
import os
|
|
from pathlib import Path
|
|
from datetime import datetime, timezone
|
|
|
|
# Paths
|
|
CROSSREF_FILE = Path("/Users/kempersc/apps/glam/data/nde/enriched/sources/zcbs/zcbs_nde_crossref.yaml")
|
|
NDE_ENTRIES_DIR = Path("/Users/kempersc/apps/glam/data/nde/enriched/entries")
|
|
|
|
def load_crossref():
|
|
"""Load cross-reference file."""
|
|
with open(CROSSREF_FILE, 'r') as f:
|
|
return yaml.safe_load(f)
|
|
|
|
def update_entry(entry_path, zcbs_data):
|
|
"""Update a single NDE entry with ZCBS enrichment."""
|
|
with open(entry_path, 'r') as f:
|
|
content = yaml.safe_load(f)
|
|
|
|
# Create ZCBS enrichment section
|
|
zcbs_enrichment = {
|
|
'zcbs_id': zcbs_data['zcbs_id'],
|
|
'zcbs_name': zcbs_data['zcbs_name'],
|
|
'platform_urls': zcbs_data['platform_urls'],
|
|
'enrichment_timestamp': datetime.now(timezone.utc).isoformat(),
|
|
'source': 'https://www.dezijpe.nl/cgi-bin/boerderij.pl?misc=90',
|
|
'match_score': zcbs_data['match_score']
|
|
}
|
|
|
|
content['zcbs_enrichment'] = zcbs_enrichment
|
|
|
|
# Write back
|
|
with open(entry_path, 'w') as f:
|
|
yaml.dump(content, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
|
|
return True
|
|
|
|
def main():
|
|
print("Loading cross-reference file...")
|
|
crossref = load_crossref()
|
|
|
|
needs_enrichment = crossref.get('needs_enrichment', [])
|
|
print(f"Found {len(needs_enrichment)} entries needing ZCBS enrichment")
|
|
|
|
updated = 0
|
|
skipped = 0
|
|
errors = []
|
|
|
|
for item in needs_enrichment:
|
|
entry_file = NDE_ENTRIES_DIR / item['nde_file']
|
|
|
|
if not entry_file.exists():
|
|
errors.append(f"File not found: {item['nde_file']}")
|
|
continue
|
|
|
|
# Skip if no platform URLs
|
|
if not item.get('platform_urls'):
|
|
skipped += 1
|
|
print(f" Skipped (no URLs): {item['nde_file']}")
|
|
continue
|
|
|
|
try:
|
|
update_entry(entry_file, item)
|
|
updated += 1
|
|
print(f" Updated: {item['nde_file']} (ZCBS #{item['zcbs_id']}, score: {item['match_score']})")
|
|
except Exception as e:
|
|
errors.append(f"Error updating {item['nde_file']}: {e}")
|
|
|
|
print(f"\n=== Summary ===")
|
|
print(f"Updated: {updated}")
|
|
print(f"Skipped: {skipped}")
|
|
print(f"Errors: {len(errors)}")
|
|
|
|
if errors:
|
|
print("\nErrors:")
|
|
for e in errors:
|
|
print(f" - {e}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|