#!/usr/bin/env python3 """ Generate archive_werkgebied_mapping.json from NDE institutions data. Uses genealogiewerkbalk data to create a comprehensive mapping of: - Archives to municipalities they serve - Municipalities to their archives (municipal + provincial) """ import json from pathlib import Path from collections import defaultdict def main(): # Load NDE institutions nde_path = Path(__file__).parent.parent / "frontend/public/data/nde_institutions.json" with open(nde_path) as f: institutions = json.load(f) print(f"Loaded {len(institutions)} institutions") # Build archive data from genealogiewerkbalk archives = {} # archive_id -> archive data municipality_to_archives = defaultdict(dict) # mun_code -> {municipal_archive_id, provincial_archive_id} for inst in institutions: gwb = inst.get('genealogiewerkbalk', {}) if not gwb: continue mun = gwb.get('municipality', {}) mun_code = mun.get('code') mun_name = mun.get('name') if not mun_code: continue # Process municipal archive mun_arch = gwb.get('municipal_archive', {}) if mun_arch and mun_arch.get('name'): arch_name = mun_arch['name'] # Create archive ID from name (slug) arch_id = arch_name.lower().replace(' ', '-').replace("'", "") if arch_id not in archives: archives[arch_id] = { 'name': arch_name, 'website': mun_arch.get('website'), 'isil': mun_arch.get('isil'), 'type': 'municipal', 'municipalities': [] } # Add municipality if not already present existing_codes = {m['code'] for m in archives[arch_id]['municipalities']} if mun_code not in existing_codes: archives[arch_id]['municipalities'].append({ 'code': mun_code, 'name': mun_name }) municipality_to_archives[mun_code]['municipal_archive_id'] = arch_id # Process provincial archive prov = gwb.get('province', {}) prov_arch = gwb.get('provincial_archive', {}) if prov_arch and prov_arch.get('name'): arch_name = prov_arch['name'] arch_id = arch_name.lower().replace(' ', '-').replace("'", "") if arch_id not in archives: archives[arch_id] = { 'name': arch_name, 'website': prov_arch.get('website'), 'isil': prov_arch.get('isil'), 'type': 'provincial', 'province_code': prov.get('code'), 'province_name': prov.get('name'), 'municipalities': [] } # Add municipality if not already present existing_codes = {m['code'] for m in archives[arch_id]['municipalities']} if mun_code not in existing_codes: archives[arch_id]['municipalities'].append({ 'code': mun_code, 'name': mun_name }) municipality_to_archives[mun_code]['provincial_archive_id'] = arch_id # Also add archives from NDE that have ISIL codes but may not be in genealogiewerkbalk for inst in institutions: isil_data = inst.get('isil', {}) isil_code = isil_data.get('code') if isinstance(isil_data, dict) else None name = inst.get('name', '') if isil_code and ('archief' in name.lower() or 'archiv' in name.lower()): arch_id = name.lower().replace(' ', '-').replace("'", "") # Only add if not already present if arch_id not in archives: archives[arch_id] = { 'name': name, 'website': inst.get('website'), 'isil': isil_code, 'type': 'municipal', # Assume municipal if not known 'municipalities': [] } # Try to add municipality from location city = inst.get('city', '') if city: # We don't have the code, but record the city archives[arch_id]['municipalities'].append({ 'code': None, # Unknown 'name': city }) # Build statistics municipal_archives = [a for a in archives.values() if a['type'] == 'municipal'] provincial_archives = [a for a in archives.values() if a['type'] == 'provincial'] # Sort archives by municipality count archives_by_size = sorted( [{'id': k, 'name': v['name'], 'count': len(v['municipalities'])} for k, v in archives.items()], key=lambda x: x['count'], reverse=True ) # Create output output = { 'archives': archives, 'municipality_to_archives': dict(municipality_to_archives), 'statistics': { 'total_archives': len(archives), 'municipal_archives': len(municipal_archives), 'provincial_archives': len(provincial_archives), 'total_municipalities': len(municipality_to_archives), 'archives_by_size': archives_by_size[:20] # Top 20 } } # Write output output_path = Path(__file__).parent.parent / "frontend/public/data/archive_werkgebied_mapping.json" with open(output_path, 'w') as f: json.dump(output, f, indent=2, ensure_ascii=False) print(f"\nGenerated werkgebied mapping:") print(f" Total archives: {len(archives)}") print(f" Municipal archives: {len(municipal_archives)}") print(f" Provincial archives: {len(provincial_archives)}") print(f" Municipalities covered: {len(municipality_to_archives)}") print(f"\nTop archives by size:") for a in archives_by_size[:10]: print(f" {a['name']}: {a['count']} municipalities") print(f"\nOutput written to: {output_path}") if __name__ == '__main__': main()