#!/usr/bin/env python3 """ Export NDE Enriched Institutions to JSON for Frontend Map Reads the enriched YAML files and produces a lightweight JSON file suitable for the React/Leaflet map component. """ import json from pathlib import Path from datetime import datetime, timezone import sys # Add project root to path for imports project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) try: import yaml except ImportError: print("Error: PyYAML not installed. Run: pip install pyyaml") sys.exit(1) # Institution type mappings TYPE_COLORS = { 'G': '#00bcd4', # Gallery - cyan 'L': '#2ecc71', # Library - green 'A': '#3498db', # Archive - blue 'M': '#e74c3c', # Museum - red 'O': '#f39c12', # Official - orange 'R': '#1abc9c', # Research - teal 'C': '#795548', # Corporation - brown 'U': '#9e9e9e', # Unknown - gray 'B': '#4caf50', # Botanical - green 'E': '#ff9800', # Education - amber 'S': '#9b59b6', # Society - purple 'F': '#95a5a6', # Features - gray 'I': '#673ab7', # Intangible - deep purple 'X': '#607d8b', # Mixed - blue gray 'P': '#ff5722', # Personal - deep orange 'H': '#607d8b', # Holy sites - blue gray 'D': '#34495e', # Digital - dark gray 'N': '#e91e63', # NGO - pink 'T': '#ff5722', # Taste/smell - deep orange } TYPE_NAMES = { 'G': 'Gallery', 'L': 'Library', 'A': 'Archive', 'M': 'Museum', 'O': 'Official', 'R': 'Research', 'C': 'Corporation', 'U': 'Unknown', 'B': 'Botanical', 'E': 'Education', 'S': 'Society', 'F': 'Features', 'I': 'Intangible', 'X': 'Mixed', 'P': 'Personal', 'H': 'Holy sites', 'D': 'Digital', 'N': 'NGO', 'T': 'Taste/smell', } def extract_institution_data(entry_data: dict) -> dict | None: """Extract the relevant data for the map from an enriched entry.""" # Get original entry data original = entry_data.get('original_entry', {}) enrichment = entry_data.get('wikidata_enrichment', {}) # Skip if no coordinates coords = enrichment.get('wikidata_coordinates', {}) if not coords or not coords.get('latitude') or not coords.get('longitude'): return None # Get institution type (first one if list) types = original.get('type', []) inst_type = types[0] if types else 'U' # Get name - prefer Dutch label, fall back to original name name = ( enrichment.get('wikidata_label_nl') or original.get('organisatie') or 'Unknown Institution' ) # Get city city = original.get('plaatsnaam_bezoekadres', '') # Get description - prefer Dutch, fall back to English description = ( enrichment.get('wikidata_description_nl') or enrichment.get('wikidata_description_en') or '' ) # Get website website = enrichment.get('wikidata_official_website', '') # Get Wikidata ID wikidata_id = enrichment.get('wikidata_entity_id', '') return { 'lat': coords['latitude'], 'lon': coords['longitude'], 'name': name, 'city': city, 'type': inst_type, 'type_name': TYPE_NAMES.get(inst_type, 'Unknown'), 'color': TYPE_COLORS.get(inst_type, '#9e9e9e'), 'website': website, 'wikidata_id': wikidata_id, 'description': description[:200] + '...' if len(description) > 200 else description, } def main(): """Main export function.""" # Paths enriched_dir = project_root / 'data' / 'nde' / 'enriched' / 'entries' output_dir = project_root / 'frontend' / 'public' / 'data' output_file = output_dir / 'nde_institutions.json' # Create output directory if needed output_dir.mkdir(parents=True, exist_ok=True) print(f"Reading enriched entries from: {enriched_dir}") institutions = [] files_processed = 0 files_with_coords = 0 # Process all YAML files yaml_files = sorted(enriched_dir.glob('*.yaml')) for yaml_file in yaml_files: try: with open(yaml_file, 'r', encoding='utf-8') as f: entry_data = yaml.safe_load(f) files_processed += 1 # Extract institution data inst_data = extract_institution_data(entry_data) if inst_data: institutions.append(inst_data) files_with_coords += 1 except Exception as e: print(f"Warning: Error processing {yaml_file.name}: {e}") continue # Sort by name institutions.sort(key=lambda x: x['name'].lower()) # Write JSON with open(output_file, 'w', encoding='utf-8') as f: json.dump(institutions, f, ensure_ascii=False, indent=2) print(f"\nāœ… Export complete!") print(f" Files processed: {files_processed}") print(f" Institutions with coordinates: {files_with_coords}") print(f" Output file: {output_file}") # Print type distribution type_counts = {} for inst in institutions: t = inst['type'] type_counts[t] = type_counts.get(t, 0) + 1 print(f"\nšŸ“Š Distribution by type:") for t, count in sorted(type_counts.items(), key=lambda x: -x[1]): print(f" {TYPE_NAMES.get(t, t)}: {count}") if __name__ == '__main__': main()