184 lines
5.3 KiB
Python
184 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Export NDE Enriched Institutions to JSON for Frontend Map
|
|
|
|
Reads the enriched YAML files and produces a lightweight JSON file
|
|
suitable for the React/Leaflet map component.
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from datetime import datetime, timezone
|
|
import sys
|
|
|
|
# Add project root to path for imports
|
|
project_root = Path(__file__).parent.parent
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
print("Error: PyYAML not installed. Run: pip install pyyaml")
|
|
sys.exit(1)
|
|
|
|
# Institution type mappings
|
|
TYPE_COLORS = {
|
|
'G': '#00bcd4', # Gallery - cyan
|
|
'L': '#2ecc71', # Library - green
|
|
'A': '#3498db', # Archive - blue
|
|
'M': '#e74c3c', # Museum - red
|
|
'O': '#f39c12', # Official - orange
|
|
'R': '#1abc9c', # Research - teal
|
|
'C': '#795548', # Corporation - brown
|
|
'U': '#9e9e9e', # Unknown - gray
|
|
'B': '#4caf50', # Botanical - green
|
|
'E': '#ff9800', # Education - amber
|
|
'S': '#9b59b6', # Society - purple
|
|
'F': '#95a5a6', # Features - gray
|
|
'I': '#673ab7', # Intangible - deep purple
|
|
'X': '#607d8b', # Mixed - blue gray
|
|
'P': '#ff5722', # Personal - deep orange
|
|
'H': '#607d8b', # Holy sites - blue gray
|
|
'D': '#34495e', # Digital - dark gray
|
|
'N': '#e91e63', # NGO - pink
|
|
'T': '#ff5722', # Taste/smell - deep orange
|
|
}
|
|
|
|
TYPE_NAMES = {
|
|
'G': 'Gallery',
|
|
'L': 'Library',
|
|
'A': 'Archive',
|
|
'M': 'Museum',
|
|
'O': 'Official',
|
|
'R': 'Research',
|
|
'C': 'Corporation',
|
|
'U': 'Unknown',
|
|
'B': 'Botanical',
|
|
'E': 'Education',
|
|
'S': 'Society',
|
|
'F': 'Features',
|
|
'I': 'Intangible',
|
|
'X': 'Mixed',
|
|
'P': 'Personal',
|
|
'H': 'Holy sites',
|
|
'D': 'Digital',
|
|
'N': 'NGO',
|
|
'T': 'Taste/smell',
|
|
}
|
|
|
|
|
|
def extract_institution_data(entry_data: dict) -> dict | None:
|
|
"""Extract the relevant data for the map from an enriched entry."""
|
|
|
|
# Get original entry data
|
|
original = entry_data.get('original_entry', {})
|
|
enrichment = entry_data.get('wikidata_enrichment', {})
|
|
|
|
# Skip if no coordinates
|
|
coords = enrichment.get('wikidata_coordinates', {})
|
|
if not coords or not coords.get('latitude') or not coords.get('longitude'):
|
|
return None
|
|
|
|
# Get institution type (first one if list)
|
|
types = original.get('type', [])
|
|
inst_type = types[0] if types else 'U'
|
|
|
|
# Get name - prefer Dutch label, fall back to original name
|
|
name = (
|
|
enrichment.get('wikidata_label_nl') or
|
|
original.get('organisatie') or
|
|
'Unknown Institution'
|
|
)
|
|
|
|
# Get city
|
|
city = original.get('plaatsnaam_bezoekadres', '')
|
|
|
|
# Get description - prefer Dutch, fall back to English
|
|
description = (
|
|
enrichment.get('wikidata_description_nl') or
|
|
enrichment.get('wikidata_description_en') or
|
|
''
|
|
)
|
|
|
|
# Get website
|
|
website = enrichment.get('wikidata_official_website', '')
|
|
|
|
# Get Wikidata ID
|
|
wikidata_id = enrichment.get('wikidata_entity_id', '')
|
|
|
|
return {
|
|
'lat': coords['latitude'],
|
|
'lon': coords['longitude'],
|
|
'name': name,
|
|
'city': city,
|
|
'type': inst_type,
|
|
'type_name': TYPE_NAMES.get(inst_type, 'Unknown'),
|
|
'color': TYPE_COLORS.get(inst_type, '#9e9e9e'),
|
|
'website': website,
|
|
'wikidata_id': wikidata_id,
|
|
'description': description[:200] + '...' if len(description) > 200 else description,
|
|
}
|
|
|
|
|
|
def main():
|
|
"""Main export function."""
|
|
|
|
# Paths
|
|
enriched_dir = project_root / 'data' / 'nde' / 'enriched' / 'entries'
|
|
output_dir = project_root / 'frontend' / 'public' / 'data'
|
|
output_file = output_dir / 'nde_institutions.json'
|
|
|
|
# Create output directory if needed
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
print(f"Reading enriched entries from: {enriched_dir}")
|
|
|
|
institutions = []
|
|
files_processed = 0
|
|
files_with_coords = 0
|
|
|
|
# Process all YAML files
|
|
yaml_files = sorted(enriched_dir.glob('*.yaml'))
|
|
|
|
for yaml_file in yaml_files:
|
|
try:
|
|
with open(yaml_file, 'r', encoding='utf-8') as f:
|
|
entry_data = yaml.safe_load(f)
|
|
|
|
files_processed += 1
|
|
|
|
# Extract institution data
|
|
inst_data = extract_institution_data(entry_data)
|
|
if inst_data:
|
|
institutions.append(inst_data)
|
|
files_with_coords += 1
|
|
|
|
except Exception as e:
|
|
print(f"Warning: Error processing {yaml_file.name}: {e}")
|
|
continue
|
|
|
|
# Sort by name
|
|
institutions.sort(key=lambda x: x['name'].lower())
|
|
|
|
# Write JSON
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(institutions, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"\n✅ Export complete!")
|
|
print(f" Files processed: {files_processed}")
|
|
print(f" Institutions with coordinates: {files_with_coords}")
|
|
print(f" Output file: {output_file}")
|
|
|
|
# Print type distribution
|
|
type_counts = {}
|
|
for inst in institutions:
|
|
t = inst['type']
|
|
type_counts[t] = type_counts.get(t, 0) + 1
|
|
|
|
print(f"\n📊 Distribution by type:")
|
|
for t, count in sorted(type_counts.items(), key=lambda x: -x[1]):
|
|
print(f" {TYPE_NAMES.get(t, t)}: {count}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|