#!/usr/bin/env python3 """ Export Swiss ISIL data to CSV format Converts the JSON scraping output to a flat CSV structure Author: GLAM Data Extraction Project Date: November 2025 """ import json import csv from pathlib import Path from typing import List, Dict, Any def flatten_institution(inst: Dict[str, Any]) -> Dict[str, str]: """Flatten nested institution data for CSV export""" flat = {} # Basic information flat['name'] = inst.get('name', '') flat['alternative_name'] = inst.get('alternative_name', '') flat['status'] = inst.get('status', '') flat['description'] = inst.get('description', '') # Location flat['region'] = inst.get('region', '') flat['canton'] = inst.get('canton', '') # Categories (join multiple with semicolon) categories = inst.get('categories', []) flat['categories'] = '; '.join(categories) if categories else '' # ISIL code flat['isil_code'] = inst.get('isil_code', '') # Institution type (from detail page) flat['institution_type'] = inst.get('institution_type', '') # Address address = inst.get('address', {}) flat['street'] = address.get('street', '') flat['postal_code'] = address.get('postal_code', '') flat['city'] = address.get('city', '') # Contact contact = inst.get('contact', {}) flat['phone'] = contact.get('phone', '') flat['email'] = contact.get('email', '') flat['website'] = contact.get('website', '') # Opening hours flat['opening_hours'] = inst.get('opening_hours', '') # Memberships (join multiple with semicolon) memberships = inst.get('memberships', []) flat['memberships'] = '; '.join(memberships) if memberships else '' # Dewey classifications (join multiple with semicolon) dewey = inst.get('dewey_classifications', []) flat['dewey_classifications'] = '; '.join(dewey) if dewey else '' # URLs flat['detail_url'] = inst.get('detail_url', '') # Merged institution info merged = inst.get('merged_into', {}) if merged: flat['merged_into_name'] = merged.get('name', '') flat['merged_into_url'] = merged.get('url', '') else: flat['merged_into_name'] = '' flat['merged_into_url'] = '' return flat def export_to_csv(json_file: Path, csv_file: Path): """Export Swiss ISIL JSON data to CSV""" print(f"Loading data from {json_file}") with open(json_file, 'r', encoding='utf-8') as f: institutions = json.load(f) print(f"Loaded {len(institutions)} institutions") # Flatten all institutions flattened = [flatten_institution(inst) for inst in institutions] # Get all field names (some records may have fields others don't) fieldnames = [ 'name', 'alternative_name', 'status', 'description', 'region', 'canton', 'categories', 'isil_code', 'institution_type', 'street', 'postal_code', 'city', 'phone', 'email', 'website', 'opening_hours', 'memberships', 'dewey_classifications', 'detail_url', 'merged_into_name', 'merged_into_url' ] # Write CSV print(f"Writing CSV to {csv_file}") with open(csv_file, 'w', encoding='utf-8', newline='') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerows(flattened) print(f"āœ“ Successfully exported {len(flattened)} institutions to CSV") # Print summary statistics with_isil = sum(1 for row in flattened if row['isil_code']) with_email = sum(1 for row in flattened if row['email']) with_website = sum(1 for row in flattened if row['website']) print("\nExport Summary:") print(f" Total records: {len(flattened)}") print(f" With ISIL codes: {with_isil} ({with_isil/len(flattened)*100:.1f}%)") print(f" With email: {with_email} ({with_email/len(flattened)*100:.1f}%)") print(f" With website: {with_website} ({with_website/len(flattened)*100:.1f}%)") def main(): """Main export function""" base_dir = Path("/Users/kempersc/apps/glam/data/isil/switzerland") json_file = base_dir / "swiss_isil_complete_final.json" csv_file = base_dir / "swiss_isil_complete.csv" if not json_file.exists(): print(f"Error: JSON file not found at {json_file}") return export_to_csv(json_file, csv_file) print(f"\nāœ“ CSV export complete!") print(f" Output file: {csv_file}") if __name__ == "__main__": main()