#!/usr/bin/env python3 """ Fix El Ghriba Synagogue geocoding in Tunisia dataset. The village "Hara Seghira" (now Erriadh) on Djerba island failed initial geocoding. This script adds the correct coordinates. GLAM Data Extraction Project Schema: LinkML v0.2.1 """ import yaml from datetime import datetime, timezone from pathlib import Path def main(): input_file = Path('data/instances/tunisia/tunisian_institutions_enhanced.yaml') print("Fixing El Ghriba Synagogue Geocoding") print("=" * 60) # Load data print(f"\nReading: {input_file}") with open(input_file, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) # Find El Ghriba Synagogue found = False for i, inst in enumerate(data['institutions']): if 'Ghriba' in inst.get('name', ''): print(f"\nFound at index {i}: {inst['name']}") # Update location with coordinates if inst.get('locations'): for loc in inst['locations']: if loc.get('city') == 'Hara Seghira': print(f" Current city: {loc['city']}") print(f" Current coords: {loc.get('latitude')}, {loc.get('longitude')}") # Add coordinates from Nominatim (El Ghriba Synagogue direct query) loc['latitude'] = 33.8139230 loc['longitude'] = 10.8593929 # Update city name to modern name (with note) loc['city'] = 'Erriadh' # Add note about name change if 'alternative_names' not in inst: inst['alternative_names'] = [] if 'El Ghriba Synagogue, Hara Seghira' not in inst['alternative_names']: inst['alternative_names'].append('El Ghriba Synagogue, Hara Seghira') print(f" Updated city: {loc['city']}") print(f" Updated coords: {loc['latitude']}, {loc['longitude']}") # Update provenance if 'provenance' in inst: notes = inst['provenance'].get('notes', '') fix_note = f" Geocoding fixed on {datetime.now(timezone.utc).isoformat()} (Hara Seghira → Erriadh, Djerba)." inst['provenance']['notes'] = notes + fix_note found = True break if found: break if not found: print("\nāŒ El Ghriba Synagogue not found!") return # Update metadata data['_metadata']['generated'] = datetime.now(timezone.utc).isoformat() # Save updated data print(f"\nWriting updated data: {input_file}") with open(input_file, 'w', encoding='utf-8') as f: yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False) print(f"\nāœ… Geocoding fixed for El Ghriba Synagogue") print(f" Location: Erriadh (formerly Hara Seghira), Djerba, Tunisia") print(f" Coordinates: 33.8139230, 10.8593929") if __name__ == '__main__': main()