#!/usr/bin/env python3 """Fix Swiss region codes - TE should be TI (Ticino).""" import os import re import yaml from datetime import datetime, timezone from pathlib import Path CUSTODIAN_DIR = Path("/Users/kempersc/apps/glam/data/custodian") # Swiss canton corrections SWISS_CORRECTIONS = { "TE": "TI", # Tessin → Ticino } def fix_file(filepath: Path) -> tuple[bool, str]: """Fix Swiss region code in file.""" with open(filepath, 'r', encoding='utf-8') as f: content = f.read() data = yaml.safe_load(content) if not data or 'ghcid' not in data: return False, "No GHCID" current = data['ghcid'].get('ghcid_current', '') match = re.match(r'^CH-([A-Z]{2})-(.+)$', current) if not match: return False, "Invalid format" old_region = match.group(1) rest = match.group(2) if old_region not in SWISS_CORRECTIONS: return False, f"No correction needed for {old_region}" new_region = SWISS_CORRECTIONS[old_region] new_ghcid = f"CH-{new_region}-{rest}" new_filename = f"{new_ghcid}.yaml" new_filepath = CUSTODIAN_DIR / new_filename # Check for collision if new_filepath.exists() and new_filepath != filepath: return False, f"COLLISION: {new_ghcid}" # Update GHCID timestamp = datetime.now(timezone.utc).isoformat() data['ghcid']['ghcid_current'] = new_ghcid # Update location_resolution if 'location_resolution' in data['ghcid']: data['ghcid']['location_resolution']['region_code'] = new_region # Add history entry if 'ghcid_history' not in data['ghcid']: data['ghcid']['ghcid_history'] = [] data['ghcid']['ghcid_history'].insert(0, { 'ghcid': new_ghcid, 'valid_from': timestamp, 'reason': f"Fixed region code: {old_region} -> {new_region} (ISO 3166-2:CH)" }) # Update location.region_code if present if 'location' in data and isinstance(data['location'], dict): if data['location'].get('region_code') == old_region: data['location']['region_code'] = new_region # Update identifiers if 'identifiers' in data: for ident in data['identifiers']: if ident.get('identifier_scheme') == 'GHCID': ident['identifier_value'] = new_ghcid # Write updated content with open(filepath, 'w', encoding='utf-8') as f: yaml.dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False) # Rename file if new_filepath != filepath: os.rename(filepath, new_filepath) return True, f"{current} -> {new_ghcid}" def main(): files = list(CUSTODIAN_DIR.glob("CH-*.yaml")) print(f"Found {len(files)} Swiss files") fixed = 0 errors = 0 for f in sorted(files): # Check if region needs correction match = re.match(r'^CH-([A-Z]{2})-', f.name) if match and match.group(1) in SWISS_CORRECTIONS: success, msg = fix_file(f) if success: print(f" Fixed: {msg}") fixed += 1 else: print(f" Error: {f.name}: {msg}") errors += 1 print(f"\nSummary: Fixed {fixed}, Errors {errors}") if __name__ == "__main__": main()