#!/usr/bin/env python3 """ Fix obvious name mismatch false matches for Type I custodians. These are files where Google Maps returned a completely different organization than the KIEN registry entry, but we couldn't auto-detect because there was no official website to compare domains. Per Rule 40: KIEN Registry is authoritative for Type I custodians. """ import yaml from pathlib import Path from datetime import datetime, timezone # Files with definite name mismatches (manually verified) FILES_TO_FIX = { "NL-GE-ARN-I-DT.yaml": { "kien_name": "Dick Timmerman", "kien_type": "heritage practitioner (person)", "gmaps_name": "Timmer & Onderhoudsbedrijf Dik Lubbertsen", "gmaps_type": "carpentry business" }, "NL-OV-IJS-I-RB.yaml": { "kien_name": "Ria Bos", "kien_type": "traditional cigar maker (person)", "gmaps_name": "Ria Money Transfer Agent", "gmaps_type": "money transfer business" }, "NL-ZH-ROT-I-K.yaml": { "kien_name": "Stichting Kracom", "kien_type": "Krampuslauf Rotterdam heritage foundation", "gmaps_name": "Happy Caps", "gmaps_type": "retail store in different city (Apeldoorn)" }, "NL-UT-UTR-I-FNV.yaml": { "kien_name": "Federatie Nederlandse Vertelorganisaties", "kien_type": "Dutch storytelling federation", "gmaps_name": "NET Foundation", "gmaps_type": "different foundation" }, "NL-ZH-AAD-I-DA.yaml": { "kien_name": "Stichting dodenherdenking Alphen", "kien_type": "memorial foundation Alphen aan den Rijn", "gmaps_name": "Waalsdorpervlakte Bourdon Bell", "gmaps_type": "different memorial in different location" }, "NL-ZH-ROT-I-SJR.yaml": { "kien_name": "Sao Joao Rotterdam", "kien_type": "Portuguese heritage festival organization", "gmaps_name": "Heemraadsplein", "gmaps_type": "public square (location, not organization)" }, "NL-GE-OOS-I-SS.yaml": { "kien_name": "sport en spel", "kien_type": "traditional games heritage organization", "gmaps_name": "Damu Sport en Spel Verhuur", "gmaps_type": "sports equipment rental business" }, "NL-OV-OMM-I-EO.yaml": { "kien_name": "Eiertikken Ommen", "kien_type": "traditional egg-tapping game heritage", "gmaps_name": "Restaurant Ekkelenkamp Ommen", "gmaps_type": "restaurant" }, } def fix_gmaps_false_match(filepath: Path, fix_info: dict) -> bool: """Mark Google Maps enrichment as FALSE_MATCH for a file.""" with open(filepath, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) gmaps = data.get('google_maps_enrichment', {}) if not gmaps: print(f" No Google Maps data in {filepath.name}") return False if gmaps.get('status') == 'FALSE_MATCH': print(f" Already fixed: {filepath.name}") return False # Create the false match record false_match_reason = ( f"Google Maps returned \"{fix_info['gmaps_name']}\" ({fix_info['gmaps_type']}) " f"instead of \"{fix_info['kien_name']}\" ({fix_info['kien_type']}). " f"Name mismatch detected during manual review. " f"Per Rule 40: KIEN is authoritative for Type I intangible heritage custodians." ) # Restructure the data data['google_maps_enrichment'] = { 'status': 'FALSE_MATCH', 'false_match_reason': false_match_reason, 'original_false_match': gmaps, 'correction_timestamp': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'), 'correction_agent': 'opencode-claude-sonnet-4', 'correction_method': 'manual_name_mismatch_review' } # Write back with open(filepath, 'w', encoding='utf-8') as f: yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=120) print(f" āœ“ Fixed: {filepath.name}") print(f" KIEN: {fix_info['kien_name']}") print(f" GMaps (wrong): {fix_info['gmaps_name']}") return True def main(): custodian_dir = Path(__file__).parent.parent / 'data' / 'custodian' print(f"Fixing {len(FILES_TO_FIX)} files with name mismatch false matches...\n") fixed = 0 for filename, fix_info in FILES_TO_FIX.items(): filepath = custodian_dir / filename if not filepath.exists(): print(f" File not found: {filename}") continue if fix_gmaps_false_match(filepath, fix_info): fixed += 1 print(f"\nāœ“ Fixed {fixed} files") return 0 if __name__ == '__main__': exit(main())