glam/scripts/fix_name_mismatch_false_matches.py
kempersc 85d9cee82f fix: mark 8 more Google Maps false matches detected via name mismatch
Additional Type I custodian files with obvious name mismatches between
KIEN registry entries and Google Maps results. These couldn't be
auto-detected via domain mismatch because they lack official websites.

Fixes:
- Dick Timmerman (person) → carpentry business
- Ria Bos (cigar maker) → money transfer agent
- Stichting Kracom (Krampuslauf) → Happy Caps retail
- Fed. Nederlandse Vertelorganisaties → NET Foundation
- Stichting dodenherdenking Alphen → wrong memorial
- Sao Joao Rotterdam → Heemraadsplein (location not org)
- sport en spel (heritage) → equipment rental
- Eiertikken Ommen → restaurant

Also adds detection and fix scripts for Google Maps false matches.
2026-01-08 13:26:53 +01:00

132 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""
Fix obvious name mismatch false matches for Type I custodians.
These are files where Google Maps returned a completely different organization
than the KIEN registry entry, but we couldn't auto-detect because there was
no official website to compare domains.
Per Rule 40: KIEN Registry is authoritative for Type I custodians.
"""
import yaml
from pathlib import Path
from datetime import datetime, timezone
# Files with definite name mismatches (manually verified)
FILES_TO_FIX = {
"NL-GE-ARN-I-DT.yaml": {
"kien_name": "Dick Timmerman",
"kien_type": "heritage practitioner (person)",
"gmaps_name": "Timmer & Onderhoudsbedrijf Dik Lubbertsen",
"gmaps_type": "carpentry business"
},
"NL-OV-IJS-I-RB.yaml": {
"kien_name": "Ria Bos",
"kien_type": "traditional cigar maker (person)",
"gmaps_name": "Ria Money Transfer Agent",
"gmaps_type": "money transfer business"
},
"NL-ZH-ROT-I-K.yaml": {
"kien_name": "Stichting Kracom",
"kien_type": "Krampuslauf Rotterdam heritage foundation",
"gmaps_name": "Happy Caps",
"gmaps_type": "retail store in different city (Apeldoorn)"
},
"NL-UT-UTR-I-FNV.yaml": {
"kien_name": "Federatie Nederlandse Vertelorganisaties",
"kien_type": "Dutch storytelling federation",
"gmaps_name": "NET Foundation",
"gmaps_type": "different foundation"
},
"NL-ZH-AAD-I-DA.yaml": {
"kien_name": "Stichting dodenherdenking Alphen",
"kien_type": "memorial foundation Alphen aan den Rijn",
"gmaps_name": "Waalsdorpervlakte Bourdon Bell",
"gmaps_type": "different memorial in different location"
},
"NL-ZH-ROT-I-SJR.yaml": {
"kien_name": "Sao Joao Rotterdam",
"kien_type": "Portuguese heritage festival organization",
"gmaps_name": "Heemraadsplein",
"gmaps_type": "public square (location, not organization)"
},
"NL-GE-OOS-I-SS.yaml": {
"kien_name": "sport en spel",
"kien_type": "traditional games heritage organization",
"gmaps_name": "Damu Sport en Spel Verhuur",
"gmaps_type": "sports equipment rental business"
},
"NL-OV-OMM-I-EO.yaml": {
"kien_name": "Eiertikken Ommen",
"kien_type": "traditional egg-tapping game heritage",
"gmaps_name": "Restaurant Ekkelenkamp Ommen",
"gmaps_type": "restaurant"
},
}
def fix_gmaps_false_match(filepath: Path, fix_info: dict) -> bool:
"""Mark Google Maps enrichment as FALSE_MATCH for a file."""
with open(filepath, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
gmaps = data.get('google_maps_enrichment', {})
if not gmaps:
print(f" No Google Maps data in {filepath.name}")
return False
if gmaps.get('status') == 'FALSE_MATCH':
print(f" Already fixed: {filepath.name}")
return False
# Create the false match record
false_match_reason = (
f"Google Maps returned \"{fix_info['gmaps_name']}\" ({fix_info['gmaps_type']}) "
f"instead of \"{fix_info['kien_name']}\" ({fix_info['kien_type']}). "
f"Name mismatch detected during manual review. "
f"Per Rule 40: KIEN is authoritative for Type I intangible heritage custodians."
)
# Restructure the data
data['google_maps_enrichment'] = {
'status': 'FALSE_MATCH',
'false_match_reason': false_match_reason,
'original_false_match': gmaps,
'correction_timestamp': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
'correction_agent': 'opencode-claude-sonnet-4',
'correction_method': 'manual_name_mismatch_review'
}
# Write back
with open(filepath, 'w', encoding='utf-8') as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=120)
print(f" ✓ Fixed: {filepath.name}")
print(f" KIEN: {fix_info['kien_name']}")
print(f" GMaps (wrong): {fix_info['gmaps_name']}")
return True
def main():
custodian_dir = Path(__file__).parent.parent / 'data' / 'custodian'
print(f"Fixing {len(FILES_TO_FIX)} files with name mismatch false matches...\n")
fixed = 0
for filename, fix_info in FILES_TO_FIX.items():
filepath = custodian_dir / filename
if not filepath.exists():
print(f" File not found: {filename}")
continue
if fix_gmaps_false_match(filepath, fix_info):
fixed += 1
print(f"\n✓ Fixed {fixed} files")
return 0
if __name__ == '__main__':
exit(main())