glam/scripts/fix_remaining_false_matches.py
kempersc 9d68ed8c2e fix: mark 15 more Google Maps false matches via comprehensive review
Manual review of remaining Type I custodian files without official websites
identified additional false matches in these categories:

Wrong organization type:
- Bird catchers vs bird watchers association
- Heritage org vs webshop
- Regional org vs specific local entity
- Federation vs single member association
- Bell ringers org vs church building

Wrong location:
- Amsterdam org matched to Den Haag
- Haarlem org matched to Apeldoorn
- Rotterdam org matched to Amstelveen
- Dutch org matched to Suriname (!)
- Giethoorn event matched to Belt-Schutsloot
- Duindorp bonfire matched to Scheveningen

Different event/entity:
- Horse racing org vs summer festival
- Street name vs organization
- Heritage foundation vs specific local fair

Total Type I false matches fixed: 62 of 188 files (33%)
2026-01-08 15:21:31 +01:00

172 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""
Fix additional name/location mismatch false matches for Type I custodians.
These are files where Google Maps returned a different organization, wrong location,
or wrong type of entity (location vs organization).
Per Rule 40: KIEN Registry is authoritative for Type I custodians.
"""
import yaml
from pathlib import Path
from datetime import datetime, timezone
# Files with definite mismatches (manually verified)
FILES_TO_FIX = {
"NL-GE-ARN-I-FV.yaml": {
"kien_name": "Friesche vogelvangersbelang",
"kien_type": "Frisian bird catchers association",
"gmaps_name": "Vogelwacht St. Johannesga-Rotsterhaule e.o.",
"gmaps_type": "bird watch group - different organization (watchers vs catchers)"
},
"NL-GR-GRO-I-GAGA.yaml": {
"kien_name": "Het Genootschap Ambachtelijke Grunneger Aaierbal",
"kien_type": "traditional egg ball makers heritage organization",
"gmaps_name": "Ik bin Grunneger (alleen webshop)",
"gmaps_type": "webshop, not the heritage organization"
},
"NL-GR-UIT-I-SH.yaml": {
"kien_name": "Scholen in het Hogeland",
"kien_type": "regional schools organization",
"gmaps_name": "Het Hogeland College, locatie Warffum",
"gmaps_type": "one specific school, not the regional organization"
},
"NL-LI-ROE-I-BR.yaml": {
"kien_name": "Bacchusklup Remunj",
"kien_type": "Bacchus club heritage organization",
"gmaps_name": "Bacchusdrieve",
"gmaps_type": "street/path name, not an organization"
},
"NL-NH-AMS-I-SE.yaml": {
"kien_name": "Stichting Sisa Events",
"kien_type": "Amsterdam-based events foundation",
"gmaps_name": "Sisa Events",
"gmaps_type": "wrong location - Den Haag instead of Amsterdam"
},
"NL-NH-HAA-I-MJ.yaml": {
"kien_name": "Stichting Ma Jong",
"kien_type": "Haarlem-based foundation",
"gmaps_name": "Majon",
"gmaps_type": "wrong location - Apeldoorn instead of Haarlem"
},
"NL-NH-IJM-I-KCIZ.yaml": {
"kien_name": "Stichting Kortebaandraverij C.A. IJmuiden aan Zee",
"kien_type": "horse racing heritage organization",
"gmaps_name": "Zomerfestival IJmuiden",
"gmaps_type": "summer festival - different event type"
},
"NL-NH-LAR-I-KI.yaml": {
"kien_name": "Stichting Kermiserfgoed i.o.",
"kien_type": "fair heritage foundation (national scope)",
"gmaps_name": "Klarenbeekse Kermis",
"gmaps_type": "specific local fair, not the heritage foundation"
},
"NL-NH-STO-I-FAAE.yaml": {
"kien_name": "Federatie van Amateurtuindersverenigingen Alkmaar e.o.",
"kien_type": "federation of amateur gardening associations",
"gmaps_name": "Amateurtuindersvereniging De Hoefmolen",
"gmaps_type": "one specific local association, not the federation"
},
"NL-OV-GIE-I-GG.yaml": {
"kien_name": "Vereniging Gondelvaart Giethoorn",
"kien_type": "Giethoorn gondola parade association",
"gmaps_name": "Gondelvaart Belt-Schutsloot",
"gmaps_type": "different village's gondola parade (Belt-Schutsloot, not Giethoorn)"
},
"NL-OV-TIL-I-BSND.yaml": {
"kien_name": "Stichting Beieraars Sint Nicolaastoren Denekamp",
"kien_type": "bell ringers foundation",
"gmaps_name": "Sint Nicolaaskerk",
"gmaps_type": "the church building, not the bell ringers organization"
},
"NL-OV-ZWO-I-LE.yaml": {
"kien_name": "Verhalend Landschap",
"kien_type": "storytelling landscape heritage organization",
"gmaps_name": "Landschap Erfgoed",
"gmaps_type": "different organization (landscape heritage, not storytelling)"
},
"NL-ZH-ROT-I-A.yaml": {
"kien_name": "APNA",
"kien_type": "Rotterdam-based heritage organization",
"gmaps_name": "Apna Indian Bazaar",
"gmaps_type": "wrong city (Amstelveen) and different type (bazaar, not heritage org)"
},
"NL-ZH-THX-I-DV.yaml": {
"kien_name": "Duindorp Vreugdevuur",
"kien_type": "Duindorp neighborhood bonfire tradition",
"gmaps_name": "Vreugdevuur Scheveningen Noorderstrand",
"gmaps_type": "different neighborhood's bonfire (Scheveningen, not Duindorp)"
},
"NL-ZH-ZOE-I-CHJI.yaml": {
"kien_name": "Stichting Comite Herdenking Javaanse Immigratie",
"kien_type": "Dutch-based Javanese immigration memorial committee",
"gmaps_name": "Vereniging Herdenking Javaanse Immigratie",
"gmaps_type": "wrong country entirely - Suriname instead of Netherlands"
},
}
def fix_gmaps_false_match(filepath: Path, fix_info: dict) -> bool:
"""Mark Google Maps enrichment as FALSE_MATCH for a file."""
with open(filepath, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
gmaps = data.get('google_maps_enrichment', {})
if not gmaps:
print(f" No Google Maps data in {filepath.name}")
return False
if gmaps.get('status') == 'FALSE_MATCH':
print(f" Already fixed: {filepath.name}")
return False
# Create the false match record
false_match_reason = (
f"Google Maps returned \"{fix_info['gmaps_name']}\" ({fix_info['gmaps_type']}) "
f"instead of \"{fix_info['kien_name']}\" ({fix_info['kien_type']}). "
f"Per Rule 40: KIEN is authoritative for Type I intangible heritage custodians."
)
# Restructure the data
data['google_maps_enrichment'] = {
'status': 'FALSE_MATCH',
'false_match_reason': false_match_reason,
'original_false_match': gmaps,
'correction_timestamp': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
'correction_agent': 'opencode-claude-sonnet-4',
'correction_method': 'manual_review_name_location_mismatch'
}
# Write back
with open(filepath, 'w', encoding='utf-8') as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=120)
print(f" ✓ Fixed: {filepath.name}")
print(f" KIEN: {fix_info['kien_name']}")
print(f" GMaps (wrong): {fix_info['gmaps_name']}")
return True
def main():
custodian_dir = Path(__file__).parent.parent / 'data' / 'custodian'
print(f"Fixing {len(FILES_TO_FIX)} files with name/location mismatch false matches...\n")
fixed = 0
for filename, fix_info in FILES_TO_FIX.items():
filepath = custodian_dir / filename
if not filepath.exists():
print(f" File not found: {filename}")
continue
if fix_gmaps_false_match(filepath, fix_info):
fixed += 1
print(f"\n✓ Fixed {fixed} files")
return 0
if __name__ == '__main__':
exit(main())