Additional Type I custodian files with obvious name mismatches between KIEN registry entries and Google Maps results. These couldn't be auto-detected via domain mismatch because they lack official websites. Fixes: - Dick Timmerman (person) → carpentry business - Ria Bos (cigar maker) → money transfer agent - Stichting Kracom (Krampuslauf) → Happy Caps retail - Fed. Nederlandse Vertelorganisaties → NET Foundation - Stichting dodenherdenking Alphen → wrong memorial - Sao Joao Rotterdam → Heemraadsplein (location not org) - sport en spel (heritage) → equipment rental - Eiertikken Ommen → restaurant Also adds detection and fix scripts for Google Maps false matches.
132 lines
4.6 KiB
Python
132 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Fix obvious name mismatch false matches for Type I custodians.
|
|
|
|
These are files where Google Maps returned a completely different organization
|
|
than the KIEN registry entry, but we couldn't auto-detect because there was
|
|
no official website to compare domains.
|
|
|
|
Per Rule 40: KIEN Registry is authoritative for Type I custodians.
|
|
"""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
from datetime import datetime, timezone
|
|
|
|
# Files with definite name mismatches (manually verified)
|
|
FILES_TO_FIX = {
|
|
"NL-GE-ARN-I-DT.yaml": {
|
|
"kien_name": "Dick Timmerman",
|
|
"kien_type": "heritage practitioner (person)",
|
|
"gmaps_name": "Timmer & Onderhoudsbedrijf Dik Lubbertsen",
|
|
"gmaps_type": "carpentry business"
|
|
},
|
|
"NL-OV-IJS-I-RB.yaml": {
|
|
"kien_name": "Ria Bos",
|
|
"kien_type": "traditional cigar maker (person)",
|
|
"gmaps_name": "Ria Money Transfer Agent",
|
|
"gmaps_type": "money transfer business"
|
|
},
|
|
"NL-ZH-ROT-I-K.yaml": {
|
|
"kien_name": "Stichting Kracom",
|
|
"kien_type": "Krampuslauf Rotterdam heritage foundation",
|
|
"gmaps_name": "Happy Caps",
|
|
"gmaps_type": "retail store in different city (Apeldoorn)"
|
|
},
|
|
"NL-UT-UTR-I-FNV.yaml": {
|
|
"kien_name": "Federatie Nederlandse Vertelorganisaties",
|
|
"kien_type": "Dutch storytelling federation",
|
|
"gmaps_name": "NET Foundation",
|
|
"gmaps_type": "different foundation"
|
|
},
|
|
"NL-ZH-AAD-I-DA.yaml": {
|
|
"kien_name": "Stichting dodenherdenking Alphen",
|
|
"kien_type": "memorial foundation Alphen aan den Rijn",
|
|
"gmaps_name": "Waalsdorpervlakte Bourdon Bell",
|
|
"gmaps_type": "different memorial in different location"
|
|
},
|
|
"NL-ZH-ROT-I-SJR.yaml": {
|
|
"kien_name": "Sao Joao Rotterdam",
|
|
"kien_type": "Portuguese heritage festival organization",
|
|
"gmaps_name": "Heemraadsplein",
|
|
"gmaps_type": "public square (location, not organization)"
|
|
},
|
|
"NL-GE-OOS-I-SS.yaml": {
|
|
"kien_name": "sport en spel",
|
|
"kien_type": "traditional games heritage organization",
|
|
"gmaps_name": "Damu Sport en Spel Verhuur",
|
|
"gmaps_type": "sports equipment rental business"
|
|
},
|
|
"NL-OV-OMM-I-EO.yaml": {
|
|
"kien_name": "Eiertikken Ommen",
|
|
"kien_type": "traditional egg-tapping game heritage",
|
|
"gmaps_name": "Restaurant Ekkelenkamp Ommen",
|
|
"gmaps_type": "restaurant"
|
|
},
|
|
}
|
|
|
|
|
|
def fix_gmaps_false_match(filepath: Path, fix_info: dict) -> bool:
|
|
"""Mark Google Maps enrichment as FALSE_MATCH for a file."""
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
gmaps = data.get('google_maps_enrichment', {})
|
|
if not gmaps:
|
|
print(f" No Google Maps data in {filepath.name}")
|
|
return False
|
|
|
|
if gmaps.get('status') == 'FALSE_MATCH':
|
|
print(f" Already fixed: {filepath.name}")
|
|
return False
|
|
|
|
# Create the false match record
|
|
false_match_reason = (
|
|
f"Google Maps returned \"{fix_info['gmaps_name']}\" ({fix_info['gmaps_type']}) "
|
|
f"instead of \"{fix_info['kien_name']}\" ({fix_info['kien_type']}). "
|
|
f"Name mismatch detected during manual review. "
|
|
f"Per Rule 40: KIEN is authoritative for Type I intangible heritage custodians."
|
|
)
|
|
|
|
# Restructure the data
|
|
data['google_maps_enrichment'] = {
|
|
'status': 'FALSE_MATCH',
|
|
'false_match_reason': false_match_reason,
|
|
'original_false_match': gmaps,
|
|
'correction_timestamp': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
|
|
'correction_agent': 'opencode-claude-sonnet-4',
|
|
'correction_method': 'manual_name_mismatch_review'
|
|
}
|
|
|
|
# Write back
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=120)
|
|
|
|
print(f" ✓ Fixed: {filepath.name}")
|
|
print(f" KIEN: {fix_info['kien_name']}")
|
|
print(f" GMaps (wrong): {fix_info['gmaps_name']}")
|
|
return True
|
|
|
|
|
|
def main():
|
|
custodian_dir = Path(__file__).parent.parent / 'data' / 'custodian'
|
|
|
|
print(f"Fixing {len(FILES_TO_FIX)} files with name mismatch false matches...\n")
|
|
|
|
fixed = 0
|
|
for filename, fix_info in FILES_TO_FIX.items():
|
|
filepath = custodian_dir / filename
|
|
if not filepath.exists():
|
|
print(f" File not found: {filename}")
|
|
continue
|
|
|
|
if fix_gmaps_false_match(filepath, fix_info):
|
|
fixed += 1
|
|
|
|
print(f"\n✓ Fixed {fixed} files")
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
exit(main())
|