#!/usr/bin/env python3 """ Reclassify KIEN custodian entries that are food/drink related from Intangible Heritage (I) to Taste/Smell (T) category. Per AGENTS.md: "Bakeries and food/drink/smell-producing organizations should be categorized under Taste/Smell category (T), NOT Intangible Heritage (I)." The heritage FORMS (e.g., "Boerenkaas maken" tradition) remain as IntangibleHeritageForm. The custodians (e.g., bakeries, cheese makers) are recategorized to T. """ import yaml from pathlib import Path from datetime import datetime, timezone # Food/drink related heritage forms - custodians safeguarding these should be type T TASTE_SMELL_HERITAGE_FORMS = { "Boerenkaas maken", "Twentse krentenwegge", "Traditie van de Tielsche kermiskoek", "De Oprechte Dalfser Mop traditie", "Kaaskoningin", "Sallandse Bottermarkt te Raalte", } # Keywords that indicate taste/smell custodians TASTE_SMELL_KEYWORDS = { "bakkerij", "boerenkaas", "kaas", "botter", # butter "krentenwegge", "kermiskoek", "mop traditie", # Dalfser Mop is a pastry } def should_be_taste_smell(entry: dict) -> tuple[bool, str]: """ Determine if a KIEN entry should be classified as Taste/Smell (T). Returns: tuple: (should_reclassify, reason) """ # Check heritage forms heritage_forms = entry.get("kien_enrichment", {}).get("heritage_forms", []) for form in heritage_forms: if form in TASTE_SMELL_HERITAGE_FORMS: return True, f"Heritage form '{form}' is food/taste related" # Check name for keywords name = entry.get("kien_enrichment", {}).get("kien_name", "").lower() orig_name = entry.get("original_entry", {}).get("organisatie", "").lower() for keyword in TASTE_SMELL_KEYWORDS: if keyword in name or keyword in orig_name: return True, f"Name contains taste/smell keyword '{keyword}'" return False, "" def reclassify_entry(entry: dict, reason: str) -> dict: """Update entry type from I to T and add provenance note.""" # Update the type if "original_entry" in entry and "type" in entry["original_entry"]: old_type = entry["original_entry"]["type"] if "I" in old_type: entry["original_entry"]["type"] = ["T"] # Add provenance note if "provenance" not in entry: entry["provenance"] = {} if "notes" not in entry["provenance"]: entry["provenance"]["notes"] = [] entry["provenance"]["notes"].append( f"Reclassified from I (Intangible Heritage) to T (Taste/Smell): {reason}" ) entry["provenance"]["notes"].append( f"Reclassification timestamp: {datetime.now(timezone.utc).isoformat()}" ) return entry def main(): entries_dir = Path("/Users/kempersc/apps/glam/data/nde/enriched/entries") # Find all KIEN entries (index 1674+) kien_entries = sorted(entries_dir.glob("1[6-9][0-9][0-9]_*.yaml")) reclassified = [] skipped = [] already_t = [] for entry_path in kien_entries: with open(entry_path, 'r') as f: entry = yaml.safe_load(f) # Check if already type T current_type = entry.get("original_entry", {}).get("type", []) if "T" in current_type: already_t.append(entry_path.name) continue # Check if should be T should_reclassify, reason = should_be_taste_smell(entry) if should_reclassify: entry = reclassify_entry(entry, reason) with open(entry_path, 'w') as f: yaml.dump(entry, f, default_flow_style=False, allow_unicode=True, sort_keys=False) reclassified.append((entry_path.name, reason)) else: skipped.append(entry_path.name) # Report print("=" * 60) print("KIEN Custodian Reclassification Report") print("=" * 60) print(f"\n✓ Already type T: {len(already_t)}") for name in already_t: print(f" - {name}") print(f"\n✓ Reclassified to T (Taste/Smell): {len(reclassified)}") for name, reason in reclassified: print(f" - {name}") print(f" Reason: {reason}") print(f"\n• Kept as I (Intangible Heritage): {len(skipped)}") print("\n" + "=" * 60) print(f"Total processed: {len(kien_entries)}") print("=" * 60) if __name__ == "__main__": main()