139 lines
4.3 KiB
Python
139 lines
4.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Reclassify KIEN custodian entries that are food/drink related from
|
|
Intangible Heritage (I) to Taste/Smell (T) category.
|
|
|
|
Per AGENTS.md: "Bakeries and food/drink/smell-producing organizations
|
|
should be categorized under Taste/Smell category (T), NOT Intangible Heritage (I)."
|
|
|
|
The heritage FORMS (e.g., "Boerenkaas maken" tradition) remain as IntangibleHeritageForm.
|
|
The custodians (e.g., bakeries, cheese makers) are recategorized to T.
|
|
"""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
from datetime import datetime, timezone
|
|
|
|
# Food/drink related heritage forms - custodians safeguarding these should be type T
|
|
TASTE_SMELL_HERITAGE_FORMS = {
|
|
"Boerenkaas maken",
|
|
"Twentse krentenwegge",
|
|
"Traditie van de Tielsche kermiskoek",
|
|
"De Oprechte Dalfser Mop traditie",
|
|
"Kaaskoningin",
|
|
"Sallandse Bottermarkt te Raalte",
|
|
}
|
|
|
|
# Keywords that indicate taste/smell custodians
|
|
TASTE_SMELL_KEYWORDS = {
|
|
"bakkerij",
|
|
"boerenkaas",
|
|
"kaas",
|
|
"botter", # butter
|
|
"krentenwegge",
|
|
"kermiskoek",
|
|
"mop traditie", # Dalfser Mop is a pastry
|
|
}
|
|
|
|
def should_be_taste_smell(entry: dict) -> tuple[bool, str]:
|
|
"""
|
|
Determine if a KIEN entry should be classified as Taste/Smell (T).
|
|
|
|
Returns:
|
|
tuple: (should_reclassify, reason)
|
|
"""
|
|
# Check heritage forms
|
|
heritage_forms = entry.get("kien_enrichment", {}).get("heritage_forms", [])
|
|
for form in heritage_forms:
|
|
if form in TASTE_SMELL_HERITAGE_FORMS:
|
|
return True, f"Heritage form '{form}' is food/taste related"
|
|
|
|
# Check name for keywords
|
|
name = entry.get("kien_enrichment", {}).get("kien_name", "").lower()
|
|
orig_name = entry.get("original_entry", {}).get("organisatie", "").lower()
|
|
|
|
for keyword in TASTE_SMELL_KEYWORDS:
|
|
if keyword in name or keyword in orig_name:
|
|
return True, f"Name contains taste/smell keyword '{keyword}'"
|
|
|
|
return False, ""
|
|
|
|
|
|
def reclassify_entry(entry: dict, reason: str) -> dict:
|
|
"""Update entry type from I to T and add provenance note."""
|
|
# Update the type
|
|
if "original_entry" in entry and "type" in entry["original_entry"]:
|
|
old_type = entry["original_entry"]["type"]
|
|
if "I" in old_type:
|
|
entry["original_entry"]["type"] = ["T"]
|
|
|
|
# Add provenance note
|
|
if "provenance" not in entry:
|
|
entry["provenance"] = {}
|
|
if "notes" not in entry["provenance"]:
|
|
entry["provenance"]["notes"] = []
|
|
|
|
entry["provenance"]["notes"].append(
|
|
f"Reclassified from I (Intangible Heritage) to T (Taste/Smell): {reason}"
|
|
)
|
|
entry["provenance"]["notes"].append(
|
|
f"Reclassification timestamp: {datetime.now(timezone.utc).isoformat()}"
|
|
)
|
|
|
|
return entry
|
|
|
|
|
|
def main():
|
|
entries_dir = Path("/Users/kempersc/apps/glam/data/nde/enriched/entries")
|
|
|
|
# Find all KIEN entries (index 1674+)
|
|
kien_entries = sorted(entries_dir.glob("1[6-9][0-9][0-9]_*.yaml"))
|
|
|
|
reclassified = []
|
|
skipped = []
|
|
already_t = []
|
|
|
|
for entry_path in kien_entries:
|
|
with open(entry_path, 'r') as f:
|
|
entry = yaml.safe_load(f)
|
|
|
|
# Check if already type T
|
|
current_type = entry.get("original_entry", {}).get("type", [])
|
|
if "T" in current_type:
|
|
already_t.append(entry_path.name)
|
|
continue
|
|
|
|
# Check if should be T
|
|
should_reclassify, reason = should_be_taste_smell(entry)
|
|
|
|
if should_reclassify:
|
|
entry = reclassify_entry(entry, reason)
|
|
with open(entry_path, 'w') as f:
|
|
yaml.dump(entry, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
reclassified.append((entry_path.name, reason))
|
|
else:
|
|
skipped.append(entry_path.name)
|
|
|
|
# Report
|
|
print("=" * 60)
|
|
print("KIEN Custodian Reclassification Report")
|
|
print("=" * 60)
|
|
|
|
print(f"\n✓ Already type T: {len(already_t)}")
|
|
for name in already_t:
|
|
print(f" - {name}")
|
|
|
|
print(f"\n✓ Reclassified to T (Taste/Smell): {len(reclassified)}")
|
|
for name, reason in reclassified:
|
|
print(f" - {name}")
|
|
print(f" Reason: {reason}")
|
|
|
|
print(f"\n• Kept as I (Intangible Heritage): {len(skipped)}")
|
|
|
|
print("\n" + "=" * 60)
|
|
print(f"Total processed: {len(kien_entries)}")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|