#!/usr/bin/env python3 """ Cross-reference KIEN heritage forms and custodians with Wikidata. This script creates a mapping file linking Dutch intangible heritage to Wikidata entities for international discoverability. Author: GLAM Project Date: 2025-12-04 """ import json from datetime import datetime, timezone from pathlib import Path # Known Wikidata mappings for Dutch intangible heritage # Format: heritage_form_name -> {wikidata_id, wikidata_label_en, notes} HERITAGE_WIKIDATA_MAPPINGS = { # Events and Celebrations "Pride Amsterdam": { "wikidata_id": "Q478546", "label_en": "Amsterdam Gay Pride", "label_nl": "Pride Amsterdam", "type": "festival" }, "Zomercarnaval Rotterdam": { "wikidata_id": "Q2384646", "label_en": "Summer Carnival", "label_nl": "Zomercarnaval", "type": "carnival" }, "Vierdaagse Nijmegen": { "wikidata_id": "Q1472255", "label_en": "International Four Days Marches Nijmegen", "label_nl": "Vierdaagse", "type": "sporting_event" }, "Koningsdag": { "wikidata_id": "Q333016", "label_en": "King's Day", "label_nl": "Koningsdag", "type": "national_holiday" }, "Koningsdag in Velp": { "wikidata_id": "Q333016", # Same as Koningsdag "label_en": "King's Day", "label_nl": "Koningsdag", "type": "national_holiday", "notes": "Local celebration of King's Day" }, # Flower Parades (Bloemencorso) "Bloemencorso Zundert": { "wikidata_id": "Q2140539", "label_en": "Zundert Flower Parade", "label_nl": "Bloemencorso Zundert", "type": "flower_parade" }, "Bloemencorso Valkenswaard": { "wikidata_id": "Q20683546", "label_en": "Valkenswaard Flower Parade", "label_nl": "Bloemencorso Valkenswaard", "type": "flower_parade" }, "Bloemencorso Lichtenvoorde": { "wikidata_id": "Q13440715", "label_en": "Bloemencorso Lichtenvoorde", "label_nl": "Bloemencorso Lichtenvoorde", "type": "flower_parade" }, # Sports "Elfstedentocht": { "wikidata_id": "Q448936", "label_en": "Elfstedentocht", "label_nl": "Elfstedentocht", "type": "sporting_event", "notes": "Eleven cities ice skating race" }, # Martial Arts "Pencak Silat": { "wikidata_id": "Q2040849", "label_en": "Pencak silat", "label_nl": "Pencak silat", "type": "martial_art", "unesco_status": "Representative List of the Intangible Cultural Heritage of Humanity" }, # Hunting "Valkerij": { "wikidata_id": "Q211011", "label_en": "Falconry", "label_nl": "Valkerij", "type": "hunting_practice", "unesco_status": "Representative List of the Intangible Cultural Heritage of Humanity" }, # Sinterklaas traditions "Nationale Intocht Sinterklaas": { "wikidata_id": "Q2389350", "label_en": "National Dutch entry parade of Sinterklaas", "label_nl": "Nationale Intocht Sinterklaas", "type": "festival" } } # Known Wikidata mappings for custodian organizations CUSTODIAN_WIKIDATA_MAPPINGS = { "Nederlands Steendrukmuseum": { "wikidata_id": "Q2170624", "label_en": "Nederlands Steendrukmuseum", "label_nl": "Nederlands Steendrukmuseum", "type": "museum" }, "Stichting Amsterdam Gay Pride": { "wikidata_id": "Q478546", # Links to the event "label_en": "Amsterdam Gay Pride", "label_nl": "Pride Amsterdam", "type": "foundation", "notes": "Organizer of Pride Amsterdam" }, "Stichting Zomercarnaval Nederland": { "wikidata_id": "Q2384646", # Links to the event "label_en": "Summer Carnival", "label_nl": "Zomercarnaval", "type": "foundation", "notes": "Organizer of Rotterdam Summer Carnival" } } def load_kien_data(): """Load KIEN heritage forms and custodians.""" data_dir = Path("/Users/kempersc/apps/glam/data/intangible_heritage") # Load heritage forms with open(data_dir / "kien_custodians.json") as f: heritage_forms = json.load(f) # Load custodian profiles with open(data_dir / "custodian_profiles.json") as f: custodians = json.load(f) return heritage_forms, custodians def create_crossref_file(): """Create cross-reference mapping file.""" heritage_forms, custodians = load_kien_data() # Build output output = { "generated_at": datetime.now(timezone.utc).isoformat(), "source": "https://www.immaterieelerfgoed.nl", "description": "Cross-reference mapping between KIEN Dutch Intangible Heritage Inventory and Wikidata", "statistics": { "total_heritage_forms": len(heritage_forms), "total_custodians": len(custodians.get("custodians", [])), "heritage_forms_with_wikidata": len(HERITAGE_WIKIDATA_MAPPINGS), "custodians_with_wikidata": len(CUSTODIAN_WIKIDATA_MAPPINGS) }, "heritage_form_mappings": [], "custodian_mappings": [], "unmapped_heritage_forms": [], "unmapped_custodians": [] } # Process heritage forms mapped_forms = set() for form in heritage_forms: form_name = form.get("heritage_form", "") if form_name in HERITAGE_WIKIDATA_MAPPINGS: mapping = HERITAGE_WIKIDATA_MAPPINGS[form_name].copy() mapping["kien_name"] = form_name mapping["kien_url"] = form.get("url") output["heritage_form_mappings"].append(mapping) mapped_forms.add(form_name) else: if form_name and form_name not in mapped_forms: output["unmapped_heritage_forms"].append({ "name": form_name, "url": form.get("url") }) mapped_forms.add(form_name) # Process custodians mapped_custodians = set() for custodian in custodians.get("custodians", []): name = custodian.get("name", "") if name in CUSTODIAN_WIKIDATA_MAPPINGS: mapping = CUSTODIAN_WIKIDATA_MAPPINGS[name].copy() mapping["kien_name"] = name mapping["kien_url"] = custodian.get("kien_url") mapping["website"] = custodian.get("website") output["custodian_mappings"].append(mapping) mapped_custodians.add(name) else: if name and name not in mapped_custodians: output["unmapped_custodians"].append({ "name": name, "kien_url": custodian.get("kien_url"), "website": custodian.get("website") }) mapped_custodians.add(name) # Update statistics output["statistics"]["heritage_forms_with_wikidata"] = len(output["heritage_form_mappings"]) output["statistics"]["custodians_with_wikidata"] = len(output["custodian_mappings"]) return output def main(): """Main function to create Wikidata cross-reference file.""" print("=" * 60) print("KIEN-Wikidata Cross-Reference Generator") print("=" * 60) print() output = create_crossref_file() # Save to file output_dir = Path("/Users/kempersc/apps/glam/data/intangible_heritage") output_file = output_dir / "wikidata_crossref.json" with open(output_file, "w", encoding="utf-8") as f: json.dump(output, f, ensure_ascii=False, indent=2) print(f"Statistics:") print(f" Total heritage forms: {output['statistics']['total_heritage_forms']}") print(f" Heritage forms with Wikidata: {output['statistics']['heritage_forms_with_wikidata']}") print(f" Total custodians: {output['statistics']['total_custodians']}") print(f" Custodians with Wikidata: {output['statistics']['custodians_with_wikidata']}") print() print(f"Saved to: {output_file}") print() print("Heritage forms with Wikidata mappings:") for mapping in output["heritage_form_mappings"]: print(f" • {mapping['kien_name']} → {mapping['wikidata_id']} ({mapping['label_en']})") print() print("Custodians with Wikidata mappings:") for mapping in output["custodian_mappings"]: print(f" • {mapping['kien_name']} → {mapping['wikidata_id']} ({mapping['label_en']})") if __name__ == "__main__": main()