246 lines
8.4 KiB
Python
246 lines
8.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Cross-reference KIEN heritage forms and custodians with Wikidata.
|
|
|
|
This script creates a mapping file linking Dutch intangible heritage
|
|
to Wikidata entities for international discoverability.
|
|
|
|
Author: GLAM Project
|
|
Date: 2025-12-04
|
|
"""
|
|
|
|
import json
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
# Known Wikidata mappings for Dutch intangible heritage
|
|
# Format: heritage_form_name -> {wikidata_id, wikidata_label_en, notes}
|
|
HERITAGE_WIKIDATA_MAPPINGS = {
|
|
# Events and Celebrations
|
|
"Pride Amsterdam": {
|
|
"wikidata_id": "Q478546",
|
|
"label_en": "Amsterdam Gay Pride",
|
|
"label_nl": "Pride Amsterdam",
|
|
"type": "festival"
|
|
},
|
|
"Zomercarnaval Rotterdam": {
|
|
"wikidata_id": "Q2384646",
|
|
"label_en": "Summer Carnival",
|
|
"label_nl": "Zomercarnaval",
|
|
"type": "carnival"
|
|
},
|
|
"Vierdaagse Nijmegen": {
|
|
"wikidata_id": "Q1472255",
|
|
"label_en": "International Four Days Marches Nijmegen",
|
|
"label_nl": "Vierdaagse",
|
|
"type": "sporting_event"
|
|
},
|
|
"Koningsdag": {
|
|
"wikidata_id": "Q333016",
|
|
"label_en": "King's Day",
|
|
"label_nl": "Koningsdag",
|
|
"type": "national_holiday"
|
|
},
|
|
"Koningsdag in Velp": {
|
|
"wikidata_id": "Q333016", # Same as Koningsdag
|
|
"label_en": "King's Day",
|
|
"label_nl": "Koningsdag",
|
|
"type": "national_holiday",
|
|
"notes": "Local celebration of King's Day"
|
|
},
|
|
|
|
# Flower Parades (Bloemencorso)
|
|
"Bloemencorso Zundert": {
|
|
"wikidata_id": "Q2140539",
|
|
"label_en": "Zundert Flower Parade",
|
|
"label_nl": "Bloemencorso Zundert",
|
|
"type": "flower_parade"
|
|
},
|
|
"Bloemencorso Valkenswaard": {
|
|
"wikidata_id": "Q20683546",
|
|
"label_en": "Valkenswaard Flower Parade",
|
|
"label_nl": "Bloemencorso Valkenswaard",
|
|
"type": "flower_parade"
|
|
},
|
|
"Bloemencorso Lichtenvoorde": {
|
|
"wikidata_id": "Q13440715",
|
|
"label_en": "Bloemencorso Lichtenvoorde",
|
|
"label_nl": "Bloemencorso Lichtenvoorde",
|
|
"type": "flower_parade"
|
|
},
|
|
|
|
# Sports
|
|
"Elfstedentocht": {
|
|
"wikidata_id": "Q448936",
|
|
"label_en": "Elfstedentocht",
|
|
"label_nl": "Elfstedentocht",
|
|
"type": "sporting_event",
|
|
"notes": "Eleven cities ice skating race"
|
|
},
|
|
|
|
# Martial Arts
|
|
"Pencak Silat": {
|
|
"wikidata_id": "Q2040849",
|
|
"label_en": "Pencak silat",
|
|
"label_nl": "Pencak silat",
|
|
"type": "martial_art",
|
|
"unesco_status": "Representative List of the Intangible Cultural Heritage of Humanity"
|
|
},
|
|
|
|
# Hunting
|
|
"Valkerij": {
|
|
"wikidata_id": "Q211011",
|
|
"label_en": "Falconry",
|
|
"label_nl": "Valkerij",
|
|
"type": "hunting_practice",
|
|
"unesco_status": "Representative List of the Intangible Cultural Heritage of Humanity"
|
|
},
|
|
|
|
# Sinterklaas traditions
|
|
"Nationale Intocht Sinterklaas": {
|
|
"wikidata_id": "Q2389350",
|
|
"label_en": "National Dutch entry parade of Sinterklaas",
|
|
"label_nl": "Nationale Intocht Sinterklaas",
|
|
"type": "festival"
|
|
}
|
|
}
|
|
|
|
# Known Wikidata mappings for custodian organizations
|
|
CUSTODIAN_WIKIDATA_MAPPINGS = {
|
|
"Nederlands Steendrukmuseum": {
|
|
"wikidata_id": "Q2170624",
|
|
"label_en": "Nederlands Steendrukmuseum",
|
|
"label_nl": "Nederlands Steendrukmuseum",
|
|
"type": "museum"
|
|
},
|
|
"Stichting Amsterdam Gay Pride": {
|
|
"wikidata_id": "Q478546", # Links to the event
|
|
"label_en": "Amsterdam Gay Pride",
|
|
"label_nl": "Pride Amsterdam",
|
|
"type": "foundation",
|
|
"notes": "Organizer of Pride Amsterdam"
|
|
},
|
|
"Stichting Zomercarnaval Nederland": {
|
|
"wikidata_id": "Q2384646", # Links to the event
|
|
"label_en": "Summer Carnival",
|
|
"label_nl": "Zomercarnaval",
|
|
"type": "foundation",
|
|
"notes": "Organizer of Rotterdam Summer Carnival"
|
|
}
|
|
}
|
|
|
|
def load_kien_data():
|
|
"""Load KIEN heritage forms and custodians."""
|
|
data_dir = Path("/Users/kempersc/apps/glam/data/intangible_heritage")
|
|
|
|
# Load heritage forms
|
|
with open(data_dir / "kien_custodians.json") as f:
|
|
heritage_forms = json.load(f)
|
|
|
|
# Load custodian profiles
|
|
with open(data_dir / "custodian_profiles.json") as f:
|
|
custodians = json.load(f)
|
|
|
|
return heritage_forms, custodians
|
|
|
|
def create_crossref_file():
|
|
"""Create cross-reference mapping file."""
|
|
heritage_forms, custodians = load_kien_data()
|
|
|
|
# Build output
|
|
output = {
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
"source": "https://www.immaterieelerfgoed.nl",
|
|
"description": "Cross-reference mapping between KIEN Dutch Intangible Heritage Inventory and Wikidata",
|
|
"statistics": {
|
|
"total_heritage_forms": len(heritage_forms),
|
|
"total_custodians": len(custodians.get("custodians", [])),
|
|
"heritage_forms_with_wikidata": len(HERITAGE_WIKIDATA_MAPPINGS),
|
|
"custodians_with_wikidata": len(CUSTODIAN_WIKIDATA_MAPPINGS)
|
|
},
|
|
"heritage_form_mappings": [],
|
|
"custodian_mappings": [],
|
|
"unmapped_heritage_forms": [],
|
|
"unmapped_custodians": []
|
|
}
|
|
|
|
# Process heritage forms
|
|
mapped_forms = set()
|
|
for form in heritage_forms:
|
|
form_name = form.get("heritage_form", "")
|
|
if form_name in HERITAGE_WIKIDATA_MAPPINGS:
|
|
mapping = HERITAGE_WIKIDATA_MAPPINGS[form_name].copy()
|
|
mapping["kien_name"] = form_name
|
|
mapping["kien_url"] = form.get("url")
|
|
output["heritage_form_mappings"].append(mapping)
|
|
mapped_forms.add(form_name)
|
|
else:
|
|
if form_name and form_name not in mapped_forms:
|
|
output["unmapped_heritage_forms"].append({
|
|
"name": form_name,
|
|
"url": form.get("url")
|
|
})
|
|
mapped_forms.add(form_name)
|
|
|
|
# Process custodians
|
|
mapped_custodians = set()
|
|
for custodian in custodians.get("custodians", []):
|
|
name = custodian.get("name", "")
|
|
if name in CUSTODIAN_WIKIDATA_MAPPINGS:
|
|
mapping = CUSTODIAN_WIKIDATA_MAPPINGS[name].copy()
|
|
mapping["kien_name"] = name
|
|
mapping["kien_url"] = custodian.get("kien_url")
|
|
mapping["website"] = custodian.get("website")
|
|
output["custodian_mappings"].append(mapping)
|
|
mapped_custodians.add(name)
|
|
else:
|
|
if name and name not in mapped_custodians:
|
|
output["unmapped_custodians"].append({
|
|
"name": name,
|
|
"kien_url": custodian.get("kien_url"),
|
|
"website": custodian.get("website")
|
|
})
|
|
mapped_custodians.add(name)
|
|
|
|
# Update statistics
|
|
output["statistics"]["heritage_forms_with_wikidata"] = len(output["heritage_form_mappings"])
|
|
output["statistics"]["custodians_with_wikidata"] = len(output["custodian_mappings"])
|
|
|
|
return output
|
|
|
|
def main():
|
|
"""Main function to create Wikidata cross-reference file."""
|
|
print("=" * 60)
|
|
print("KIEN-Wikidata Cross-Reference Generator")
|
|
print("=" * 60)
|
|
print()
|
|
|
|
output = create_crossref_file()
|
|
|
|
# Save to file
|
|
output_dir = Path("/Users/kempersc/apps/glam/data/intangible_heritage")
|
|
output_file = output_dir / "wikidata_crossref.json"
|
|
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
json.dump(output, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"Statistics:")
|
|
print(f" Total heritage forms: {output['statistics']['total_heritage_forms']}")
|
|
print(f" Heritage forms with Wikidata: {output['statistics']['heritage_forms_with_wikidata']}")
|
|
print(f" Total custodians: {output['statistics']['total_custodians']}")
|
|
print(f" Custodians with Wikidata: {output['statistics']['custodians_with_wikidata']}")
|
|
print()
|
|
print(f"Saved to: {output_file}")
|
|
print()
|
|
|
|
print("Heritage forms with Wikidata mappings:")
|
|
for mapping in output["heritage_form_mappings"]:
|
|
print(f" • {mapping['kien_name']} → {mapping['wikidata_id']} ({mapping['label_en']})")
|
|
|
|
print()
|
|
print("Custodians with Wikidata mappings:")
|
|
for mapping in output["custodian_mappings"]:
|
|
print(f" • {mapping['kien_name']} → {mapping['wikidata_id']} ({mapping['label_en']})")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|