glam/scripts/crossref_kien_wikidata.py
2025-12-05 15:30:23 +01:00

246 lines
8.4 KiB
Python

#!/usr/bin/env python3
"""
Cross-reference KIEN heritage forms and custodians with Wikidata.
This script creates a mapping file linking Dutch intangible heritage
to Wikidata entities for international discoverability.
Author: GLAM Project
Date: 2025-12-04
"""
import json
from datetime import datetime, timezone
from pathlib import Path
# Known Wikidata mappings for Dutch intangible heritage
# Format: heritage_form_name -> {wikidata_id, wikidata_label_en, notes}
HERITAGE_WIKIDATA_MAPPINGS = {
# Events and Celebrations
"Pride Amsterdam": {
"wikidata_id": "Q478546",
"label_en": "Amsterdam Gay Pride",
"label_nl": "Pride Amsterdam",
"type": "festival"
},
"Zomercarnaval Rotterdam": {
"wikidata_id": "Q2384646",
"label_en": "Summer Carnival",
"label_nl": "Zomercarnaval",
"type": "carnival"
},
"Vierdaagse Nijmegen": {
"wikidata_id": "Q1472255",
"label_en": "International Four Days Marches Nijmegen",
"label_nl": "Vierdaagse",
"type": "sporting_event"
},
"Koningsdag": {
"wikidata_id": "Q333016",
"label_en": "King's Day",
"label_nl": "Koningsdag",
"type": "national_holiday"
},
"Koningsdag in Velp": {
"wikidata_id": "Q333016", # Same as Koningsdag
"label_en": "King's Day",
"label_nl": "Koningsdag",
"type": "national_holiday",
"notes": "Local celebration of King's Day"
},
# Flower Parades (Bloemencorso)
"Bloemencorso Zundert": {
"wikidata_id": "Q2140539",
"label_en": "Zundert Flower Parade",
"label_nl": "Bloemencorso Zundert",
"type": "flower_parade"
},
"Bloemencorso Valkenswaard": {
"wikidata_id": "Q20683546",
"label_en": "Valkenswaard Flower Parade",
"label_nl": "Bloemencorso Valkenswaard",
"type": "flower_parade"
},
"Bloemencorso Lichtenvoorde": {
"wikidata_id": "Q13440715",
"label_en": "Bloemencorso Lichtenvoorde",
"label_nl": "Bloemencorso Lichtenvoorde",
"type": "flower_parade"
},
# Sports
"Elfstedentocht": {
"wikidata_id": "Q448936",
"label_en": "Elfstedentocht",
"label_nl": "Elfstedentocht",
"type": "sporting_event",
"notes": "Eleven cities ice skating race"
},
# Martial Arts
"Pencak Silat": {
"wikidata_id": "Q2040849",
"label_en": "Pencak silat",
"label_nl": "Pencak silat",
"type": "martial_art",
"unesco_status": "Representative List of the Intangible Cultural Heritage of Humanity"
},
# Hunting
"Valkerij": {
"wikidata_id": "Q211011",
"label_en": "Falconry",
"label_nl": "Valkerij",
"type": "hunting_practice",
"unesco_status": "Representative List of the Intangible Cultural Heritage of Humanity"
},
# Sinterklaas traditions
"Nationale Intocht Sinterklaas": {
"wikidata_id": "Q2389350",
"label_en": "National Dutch entry parade of Sinterklaas",
"label_nl": "Nationale Intocht Sinterklaas",
"type": "festival"
}
}
# Known Wikidata mappings for custodian organizations
CUSTODIAN_WIKIDATA_MAPPINGS = {
"Nederlands Steendrukmuseum": {
"wikidata_id": "Q2170624",
"label_en": "Nederlands Steendrukmuseum",
"label_nl": "Nederlands Steendrukmuseum",
"type": "museum"
},
"Stichting Amsterdam Gay Pride": {
"wikidata_id": "Q478546", # Links to the event
"label_en": "Amsterdam Gay Pride",
"label_nl": "Pride Amsterdam",
"type": "foundation",
"notes": "Organizer of Pride Amsterdam"
},
"Stichting Zomercarnaval Nederland": {
"wikidata_id": "Q2384646", # Links to the event
"label_en": "Summer Carnival",
"label_nl": "Zomercarnaval",
"type": "foundation",
"notes": "Organizer of Rotterdam Summer Carnival"
}
}
def load_kien_data():
"""Load KIEN heritage forms and custodians."""
data_dir = Path("/Users/kempersc/apps/glam/data/intangible_heritage")
# Load heritage forms
with open(data_dir / "kien_custodians.json") as f:
heritage_forms = json.load(f)
# Load custodian profiles
with open(data_dir / "custodian_profiles.json") as f:
custodians = json.load(f)
return heritage_forms, custodians
def create_crossref_file():
"""Create cross-reference mapping file."""
heritage_forms, custodians = load_kien_data()
# Build output
output = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"source": "https://www.immaterieelerfgoed.nl",
"description": "Cross-reference mapping between KIEN Dutch Intangible Heritage Inventory and Wikidata",
"statistics": {
"total_heritage_forms": len(heritage_forms),
"total_custodians": len(custodians.get("custodians", [])),
"heritage_forms_with_wikidata": len(HERITAGE_WIKIDATA_MAPPINGS),
"custodians_with_wikidata": len(CUSTODIAN_WIKIDATA_MAPPINGS)
},
"heritage_form_mappings": [],
"custodian_mappings": [],
"unmapped_heritage_forms": [],
"unmapped_custodians": []
}
# Process heritage forms
mapped_forms = set()
for form in heritage_forms:
form_name = form.get("heritage_form", "")
if form_name in HERITAGE_WIKIDATA_MAPPINGS:
mapping = HERITAGE_WIKIDATA_MAPPINGS[form_name].copy()
mapping["kien_name"] = form_name
mapping["kien_url"] = form.get("url")
output["heritage_form_mappings"].append(mapping)
mapped_forms.add(form_name)
else:
if form_name and form_name not in mapped_forms:
output["unmapped_heritage_forms"].append({
"name": form_name,
"url": form.get("url")
})
mapped_forms.add(form_name)
# Process custodians
mapped_custodians = set()
for custodian in custodians.get("custodians", []):
name = custodian.get("name", "")
if name in CUSTODIAN_WIKIDATA_MAPPINGS:
mapping = CUSTODIAN_WIKIDATA_MAPPINGS[name].copy()
mapping["kien_name"] = name
mapping["kien_url"] = custodian.get("kien_url")
mapping["website"] = custodian.get("website")
output["custodian_mappings"].append(mapping)
mapped_custodians.add(name)
else:
if name and name not in mapped_custodians:
output["unmapped_custodians"].append({
"name": name,
"kien_url": custodian.get("kien_url"),
"website": custodian.get("website")
})
mapped_custodians.add(name)
# Update statistics
output["statistics"]["heritage_forms_with_wikidata"] = len(output["heritage_form_mappings"])
output["statistics"]["custodians_with_wikidata"] = len(output["custodian_mappings"])
return output
def main():
"""Main function to create Wikidata cross-reference file."""
print("=" * 60)
print("KIEN-Wikidata Cross-Reference Generator")
print("=" * 60)
print()
output = create_crossref_file()
# Save to file
output_dir = Path("/Users/kempersc/apps/glam/data/intangible_heritage")
output_file = output_dir / "wikidata_crossref.json"
with open(output_file, "w", encoding="utf-8") as f:
json.dump(output, f, ensure_ascii=False, indent=2)
print(f"Statistics:")
print(f" Total heritage forms: {output['statistics']['total_heritage_forms']}")
print(f" Heritage forms with Wikidata: {output['statistics']['heritage_forms_with_wikidata']}")
print(f" Total custodians: {output['statistics']['total_custodians']}")
print(f" Custodians with Wikidata: {output['statistics']['custodians_with_wikidata']}")
print()
print(f"Saved to: {output_file}")
print()
print("Heritage forms with Wikidata mappings:")
for mapping in output["heritage_form_mappings"]:
print(f"{mapping['kien_name']}{mapping['wikidata_id']} ({mapping['label_en']})")
print()
print("Custodians with Wikidata mappings:")
for mapping in output["custodian_mappings"]:
print(f"{mapping['kien_name']}{mapping['wikidata_id']} ({mapping['label_en']})")
if __name__ == "__main__":
main()