glam/scripts/add_temporal_extent_defunct.py
2025-12-01 23:55:55 +01:00

190 lines
7.5 KiB
Python

#!/usr/bin/env python3
"""
Add temporal_extent to defunct municipal archives.
These archives have been merged into regional archives following Dutch municipal
reorganizations. The custodian_name is correct (historical name), but we need
to add temporal_extent to indicate when they ceased to exist as independent entities.
Uses CIDOC-CRM E52_Time-Span pattern:
- begin_of_the_begin: Earliest possible founding date
- end_of_the_begin: Latest possible founding date
- begin_of_the_end: Earliest possible dissolution date
- end_of_the_end: Latest possible dissolution date
For municipal archives, the dissolution is precise (municipal merger date),
but founding dates are often unknown.
"""
import yaml
from pathlib import Path
from datetime import datetime, timezone
# Known Dutch municipal mergers and their archive dissolution dates
MUNICIPAL_MERGERS = {
# Noardeast-Fryslân mergers (2019-01-01)
"0095_Q81181251.yaml": { # Gemeentearchief Ferwerderadiel
"dissolution_date": "2019-01-01",
"successor": "Streekargyf Noardeast-Fryslân",
"successor_wikidata": None, # TODO: Find Q-number
"merger_municipality": "Noardeast-Fryslân",
},
"0101_Q81181296.yaml": { # Gemeentearchief Kollumerland
"dissolution_date": "2019-01-01",
"successor": "Streekargyf Noardeast-Fryslân",
"successor_wikidata": None,
"merger_municipality": "Noardeast-Fryslân",
},
# Eemsdelta mergers (2021-01-01)
"0309_Q81181191.yaml": { # Gemeentearchief Appingedam
"dissolution_date": "2021-01-01",
"successor": "Groninger Archieven",
"successor_wikidata": "Q2341044",
"merger_municipality": "Eemsdelta",
},
"0313_Q81181241.yaml": { # Gemeentearchief Delfzijl
"dissolution_date": "2021-01-01",
"successor": "Groninger Archieven",
"successor_wikidata": "Q2341044",
"merger_municipality": "Eemsdelta",
},
# Westerwolde mergers (2018-01-01)
"0311_Q81181383.yaml": { # Gemeentearchief Bellingwedde
"dissolution_date": "2018-01-01",
"successor": "Groninger Archieven",
"successor_wikidata": "Q2341044",
"merger_municipality": "Westerwolde",
},
"0328_Q81181347.yaml": { # Gemeentearchief Vlagtwedde
"dissolution_date": "2018-01-01",
"successor": "Groninger Archieven",
"successor_wikidata": "Q2341044",
"merger_municipality": "Westerwolde",
},
# Midden-Groningen mergers (2018-01-01)
"0317_Q81181273.yaml": { # Gemeentearchief Hoogezand-Sappemeer
"dissolution_date": "2018-01-01",
"successor": "Historisch Archief Midden-Groningen",
"successor_wikidata": None,
"merger_municipality": "Midden-Groningen",
},
"0321_Q81181324.yaml": { # Gemeentearchief Menterwolde
"dissolution_date": "2018-01-01",
"successor": "Historisch Archief Midden-Groningen",
"successor_wikidata": None,
"merger_municipality": "Midden-Groningen",
},
# Ten Boer (merged into Groningen 2019-01-01)
"0326_Q81181363.yaml": { # Gemeentearchief Ten Boer
"dissolution_date": "2019-01-01",
"successor": "Groninger Archieven",
"successor_wikidata": "Q2341044",
"merger_municipality": "Groningen",
},
}
def add_temporal_extent(entry: dict, merger_info: dict, filename: str) -> dict:
"""Add temporal_extent and organizational change event to entry."""
dissolution_date = merger_info["dissolution_date"]
# Create temporal_extent following CIDOC-CRM E52_Time-Span pattern
# Founding date unknown for most municipal archives, so only set dissolution
temporal_extent = {
# Unknown founding - could try to find from ISIL assignment date
"begin_of_the_begin": None, # Unknown earliest founding
"end_of_the_begin": None, # Unknown latest founding
# Precise dissolution date (municipal merger)
"begin_of_the_end": f"{dissolution_date}T00:00:00Z",
"end_of_the_end": f"{dissolution_date}T00:00:00Z",
}
# Check if we can infer founding from ISIL assignment
isil_date = None
if "nan_isil_enrichment" in entry:
isil_date = entry["nan_isil_enrichment"].get("nan_toegekend_op")
elif "identifiers" in entry:
for ident in entry.get("identifiers", []):
if ident.get("identifier_scheme") == "ISIL":
isil_date = ident.get("assigned_date")
break
# ISIL assignment is NOT founding date, but gives a lower bound
# The archive existed BEFORE ISIL was assigned
if isil_date:
temporal_extent["_notes"] = f"Archive existed before ISIL assignment ({isil_date})"
entry["temporal_extent"] = temporal_extent
# Add organizational change event for the merger
if "organizational_change_events" not in entry:
entry["organizational_change_events"] = []
# Check if we already have this merger event
existing_events = [
e for e in entry["organizational_change_events"]
if e.get("event_type") == "MERGER" and e.get("event_date") == dissolution_date
]
if not existing_events:
merger_event = {
"event_type": "MERGER",
"event_date": dissolution_date,
"event_description": f"Municipal archive dissolved due to merger of municipality into {merger_info['merger_municipality']}. Archive holdings transferred to {merger_info['successor']}.",
"successor_organization": merger_info["successor"],
"successor_wikidata_id": merger_info["successor_wikidata"],
"provenance": {
"source": "Dutch municipal reorganization records",
"extraction_timestamp": datetime.now(timezone.utc).isoformat(),
}
}
entry["organizational_change_events"].append(merger_event)
# Mark entity as defunct
entry["is_defunct"] = True
entry["defunct_reason"] = f"Merged into {merger_info['merger_municipality']} municipality ({dissolution_date})"
return entry
def main():
entries_dir = Path("/Users/kempersc/apps/glam/data/nde/enriched/entries")
updated = 0
for filename, merger_info in MUNICIPAL_MERGERS.items():
filepath = entries_dir / filename
if not filepath.exists():
print(f"⚠️ File not found: {filename}")
continue
with open(filepath, 'r', encoding='utf-8') as f:
entry = yaml.safe_load(f)
# Get custodian name for logging
custodian_name = entry.get("custodian_name", {}).get("claim_value", "Unknown")
# Check if already has temporal_extent with end date
existing_extent = entry.get("temporal_extent", {})
if existing_extent.get("end_of_the_end"):
print(f"⏭️ Already has temporal_extent: {filename} ({custodian_name})")
continue
# Add temporal extent and change event
entry = add_temporal_extent(entry, merger_info, filename)
# Write back
with open(filepath, 'w', encoding='utf-8') as f:
yaml.dump(entry, f, allow_unicode=True, sort_keys=False, default_flow_style=False)
print(f"✅ Added temporal_extent to: {filename}")
print(f" Name: {custodian_name}")
print(f" Dissolved: {merger_info['dissolution_date']}")
print(f" Successor: {merger_info['successor']}")
updated += 1
print(f"\n📊 Summary: Updated {updated}/{len(MUNICIPAL_MERGERS)} defunct municipal archives")
if __name__ == "__main__":
main()