190 lines
7.5 KiB
Python
190 lines
7.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Add temporal_extent to defunct municipal archives.
|
|
|
|
These archives have been merged into regional archives following Dutch municipal
|
|
reorganizations. The custodian_name is correct (historical name), but we need
|
|
to add temporal_extent to indicate when they ceased to exist as independent entities.
|
|
|
|
Uses CIDOC-CRM E52_Time-Span pattern:
|
|
- begin_of_the_begin: Earliest possible founding date
|
|
- end_of_the_begin: Latest possible founding date
|
|
- begin_of_the_end: Earliest possible dissolution date
|
|
- end_of_the_end: Latest possible dissolution date
|
|
|
|
For municipal archives, the dissolution is precise (municipal merger date),
|
|
but founding dates are often unknown.
|
|
"""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
from datetime import datetime, timezone
|
|
|
|
# Known Dutch municipal mergers and their archive dissolution dates
|
|
MUNICIPAL_MERGERS = {
|
|
# Noardeast-Fryslân mergers (2019-01-01)
|
|
"0095_Q81181251.yaml": { # Gemeentearchief Ferwerderadiel
|
|
"dissolution_date": "2019-01-01",
|
|
"successor": "Streekargyf Noardeast-Fryslân",
|
|
"successor_wikidata": None, # TODO: Find Q-number
|
|
"merger_municipality": "Noardeast-Fryslân",
|
|
},
|
|
"0101_Q81181296.yaml": { # Gemeentearchief Kollumerland
|
|
"dissolution_date": "2019-01-01",
|
|
"successor": "Streekargyf Noardeast-Fryslân",
|
|
"successor_wikidata": None,
|
|
"merger_municipality": "Noardeast-Fryslân",
|
|
},
|
|
# Eemsdelta mergers (2021-01-01)
|
|
"0309_Q81181191.yaml": { # Gemeentearchief Appingedam
|
|
"dissolution_date": "2021-01-01",
|
|
"successor": "Groninger Archieven",
|
|
"successor_wikidata": "Q2341044",
|
|
"merger_municipality": "Eemsdelta",
|
|
},
|
|
"0313_Q81181241.yaml": { # Gemeentearchief Delfzijl
|
|
"dissolution_date": "2021-01-01",
|
|
"successor": "Groninger Archieven",
|
|
"successor_wikidata": "Q2341044",
|
|
"merger_municipality": "Eemsdelta",
|
|
},
|
|
# Westerwolde mergers (2018-01-01)
|
|
"0311_Q81181383.yaml": { # Gemeentearchief Bellingwedde
|
|
"dissolution_date": "2018-01-01",
|
|
"successor": "Groninger Archieven",
|
|
"successor_wikidata": "Q2341044",
|
|
"merger_municipality": "Westerwolde",
|
|
},
|
|
"0328_Q81181347.yaml": { # Gemeentearchief Vlagtwedde
|
|
"dissolution_date": "2018-01-01",
|
|
"successor": "Groninger Archieven",
|
|
"successor_wikidata": "Q2341044",
|
|
"merger_municipality": "Westerwolde",
|
|
},
|
|
# Midden-Groningen mergers (2018-01-01)
|
|
"0317_Q81181273.yaml": { # Gemeentearchief Hoogezand-Sappemeer
|
|
"dissolution_date": "2018-01-01",
|
|
"successor": "Historisch Archief Midden-Groningen",
|
|
"successor_wikidata": None,
|
|
"merger_municipality": "Midden-Groningen",
|
|
},
|
|
"0321_Q81181324.yaml": { # Gemeentearchief Menterwolde
|
|
"dissolution_date": "2018-01-01",
|
|
"successor": "Historisch Archief Midden-Groningen",
|
|
"successor_wikidata": None,
|
|
"merger_municipality": "Midden-Groningen",
|
|
},
|
|
# Ten Boer (merged into Groningen 2019-01-01)
|
|
"0326_Q81181363.yaml": { # Gemeentearchief Ten Boer
|
|
"dissolution_date": "2019-01-01",
|
|
"successor": "Groninger Archieven",
|
|
"successor_wikidata": "Q2341044",
|
|
"merger_municipality": "Groningen",
|
|
},
|
|
}
|
|
|
|
|
|
def add_temporal_extent(entry: dict, merger_info: dict, filename: str) -> dict:
|
|
"""Add temporal_extent and organizational change event to entry."""
|
|
|
|
dissolution_date = merger_info["dissolution_date"]
|
|
|
|
# Create temporal_extent following CIDOC-CRM E52_Time-Span pattern
|
|
# Founding date unknown for most municipal archives, so only set dissolution
|
|
temporal_extent = {
|
|
# Unknown founding - could try to find from ISIL assignment date
|
|
"begin_of_the_begin": None, # Unknown earliest founding
|
|
"end_of_the_begin": None, # Unknown latest founding
|
|
# Precise dissolution date (municipal merger)
|
|
"begin_of_the_end": f"{dissolution_date}T00:00:00Z",
|
|
"end_of_the_end": f"{dissolution_date}T00:00:00Z",
|
|
}
|
|
|
|
# Check if we can infer founding from ISIL assignment
|
|
isil_date = None
|
|
if "nan_isil_enrichment" in entry:
|
|
isil_date = entry["nan_isil_enrichment"].get("nan_toegekend_op")
|
|
elif "identifiers" in entry:
|
|
for ident in entry.get("identifiers", []):
|
|
if ident.get("identifier_scheme") == "ISIL":
|
|
isil_date = ident.get("assigned_date")
|
|
break
|
|
|
|
# ISIL assignment is NOT founding date, but gives a lower bound
|
|
# The archive existed BEFORE ISIL was assigned
|
|
if isil_date:
|
|
temporal_extent["_notes"] = f"Archive existed before ISIL assignment ({isil_date})"
|
|
|
|
entry["temporal_extent"] = temporal_extent
|
|
|
|
# Add organizational change event for the merger
|
|
if "organizational_change_events" not in entry:
|
|
entry["organizational_change_events"] = []
|
|
|
|
# Check if we already have this merger event
|
|
existing_events = [
|
|
e for e in entry["organizational_change_events"]
|
|
if e.get("event_type") == "MERGER" and e.get("event_date") == dissolution_date
|
|
]
|
|
|
|
if not existing_events:
|
|
merger_event = {
|
|
"event_type": "MERGER",
|
|
"event_date": dissolution_date,
|
|
"event_description": f"Municipal archive dissolved due to merger of municipality into {merger_info['merger_municipality']}. Archive holdings transferred to {merger_info['successor']}.",
|
|
"successor_organization": merger_info["successor"],
|
|
"successor_wikidata_id": merger_info["successor_wikidata"],
|
|
"provenance": {
|
|
"source": "Dutch municipal reorganization records",
|
|
"extraction_timestamp": datetime.now(timezone.utc).isoformat(),
|
|
}
|
|
}
|
|
entry["organizational_change_events"].append(merger_event)
|
|
|
|
# Mark entity as defunct
|
|
entry["is_defunct"] = True
|
|
entry["defunct_reason"] = f"Merged into {merger_info['merger_municipality']} municipality ({dissolution_date})"
|
|
|
|
return entry
|
|
|
|
|
|
def main():
|
|
entries_dir = Path("/Users/kempersc/apps/glam/data/nde/enriched/entries")
|
|
|
|
updated = 0
|
|
for filename, merger_info in MUNICIPAL_MERGERS.items():
|
|
filepath = entries_dir / filename
|
|
if not filepath.exists():
|
|
print(f"⚠️ File not found: {filename}")
|
|
continue
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
entry = yaml.safe_load(f)
|
|
|
|
# Get custodian name for logging
|
|
custodian_name = entry.get("custodian_name", {}).get("claim_value", "Unknown")
|
|
|
|
# Check if already has temporal_extent with end date
|
|
existing_extent = entry.get("temporal_extent", {})
|
|
if existing_extent.get("end_of_the_end"):
|
|
print(f"⏭️ Already has temporal_extent: {filename} ({custodian_name})")
|
|
continue
|
|
|
|
# Add temporal extent and change event
|
|
entry = add_temporal_extent(entry, merger_info, filename)
|
|
|
|
# Write back
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
yaml.dump(entry, f, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
|
|
|
print(f"✅ Added temporal_extent to: {filename}")
|
|
print(f" Name: {custodian_name}")
|
|
print(f" Dissolved: {merger_info['dissolution_date']}")
|
|
print(f" Successor: {merger_info['successor']}")
|
|
updated += 1
|
|
|
|
print(f"\n📊 Summary: Updated {updated}/{len(MUNICIPAL_MERGERS)} defunct municipal archives")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|