#!/usr/bin/env python3 """ Add temporal_extent to defunct municipal archives. These archives have been merged into regional archives following Dutch municipal reorganizations. The custodian_name is correct (historical name), but we need to add temporal_extent to indicate when they ceased to exist as independent entities. Uses CIDOC-CRM E52_Time-Span pattern: - begin_of_the_begin: Earliest possible founding date - end_of_the_begin: Latest possible founding date - begin_of_the_end: Earliest possible dissolution date - end_of_the_end: Latest possible dissolution date For municipal archives, the dissolution is precise (municipal merger date), but founding dates are often unknown. """ import yaml from pathlib import Path from datetime import datetime, timezone # Known Dutch municipal mergers and their archive dissolution dates MUNICIPAL_MERGERS = { # Noardeast-Fryslân mergers (2019-01-01) "0095_Q81181251.yaml": { # Gemeentearchief Ferwerderadiel "dissolution_date": "2019-01-01", "successor": "Streekargyf Noardeast-Fryslân", "successor_wikidata": None, # TODO: Find Q-number "merger_municipality": "Noardeast-Fryslân", }, "0101_Q81181296.yaml": { # Gemeentearchief Kollumerland "dissolution_date": "2019-01-01", "successor": "Streekargyf Noardeast-Fryslân", "successor_wikidata": None, "merger_municipality": "Noardeast-Fryslân", }, # Eemsdelta mergers (2021-01-01) "0309_Q81181191.yaml": { # Gemeentearchief Appingedam "dissolution_date": "2021-01-01", "successor": "Groninger Archieven", "successor_wikidata": "Q2341044", "merger_municipality": "Eemsdelta", }, "0313_Q81181241.yaml": { # Gemeentearchief Delfzijl "dissolution_date": "2021-01-01", "successor": "Groninger Archieven", "successor_wikidata": "Q2341044", "merger_municipality": "Eemsdelta", }, # Westerwolde mergers (2018-01-01) "0311_Q81181383.yaml": { # Gemeentearchief Bellingwedde "dissolution_date": "2018-01-01", "successor": "Groninger Archieven", "successor_wikidata": "Q2341044", "merger_municipality": "Westerwolde", }, "0328_Q81181347.yaml": { # Gemeentearchief Vlagtwedde "dissolution_date": "2018-01-01", "successor": "Groninger Archieven", "successor_wikidata": "Q2341044", "merger_municipality": "Westerwolde", }, # Midden-Groningen mergers (2018-01-01) "0317_Q81181273.yaml": { # Gemeentearchief Hoogezand-Sappemeer "dissolution_date": "2018-01-01", "successor": "Historisch Archief Midden-Groningen", "successor_wikidata": None, "merger_municipality": "Midden-Groningen", }, "0321_Q81181324.yaml": { # Gemeentearchief Menterwolde "dissolution_date": "2018-01-01", "successor": "Historisch Archief Midden-Groningen", "successor_wikidata": None, "merger_municipality": "Midden-Groningen", }, # Ten Boer (merged into Groningen 2019-01-01) "0326_Q81181363.yaml": { # Gemeentearchief Ten Boer "dissolution_date": "2019-01-01", "successor": "Groninger Archieven", "successor_wikidata": "Q2341044", "merger_municipality": "Groningen", }, } def add_temporal_extent(entry: dict, merger_info: dict, filename: str) -> dict: """Add temporal_extent and organizational change event to entry.""" dissolution_date = merger_info["dissolution_date"] # Create temporal_extent following CIDOC-CRM E52_Time-Span pattern # Founding date unknown for most municipal archives, so only set dissolution temporal_extent = { # Unknown founding - could try to find from ISIL assignment date "begin_of_the_begin": None, # Unknown earliest founding "end_of_the_begin": None, # Unknown latest founding # Precise dissolution date (municipal merger) "begin_of_the_end": f"{dissolution_date}T00:00:00Z", "end_of_the_end": f"{dissolution_date}T00:00:00Z", } # Check if we can infer founding from ISIL assignment isil_date = None if "nan_isil_enrichment" in entry: isil_date = entry["nan_isil_enrichment"].get("nan_toegekend_op") elif "identifiers" in entry: for ident in entry.get("identifiers", []): if ident.get("identifier_scheme") == "ISIL": isil_date = ident.get("assigned_date") break # ISIL assignment is NOT founding date, but gives a lower bound # The archive existed BEFORE ISIL was assigned if isil_date: temporal_extent["_notes"] = f"Archive existed before ISIL assignment ({isil_date})" entry["temporal_extent"] = temporal_extent # Add organizational change event for the merger if "organizational_change_events" not in entry: entry["organizational_change_events"] = [] # Check if we already have this merger event existing_events = [ e for e in entry["organizational_change_events"] if e.get("event_type") == "MERGER" and e.get("event_date") == dissolution_date ] if not existing_events: merger_event = { "event_type": "MERGER", "event_date": dissolution_date, "event_description": f"Municipal archive dissolved due to merger of municipality into {merger_info['merger_municipality']}. Archive holdings transferred to {merger_info['successor']}.", "successor_organization": merger_info["successor"], "successor_wikidata_id": merger_info["successor_wikidata"], "provenance": { "source": "Dutch municipal reorganization records", "extraction_timestamp": datetime.now(timezone.utc).isoformat(), } } entry["organizational_change_events"].append(merger_event) # Mark entity as defunct entry["is_defunct"] = True entry["defunct_reason"] = f"Merged into {merger_info['merger_municipality']} municipality ({dissolution_date})" return entry def main(): entries_dir = Path("/Users/kempersc/apps/glam/data/nde/enriched/entries") updated = 0 for filename, merger_info in MUNICIPAL_MERGERS.items(): filepath = entries_dir / filename if not filepath.exists(): print(f"⚠️ File not found: {filename}") continue with open(filepath, 'r', encoding='utf-8') as f: entry = yaml.safe_load(f) # Get custodian name for logging custodian_name = entry.get("custodian_name", {}).get("claim_value", "Unknown") # Check if already has temporal_extent with end date existing_extent = entry.get("temporal_extent", {}) if existing_extent.get("end_of_the_end"): print(f"⏭️ Already has temporal_extent: {filename} ({custodian_name})") continue # Add temporal extent and change event entry = add_temporal_extent(entry, merger_info, filename) # Write back with open(filepath, 'w', encoding='utf-8') as f: yaml.dump(entry, f, allow_unicode=True, sort_keys=False, default_flow_style=False) print(f"✅ Added temporal_extent to: {filename}") print(f" Name: {custodian_name}") print(f" Dissolved: {merger_info['dissolution_date']}") print(f" Successor: {merger_info['successor']}") updated += 1 print(f"\n📊 Summary: Updated {updated}/{len(MUNICIPAL_MERGERS)} defunct municipal archives") if __name__ == "__main__": main()