#!/usr/bin/env python3 """ Fix the WIE is WIE in Overijssel entry (1004_Q110891808.yaml) Issues identified: 1. Google Maps resolved to "Overijssel" (the province) instead of the actual project 2. Website scraped was www.overijssel.nl instead of www.wieiswieinoverijssel.nl 3. Type should be D (DIGITAL_PLATFORM) not S (Society) 4. Entity dissolved on 2024-10-01 - transferred to Collectie Overijssel 5. Location should be based on Collectie Overijssel (Zwolle) since that's the managing institution Reference: https://www.wieiswieinoverijssel.nl/colofon """ import yaml from pathlib import Path from datetime import datetime, timezone # Paths ENTRY_PATH = Path("/Users/kempersc/apps/glam/data/nde/enriched/entries/1004_Q110891808.yaml") def load_yaml(path: Path) -> dict: """Load YAML file preserving structure.""" with open(path, 'r', encoding='utf-8') as f: return yaml.safe_load(f) def save_yaml(path: Path, data: dict): """Save YAML file with proper formatting.""" with open(path, 'w', encoding='utf-8') as f: yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=120) def fix_entry(): """Fix the WIE is WIE entry.""" print(f"Loading {ENTRY_PATH}...") entry = load_yaml(ENTRY_PATH) timestamp = datetime.now(timezone.utc).isoformat() # 1. Change type from S to D (DIGITAL_PLATFORM) print("1. Changing type from S to D (DIGITAL_PLATFORM)...") entry['original_entry']['type'] = ['D'] # Digital platform # 2. Add dissolution information (TimeSpan) print("2. Adding dissolution date and TimeSpan...") entry['temporal_extent'] = { 'begin_of_the_begin': '2004-01-01T00:00:00Z', # Project started in 2004 'end_of_the_begin': '2004-12-31T23:59:59Z', # Approximate founding 'begin_of_the_end': '2024-10-01T00:00:00Z', # Exact dissolution date 'end_of_the_end': '2024-10-01T00:00:00Z', # Same - precise date known 'active': False, 'dissolution_date': '2024-10-01', 'dissolution_reason': 'Technical management transferred to Collectie Overijssel. Content remains accessible but will not be updated.', 'source': 'https://www.wieiswieinoverijssel.nl/colofon' } # 3. Add successor organization print("3. Adding successor organization (Collectie Overijssel)...") entry['successor_organization'] = { 'wikidata_id': 'Q23009897', 'name': 'Collectie Overijssel', 'description_nl': 'archiefinstelling in Overijssel', 'isil': 'NL-ZlCO', 'viaf': '144502123', 'website': 'https://www.historischcentrumoverijssel.nl/', 'relationship': 'technical_management_transferred', 'transfer_date': '2024-10-01' } # 4. Fix the Google Maps enrichment - mark as incorrect and add correct reference print("4. Marking Google Maps enrichment as incorrect match...") if 'google_maps_enrichment' in entry: entry['google_maps_enrichment_original'] = entry['google_maps_enrichment'].copy() entry['google_maps_enrichment']['_match_error'] = { 'error_type': 'WRONG_ENTITY_MATCHED', 'expected': 'WIE is WIE in Overijssel (biographical database project)', 'matched': 'Overijssel (the province)', 'error_detected': timestamp, 'notes': 'Google Maps search for "WIE is WIE in Overijssel" incorrectly matched the province. This is a digital-only project with no physical location.' } # 5. Add correct location based on Collectie Overijssel (managing institution) print("5. Adding correct location (Collectie Overijssel in Zwolle)...") entry['location'] = { 'city': 'Zwolle', 'province': 'Overijssel', 'country': 'NL', 'latitude': 52.5125, # Collectie Overijssel coordinates 'longitude': 6.0944, 'address': 'Eikenstraat 20, 8021 WX Zwolle', 'location_note': 'Location of managing institution (Collectie Overijssel). WIE is WIE in Overijssel is a digital-only project.', 'source': 'Collectie Overijssel (managing institution since 2024-10-01)' } # 6. Fix the website reference print("6. Fixing website reference...") entry['correct_website'] = { 'url': 'https://www.wieiswieinoverijssel.nl/', 'verified': True, 'verification_date': timestamp, 'note': 'This is the actual project website. The previously scraped www.overijssel.nl was incorrect.' } # 7. Add correct description print("7. Adding correct description...") entry['correct_description'] = { 'nl': 'WIE is WIE in Overijssel is een online biografisch woordenboek van personen die van betekenis zijn geweest voor of in de provincie Overijssel. Het project is gestart in 2004 als samenwerking tussen Athenaeumbibliotheek Deventer, Collectie Overijssel en Rijnbrink. Per 1 oktober 2024 is het technisch beheer overgedragen aan Collectie Overijssel; de informatie blijft toegankelijk maar wordt niet meer aangevuld.', 'en': 'WIE is WIE in Overijssel (WHO is WHO in Overijssel) is an online biographical dictionary of notable persons from the Dutch province of Overijssel. The project started in 2004 as a collaboration between Athenaeumbibliotheek Deventer, Collectie Overijssel, and Rijnbrink. As of October 1, 2024, technical management was transferred to Collectie Overijssel; the information remains accessible but will no longer be updated.', 'source': 'https://www.wieiswieinoverijssel.nl/colofon' } # 8. Mark the web_claims as from wrong source print("8. Marking web_claims as from incorrect source...") if 'web_claims' in entry: entry['web_claims']['_source_error'] = { 'error_type': 'WRONG_WEBSITE_SCRAPED', 'scraped_url': 'http://www.overijssel.nl/', 'correct_url': 'https://www.wieiswieinoverijssel.nl/', 'error_detected': timestamp, 'claims_valid': False, 'notes': 'All claims in this section are from the provincial government website, not the actual biographical project website.' } # 9. Regenerate GHCID with correct type print("9. Adding note about GHCID regeneration needed...") if 'ghcid' in entry: entry['ghcid']['_regeneration_needed'] = { 'reason': 'Type changed from S to D, location changed to Zwolle', 'previous_ghcid': entry['ghcid'].get('ghcid_current'), 'flagged_for_regeneration': timestamp } # 10. Update provenance print("10. Updating provenance...") if 'provenance' not in entry: entry['provenance'] = {} if 'notes' not in entry['provenance']: entry['provenance']['notes'] = [] entry['provenance']['notes'].append(f'[{timestamp}] Fixed incorrect Google Maps/website enrichment. Type changed S->D. Dissolution date added (2024-10-01). See fix_wie_is_wie_entry.py for details.') entry['provenance']['last_manual_fix'] = timestamp entry['provenance']['fix_script'] = 'scripts/fix_wie_is_wie_entry.py' # Save the fixed entry print(f"\nSaving fixed entry to {ENTRY_PATH}...") save_yaml(ENTRY_PATH, entry) print("\n" + "="*60) print("SUMMARY OF CHANGES:") print("="*60) print("1. Type: S -> D (DIGITAL_PLATFORM)") print("2. Added temporal_extent with dissolution date 2024-10-01") print("3. Added successor_organization (Collectie Overijssel, Q23009897)") print("4. Marked Google Maps enrichment as incorrect match") print("5. Added correct location (Zwolle, based on Collectie Overijssel)") print("6. Added correct website reference (wieiswieinoverijssel.nl)") print("7. Added correct bilingual description") print("8. Marked web_claims as from wrong source") print("9. Flagged GHCID for regeneration") print("10. Updated provenance notes") print("="*60) print("\nNEXT STEPS:") print("1. Run website scraper on https://www.wieiswieinoverijssel.nl/") print("2. Regenerate GHCID with new type (D) and location (Zwolle)") print("3. Re-export to nde_institutions.json") print("4. Update frontend to display TimeSpan/dissolution dates") if __name__ == '__main__': fix_entry()