#!/usr/bin/env python3 """ Add proper legal_status structure to entries where we know the legal entity name. This separates: - custodian_name: Public/trade name (used for GHCID) - legal_status.legal_name: Legal entity name (Stichting X, Vereniging Y, etc.) """ import yaml import os from datetime import datetime # Define entries with known legal entity names LEGAL_STATUS_UPDATES = { "0040_museum_contemporary_tibetan_art.yaml": { "public_name": "Museum of Contemporary Tibetan Art", "legal_name": { "full_name": "Stichting Tibet House Holland", "name_without_type": "Tibet House Holland", "display_name": "Tibet House Holland", "language": "nl" }, "legal_form": "stichting", "note": "Foundation operating museum of Tibetan contemporary art" }, "0138_Q13447121.yaml": { "public_name": "CODA Apeldoorn", "legal_name": { "full_name": "Stichting CODA", "name_without_type": "CODA", "display_name": "CODA", "language": "nl" }, "legal_form": "stichting", "note": "Foundation operating CODA museum (museum, archive, library) in Apeldoorn" }, "0432_Q56460988.yaml": { "public_name": "Museum De Domijnen", "legal_name": { "full_name": "Stichting De Domijnen", "name_without_type": "De Domijnen", "display_name": "De Domijnen", "language": "nl" }, "legal_form": "stichting", "note": "Foundation operating Museum De Domijnen in Sittard" }, "1610_eye_film_instituut_nederland.yaml": { "public_name": "Eye Filmmuseum", "legal_name": { "full_name": "Stichting EYE Film Instituut Nederland", "name_without_type": "EYE Film Instituut Nederland", "display_name": "EYE", "language": "nl" }, "legal_form": "stichting", "note": "Foundation operating national film museum and archive" }, "1517_rijksmuseum_amsterdam.yaml": { "public_name": "Rijksmuseum", "legal_name": { "full_name": "Stichting het Rijksmuseum", "name_without_type": "het Rijksmuseum", "display_name": "Rijksmuseum", "language": "nl" }, "legal_form": "stichting", "note": "Foundation operating national museum of the Netherlands" } } ENTRIES_DIR = "/Users/kempersc/apps/glam/data/nde/enriched/entries" def update_entry(filename: str, updates: dict): """Update an entry with proper legal_status structure.""" filepath = os.path.join(ENTRIES_DIR, filename) if not os.path.exists(filepath): print(f" ⚠ File not found: {filename}") return False with open(filepath, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) # Ensure custodian_name uses public name if 'custodian_name' in data: data['custodian_name']['claim_value'] = updates['public_name'] data['custodian_name']['confidence'] = 0.95 data['custodian_name']['name_type'] = 'public_name' data['custodian_name']['note'] = 'Public/trade name used for GHCID generation' # Add legal_status structure if 'legal_status' not in data: data['legal_status'] = {} data['legal_status']['legal_name'] = updates['legal_name'] data['legal_status']['legal_form'] = updates['legal_form'] data['legal_status']['legal_form_code'] = updates['legal_form'].upper()[:3] # STI for stichting data['legal_status']['note'] = updates['note'] data['legal_status']['update_timestamp'] = datetime.now().isoformat() # Update alternative_names to clarify types if 'alternative_names' in data: for alt in data['alternative_names']: if alt.get('name') == updates['legal_name']['full_name']: alt['name_type'] = 'legal_name' alt['note'] = 'Legal entity name (not used for GHCID)' with open(filepath, 'w', encoding='utf-8') as f: yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False) return True def main(): print("Adding legal_status structure to entries...") print("=" * 60) updated = 0 for filename, updates in LEGAL_STATUS_UPDATES.items(): print(f"Processing: {filename}") print(f" Public name: {updates['public_name']}") print(f" Legal name: {updates['legal_name']['full_name']}") if update_entry(filename, updates): print(f" ✓ Updated with legal_status") updated += 1 print("=" * 60) print(f"Updated {updated}/{len(LEGAL_STATUS_UPDATES)} entries") print() print("NOTE: Public name (custodian_name) is used for GHCID generation") print(" Legal name is stored in legal_status.legal_name") if __name__ == "__main__": main()