glam/scripts/add_legal_status.py
2025-12-01 23:55:55 +01:00

133 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""
Add proper legal_status structure to entries where we know the legal entity name.
This separates:
- custodian_name: Public/trade name (used for GHCID)
- legal_status.legal_name: Legal entity name (Stichting X, Vereniging Y, etc.)
"""
import yaml
import os
from datetime import datetime
# Define entries with known legal entity names
LEGAL_STATUS_UPDATES = {
"0040_museum_contemporary_tibetan_art.yaml": {
"public_name": "Museum of Contemporary Tibetan Art",
"legal_name": {
"full_name": "Stichting Tibet House Holland",
"name_without_type": "Tibet House Holland",
"display_name": "Tibet House Holland",
"language": "nl"
},
"legal_form": "stichting",
"note": "Foundation operating museum of Tibetan contemporary art"
},
"0138_Q13447121.yaml": {
"public_name": "CODA Apeldoorn",
"legal_name": {
"full_name": "Stichting CODA",
"name_without_type": "CODA",
"display_name": "CODA",
"language": "nl"
},
"legal_form": "stichting",
"note": "Foundation operating CODA museum (museum, archive, library) in Apeldoorn"
},
"0432_Q56460988.yaml": {
"public_name": "Museum De Domijnen",
"legal_name": {
"full_name": "Stichting De Domijnen",
"name_without_type": "De Domijnen",
"display_name": "De Domijnen",
"language": "nl"
},
"legal_form": "stichting",
"note": "Foundation operating Museum De Domijnen in Sittard"
},
"1610_eye_film_instituut_nederland.yaml": {
"public_name": "Eye Filmmuseum",
"legal_name": {
"full_name": "Stichting EYE Film Instituut Nederland",
"name_without_type": "EYE Film Instituut Nederland",
"display_name": "EYE",
"language": "nl"
},
"legal_form": "stichting",
"note": "Foundation operating national film museum and archive"
},
"1517_rijksmuseum_amsterdam.yaml": {
"public_name": "Rijksmuseum",
"legal_name": {
"full_name": "Stichting het Rijksmuseum",
"name_without_type": "het Rijksmuseum",
"display_name": "Rijksmuseum",
"language": "nl"
},
"legal_form": "stichting",
"note": "Foundation operating national museum of the Netherlands"
}
}
ENTRIES_DIR = "/Users/kempersc/apps/glam/data/nde/enriched/entries"
def update_entry(filename: str, updates: dict):
"""Update an entry with proper legal_status structure."""
filepath = os.path.join(ENTRIES_DIR, filename)
if not os.path.exists(filepath):
print(f" ⚠ File not found: {filename}")
return False
with open(filepath, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
# Ensure custodian_name uses public name
if 'custodian_name' in data:
data['custodian_name']['claim_value'] = updates['public_name']
data['custodian_name']['confidence'] = 0.95
data['custodian_name']['name_type'] = 'public_name'
data['custodian_name']['note'] = 'Public/trade name used for GHCID generation'
# Add legal_status structure
if 'legal_status' not in data:
data['legal_status'] = {}
data['legal_status']['legal_name'] = updates['legal_name']
data['legal_status']['legal_form'] = updates['legal_form']
data['legal_status']['legal_form_code'] = updates['legal_form'].upper()[:3] # STI for stichting
data['legal_status']['note'] = updates['note']
data['legal_status']['update_timestamp'] = datetime.now().isoformat()
# Update alternative_names to clarify types
if 'alternative_names' in data:
for alt in data['alternative_names']:
if alt.get('name') == updates['legal_name']['full_name']:
alt['name_type'] = 'legal_name'
alt['note'] = 'Legal entity name (not used for GHCID)'
with open(filepath, 'w', encoding='utf-8') as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
return True
def main():
print("Adding legal_status structure to entries...")
print("=" * 60)
updated = 0
for filename, updates in LEGAL_STATUS_UPDATES.items():
print(f"Processing: {filename}")
print(f" Public name: {updates['public_name']}")
print(f" Legal name: {updates['legal_name']['full_name']}")
if update_entry(filename, updates):
print(f" ✓ Updated with legal_status")
updated += 1
print("=" * 60)
print(f"Updated {updated}/{len(LEGAL_STATUS_UPDATES)} entries")
print()
print("NOTE: Public name (custodian_name) is used for GHCID generation")
print(" Legal name is stored in legal_status.legal_name")
if __name__ == "__main__":
main()