133 lines
4.8 KiB
Python
133 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Add proper legal_status structure to entries where we know the legal entity name.
|
|
This separates:
|
|
- custodian_name: Public/trade name (used for GHCID)
|
|
- legal_status.legal_name: Legal entity name (Stichting X, Vereniging Y, etc.)
|
|
"""
|
|
import yaml
|
|
import os
|
|
from datetime import datetime
|
|
|
|
# Define entries with known legal entity names
|
|
LEGAL_STATUS_UPDATES = {
|
|
"0040_museum_contemporary_tibetan_art.yaml": {
|
|
"public_name": "Museum of Contemporary Tibetan Art",
|
|
"legal_name": {
|
|
"full_name": "Stichting Tibet House Holland",
|
|
"name_without_type": "Tibet House Holland",
|
|
"display_name": "Tibet House Holland",
|
|
"language": "nl"
|
|
},
|
|
"legal_form": "stichting",
|
|
"note": "Foundation operating museum of Tibetan contemporary art"
|
|
},
|
|
"0138_Q13447121.yaml": {
|
|
"public_name": "CODA Apeldoorn",
|
|
"legal_name": {
|
|
"full_name": "Stichting CODA",
|
|
"name_without_type": "CODA",
|
|
"display_name": "CODA",
|
|
"language": "nl"
|
|
},
|
|
"legal_form": "stichting",
|
|
"note": "Foundation operating CODA museum (museum, archive, library) in Apeldoorn"
|
|
},
|
|
"0432_Q56460988.yaml": {
|
|
"public_name": "Museum De Domijnen",
|
|
"legal_name": {
|
|
"full_name": "Stichting De Domijnen",
|
|
"name_without_type": "De Domijnen",
|
|
"display_name": "De Domijnen",
|
|
"language": "nl"
|
|
},
|
|
"legal_form": "stichting",
|
|
"note": "Foundation operating Museum De Domijnen in Sittard"
|
|
},
|
|
"1610_eye_film_instituut_nederland.yaml": {
|
|
"public_name": "Eye Filmmuseum",
|
|
"legal_name": {
|
|
"full_name": "Stichting EYE Film Instituut Nederland",
|
|
"name_without_type": "EYE Film Instituut Nederland",
|
|
"display_name": "EYE",
|
|
"language": "nl"
|
|
},
|
|
"legal_form": "stichting",
|
|
"note": "Foundation operating national film museum and archive"
|
|
},
|
|
"1517_rijksmuseum_amsterdam.yaml": {
|
|
"public_name": "Rijksmuseum",
|
|
"legal_name": {
|
|
"full_name": "Stichting het Rijksmuseum",
|
|
"name_without_type": "het Rijksmuseum",
|
|
"display_name": "Rijksmuseum",
|
|
"language": "nl"
|
|
},
|
|
"legal_form": "stichting",
|
|
"note": "Foundation operating national museum of the Netherlands"
|
|
}
|
|
}
|
|
|
|
ENTRIES_DIR = "/Users/kempersc/apps/glam/data/nde/enriched/entries"
|
|
|
|
def update_entry(filename: str, updates: dict):
|
|
"""Update an entry with proper legal_status structure."""
|
|
filepath = os.path.join(ENTRIES_DIR, filename)
|
|
|
|
if not os.path.exists(filepath):
|
|
print(f" ⚠ File not found: {filename}")
|
|
return False
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
# Ensure custodian_name uses public name
|
|
if 'custodian_name' in data:
|
|
data['custodian_name']['claim_value'] = updates['public_name']
|
|
data['custodian_name']['confidence'] = 0.95
|
|
data['custodian_name']['name_type'] = 'public_name'
|
|
data['custodian_name']['note'] = 'Public/trade name used for GHCID generation'
|
|
|
|
# Add legal_status structure
|
|
if 'legal_status' not in data:
|
|
data['legal_status'] = {}
|
|
|
|
data['legal_status']['legal_name'] = updates['legal_name']
|
|
data['legal_status']['legal_form'] = updates['legal_form']
|
|
data['legal_status']['legal_form_code'] = updates['legal_form'].upper()[:3] # STI for stichting
|
|
data['legal_status']['note'] = updates['note']
|
|
data['legal_status']['update_timestamp'] = datetime.now().isoformat()
|
|
|
|
# Update alternative_names to clarify types
|
|
if 'alternative_names' in data:
|
|
for alt in data['alternative_names']:
|
|
if alt.get('name') == updates['legal_name']['full_name']:
|
|
alt['name_type'] = 'legal_name'
|
|
alt['note'] = 'Legal entity name (not used for GHCID)'
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
|
|
return True
|
|
|
|
def main():
|
|
print("Adding legal_status structure to entries...")
|
|
print("=" * 60)
|
|
|
|
updated = 0
|
|
for filename, updates in LEGAL_STATUS_UPDATES.items():
|
|
print(f"Processing: {filename}")
|
|
print(f" Public name: {updates['public_name']}")
|
|
print(f" Legal name: {updates['legal_name']['full_name']}")
|
|
if update_entry(filename, updates):
|
|
print(f" ✓ Updated with legal_status")
|
|
updated += 1
|
|
|
|
print("=" * 60)
|
|
print(f"Updated {updated}/{len(LEGAL_STATUS_UPDATES)} entries")
|
|
print()
|
|
print("NOTE: Public name (custodian_name) is used for GHCID generation")
|
|
print(" Legal name is stored in legal_status.legal_name")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|