glam/scripts/migrate_slots.py
2026-02-04 00:24:46 +01:00

171 lines
5.7 KiB
Python

#!/usr/bin/env python3
"""
Migrate old-style slot files to concise naming convention.
Transforms:
- is_or_was_X -> X
- has_or_had_X -> has_X
- contains_or_contained_X -> contains_X
"""
import os
import re
import yaml
from pathlib import Path
from datetime import datetime
SLOTS_DIR = Path("/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/slots")
NEW_SLOTS_DIR = SLOTS_DIR / "20260202_matang"
ARCHIVE_DIR = SLOTS_DIR / "archive"
# Create directories if they don't exist
NEW_SLOTS_DIR.mkdir(exist_ok=True)
ARCHIVE_DIR.mkdir(exist_ok=True)
def to_camel_case(snake_str: str) -> str:
"""Convert snake_case to camelCase."""
components = snake_str.split('_')
return components[0] + ''.join(x.title() for x in components[1:])
def migrate_slot_name(old_name: str) -> str:
"""Convert old slot name to new concise name."""
if old_name.startswith("is_or_was_"):
return old_name.replace("is_or_was_", "")
elif old_name.startswith("has_or_had_"):
return old_name.replace("has_or_had_", "has_")
elif old_name.startswith("contains_or_contained_"):
return old_name.replace("contains_or_contained_", "contains_")
return old_name
def create_new_slot_file(old_file: Path, new_name: str) -> str:
"""Create new slot file content from old file."""
with open(old_file, 'r') as f:
old_content = yaml.safe_load(f)
old_name = old_file.stem
# Get old slot data
old_slots = old_content.get('slots', {})
if old_name not in old_slots:
# Try without suffix
for key in old_slots:
if key.startswith(old_name.replace('_slot', '')):
old_name = key
break
old_slot = old_slots.get(old_name, {})
# Extract key properties from old slot
description = old_slot.get('description', f'Migrated from {old_name}')
slot_uri = old_slot.get('slot_uri', f'hc:{to_camel_case(new_name)}')
range_val = old_slot.get('range', 'string')
multivalued = old_slot.get('multivalued', True)
# Get mappings
exact_mappings = old_slot.get('exact_mappings', [])
close_mappings = old_slot.get('close_mappings', [])
related_mappings = old_slot.get('related_mappings', [])
broad_mappings = old_slot.get('broad_mappings', [])
narrow_mappings = old_slot.get('narrow_mappings', [])
# Get annotations
annotations = old_slot.get('annotations', {'custodian_types': '["*"]'})
# Get examples and comments
examples = old_slot.get('examples', [])
comments = old_slot.get('comments', [])
# Build new slot content
new_slot = {
'slot_uri': f'hc:{to_camel_case(new_name)}',
'description': f'{description}\n\nMIGRATED {datetime.now().strftime("%Y-%m-%d")} from {old_name} for conciseness.',
'range': range_val,
'multivalued': multivalued,
'aliases': [old_name],
}
# Add mappings if present
if exact_mappings:
new_slot['exact_mappings'] = exact_mappings
if close_mappings:
new_slot['close_mappings'] = close_mappings
if related_mappings:
new_slot['related_mappings'] = related_mappings
if broad_mappings:
new_slot['broad_mappings'] = broad_mappings
if narrow_mappings:
new_slot['narrow_mappings'] = narrow_mappings
# Add annotations
new_slot['annotations'] = annotations
# Add examples if present
if examples:
new_slot['examples'] = examples
# Add comments if present
if comments:
new_slot['comments'] = comments
# Build full YAML structure
new_content = {
'id': f'https://nde.nl/ontology/hc/slot/{new_name}',
'name': new_name,
'title': new_name.replace('_', ' '),
'prefixes': {
'linkml': 'https://w3id.org/linkml/',
'hc': 'https://nde.nl/ontology/hc/',
'schema': 'http://schema.org/',
'dcterms': 'http://purl.org/dc/terms/',
'rico': 'https://www.ica.org/standards/RiC/ontology#',
'org': 'http://www.w3.org/ns/org#',
'oa': 'http://www.w3.org/ns/oa#',
},
'default_prefix': 'hc',
'imports': ['linkml:types'],
'slots': {
new_name: new_slot
}
}
return yaml.dump(new_content, default_flow_style=False, allow_unicode=True, sort_keys=False)
def process_slots():
"""Process all old-style slot files."""
patterns = [
('is_or_was_*.yaml', lambda x: x.replace('is_or_was_', '')),
('has_or_had_*.yaml', lambda x: x.replace('has_or_had_', 'has_')),
('contains_or_contained_*.yaml', lambda x: x.replace('contains_or_contained_', 'contains_')),
]
created = 0
skipped = 0
for pattern, transform_fn in patterns:
for old_file in SLOTS_DIR.glob(pattern):
if old_file.is_file():
old_name = old_file.stem
new_name = transform_fn(old_name)
new_file = NEW_SLOTS_DIR / f'{new_name}.yaml'
# Check if new file already exists
if new_file.exists():
print(f"SKIP (exists): {old_name} -> {new_name}")
skipped += 1
continue
# Create new slot file
try:
new_content = create_new_slot_file(old_file, new_name)
with open(new_file, 'w') as f:
f.write(new_content)
print(f"CREATED: {old_name} -> {new_name}")
created += 1
except Exception as e:
print(f"ERROR: {old_name} -> {e}")
print(f"\nSummary: Created {created}, Skipped {skipped}")
return created, skipped
if __name__ == '__main__':
process_slots()