glam/scripts/rename_plural_slot.py
kempersc ad74d8379e feat(scripts): improve types-vocab extraction to derive all vocabulary from schema
- Remove hardcoded type mappings, derive dynamically from LinkML
- Extract keywords from annotations, structured_aliases, and comments
- Add rename_plural_slot.py utility for schema slot renaming
2026-01-10 15:37:52 +01:00

201 lines
6.3 KiB
Python

#!/usr/bin/env python3
"""
Rename a plural slot to singular form following Rule 43.
This script:
1. Renames the slot file (identifiers.yaml → identifier.yaml)
2. Updates slot name inside the file
3. Updates all class files that reference the slot
4. Preserves natural language descriptions containing the plural word
Usage:
python scripts/rename_plural_slot.py identifiers identifier --dry-run
python scripts/rename_plural_slot.py identifiers identifier
"""
import argparse
import os
import re
import sys
from pathlib import Path
SLOTS_DIR = Path("schemas/20251121/linkml/modules/slots")
CLASSES_DIR = Path("schemas/20251121/linkml/modules/classes")
def rename_slot_file(old_name: str, new_name: str, dry_run: bool = False) -> bool:
"""Rename the slot YAML file and update its contents."""
old_path = SLOTS_DIR / f"{old_name}.yaml"
new_path = SLOTS_DIR / f"{new_name}.yaml"
if not old_path.exists():
print(f"ERROR: Slot file not found: {old_path}")
return False
if new_path.exists():
print(f"ERROR: Target slot file already exists: {new_path}")
return False
# Read the file
content = old_path.read_text()
# Update the slot definition inside the file
# Pattern 1: id field
content = re.sub(
rf"^id: (.*/slot/){old_name}$",
rf"id: \g<1>{new_name}",
content,
flags=re.MULTILINE
)
# Pattern 2: name field (e.g., name: identifiers_slot → name: identifier_slot)
content = re.sub(
rf"^name: {old_name}_slot$",
f"name: {new_name}_slot",
content,
flags=re.MULTILINE
)
content = re.sub(
rf"^name: {old_name}$",
f"name: {new_name}",
content,
flags=re.MULTILINE
)
# Pattern 3: slots section - the slot key itself
# Be careful: only match when it's a YAML key (followed by :)
content = re.sub(
rf"^(slots:\s*\n ){old_name}:",
rf"\g<1>{new_name}:",
content,
flags=re.MULTILINE
)
# Also handle when the slot key is not right after slots:
content = re.sub(
rf"^ {old_name}:$",
f" {new_name}:",
content,
flags=re.MULTILINE
)
if dry_run:
print(f"[DRY-RUN] Would rename: {old_path}{new_path}")
print(f"[DRY-RUN] Updated content preview:")
# Show first 30 lines
for i, line in enumerate(content.split('\n')[:30], 1):
print(f" {i:3d}| {line}")
else:
# Write the updated content
new_path.write_text(content)
# Remove the old file
old_path.unlink()
print(f"✓ Renamed slot file: {old_path.name}{new_path.name}")
return True
def update_class_files(old_name: str, new_name: str, dry_run: bool = False) -> int:
"""Update all class files that reference the slot."""
updated_count = 0
for class_file in sorted(CLASSES_DIR.glob("*.yaml")):
content = class_file.read_text()
original_content = content
# Pattern 1: Import statement
# - ../slots/identifiers → - ../slots/identifier
content = re.sub(
rf"^(- \.\./slots/){old_name}$",
rf"\g<1>{new_name}",
content,
flags=re.MULTILINE
)
# Pattern 2: Slot reference in slots list (under class definition)
# slots:
# - identifiers
content = re.sub(
rf"^(\s+- ){old_name}$",
rf"\g<1>{new_name}",
content,
flags=re.MULTILINE
)
# Pattern 3: Slot usage as YAML key (indented)
# Be careful: only match YAML keys (word at start of logical line, followed by :)
# This handles cases like:
# identifiers:
# range: CustodianIdentifier
# But NOT: "External identifiers assigned"
content = re.sub(
rf"^(\s+){old_name}:$",
rf"\g<1>{new_name}:",
content,
flags=re.MULTILINE
)
# Also handle key with value on same line
content = re.sub(
rf"^(\s+){old_name}: ",
rf"\g<1>{new_name}: ",
content,
flags=re.MULTILINE
)
if content != original_content:
updated_count += 1
if dry_run:
print(f"[DRY-RUN] Would update: {class_file.name}")
# Show diff-like output
old_lines = original_content.split('\n')
new_lines = content.split('\n')
for i, (old, new) in enumerate(zip(old_lines, new_lines), 1):
if old != new:
print(f" Line {i}:")
print(f" - {old}")
print(f" + {new}")
else:
class_file.write_text(content)
print(f"✓ Updated class: {class_file.name}")
return updated_count
def main():
parser = argparse.ArgumentParser(
description="Rename a plural slot to singular form (Rule 43)"
)
parser.add_argument("old_name", help="Current slot name (e.g., 'identifiers')")
parser.add_argument("new_name", help="New slot name (e.g., 'identifier')")
parser.add_argument("--dry-run", action="store_true",
help="Show what would be changed without making changes")
args = parser.parse_args()
# Change to repo root
repo_root = Path(__file__).parent.parent
os.chdir(repo_root)
print(f"{'='*60}")
print(f"Renaming slot: {args.old_name}{args.new_name}")
print(f"Mode: {'DRY-RUN' if args.dry_run else 'LIVE'}")
print(f"{'='*60}")
# Step 1: Rename slot file
print("\n[Step 1] Renaming slot file...")
if not rename_slot_file(args.old_name, args.new_name, args.dry_run):
sys.exit(1)
# Step 2: Update class files
print("\n[Step 2] Updating class files...")
count = update_class_files(args.old_name, args.new_name, args.dry_run)
print(f"\n{'Would update' if args.dry_run else 'Updated'} {count} class files")
if args.dry_run:
print("\n[DRY-RUN] No changes made. Run without --dry-run to apply changes.")
else:
print(f"\n✓ Successfully renamed slot: {args.old_name}{args.new_name}")
if __name__ == "__main__":
main()