feat(scripts): improve types-vocab extraction to derive all vocabulary from schema
- Remove hardcoded type mappings, derive dynamically from LinkML - Extract keywords from annotations, structured_aliases, and comments - Add rename_plural_slot.py utility for schema slot renaming
This commit is contained in:
parent
ec18e1810d
commit
ad74d8379e
3 changed files with 14211 additions and 7079 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
201
scripts/rename_plural_slot.py
Normal file
201
scripts/rename_plural_slot.py
Normal file
|
|
@ -0,0 +1,201 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Rename a plural slot to singular form following Rule 43.
|
||||||
|
|
||||||
|
This script:
|
||||||
|
1. Renames the slot file (identifiers.yaml → identifier.yaml)
|
||||||
|
2. Updates slot name inside the file
|
||||||
|
3. Updates all class files that reference the slot
|
||||||
|
4. Preserves natural language descriptions containing the plural word
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/rename_plural_slot.py identifiers identifier --dry-run
|
||||||
|
python scripts/rename_plural_slot.py identifiers identifier
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
SLOTS_DIR = Path("schemas/20251121/linkml/modules/slots")
|
||||||
|
CLASSES_DIR = Path("schemas/20251121/linkml/modules/classes")
|
||||||
|
|
||||||
|
|
||||||
|
def rename_slot_file(old_name: str, new_name: str, dry_run: bool = False) -> bool:
|
||||||
|
"""Rename the slot YAML file and update its contents."""
|
||||||
|
old_path = SLOTS_DIR / f"{old_name}.yaml"
|
||||||
|
new_path = SLOTS_DIR / f"{new_name}.yaml"
|
||||||
|
|
||||||
|
if not old_path.exists():
|
||||||
|
print(f"ERROR: Slot file not found: {old_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if new_path.exists():
|
||||||
|
print(f"ERROR: Target slot file already exists: {new_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Read the file
|
||||||
|
content = old_path.read_text()
|
||||||
|
|
||||||
|
# Update the slot definition inside the file
|
||||||
|
# Pattern 1: id field
|
||||||
|
content = re.sub(
|
||||||
|
rf"^id: (.*/slot/){old_name}$",
|
||||||
|
rf"id: \g<1>{new_name}",
|
||||||
|
content,
|
||||||
|
flags=re.MULTILINE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pattern 2: name field (e.g., name: identifiers_slot → name: identifier_slot)
|
||||||
|
content = re.sub(
|
||||||
|
rf"^name: {old_name}_slot$",
|
||||||
|
f"name: {new_name}_slot",
|
||||||
|
content,
|
||||||
|
flags=re.MULTILINE
|
||||||
|
)
|
||||||
|
content = re.sub(
|
||||||
|
rf"^name: {old_name}$",
|
||||||
|
f"name: {new_name}",
|
||||||
|
content,
|
||||||
|
flags=re.MULTILINE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pattern 3: slots section - the slot key itself
|
||||||
|
# Be careful: only match when it's a YAML key (followed by :)
|
||||||
|
content = re.sub(
|
||||||
|
rf"^(slots:\s*\n ){old_name}:",
|
||||||
|
rf"\g<1>{new_name}:",
|
||||||
|
content,
|
||||||
|
flags=re.MULTILINE
|
||||||
|
)
|
||||||
|
# Also handle when the slot key is not right after slots:
|
||||||
|
content = re.sub(
|
||||||
|
rf"^ {old_name}:$",
|
||||||
|
f" {new_name}:",
|
||||||
|
content,
|
||||||
|
flags=re.MULTILINE
|
||||||
|
)
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
print(f"[DRY-RUN] Would rename: {old_path} → {new_path}")
|
||||||
|
print(f"[DRY-RUN] Updated content preview:")
|
||||||
|
# Show first 30 lines
|
||||||
|
for i, line in enumerate(content.split('\n')[:30], 1):
|
||||||
|
print(f" {i:3d}| {line}")
|
||||||
|
else:
|
||||||
|
# Write the updated content
|
||||||
|
new_path.write_text(content)
|
||||||
|
# Remove the old file
|
||||||
|
old_path.unlink()
|
||||||
|
print(f"✓ Renamed slot file: {old_path.name} → {new_path.name}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def update_class_files(old_name: str, new_name: str, dry_run: bool = False) -> int:
|
||||||
|
"""Update all class files that reference the slot."""
|
||||||
|
updated_count = 0
|
||||||
|
|
||||||
|
for class_file in sorted(CLASSES_DIR.glob("*.yaml")):
|
||||||
|
content = class_file.read_text()
|
||||||
|
original_content = content
|
||||||
|
|
||||||
|
# Pattern 1: Import statement
|
||||||
|
# - ../slots/identifiers → - ../slots/identifier
|
||||||
|
content = re.sub(
|
||||||
|
rf"^(- \.\./slots/){old_name}$",
|
||||||
|
rf"\g<1>{new_name}",
|
||||||
|
content,
|
||||||
|
flags=re.MULTILINE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pattern 2: Slot reference in slots list (under class definition)
|
||||||
|
# slots:
|
||||||
|
# - identifiers
|
||||||
|
content = re.sub(
|
||||||
|
rf"^(\s+- ){old_name}$",
|
||||||
|
rf"\g<1>{new_name}",
|
||||||
|
content,
|
||||||
|
flags=re.MULTILINE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pattern 3: Slot usage as YAML key (indented)
|
||||||
|
# Be careful: only match YAML keys (word at start of logical line, followed by :)
|
||||||
|
# This handles cases like:
|
||||||
|
# identifiers:
|
||||||
|
# range: CustodianIdentifier
|
||||||
|
# But NOT: "External identifiers assigned"
|
||||||
|
content = re.sub(
|
||||||
|
rf"^(\s+){old_name}:$",
|
||||||
|
rf"\g<1>{new_name}:",
|
||||||
|
content,
|
||||||
|
flags=re.MULTILINE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Also handle key with value on same line
|
||||||
|
content = re.sub(
|
||||||
|
rf"^(\s+){old_name}: ",
|
||||||
|
rf"\g<1>{new_name}: ",
|
||||||
|
content,
|
||||||
|
flags=re.MULTILINE
|
||||||
|
)
|
||||||
|
|
||||||
|
if content != original_content:
|
||||||
|
updated_count += 1
|
||||||
|
if dry_run:
|
||||||
|
print(f"[DRY-RUN] Would update: {class_file.name}")
|
||||||
|
# Show diff-like output
|
||||||
|
old_lines = original_content.split('\n')
|
||||||
|
new_lines = content.split('\n')
|
||||||
|
for i, (old, new) in enumerate(zip(old_lines, new_lines), 1):
|
||||||
|
if old != new:
|
||||||
|
print(f" Line {i}:")
|
||||||
|
print(f" - {old}")
|
||||||
|
print(f" + {new}")
|
||||||
|
else:
|
||||||
|
class_file.write_text(content)
|
||||||
|
print(f"✓ Updated class: {class_file.name}")
|
||||||
|
|
||||||
|
return updated_count
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Rename a plural slot to singular form (Rule 43)"
|
||||||
|
)
|
||||||
|
parser.add_argument("old_name", help="Current slot name (e.g., 'identifiers')")
|
||||||
|
parser.add_argument("new_name", help="New slot name (e.g., 'identifier')")
|
||||||
|
parser.add_argument("--dry-run", action="store_true",
|
||||||
|
help="Show what would be changed without making changes")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Change to repo root
|
||||||
|
repo_root = Path(__file__).parent.parent
|
||||||
|
os.chdir(repo_root)
|
||||||
|
|
||||||
|
print(f"{'='*60}")
|
||||||
|
print(f"Renaming slot: {args.old_name} → {args.new_name}")
|
||||||
|
print(f"Mode: {'DRY-RUN' if args.dry_run else 'LIVE'}")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
|
||||||
|
# Step 1: Rename slot file
|
||||||
|
print("\n[Step 1] Renaming slot file...")
|
||||||
|
if not rename_slot_file(args.old_name, args.new_name, args.dry_run):
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Step 2: Update class files
|
||||||
|
print("\n[Step 2] Updating class files...")
|
||||||
|
count = update_class_files(args.old_name, args.new_name, args.dry_run)
|
||||||
|
print(f"\n{'Would update' if args.dry_run else 'Updated'} {count} class files")
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
print("\n[DRY-RUN] No changes made. Run without --dry-run to apply changes.")
|
||||||
|
else:
|
||||||
|
print(f"\n✓ Successfully renamed slot: {args.old_name} → {args.new_name}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in a new issue