glam/scripts/remove_inline_slots.py

182 lines
5.7 KiB
Python

#!/usr/bin/env python3
"""
Remove inline slots sections from LinkML class files.
Per Rule 38 (AGENTS.md), all LinkML slots MUST be centralized in
schemas/20251121/linkml/modules/slots/, never inline in class files.
This script removes the `slots:` section from class files after
the slots have been extracted to individual files.
Usage:
python scripts/remove_inline_slots.py [--dry-run] [--file PATH]
Options:
--dry-run Show what would be done without making changes
--file PATH Process only a single file
"""
import argparse
import os
import re
import sys
from pathlib import Path
from typing import List, Tuple
# Schema paths
SCHEMA_ROOT = Path("schemas/20251121/linkml")
CLASSES_DIR = SCHEMA_ROOT / "modules" / "classes"
def find_slots_section(lines: List[str]) -> Tuple[int, int]:
"""
Find the start and end line indices of the slots: section.
Returns:
Tuple of (start_index, end_index) or (-1, -1) if not found
"""
start_idx = -1
end_idx = -1
for i, line in enumerate(lines):
# Find start of slots section (top-level, no indentation)
if line.rstrip() == "slots:":
start_idx = i
continue
# If we found start, look for end (next top-level key)
if start_idx != -1 and end_idx == -1:
# Check for next top-level key (no indentation, ends with colon)
if line and not line[0].isspace() and line.rstrip().endswith(':'):
end_idx = i
break
# Also check for end of file marker or document separator
if line.startswith('---') or line.startswith('...'):
end_idx = i
break
# If we found start but no end, slots section goes to end of file
if start_idx != -1 and end_idx == -1:
end_idx = len(lines)
return start_idx, end_idx
def remove_slots_section(file_path: Path, dry_run: bool = False) -> dict:
"""
Remove the slots: section from a class file.
Returns:
Dict with statistics: {removed: bool, lines_removed: int, error: str or None}
"""
result = {"removed": False, "lines_removed": 0, "error": None}
try:
with open(file_path, 'r') as f:
content = f.read()
lines = content.split('\n')
except Exception as e:
result["error"] = f"Failed to read {file_path}: {e}"
return result
start_idx, end_idx = find_slots_section(lines)
if start_idx == -1:
# No slots section found
return result
lines_to_remove = end_idx - start_idx
result["lines_removed"] = lines_to_remove
if dry_run:
print(f" Would remove lines {start_idx + 1} to {end_idx} ({lines_to_remove} lines)")
result["removed"] = True
return result
# Remove the slots section
new_lines = lines[:start_idx] + lines[end_idx:]
# Clean up any resulting double blank lines
new_content = '\n'.join(new_lines)
new_content = re.sub(r'\n{3,}', '\n\n', new_content)
try:
with open(file_path, 'w') as f:
f.write(new_content)
result["removed"] = True
print(f" Removed {lines_to_remove} lines (slots section)")
except Exception as e:
result["error"] = f"Failed to write {file_path}: {e}"
return result
def find_class_files_with_inline_slots() -> List[Path]:
"""Find all class files that have inline slots."""
files_with_slots = []
for yaml_file in CLASSES_DIR.glob("*.yaml"):
try:
with open(yaml_file, 'r') as f:
content = f.read()
# Check for top-level slots: section
if re.search(r'^slots:\s*$', content, re.MULTILINE):
files_with_slots.append(yaml_file)
except:
continue
return sorted(files_with_slots)
def main():
parser = argparse.ArgumentParser(description="Remove inline slots from LinkML class files")
parser.add_argument("--dry-run", action="store_true", help="Show what would be done without making changes")
parser.add_argument("--file", type=str, help="Process only a single file")
args = parser.parse_args()
# Change to project root
project_root = Path(__file__).parent.parent
os.chdir(project_root)
total_stats = {"files_processed": 0, "files_modified": 0, "lines_removed": 0, "errors": []}
if args.file:
files_to_process = [Path(args.file)]
else:
files_to_process = find_class_files_with_inline_slots()
print(f"Processing {len(files_to_process)} class file(s) with inline slots...")
if args.dry_run:
print("(DRY RUN - no changes will be made)\n")
for class_file in files_to_process:
print(f"\nProcessing: {class_file.name}")
total_stats["files_processed"] += 1
result = remove_slots_section(class_file, dry_run=args.dry_run)
if result["error"]:
total_stats["errors"].append(result["error"])
elif result["removed"]:
total_stats["files_modified"] += 1
total_stats["lines_removed"] += result["lines_removed"]
# Summary
print("\n" + "=" * 60)
print("SUMMARY")
print("=" * 60)
print(f"Files processed: {total_stats['files_processed']}")
print(f"Files modified: {total_stats['files_modified']}")
print(f"Total lines removed: {total_stats['lines_removed']}")
print(f"Errors: {len(total_stats['errors'])}")
if total_stats["errors"]:
print("\nErrors encountered:")
for error in total_stats["errors"]:
print(f" - {error}")
return 0 if not total_stats["errors"] else 1
if __name__ == "__main__":
sys.exit(main())