#!/usr/bin/env python3 """ Remove inline slots sections from LinkML class files. Per Rule 38 (AGENTS.md), all LinkML slots MUST be centralized in schemas/20251121/linkml/modules/slots/, never inline in class files. This script removes the `slots:` section from class files after the slots have been extracted to individual files. Usage: python scripts/remove_inline_slots.py [--dry-run] [--file PATH] Options: --dry-run Show what would be done without making changes --file PATH Process only a single file """ import argparse import os import re import sys from pathlib import Path from typing import List, Tuple # Schema paths SCHEMA_ROOT = Path("schemas/20251121/linkml") CLASSES_DIR = SCHEMA_ROOT / "modules" / "classes" def find_slots_section(lines: List[str]) -> Tuple[int, int]: """ Find the start and end line indices of the slots: section. Returns: Tuple of (start_index, end_index) or (-1, -1) if not found """ start_idx = -1 end_idx = -1 for i, line in enumerate(lines): # Find start of slots section (top-level, no indentation) if line.rstrip() == "slots:": start_idx = i continue # If we found start, look for end (next top-level key) if start_idx != -1 and end_idx == -1: # Check for next top-level key (no indentation, ends with colon) if line and not line[0].isspace() and line.rstrip().endswith(':'): end_idx = i break # Also check for end of file marker or document separator if line.startswith('---') or line.startswith('...'): end_idx = i break # If we found start but no end, slots section goes to end of file if start_idx != -1 and end_idx == -1: end_idx = len(lines) return start_idx, end_idx def remove_slots_section(file_path: Path, dry_run: bool = False) -> dict: """ Remove the slots: section from a class file. Returns: Dict with statistics: {removed: bool, lines_removed: int, error: str or None} """ result = {"removed": False, "lines_removed": 0, "error": None} try: with open(file_path, 'r') as f: content = f.read() lines = content.split('\n') except Exception as e: result["error"] = f"Failed to read {file_path}: {e}" return result start_idx, end_idx = find_slots_section(lines) if start_idx == -1: # No slots section found return result lines_to_remove = end_idx - start_idx result["lines_removed"] = lines_to_remove if dry_run: print(f" Would remove lines {start_idx + 1} to {end_idx} ({lines_to_remove} lines)") result["removed"] = True return result # Remove the slots section new_lines = lines[:start_idx] + lines[end_idx:] # Clean up any resulting double blank lines new_content = '\n'.join(new_lines) new_content = re.sub(r'\n{3,}', '\n\n', new_content) try: with open(file_path, 'w') as f: f.write(new_content) result["removed"] = True print(f" Removed {lines_to_remove} lines (slots section)") except Exception as e: result["error"] = f"Failed to write {file_path}: {e}" return result def find_class_files_with_inline_slots() -> List[Path]: """Find all class files that have inline slots.""" files_with_slots = [] for yaml_file in CLASSES_DIR.glob("*.yaml"): try: with open(yaml_file, 'r') as f: content = f.read() # Check for top-level slots: section if re.search(r'^slots:\s*$', content, re.MULTILINE): files_with_slots.append(yaml_file) except: continue return sorted(files_with_slots) def main(): parser = argparse.ArgumentParser(description="Remove inline slots from LinkML class files") parser.add_argument("--dry-run", action="store_true", help="Show what would be done without making changes") parser.add_argument("--file", type=str, help="Process only a single file") args = parser.parse_args() # Change to project root project_root = Path(__file__).parent.parent os.chdir(project_root) total_stats = {"files_processed": 0, "files_modified": 0, "lines_removed": 0, "errors": []} if args.file: files_to_process = [Path(args.file)] else: files_to_process = find_class_files_with_inline_slots() print(f"Processing {len(files_to_process)} class file(s) with inline slots...") if args.dry_run: print("(DRY RUN - no changes will be made)\n") for class_file in files_to_process: print(f"\nProcessing: {class_file.name}") total_stats["files_processed"] += 1 result = remove_slots_section(class_file, dry_run=args.dry_run) if result["error"]: total_stats["errors"].append(result["error"]) elif result["removed"]: total_stats["files_modified"] += 1 total_stats["lines_removed"] += result["lines_removed"] # Summary print("\n" + "=" * 60) print("SUMMARY") print("=" * 60) print(f"Files processed: {total_stats['files_processed']}") print(f"Files modified: {total_stats['files_modified']}") print(f"Total lines removed: {total_stats['lines_removed']}") print(f"Errors: {len(total_stats['errors'])}") if total_stats["errors"]: print("\nErrors encountered:") for error in total_stats["errors"]: print(f" - {error}") return 0 if not total_stats["errors"] else 1 if __name__ == "__main__": sys.exit(main())