182 lines
5.7 KiB
Python
182 lines
5.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Remove inline slots sections from LinkML class files.
|
|
|
|
Per Rule 38 (AGENTS.md), all LinkML slots MUST be centralized in
|
|
schemas/20251121/linkml/modules/slots/, never inline in class files.
|
|
|
|
This script removes the `slots:` section from class files after
|
|
the slots have been extracted to individual files.
|
|
|
|
Usage:
|
|
python scripts/remove_inline_slots.py [--dry-run] [--file PATH]
|
|
|
|
Options:
|
|
--dry-run Show what would be done without making changes
|
|
--file PATH Process only a single file
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import List, Tuple
|
|
|
|
# Schema paths
|
|
SCHEMA_ROOT = Path("schemas/20251121/linkml")
|
|
CLASSES_DIR = SCHEMA_ROOT / "modules" / "classes"
|
|
|
|
|
|
def find_slots_section(lines: List[str]) -> Tuple[int, int]:
|
|
"""
|
|
Find the start and end line indices of the slots: section.
|
|
|
|
Returns:
|
|
Tuple of (start_index, end_index) or (-1, -1) if not found
|
|
"""
|
|
start_idx = -1
|
|
end_idx = -1
|
|
|
|
for i, line in enumerate(lines):
|
|
# Find start of slots section (top-level, no indentation)
|
|
if line.rstrip() == "slots:":
|
|
start_idx = i
|
|
continue
|
|
|
|
# If we found start, look for end (next top-level key)
|
|
if start_idx != -1 and end_idx == -1:
|
|
# Check for next top-level key (no indentation, ends with colon)
|
|
if line and not line[0].isspace() and line.rstrip().endswith(':'):
|
|
end_idx = i
|
|
break
|
|
# Also check for end of file marker or document separator
|
|
if line.startswith('---') or line.startswith('...'):
|
|
end_idx = i
|
|
break
|
|
|
|
# If we found start but no end, slots section goes to end of file
|
|
if start_idx != -1 and end_idx == -1:
|
|
end_idx = len(lines)
|
|
|
|
return start_idx, end_idx
|
|
|
|
|
|
def remove_slots_section(file_path: Path, dry_run: bool = False) -> dict:
|
|
"""
|
|
Remove the slots: section from a class file.
|
|
|
|
Returns:
|
|
Dict with statistics: {removed: bool, lines_removed: int, error: str or None}
|
|
"""
|
|
result = {"removed": False, "lines_removed": 0, "error": None}
|
|
|
|
try:
|
|
with open(file_path, 'r') as f:
|
|
content = f.read()
|
|
lines = content.split('\n')
|
|
except Exception as e:
|
|
result["error"] = f"Failed to read {file_path}: {e}"
|
|
return result
|
|
|
|
start_idx, end_idx = find_slots_section(lines)
|
|
|
|
if start_idx == -1:
|
|
# No slots section found
|
|
return result
|
|
|
|
lines_to_remove = end_idx - start_idx
|
|
result["lines_removed"] = lines_to_remove
|
|
|
|
if dry_run:
|
|
print(f" Would remove lines {start_idx + 1} to {end_idx} ({lines_to_remove} lines)")
|
|
result["removed"] = True
|
|
return result
|
|
|
|
# Remove the slots section
|
|
new_lines = lines[:start_idx] + lines[end_idx:]
|
|
|
|
# Clean up any resulting double blank lines
|
|
new_content = '\n'.join(new_lines)
|
|
new_content = re.sub(r'\n{3,}', '\n\n', new_content)
|
|
|
|
try:
|
|
with open(file_path, 'w') as f:
|
|
f.write(new_content)
|
|
result["removed"] = True
|
|
print(f" Removed {lines_to_remove} lines (slots section)")
|
|
except Exception as e:
|
|
result["error"] = f"Failed to write {file_path}: {e}"
|
|
|
|
return result
|
|
|
|
|
|
def find_class_files_with_inline_slots() -> List[Path]:
|
|
"""Find all class files that have inline slots."""
|
|
files_with_slots = []
|
|
|
|
for yaml_file in CLASSES_DIR.glob("*.yaml"):
|
|
try:
|
|
with open(yaml_file, 'r') as f:
|
|
content = f.read()
|
|
# Check for top-level slots: section
|
|
if re.search(r'^slots:\s*$', content, re.MULTILINE):
|
|
files_with_slots.append(yaml_file)
|
|
except:
|
|
continue
|
|
|
|
return sorted(files_with_slots)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Remove inline slots from LinkML class files")
|
|
parser.add_argument("--dry-run", action="store_true", help="Show what would be done without making changes")
|
|
parser.add_argument("--file", type=str, help="Process only a single file")
|
|
args = parser.parse_args()
|
|
|
|
# Change to project root
|
|
project_root = Path(__file__).parent.parent
|
|
os.chdir(project_root)
|
|
|
|
total_stats = {"files_processed": 0, "files_modified": 0, "lines_removed": 0, "errors": []}
|
|
|
|
if args.file:
|
|
files_to_process = [Path(args.file)]
|
|
else:
|
|
files_to_process = find_class_files_with_inline_slots()
|
|
|
|
print(f"Processing {len(files_to_process)} class file(s) with inline slots...")
|
|
if args.dry_run:
|
|
print("(DRY RUN - no changes will be made)\n")
|
|
|
|
for class_file in files_to_process:
|
|
print(f"\nProcessing: {class_file.name}")
|
|
total_stats["files_processed"] += 1
|
|
|
|
result = remove_slots_section(class_file, dry_run=args.dry_run)
|
|
|
|
if result["error"]:
|
|
total_stats["errors"].append(result["error"])
|
|
elif result["removed"]:
|
|
total_stats["files_modified"] += 1
|
|
total_stats["lines_removed"] += result["lines_removed"]
|
|
|
|
# Summary
|
|
print("\n" + "=" * 60)
|
|
print("SUMMARY")
|
|
print("=" * 60)
|
|
print(f"Files processed: {total_stats['files_processed']}")
|
|
print(f"Files modified: {total_stats['files_modified']}")
|
|
print(f"Total lines removed: {total_stats['lines_removed']}")
|
|
print(f"Errors: {len(total_stats['errors'])}")
|
|
|
|
if total_stats["errors"]:
|
|
print("\nErrors encountered:")
|
|
for error in total_stats["errors"]:
|
|
print(f" - {error}")
|
|
|
|
return 0 if not total_stats["errors"] else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|