#!/usr/bin/env python3 """ Fix YAML examples formatting issues in LinkML slot files. The common issue is examples like: examples: - value: "some content..." description: Some description When it should be: examples: - value: "some content..." description: Some description (description should be indented under the list item, not at the same level as "- value:") """ import re import sys from pathlib import Path def fix_yaml_file(filepath: Path) -> bool: """Fix YAML formatting issues in a file. Returns True if file was modified.""" content = filepath.read_text() original = content # Pattern 1: Fix description at wrong indentation after value in examples # Match: " - value:" followed eventually by " description:" (both at 6 spaces) # Should be: " description:" (8 spaces) # This regex finds the examples section and fixes indentation lines = content.split('\n') fixed_lines = [] in_examples = False in_example_item = False example_indent = 0 for i, line in enumerate(lines): # Detect entering examples section if re.match(r'^(\s*)examples:\s*$', line): in_examples = True match = re.match(r'^(\s*)', line) example_indent = len(match.group(1)) if match else 0 fixed_lines.append(line) continue # If we're in examples section if in_examples: # Check if we've left examples (new section at same or less indent) if line.strip() and not line.startswith(' ' * (example_indent + 2)): if not line.startswith(' ' * example_indent) or (line.strip() and not line.strip().startswith('-') and ':' in line.split('#')[0]): in_examples = False fixed_lines.append(line) continue # Detect new example item (starts with "- value:") item_match = re.match(rf'^(\s{{' + str(example_indent + 2) + r'}})-\s+(value:|description:)', line) if item_match: in_example_item = True fixed_lines.append(line) continue # Fix: description at same indent as "- value:" should be indented more desc_match = re.match(rf'^(\s{{' + str(example_indent + 2) + r'}})(description:.*)', line) if desc_match and in_example_item: # Add 2 more spaces to indent description under the list item fixed_line = ' ' * (example_indent + 4) + desc_match.group(2) fixed_lines.append(fixed_line) in_example_item = False continue fixed_lines.append(line) content = '\n'.join(fixed_lines) # Pattern 2: Fix multiline value strings that got mangled # Look for "- value:" followed by a list that should be indented under value content = re.sub( r'(\s+- value:)\n(\s+)(- [^\n]+)', lambda m: f'{m.group(1)}\n{" " * (len(m.group(2)) + 2)}{m.group(3)}', content ) if content != original: filepath.write_text(content) return True return False def main(): """Main entry point.""" slots_dir = Path('schemas/20251121/linkml/modules/slots') if not slots_dir.exists(): print(f"Directory not found: {slots_dir}") sys.exit(1) modified = 0 for yaml_file in sorted(slots_dir.glob('*.yaml')): if fix_yaml_file(yaml_file): print(f"Fixed: {yaml_file.name}") modified += 1 print(f"\nModified {modified} files") if __name__ == '__main__': main()