#!/usr/bin/env python3 """ Fix YAML examples formatting issues in LinkML slot files. The main issue is examples sections with bad formatting like: examples: - value: "multi\nline\nstring..." description: Some description When it should be: examples: - value: | multi line string description: Some description """ import re import sys from pathlib import Path def fix_yaml_content(content: str) -> str: """Fix YAML content with examples issues.""" lines = content.split('\n') result = [] i = 0 while i < len(lines): line = lines[i] # Check if this is an examples section start if re.match(r'^(\s*)examples:\s*$', line): result.append(line) i += 1 # Process examples items while i < len(lines): item_line = lines[i] # Check if we've left examples section (unindented line that's not empty or comment) if item_line.strip() and not item_line.startswith(' ') and not item_line.strip().startswith('#'): break # Check for "- value:" pattern with inline content value_match = re.match(r'^(\s+)- value:\s*"(.*)$', item_line) if value_match: indent = value_match.group(1) value_start = value_match.group(2) # Convert inline string to block scalar # First collect the full value (may span multiple logical lines due to escapes) full_value = value_start # Check if string continues (no closing quote) while not full_value.rstrip().endswith('"') or full_value.rstrip().endswith('\\"'): i += 1 if i >= len(lines): break full_value += lines[i] # Now we have the full value string # Remove trailing quote and unescape full_value = full_value.rstrip() if full_value.endswith('"'): full_value = full_value[:-1] # Unescape common escapes full_value = full_value.replace('\\n', '\n').replace('\\"', '"').replace('\\t', '\t') # Write as block scalar result.append(f'{indent}- value: |') for val_line in full_value.split('\n'): result.append(f'{indent} {val_line}') i += 1 # Check for description at wrong indent if i < len(lines): desc_match = re.match(rf'^{indent}(description:\s*.*)$', lines[i]) if desc_match: # Fix indentation result.append(f'{indent} {desc_match.group(1)}') i += 1 continue continue # Check for description at wrong indent (following a properly formatted value) desc_wrong_indent = re.match(r'^(\s+)(description:\s*.*)$', item_line) if desc_wrong_indent: indent = desc_wrong_indent.group(1) desc_content = desc_wrong_indent.group(2) # Check if previous line was a "- value:" line or content under it if result and ('- value:' in result[-1] or result[-1].startswith(indent + ' ')): # This description should be at indent + 2 result.append(f'{indent} {desc_content}') i += 1 continue result.append(item_line) i += 1 continue result.append(line) i += 1 return '\n'.join(result) def fix_file(filepath: Path) -> bool: """Fix a single file. Returns True if modified.""" try: content = filepath.read_text() except Exception as e: print(f"Error reading {filepath}: {e}") return False original = content fixed = fix_yaml_content(content) if fixed != original: filepath.write_text(fixed) return True return False def main(): """Main entry point.""" slots_dir = Path('schemas/20251121/linkml/modules/slots') if not slots_dir.exists(): print(f"Directory not found: {slots_dir}") sys.exit(1) modified = 0 errors = 0 for yaml_file in sorted(slots_dir.glob('*.yaml')): # First check if file has YAML errors try: import yaml yaml.safe_load(yaml_file.read_text()) continue # File is valid, skip except yaml.YAMLError: pass # File has errors, try to fix if fix_file(yaml_file): # Verify the fix worked try: yaml.safe_load(yaml_file.read_text()) print(f"Fixed: {yaml_file.name}") modified += 1 except yaml.YAMLError as e: print(f"Still broken after fix: {yaml_file.name}") errors += 1 print(f"\nModified {modified} files, {errors} still have errors") if __name__ == '__main__': main()