#!/usr/bin/env python3 """ Fix malformed ghcid_history entries in YAML files. The issue: Some files have multiple history entries concatenated on a single line: reason: Migrated from CH to EG namespace - Assiut - ghcid: XX-XX-XXX-L-AUL This should be split into separate list items. """ import os import re import sys from pathlib import Path def fix_yaml_content(content: str) -> str: """Fix malformed ghcid_history entries.""" lines = content.split('\n') fixed_lines = [] i = 0 while i < len(lines): line = lines[i] # Check if this is a reason line that has an embedded " - ghcid:" or similar # Pattern: reason: ... - ghcid: or reason: ... - valid_from: if 'reason:' in line and ' - ghcid:' in line: # Split the line at the embedded list marker parts = line.split(' - ghcid:', 1) # Add the first part (the reason line, truncated) fixed_lines.append(parts[0].rstrip()) # Add the second part as a new list item # Find the proper indentation (should be same level as the - that started this entry) indent_match = re.match(r'^(\s*)', line) base_indent = indent_match.group(1) if indent_match else ' ' # The new entry should be at the list item level fixed_lines.append(f"{base_indent}- ghcid:{parts[1]}") elif 'reason:' in line and ' - valid_from:' in line: parts = line.split(' - valid_from:', 1) fixed_lines.append(parts[0].rstrip()) indent_match = re.match(r'^(\s*)', line) base_indent = indent_match.group(1) if indent_match else ' ' fixed_lines.append(f"{base_indent}- valid_from:{parts[1]}") else: fixed_lines.append(line) i += 1 return '\n'.join(fixed_lines) def process_file(filepath: Path, dry_run: bool = False) -> bool: """Process a single file. Returns True if changes were made.""" with open(filepath, 'r', encoding='utf-8') as f: original = f.read() fixed = fix_yaml_content(original) if fixed != original: if dry_run: print(f"Would fix: {filepath.name}") else: with open(filepath, 'w', encoding='utf-8') as f: f.write(fixed) print(f"Fixed: {filepath.name}") return True return False def main(): dry_run = '--dry-run' in sys.argv custodian_dir = Path('data/custodian') if not custodian_dir.exists(): print("Error: data/custodian directory not found") sys.exit(1) fixed_count = 0 # Process files known to have issues (BE and EG prefixes) for prefix in ['BE-', 'EG-']: for yaml_file in sorted(custodian_dir.glob(f'{prefix}*.yaml')): if process_file(yaml_file, dry_run): fixed_count += 1 print(f"\n{'Would fix' if dry_run else 'Fixed'}: {fixed_count} files") if __name__ == '__main__': main()