175 lines
5.6 KiB
Python
175 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Fix GHCID mismatches where ghcid_current doesn't match the filename.
|
|
|
|
Per AGENTS.md Rule on PID Stability, the ghcid_current MUST match the filename.
|
|
This script:
|
|
1. Finds all files where ghcid_current != filename
|
|
2. Updates ghcid_current to match filename
|
|
3. Updates ghcid_history to record the correction
|
|
4. Logs all changes for audit trail
|
|
"""
|
|
|
|
import os
|
|
import yaml
|
|
import re
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
|
|
def get_ghcid_from_file(filepath: Path) -> str | None:
|
|
"""Extract ghcid_current from a YAML file."""
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Use regex to find ghcid_current value
|
|
match = re.search(r'ghcid_current:\s*[\'"]?([^\s\'"]+)[\'"]?', content)
|
|
if match:
|
|
return match.group(1)
|
|
return None
|
|
|
|
|
|
def fix_ghcid_mismatch(filepath: Path, correct_ghcid: str, old_ghcid: str) -> bool:
|
|
"""
|
|
Fix the ghcid_current in a file to match the filename.
|
|
|
|
Returns True if file was modified, False otherwise.
|
|
"""
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Replace ghcid_current value
|
|
# Handle both quoted and unquoted values
|
|
patterns = [
|
|
(rf"ghcid_current:\s*'{re.escape(old_ghcid)}'", f"ghcid_current: '{correct_ghcid}'"),
|
|
(rf'ghcid_current:\s*"{re.escape(old_ghcid)}"', f'ghcid_current: "{correct_ghcid}"'),
|
|
(rf'ghcid_current:\s*{re.escape(old_ghcid)}(?=\s|$)', f'ghcid_current: {correct_ghcid}'),
|
|
]
|
|
|
|
new_content = content
|
|
for pattern, replacement in patterns:
|
|
new_content = re.sub(pattern, replacement, new_content, count=1)
|
|
if new_content != content:
|
|
break
|
|
|
|
if new_content == content:
|
|
print(f" WARNING: Could not find ghcid_current to replace in {filepath.name}")
|
|
return False
|
|
|
|
# Add correction note to ghcid_history if it exists, or add a comment
|
|
timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
|
|
# Check if there's a ghcid_history section
|
|
if 'ghcid_history:' in new_content:
|
|
# Add entry to existing history
|
|
history_entry = f"""
|
|
- ghcid_value: {old_ghcid}
|
|
valid_from: null
|
|
valid_to: '{timestamp}'
|
|
reason: "Corrected: ghcid_current was incorrectly set to {old_ghcid}, should be {correct_ghcid} (filename mismatch fix)"
|
|
"""
|
|
# Find ghcid_history and add entry after it
|
|
new_content = re.sub(
|
|
r'(ghcid_history:\s*\n)',
|
|
r'\1' + history_entry,
|
|
new_content,
|
|
count=1
|
|
)
|
|
else:
|
|
# Add a comment about the correction
|
|
correction_comment = f"# GHCID Correction {timestamp}: Changed from {old_ghcid} to {correct_ghcid} (filename mismatch)\n"
|
|
# Add after ghcid_current line
|
|
new_content = re.sub(
|
|
rf'(ghcid_current:\s*{re.escape(correct_ghcid)}[^\n]*\n)',
|
|
r'\1' + correction_comment,
|
|
new_content,
|
|
count=1
|
|
)
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write(new_content)
|
|
|
|
return True
|
|
|
|
|
|
def main():
|
|
custodian_dir = Path('/Users/kempersc/apps/glam/data/custodian')
|
|
|
|
print("=" * 70)
|
|
print("GHCID Mismatch Fixer")
|
|
print("=" * 70)
|
|
print(f"Scanning: {custodian_dir}")
|
|
print()
|
|
|
|
mismatches = []
|
|
|
|
# Find all mismatches
|
|
for filepath in sorted(custodian_dir.glob('*.yaml')):
|
|
filename_ghcid = filepath.stem # filename without .yaml
|
|
file_ghcid = get_ghcid_from_file(filepath)
|
|
|
|
if file_ghcid and file_ghcid != filename_ghcid:
|
|
mismatches.append({
|
|
'filepath': filepath,
|
|
'filename_ghcid': filename_ghcid,
|
|
'file_ghcid': file_ghcid
|
|
})
|
|
|
|
print(f"Found {len(mismatches)} files with GHCID mismatches")
|
|
print()
|
|
|
|
if not mismatches:
|
|
print("No mismatches to fix!")
|
|
return
|
|
|
|
# Display mismatches
|
|
print("Mismatches to fix:")
|
|
print("-" * 70)
|
|
for m in mismatches:
|
|
print(f" {m['filepath'].name}")
|
|
print(f" Current ghcid_current: {m['file_ghcid']}")
|
|
print(f" Should be: {m['filename_ghcid']}")
|
|
print()
|
|
|
|
# Fix them
|
|
print("Fixing mismatches...")
|
|
print("-" * 70)
|
|
|
|
fixed_count = 0
|
|
failed_count = 0
|
|
|
|
for m in mismatches:
|
|
print(f"Fixing: {m['filepath'].name}")
|
|
if fix_ghcid_mismatch(m['filepath'], m['filename_ghcid'], m['file_ghcid']):
|
|
print(f" OK: {m['file_ghcid']} -> {m['filename_ghcid']}")
|
|
fixed_count += 1
|
|
else:
|
|
print(f" FAILED")
|
|
failed_count += 1
|
|
|
|
print()
|
|
print("=" * 70)
|
|
print(f"Summary: Fixed {fixed_count} files, Failed {failed_count} files")
|
|
print("=" * 70)
|
|
|
|
# Write audit log
|
|
log_path = custodian_dir / 'ghcid_mismatch_fix_log.txt'
|
|
with open(log_path, 'w', encoding='utf-8') as f:
|
|
f.write(f"GHCID Mismatch Fix Log\n")
|
|
f.write(f"Generated: {datetime.now(timezone.utc).isoformat()}\n")
|
|
f.write(f"=" * 70 + "\n\n")
|
|
f.write(f"Total mismatches found: {len(mismatches)}\n")
|
|
f.write(f"Fixed: {fixed_count}\n")
|
|
f.write(f"Failed: {failed_count}\n\n")
|
|
f.write("Details:\n")
|
|
f.write("-" * 70 + "\n")
|
|
for m in mismatches:
|
|
f.write(f"File: {m['filepath'].name}\n")
|
|
f.write(f" Old ghcid_current: {m['file_ghcid']}\n")
|
|
f.write(f" New ghcid_current: {m['filename_ghcid']}\n\n")
|
|
|
|
print(f"\nAudit log written to: {log_path}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|