glam/scripts/migrate_web_dirs_to_ghcid.py
2025-12-27 02:15:17 +01:00

195 lines
5.9 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Migrate web directories from index-based naming to GHCID-based naming.
This script reads the _entry_to_ghcid.txt mapping file and renames directories
from numeric indices (e.g., 0002, 0681) to their corresponding GHCID names
(e.g., NL-DR-ASS-A-DA, NL-NH-HEE-S-HKH).
Usage:
python scripts/migrate_web_dirs_to_ghcid.py [--dry-run] [--verbose]
Options:
--dry-run Show what would be done without making changes
--verbose Show detailed progress information
"""
import os
import sys
import argparse
from pathlib import Path
from collections import defaultdict
def parse_mapping_file(mapping_path: Path) -> dict[str, str]:
"""Parse the entry to GHCID mapping file.
Returns dict mapping index (as string with leading zeros) to GHCID.
For duplicate indices, uses the first mapping encountered.
"""
mapping = {}
seen_indices = set()
with open(mapping_path, 'r') as f:
for line in f:
line = line.strip()
if not line:
continue
parts = line.split(maxsplit=1)
if len(parts) != 2:
print(f"Warning: Skipping malformed line: {line}")
continue
index_str, ghcid = parts
# Handle both "0" and "0002" style indices
# Normalize to 4-digit zero-padded format
try:
index_int = int(index_str)
index_normalized = str(index_int) # Without padding for directory matching
index_padded = f"{index_int:04d}" # With padding for some directories
except ValueError:
print(f"Warning: Invalid index '{index_str}' in line: {line}")
continue
# Skip duplicates - use first mapping
if index_normalized in seen_indices:
continue
seen_indices.add(index_normalized)
mapping[index_normalized] = ghcid
mapping[index_padded] = ghcid # Also store padded version
return mapping
def find_index_directories(web_dir: Path) -> list[Path]:
"""Find all directories with numeric names."""
index_dirs = []
for item in web_dir.iterdir():
if item.is_dir() and item.name.isdigit():
index_dirs.append(item)
elif item.is_dir() and item.name.lstrip('0').isdigit():
# Handle zero-padded names like "0002"
index_dirs.append(item)
return sorted(index_dirs, key=lambda p: int(p.name.lstrip('0') or '0'))
def migrate_directories(
web_dir: Path,
mapping: dict[str, str],
dry_run: bool = False,
verbose: bool = False
) -> tuple[int, int, int]:
"""Migrate directories from index to GHCID naming.
Returns (success_count, skip_count, error_count).
"""
index_dirs = find_index_directories(web_dir)
success_count = 0
skip_count = 0
error_count = 0
for dir_path in index_dirs:
dir_name = dir_path.name
# Try both padded and unpadded versions
index_key = dir_name.lstrip('0') or '0'
if index_key not in mapping and dir_name not in mapping:
if verbose:
print(f"Skip: No mapping for index {dir_name}")
skip_count += 1
continue
ghcid = mapping.get(index_key) or mapping.get(dir_name)
new_path = web_dir / ghcid
# Check if target already exists
if new_path.exists():
if verbose:
print(f"Skip: Target already exists: {ghcid}")
skip_count += 1
continue
# Perform the rename
if dry_run:
print(f"Would rename: {dir_name} -> {ghcid}")
success_count += 1
else:
try:
dir_path.rename(new_path)
if verbose:
print(f"Renamed: {dir_name} -> {ghcid}")
success_count += 1
except OSError as e:
print(f"Error renaming {dir_name}: {e}")
error_count += 1
return success_count, skip_count, error_count
def main():
parser = argparse.ArgumentParser(
description="Migrate web directories from index to GHCID naming"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be done without making changes"
)
parser.add_argument(
"--verbose",
action="store_true",
help="Show detailed progress information"
)
args = parser.parse_args()
# Paths
project_root = Path(__file__).parent.parent
web_dir = project_root / "data" / "custodian" / "web"
mapping_file = web_dir / "_entry_to_ghcid.txt"
# Validate paths
if not web_dir.exists():
print(f"Error: Web directory not found: {web_dir}")
sys.exit(1)
if not mapping_file.exists():
print(f"Error: Mapping file not found: {mapping_file}")
sys.exit(1)
# Parse mapping
print(f"Reading mapping from: {mapping_file}")
mapping = parse_mapping_file(mapping_file)
print(f"Found {len(mapping) // 2} unique index-to-GHCID mappings")
# Find directories to migrate
index_dirs = find_index_directories(web_dir)
print(f"Found {len(index_dirs)} index-based directories")
if args.dry_run:
print("\n=== DRY RUN MODE ===\n")
# Perform migration
success, skip, error = migrate_directories(
web_dir, mapping, dry_run=args.dry_run, verbose=args.verbose
)
# Summary
print(f"\n=== Summary ===")
print(f"Successfully {'would rename' if args.dry_run else 'renamed'}: {success}")
print(f"Skipped (no mapping or target exists): {skip}")
print(f"Errors: {error}")
if args.dry_run and success > 0:
print(f"\nRun without --dry-run to perform actual migration")
return 0 if error == 0 else 1
if __name__ == "__main__":
sys.exit(main())