195 lines
5.9 KiB
Python
Executable file
195 lines
5.9 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Migrate web directories from index-based naming to GHCID-based naming.
|
|
|
|
This script reads the _entry_to_ghcid.txt mapping file and renames directories
|
|
from numeric indices (e.g., 0002, 0681) to their corresponding GHCID names
|
|
(e.g., NL-DR-ASS-A-DA, NL-NH-HEE-S-HKH).
|
|
|
|
Usage:
|
|
python scripts/migrate_web_dirs_to_ghcid.py [--dry-run] [--verbose]
|
|
|
|
Options:
|
|
--dry-run Show what would be done without making changes
|
|
--verbose Show detailed progress information
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
|
|
|
|
def parse_mapping_file(mapping_path: Path) -> dict[str, str]:
|
|
"""Parse the entry to GHCID mapping file.
|
|
|
|
Returns dict mapping index (as string with leading zeros) to GHCID.
|
|
For duplicate indices, uses the first mapping encountered.
|
|
"""
|
|
mapping = {}
|
|
seen_indices = set()
|
|
|
|
with open(mapping_path, 'r') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
parts = line.split(maxsplit=1)
|
|
if len(parts) != 2:
|
|
print(f"Warning: Skipping malformed line: {line}")
|
|
continue
|
|
|
|
index_str, ghcid = parts
|
|
|
|
# Handle both "0" and "0002" style indices
|
|
# Normalize to 4-digit zero-padded format
|
|
try:
|
|
index_int = int(index_str)
|
|
index_normalized = str(index_int) # Without padding for directory matching
|
|
index_padded = f"{index_int:04d}" # With padding for some directories
|
|
except ValueError:
|
|
print(f"Warning: Invalid index '{index_str}' in line: {line}")
|
|
continue
|
|
|
|
# Skip duplicates - use first mapping
|
|
if index_normalized in seen_indices:
|
|
continue
|
|
|
|
seen_indices.add(index_normalized)
|
|
mapping[index_normalized] = ghcid
|
|
mapping[index_padded] = ghcid # Also store padded version
|
|
|
|
return mapping
|
|
|
|
|
|
def find_index_directories(web_dir: Path) -> list[Path]:
|
|
"""Find all directories with numeric names."""
|
|
index_dirs = []
|
|
|
|
for item in web_dir.iterdir():
|
|
if item.is_dir() and item.name.isdigit():
|
|
index_dirs.append(item)
|
|
elif item.is_dir() and item.name.lstrip('0').isdigit():
|
|
# Handle zero-padded names like "0002"
|
|
index_dirs.append(item)
|
|
|
|
return sorted(index_dirs, key=lambda p: int(p.name.lstrip('0') or '0'))
|
|
|
|
|
|
def migrate_directories(
|
|
web_dir: Path,
|
|
mapping: dict[str, str],
|
|
dry_run: bool = False,
|
|
verbose: bool = False
|
|
) -> tuple[int, int, int]:
|
|
"""Migrate directories from index to GHCID naming.
|
|
|
|
Returns (success_count, skip_count, error_count).
|
|
"""
|
|
index_dirs = find_index_directories(web_dir)
|
|
|
|
success_count = 0
|
|
skip_count = 0
|
|
error_count = 0
|
|
|
|
for dir_path in index_dirs:
|
|
dir_name = dir_path.name
|
|
# Try both padded and unpadded versions
|
|
index_key = dir_name.lstrip('0') or '0'
|
|
|
|
if index_key not in mapping and dir_name not in mapping:
|
|
if verbose:
|
|
print(f"Skip: No mapping for index {dir_name}")
|
|
skip_count += 1
|
|
continue
|
|
|
|
ghcid = mapping.get(index_key) or mapping.get(dir_name)
|
|
new_path = web_dir / ghcid
|
|
|
|
# Check if target already exists
|
|
if new_path.exists():
|
|
if verbose:
|
|
print(f"Skip: Target already exists: {ghcid}")
|
|
skip_count += 1
|
|
continue
|
|
|
|
# Perform the rename
|
|
if dry_run:
|
|
print(f"Would rename: {dir_name} -> {ghcid}")
|
|
success_count += 1
|
|
else:
|
|
try:
|
|
dir_path.rename(new_path)
|
|
if verbose:
|
|
print(f"Renamed: {dir_name} -> {ghcid}")
|
|
success_count += 1
|
|
except OSError as e:
|
|
print(f"Error renaming {dir_name}: {e}")
|
|
error_count += 1
|
|
|
|
return success_count, skip_count, error_count
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Migrate web directories from index to GHCID naming"
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run",
|
|
action="store_true",
|
|
help="Show what would be done without making changes"
|
|
)
|
|
parser.add_argument(
|
|
"--verbose",
|
|
action="store_true",
|
|
help="Show detailed progress information"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
# Paths
|
|
project_root = Path(__file__).parent.parent
|
|
web_dir = project_root / "data" / "custodian" / "web"
|
|
mapping_file = web_dir / "_entry_to_ghcid.txt"
|
|
|
|
# Validate paths
|
|
if not web_dir.exists():
|
|
print(f"Error: Web directory not found: {web_dir}")
|
|
sys.exit(1)
|
|
|
|
if not mapping_file.exists():
|
|
print(f"Error: Mapping file not found: {mapping_file}")
|
|
sys.exit(1)
|
|
|
|
# Parse mapping
|
|
print(f"Reading mapping from: {mapping_file}")
|
|
mapping = parse_mapping_file(mapping_file)
|
|
print(f"Found {len(mapping) // 2} unique index-to-GHCID mappings")
|
|
|
|
# Find directories to migrate
|
|
index_dirs = find_index_directories(web_dir)
|
|
print(f"Found {len(index_dirs)} index-based directories")
|
|
|
|
if args.dry_run:
|
|
print("\n=== DRY RUN MODE ===\n")
|
|
|
|
# Perform migration
|
|
success, skip, error = migrate_directories(
|
|
web_dir, mapping, dry_run=args.dry_run, verbose=args.verbose
|
|
)
|
|
|
|
# Summary
|
|
print(f"\n=== Summary ===")
|
|
print(f"Successfully {'would rename' if args.dry_run else 'renamed'}: {success}")
|
|
print(f"Skipped (no mapping or target exists): {skip}")
|
|
print(f"Errors: {error}")
|
|
|
|
if args.dry_run and success > 0:
|
|
print(f"\nRun without --dry-run to perform actual migration")
|
|
|
|
return 0 if error == 0 else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|