#!/usr/bin/env python3 """ Migrate class files from importing class_metadata_slots bundle to importing specific slots. Per Rule 38: All LinkML slots MUST be centralized in modules/slots/ and classes should import only the specific slots they need, not a bundle. This script: 1. Finds all class files importing ../slots/class_metadata_slots 2. Analyzes which slots from that bundle are actually used 3. Replaces the bundle import with specific slot/class imports 4. Validates the migrated files Usage: python scripts/migrate_class_metadata_imports.py [--dry-run] [--validate] """ import os import re import sys import yaml import argparse import subprocess from pathlib import Path from collections import defaultdict from typing import Set, List, Dict, Optional # Root directory SCHEMA_DIR = Path("schemas/20251121/linkml") CLASSES_DIR = SCHEMA_DIR / "modules" / "classes" SLOTS_DIR = SCHEMA_DIR / "modules" / "slots" ENUMS_DIR = SCHEMA_DIR / "modules" / "enums" # Mapping of slot names to their file paths (relative to classes dir) SLOT_FILES = { # Custodian type slots "custodian_types": "../slots/custodian_types", "custodian_types_rationale": "../slots/custodian_types_rationale", "custodian_types_primary": "../slots/custodian_types_primary", # Wikidata slots "wikidata_entity_id": "../slots/wikidata_entity_id", "wikidata_entity_label": "../slots/wikidata_entity_label", "wikidata_mapping_type": "../slots/wikidata_mapping_type", "wikidata_mapping_rationale": "../slots/wikidata_mapping_rationale", "wikidata_alignment": "../slots/wikidata_alignment", # SKOS slots "skos_broader": "../slots/skos_broader", "skos_broader_label": "../slots/skos_broader_label", "skos_narrower": "../slots/skos_narrower", "skos_related": "../slots/skos_related", # Dual-class slots "dual_class_role": "../slots/dual_class_role", "linked_class_name": "../slots/linked_class_name", "link_rationale": "../slots/link_rationale", "dual_class_link": "../slots/dual_class_link", # Specificity slots "specificity_score": "../slots/specificity_score", "specificity_rationale": "../slots/specificity_rationale", "specificity_timestamp": "../slots/specificity_timestamp", "specificity_agent": "../slots/specificity_agent", "specificity_annotation": "../slots/specificity_annotation", "template_specificity": "../slots/template_specificity", # Per-template score slots "archive_search_score": "../slots/archive_search_score", "museum_search_score": "../slots/museum_search_score", "library_search_score": "../slots/library_search_score", "collection_discovery_score": "../slots/collection_discovery_score", "person_research_score": "../slots/person_research_score", "location_browse_score": "../slots/location_browse_score", "identifier_lookup_score": "../slots/identifier_lookup_score", "organizational_change_score": "../slots/organizational_change_score", "digital_platform_score": "../slots/digital_platform_score", "general_heritage_score": "../slots/general_heritage_score", # RiC-O slots "rico_organizational_principle": "../slots/rico_organizational_principle", "rico_organizational_principle_uri": "../slots/rico_organizational_principle_uri", "rico_has_or_had_holder": "../slots/rico_has_or_had_holder", "rico_has_or_had_holder_note": "../slots/rico_has_or_had_holder_note", "rico_note": "../slots/rico_note", # Scope slots "custodian_only": "../slots/custodian_only", "organizational_level": "../slots/organizational_level", "geographic_restriction": "../slots/geographic_restriction", # Multilingual labels "label_de": "../slots/label_de", "label_es": "../slots/label_es", "label_fr": "../slots/label_fr", "label_nl": "../slots/label_nl", "label_it": "../slots/label_it", "label_pt": "../slots/label_pt", # Notes "privacy_note": "../slots/privacy_note", "preservation_note": "../slots/preservation_note", "legal_note": "../slots/legal_note", } # Slots that need their class files imported too SLOT_TO_CLASS = { "wikidata_alignment": "./WikidataAlignment", "dual_class_link": "./DualClassLink", "specificity_annotation": "./SpecificityAnnotation", "template_specificity": "./TemplateSpecificityScores", } # All metadata slots for detection ALL_METADATA_SLOTS = set(SLOT_FILES.keys()) def find_used_slots(content: str, data: dict) -> Set[str]: """Find which metadata slots are actually used in a class file.""" used = set() if not data or 'classes' not in data: return used for class_name, class_def in data.get('classes', {}).items(): if not class_def: continue # Check slots list slots_list = class_def.get('slots', []) or [] for slot in slots_list: if slot in ALL_METADATA_SLOTS: used.add(slot) # Check slot_usage (handle None/null explicitly) slot_usage = class_def.get('slot_usage') if slot_usage: # Skip if None or empty for slot in slot_usage.keys(): if slot in ALL_METADATA_SLOTS: used.add(slot) return used def generate_new_imports(used_slots: Set[str], existing_imports: List[str]) -> List[str]: """Generate the new import list replacing class_metadata_slots with specific imports.""" new_imports = [] added_classes = set() for imp in existing_imports: if imp == "../slots/class_metadata_slots": # Skip - we'll add specific imports continue new_imports.append(imp) # Add specific slot imports for slot in sorted(used_slots): if slot in SLOT_FILES: slot_import = SLOT_FILES[slot] if slot_import not in new_imports: new_imports.append(slot_import) # Add class import if needed if slot in SLOT_TO_CLASS: class_import = SLOT_TO_CLASS[slot] if class_import not in new_imports and class_import not in added_classes: new_imports.append(class_import) added_classes.add(class_import) return new_imports def migrate_file(file_path: Path, dry_run: bool = False) -> bool: """Migrate a single class file.""" with open(file_path) as f: content = f.read() # Check if it imports class_metadata_slots if "../slots/class_metadata_slots" not in content: return False try: data = yaml.safe_load(content) except yaml.YAMLError as e: print(f" ERROR: YAML parse error: {e}") return False if not data: print(f" ERROR: Empty YAML file") return False # Find used slots used_slots = find_used_slots(content, data) if not used_slots: # File imports bundle but doesn't use any slots - just remove the import print(f" No metadata slots used - removing import") else: print(f" Uses slots: {sorted(used_slots)}") # Get existing imports existing_imports = data.get('imports', []) # Generate new imports new_imports = generate_new_imports(used_slots, existing_imports) # Update the data data['imports'] = new_imports if dry_run: print(f" Would update imports to: {new_imports}") return True # Write back - preserve formatting as much as possible # Use regex replacement to preserve comments and formatting # Find the imports block imports_pattern = r'(imports:\s*\n)((?:- [^\n]+\n)+)' def replace_imports(match): prefix = match.group(1) new_block = '\n'.join(f'- {imp}' for imp in new_imports) return prefix + new_block + '\n' new_content = re.sub(imports_pattern, replace_imports, content) with open(file_path, 'w') as f: f.write(new_content) return True def validate_file(file_path: Path) -> bool: """Validate a migrated file using linkml-validate.""" try: result = subprocess.run( ['linkml-validate', '--schema', str(file_path)], capture_output=True, text=True, cwd=str(Path.cwd()) ) if result.returncode != 0: print(f" VALIDATION ERROR: {result.stderr}") return False return True except Exception as e: print(f" VALIDATION ERROR: {e}") return False def main(): parser = argparse.ArgumentParser(description='Migrate class files to use specific slot imports') parser.add_argument('--dry-run', action='store_true', help='Show what would be changed without making changes') parser.add_argument('--validate', action='store_true', help='Validate files after migration') parser.add_argument('--file', type=str, help='Migrate a specific file only') args = parser.parse_args() os.chdir(Path(__file__).parent.parent) if args.file: files = [Path(args.file)] else: files = sorted(CLASSES_DIR.glob("*.yaml")) migrated = 0 skipped = 0 errors = 0 for file_path in files: print(f"\nProcessing: {file_path.name}") try: if migrate_file(file_path, dry_run=args.dry_run): migrated += 1 if args.validate and not args.dry_run: if validate_file(file_path): print(f" VALIDATED OK") else: errors += 1 else: skipped += 1 print(f" Skipped (no class_metadata_slots import)") except Exception as e: errors += 1 print(f" ERROR: {e}") print(f"\n{'='*60}") print(f"Migration complete:") print(f" Migrated: {migrated}") print(f" Skipped: {skipped}") print(f" Errors: {errors}") if args.dry_run: print("\n(Dry run - no changes made)") if __name__ == '__main__': main()