290 lines
9.9 KiB
Python
290 lines
9.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Migrate class files from importing class_metadata_slots bundle to importing specific slots.
|
|
|
|
Per Rule 38: All LinkML slots MUST be centralized in modules/slots/ and classes should
|
|
import only the specific slots they need, not a bundle.
|
|
|
|
This script:
|
|
1. Finds all class files importing ../slots/class_metadata_slots
|
|
2. Analyzes which slots from that bundle are actually used
|
|
3. Replaces the bundle import with specific slot/class imports
|
|
4. Validates the migrated files
|
|
|
|
Usage:
|
|
python scripts/migrate_class_metadata_imports.py [--dry-run] [--validate]
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import yaml
|
|
import argparse
|
|
import subprocess
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
from typing import Set, List, Dict, Optional
|
|
|
|
# Root directory
|
|
SCHEMA_DIR = Path("schemas/20251121/linkml")
|
|
CLASSES_DIR = SCHEMA_DIR / "modules" / "classes"
|
|
SLOTS_DIR = SCHEMA_DIR / "modules" / "slots"
|
|
ENUMS_DIR = SCHEMA_DIR / "modules" / "enums"
|
|
|
|
# Mapping of slot names to their file paths (relative to classes dir)
|
|
SLOT_FILES = {
|
|
# Custodian type slots
|
|
"custodian_types": "../slots/custodian_types",
|
|
"custodian_types_rationale": "../slots/custodian_types_rationale",
|
|
"custodian_types_primary": "../slots/custodian_types_primary",
|
|
# Wikidata slots
|
|
"wikidata_entity_id": "../slots/wikidata_entity_id",
|
|
"wikidata_entity_label": "../slots/wikidata_entity_label",
|
|
"wikidata_mapping_type": "../slots/wikidata_mapping_type",
|
|
"wikidata_mapping_rationale": "../slots/wikidata_mapping_rationale",
|
|
"wikidata_alignment": "../slots/wikidata_alignment",
|
|
# SKOS slots
|
|
"skos_broader": "../slots/skos_broader",
|
|
"skos_broader_label": "../slots/skos_broader_label",
|
|
"skos_narrower": "../slots/skos_narrower",
|
|
"skos_related": "../slots/skos_related",
|
|
# Dual-class slots
|
|
"dual_class_role": "../slots/dual_class_role",
|
|
"linked_class_name": "../slots/linked_class_name",
|
|
"link_rationale": "../slots/link_rationale",
|
|
"dual_class_link": "../slots/dual_class_link",
|
|
# Specificity slots
|
|
"specificity_score": "../slots/specificity_score",
|
|
"specificity_rationale": "../slots/specificity_rationale",
|
|
"specificity_timestamp": "../slots/specificity_timestamp",
|
|
"specificity_agent": "../slots/specificity_agent",
|
|
"specificity_annotation": "../slots/specificity_annotation",
|
|
"template_specificity": "../slots/template_specificity",
|
|
# Per-template score slots
|
|
"archive_search_score": "../slots/archive_search_score",
|
|
"museum_search_score": "../slots/museum_search_score",
|
|
"library_search_score": "../slots/library_search_score",
|
|
"collection_discovery_score": "../slots/collection_discovery_score",
|
|
"person_research_score": "../slots/person_research_score",
|
|
"location_browse_score": "../slots/location_browse_score",
|
|
"identifier_lookup_score": "../slots/identifier_lookup_score",
|
|
"organizational_change_score": "../slots/organizational_change_score",
|
|
"digital_platform_score": "../slots/digital_platform_score",
|
|
"general_heritage_score": "../slots/general_heritage_score",
|
|
# RiC-O slots
|
|
"rico_organizational_principle": "../slots/rico_organizational_principle",
|
|
"rico_organizational_principle_uri": "../slots/rico_organizational_principle_uri",
|
|
"rico_has_or_had_holder": "../slots/rico_has_or_had_holder",
|
|
"rico_has_or_had_holder_note": "../slots/rico_has_or_had_holder_note",
|
|
"rico_note": "../slots/rico_note",
|
|
# Scope slots
|
|
"custodian_only": "../slots/custodian_only",
|
|
"organizational_level": "../slots/organizational_level",
|
|
"geographic_restriction": "../slots/geographic_restriction",
|
|
# Multilingual labels
|
|
"label_de": "../slots/label_de",
|
|
"label_es": "../slots/label_es",
|
|
"label_fr": "../slots/label_fr",
|
|
"label_nl": "../slots/label_nl",
|
|
"label_it": "../slots/label_it",
|
|
"label_pt": "../slots/label_pt",
|
|
# Notes
|
|
"privacy_note": "../slots/privacy_note",
|
|
"preservation_note": "../slots/preservation_note",
|
|
"legal_note": "../slots/legal_note",
|
|
}
|
|
|
|
# Slots that need their class files imported too
|
|
SLOT_TO_CLASS = {
|
|
"wikidata_alignment": "./WikidataAlignment",
|
|
"dual_class_link": "./DualClassLink",
|
|
"specificity_annotation": "./SpecificityAnnotation",
|
|
"template_specificity": "./TemplateSpecificityScores",
|
|
}
|
|
|
|
# All metadata slots for detection
|
|
ALL_METADATA_SLOTS = set(SLOT_FILES.keys())
|
|
|
|
|
|
def find_used_slots(content: str, data: dict) -> Set[str]:
|
|
"""Find which metadata slots are actually used in a class file."""
|
|
used = set()
|
|
|
|
if not data or 'classes' not in data:
|
|
return used
|
|
|
|
for class_name, class_def in data.get('classes', {}).items():
|
|
if not class_def:
|
|
continue
|
|
|
|
# Check slots list
|
|
slots_list = class_def.get('slots', []) or []
|
|
for slot in slots_list:
|
|
if slot in ALL_METADATA_SLOTS:
|
|
used.add(slot)
|
|
|
|
# Check slot_usage (handle None/null explicitly)
|
|
slot_usage = class_def.get('slot_usage')
|
|
if slot_usage: # Skip if None or empty
|
|
for slot in slot_usage.keys():
|
|
if slot in ALL_METADATA_SLOTS:
|
|
used.add(slot)
|
|
|
|
return used
|
|
|
|
|
|
def generate_new_imports(used_slots: Set[str], existing_imports: List[str]) -> List[str]:
|
|
"""Generate the new import list replacing class_metadata_slots with specific imports."""
|
|
new_imports = []
|
|
added_classes = set()
|
|
|
|
for imp in existing_imports:
|
|
if imp == "../slots/class_metadata_slots":
|
|
# Skip - we'll add specific imports
|
|
continue
|
|
new_imports.append(imp)
|
|
|
|
# Add specific slot imports
|
|
for slot in sorted(used_slots):
|
|
if slot in SLOT_FILES:
|
|
slot_import = SLOT_FILES[slot]
|
|
if slot_import not in new_imports:
|
|
new_imports.append(slot_import)
|
|
|
|
# Add class import if needed
|
|
if slot in SLOT_TO_CLASS:
|
|
class_import = SLOT_TO_CLASS[slot]
|
|
if class_import not in new_imports and class_import not in added_classes:
|
|
new_imports.append(class_import)
|
|
added_classes.add(class_import)
|
|
|
|
return new_imports
|
|
|
|
|
|
def migrate_file(file_path: Path, dry_run: bool = False) -> bool:
|
|
"""Migrate a single class file."""
|
|
with open(file_path) as f:
|
|
content = f.read()
|
|
|
|
# Check if it imports class_metadata_slots
|
|
if "../slots/class_metadata_slots" not in content:
|
|
return False
|
|
|
|
try:
|
|
data = yaml.safe_load(content)
|
|
except yaml.YAMLError as e:
|
|
print(f" ERROR: YAML parse error: {e}")
|
|
return False
|
|
|
|
if not data:
|
|
print(f" ERROR: Empty YAML file")
|
|
return False
|
|
|
|
# Find used slots
|
|
used_slots = find_used_slots(content, data)
|
|
|
|
if not used_slots:
|
|
# File imports bundle but doesn't use any slots - just remove the import
|
|
print(f" No metadata slots used - removing import")
|
|
else:
|
|
print(f" Uses slots: {sorted(used_slots)}")
|
|
|
|
# Get existing imports
|
|
existing_imports = data.get('imports', [])
|
|
|
|
# Generate new imports
|
|
new_imports = generate_new_imports(used_slots, existing_imports)
|
|
|
|
# Update the data
|
|
data['imports'] = new_imports
|
|
|
|
if dry_run:
|
|
print(f" Would update imports to: {new_imports}")
|
|
return True
|
|
|
|
# Write back - preserve formatting as much as possible
|
|
# Use regex replacement to preserve comments and formatting
|
|
|
|
# Find the imports block
|
|
imports_pattern = r'(imports:\s*\n)((?:- [^\n]+\n)+)'
|
|
|
|
def replace_imports(match):
|
|
prefix = match.group(1)
|
|
new_block = '\n'.join(f'- {imp}' for imp in new_imports)
|
|
return prefix + new_block + '\n'
|
|
|
|
new_content = re.sub(imports_pattern, replace_imports, content)
|
|
|
|
with open(file_path, 'w') as f:
|
|
f.write(new_content)
|
|
|
|
return True
|
|
|
|
|
|
def validate_file(file_path: Path) -> bool:
|
|
"""Validate a migrated file using linkml-validate."""
|
|
try:
|
|
result = subprocess.run(
|
|
['linkml-validate', '--schema', str(file_path)],
|
|
capture_output=True,
|
|
text=True,
|
|
cwd=str(Path.cwd())
|
|
)
|
|
if result.returncode != 0:
|
|
print(f" VALIDATION ERROR: {result.stderr}")
|
|
return False
|
|
return True
|
|
except Exception as e:
|
|
print(f" VALIDATION ERROR: {e}")
|
|
return False
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Migrate class files to use specific slot imports')
|
|
parser.add_argument('--dry-run', action='store_true', help='Show what would be changed without making changes')
|
|
parser.add_argument('--validate', action='store_true', help='Validate files after migration')
|
|
parser.add_argument('--file', type=str, help='Migrate a specific file only')
|
|
args = parser.parse_args()
|
|
|
|
os.chdir(Path(__file__).parent.parent)
|
|
|
|
if args.file:
|
|
files = [Path(args.file)]
|
|
else:
|
|
files = sorted(CLASSES_DIR.glob("*.yaml"))
|
|
|
|
migrated = 0
|
|
skipped = 0
|
|
errors = 0
|
|
|
|
for file_path in files:
|
|
print(f"\nProcessing: {file_path.name}")
|
|
|
|
try:
|
|
if migrate_file(file_path, dry_run=args.dry_run):
|
|
migrated += 1
|
|
|
|
if args.validate and not args.dry_run:
|
|
if validate_file(file_path):
|
|
print(f" VALIDATED OK")
|
|
else:
|
|
errors += 1
|
|
else:
|
|
skipped += 1
|
|
print(f" Skipped (no class_metadata_slots import)")
|
|
except Exception as e:
|
|
errors += 1
|
|
print(f" ERROR: {e}")
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Migration complete:")
|
|
print(f" Migrated: {migrated}")
|
|
print(f" Skipped: {skipped}")
|
|
print(f" Errors: {errors}")
|
|
|
|
if args.dry_run:
|
|
print("\n(Dry run - no changes made)")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|