#!/usr/bin/env python3 """ Update LinkML class files to reference renamed slots. This script updates class files to use the new RiC-O style slot names. Usage: python scripts/update_class_slot_references.py --dry-run # Preview changes python scripts/update_class_slot_references.py # Apply changes """ import os import re from pathlib import Path from typing import Dict, List, Tuple # Mapping from old slot names to new slot names SLOT_RENAMES: Dict[str, str] = { "abbreviation": "has_or_had_abbreviation", "about_digital_presence": "is_or_was_about_digital_presence", "about_text": "has_or_had_about_text", "academic_affiliation": "has_or_had_academic_affiliation", "academic_programs": "has_or_had_academic_program", "accepts_external_work": "accepts_or_accepted_external_work", "accepts_payment_methods": "accepts_or_accepted_payment_method", "accepts_visiting_scholars": "accepts_or_accepted_visiting_scholar", "access": "has_or_had_access_condition", "access_application_url": "has_access_application_url", "access_control": "has_or_had_access_control", "access_description": "has_or_had_access_description", "access_frequency": "has_or_had_access_frequency", "access_interface_url": "has_access_interface_url", "access_level": "has_or_had_access_level", "access_management": "has_or_had_access_management", "access_policy": "has_or_had_access_policy", "access_policy_ref": "has_access_policy_reference", "access_restricted": "is_or_was_access_restricted", "access_restriction": "has_or_had_access_restriction", "access_restrictions": "has_or_had_access_restriction", "access_rights": "has_or_had_access_right", "access_trigger_events": "has_or_had_access_trigger_event", "accessibility_features": "has_or_had_accessibility_feature", "accession_date": "has_accession_date", "accession_number": "has_accession_number", "account_id": "has_account_identifier", "account_name": "has_or_had_account_name", "account_status": "has_or_had_account_status", "accreditation": "has_or_had_accreditation", "accreditation_body": "has_or_had_accreditation_body", "accumulation_date_end": "has_accumulation_end_date", "accumulation_date_start": "has_accumulation_start_date", "accuracy_meters": "has_accuracy_in_meters", "acquisition_budget": "has_or_had_acquisition_budget", "acquisition_date": "has_acquisition_date", "acquisition_history": "has_acquisition_history", "acquisition_method": "has_acquisition_method", "acquisition_source": "has_acquisition_source", "active_since": "has_active_since_date", "activities_societies": "has_or_had_activity_or_society_membership", "activity_description": "has_activity_description", "activity_id": "has_activity_identifier", "activity_name": "has_activity_name", "activity_timespan": "has_activity_timespan", "activity_type": "has_activity_type", "actual_end": "has_actual_end_date", "actual_return_date": "has_actual_return_date", "actual_start": "has_actual_start_date", "admin_office_description": "has_admin_office_description", "admin_office_id": "has_admin_office_identifier", "admin_office_name": "has_admin_office_name", "admin_staff_count": "has_or_had_admin_staff_count", "administration_description": "has_administration_description", "administration_name": "has_administration_name", "administrative_expenses": "has_or_had_administrative_expense", "administrative_functions": "has_or_had_administrative_function", "administrative_level": "has_administrative_level", "admission_fee": "has_or_had_admission_fee", "adoption_context": "has_adoption_context", "affected_by_event": "is_or_was_affected_by_event", "affected_territory": "has_or_had_affected_territory", "affected_units": "has_or_had_affected_unit", "affects_organization": "affects_or_affected_organization", "affiliated_universities": "has_or_had_affiliated_university", "affiliation": "has_or_had_affiliation", "age": "has_age", "agenda_description": "has_agenda_description", "agenda_document_url": "has_agenda_document_url", "agenda_id": "has_agenda_identifier", "agenda_short_name": "has_agenda_short_name", "agenda_title": "has_agenda_title", "agenda_url": "has_agenda_url", "agent_name": "has_agent_name", "agent_type": "has_agent_type", "aggregated_by": "is_or_was_aggregated_by", "aggregates_from": "aggregates_or_aggregated_from", "agreement_signed_date": "has_agreement_signed_date", "air_changes_per_hour": "has_air_changes_per_hour", "all_data_real": "has_all_data_real_flag", "all_links": "has_link", "allocated_by": "is_or_was_allocated_by", "allocates": "allocates_or_allocated", "allocation_date": "has_allocation_date", "allows_laptops": "allows_or_allowed_laptop", "allows_photography": "allows_or_allowed_photography", "alpha_2": "has_alpha_2_code", "alpha_3": "has_alpha_3_code", "also_allocation_agency": "is_or_was_also_allocation_agency", "also_identifies_name": "also_identifies_name", "alternative_names": "has_or_had_alternative_name", "alternative_observed_names": "has_or_had_alternative_observed_name", "altitude": "has_altitude", "amendment_history": "has_amendment_history", "animal_species_count": "has_or_had_animal_species_count", "annex_description": "has_annex_description", "annex_id": "has_annex_identifier", "annex_name": "has_annex_name", "annex_reason": "has_annex_reason", "annotation_motivation": "has_annotation_motivation", "annotation_segments": "has_annotation_segment", "annotation_type": "has_annotation_type", "annotations_by": "has_annotation_by", "annual_participants": "has_or_had_annual_participant_count", "annual_revenue": "has_or_had_annual_revenue", "api_available": "has_api_available_flag", "api_documentation": "has_api_documentation_url", "api_endpoint": "has_api_endpoint", "api_version": "has_api_version", "appellation_language": "has_appellation_language", "appellation_type": "has_appellation_type", "appellation_value": "has_appellation_value", "appellations": "has_or_had_appellation", "applicable_countries": "has_applicable_country", "application_deadline": "has_application_deadline", "application_opening_date": "has_application_opening_date", "applies_to_call": "applies_to_call", "appointment_required": "has_appointment_required_flag", "appraisal_notes": "has_appraisal_note", "appraisal_policy": "has_or_had_appraisal_policy", "approval_date": "has_approval_date", "approved_by": "was_approved_by", "approximate": "is_approximate", "archdiocese_name": "has_archdiocese_name", "architect": "has_or_had_architect", "architectural_style": "has_architectural_style", "archival_reference": "has_archival_reference", "archival_status": "has_or_had_archival_status", "archive_branches": "has_or_had_archive_branch", "archive_department_of": "is_or_was_archive_department_of", "archive_description": "has_archive_description", "archive_memento_uri": "has_archive_memento_uri", "archive_name": "has_archive_name", "archive_path": "has_archive_path", "archive_scope": "has_or_had_archive_scope", "archive_search_score": "has_archive_search_score", "archive_series": "is_or_was_part_of_archive_series", "archive_subtype": "has_archive_subtype", "archived_at": "was_archived_at", "archived_in": "is_or_was_archived_in", "area_hectares": "has_area_in_hectares", "area_served": "has_or_had_area_served", "arrangement": "has_arrangement", "arrangement_level": "has_arrangement_level", "arrangement_notes": "has_arrangement_note", "arrangement_system": "has_or_had_arrangement_system", "articles_archival_stage": "has_articles_archival_stage", "articles_document_format": "has_articles_document_format", "articles_document_url": "has_articles_document_url", "artist_representation": "has_or_had_artist_representation", "artwork_count": "has_or_had_artwork_count", "aspect_ratio": "has_aspect_ratio", "asserted_by": "was_asserted_by", "assertion_date": "has_assertion_date", "assertion_id": "has_assertion_identifier", "assertion_rationale": "has_assertion_rationale", "assertion_value": "has_assertion_value", "assessment_category": "has_assessment_category", "assessment_date": "has_assessment_date", "assigned_processor": "has_or_had_assigned_processor", "associated_auxiliary_platform": "has_or_had_associated_auxiliary_platform", "associated_custodian": "has_or_had_associated_custodian", "associated_digital_platform": "has_or_had_associated_digital_platform", "associated_encompassing_bodies": "has_or_had_associated_encompassing_body", "associated_taxa": "has_associated_taxon", "auction_house": "has_auction_house", "auction_sale_name": "has_auction_sale_name", "audience_size": "has_or_had_audience_size", "audience_type": "has_audience_type", "audio_event_segments": "has_audio_event_segment", "audio_quality_score": "has_audio_quality_score", "audit_date": "has_audit_date", "audit_opinion": "has_audit_opinion", "audit_status": "has_or_had_audit_status", "auditor_name": "has_auditor_name", "authentication_required": "has_authentication_required_flag", "authority_file_abbreviation": "has_authority_file_abbreviation", "authority_file_name": "has_authority_file_name", "authority_file_url": "has_authority_file_url", "authors": "has_author", "auto_generated": "is_auto_generated", "auxiliary_place_id": "has_auxiliary_place_identifier", "auxiliary_place_type": "has_auxiliary_place_type", "auxiliary_places": "has_auxiliary_place", "auxiliary_platform_id": "has_auxiliary_platform_identifier", "auxiliary_platform_type": "has_auxiliary_platform_type", "auxiliary_platforms": "has_auxiliary_platform", "availability_timespan": "has_availability_timespan", "available_caption_languages": "has_available_caption_language", "average_entry_duration_seconds": "has_average_entry_duration_seconds", "average_scene_duration_seconds": "has_average_scene_duration_seconds", } def find_class_files(classes_dir: Path) -> List[Path]: """Find all YAML class files.""" return list(classes_dir.glob("**/*.yaml")) def update_file_content(content: str, renames: Dict[str, str]) -> Tuple[str, List[str]]: """Update slot references in file content.""" changes = [] updated_content = content for old_name, new_name in renames.items(): # Match slot references in attributes section # Pattern: " old_name:" at start of line (with proper indentation) pattern = rf'^(\s+){old_name}:(\s*)$' if re.search(pattern, updated_content, re.MULTILINE): updated_content = re.sub( pattern, rf'\1{new_name}:\2', updated_content, flags=re.MULTILINE ) changes.append(f"{old_name} -> {new_name}") # Also match in slot_usage and other contexts pattern2 = rf'^(\s+){old_name}:(\s*\n)' if re.search(pattern2, updated_content, re.MULTILINE): updated_content = re.sub( pattern2, rf'\1{new_name}:\2', updated_content, flags=re.MULTILINE ) if f"{old_name} -> {new_name}" not in changes: changes.append(f"{old_name} -> {new_name}") return updated_content, changes def process_file(file_path: Path, renames: Dict[str, str], dry_run: bool = False) -> Tuple[bool, List[str]]: """Process a single class file.""" try: content = file_path.read_text() except Exception as e: return False, [f"Error reading {file_path}: {e}"] updated_content, changes = update_file_content(content, renames) if not changes: return True, [] if not dry_run: try: file_path.write_text(updated_content) except Exception as e: return False, [f"Error writing {file_path}: {e}"] return True, changes def main(): import argparse parser = argparse.ArgumentParser(description="Update class files with new slot names") parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing files") parser.add_argument("--classes-dir", default="schemas/20251121/linkml/modules/classes", help="Path to classes directory") args = parser.parse_args() classes_dir = Path(args.classes_dir) if not classes_dir.exists(): print(f"Classes directory not found: {classes_dir}") return 1 class_files = find_class_files(classes_dir) print(f"Found {len(class_files)} class files") print(f"Checking for {len(SLOT_RENAMES)} slot renames") print(f"Dry run: {args.dry_run}") print() files_updated = 0 total_changes = 0 for file_path in sorted(class_files): success, changes = process_file(file_path, SLOT_RENAMES, args.dry_run) if changes: files_updated += 1 total_changes += len(changes) rel_path = file_path.relative_to(classes_dir) action = "Would update" if args.dry_run else "Updated" print(f"✓ {action} {rel_path}:") for change in changes: print(f" {change}") print() print(f"Files updated: {files_updated}") print(f"Total slot renames: {total_changes}") return 0 if __name__ == "__main__": exit(main())