glam/scripts/rename_ontology_prefixed_slots.py

198 lines
5.9 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Rename slots that have ontology prefixes in their names.
Rule 42: No Ontology Prefixes in Slot Names
This script:
1. Renames slot files from ontology-prefixed names to clean names
2. Updates the slot definitions inside the files
3. Updates all class file references to use the new names
"""
import os
import re
from pathlib import Path
# Mapping of old slot names to new clean names
SLOT_RENAMES = {
# RiC-O prefixed slots
"rico_equivalent": "record_equivalent",
"rico_has_or_had_holder": "record_holder",
"rico_has_or_had_holder_note": "record_holder_note",
"rico_note": "record_note",
"rico_organizational_principle": "organizational_principle",
"rico_organizational_principle_uri": "organizational_principle_uri",
"rico_record_set_type": "record_set_type",
# SKOS prefixed slots
"skos_broader": "broader_concept",
"skos_broader_label": "broader_concept_label",
"skos_narrower": "narrower_concept",
"skos_related": "related_concept",
# BIBFRAME prefixed slots
"bf_equivalent": "bibframe_equivalent",
}
def rename_slot_file(slots_dir: Path, old_name: str, new_name: str) -> bool:
"""Rename a slot file and update its contents."""
old_file = slots_dir / f"{old_name}.yaml"
new_file = slots_dir / f"{new_name}.yaml"
if not old_file.exists():
print(f" WARNING: {old_file} does not exist")
return False
if new_file.exists():
print(f" WARNING: {new_file} already exists, skipping")
return False
# Read and update content
content = old_file.read_text()
# Update the slot name inside the file
# Pattern: "slots:\n old_name:" -> "slots:\n new_name:"
content = re.sub(
rf'^(\s*)({old_name}):',
rf'\1{new_name}:',
content,
flags=re.MULTILINE
)
# Update id field if it contains the old name
content = re.sub(
rf'id: (https://[^/]+/[^/]+/[^/]+/slot/){old_name}',
rf'id: \1{new_name}',
content
)
# Update name field
content = re.sub(
rf"name: {old_name}(_slot)?",
rf"name: {new_name}",
content
)
# Update title field
old_title_parts = old_name.replace('_', ' ').title()
new_title_parts = new_name.replace('_', ' ').title()
content = re.sub(
rf"title: .*{old_name.replace('_', '[ _]')}.*",
f"title: {new_title_parts} Slot",
content,
flags=re.IGNORECASE
)
# Write to new file
new_file.write_text(content)
# Remove old file
old_file.unlink()
print(f" Renamed: {old_name}.yaml -> {new_name}.yaml")
return True
def update_class_references(classes_dir: Path, old_name: str, new_name: str) -> int:
"""Update all class file references from old slot name to new name."""
count = 0
for yaml_file in classes_dir.glob("*.yaml"):
content = yaml_file.read_text()
# Check if file contains the old slot name
if old_name not in content:
continue
# Update slot references in slots lists: "- old_name" -> "- new_name"
new_content = re.sub(
rf'^(\s*- ){old_name}$',
rf'\1{new_name}',
content,
flags=re.MULTILINE
)
# Update slot_usage references: "old_name:" -> "new_name:"
new_content = re.sub(
rf'^(\s*){old_name}:',
rf'\1{new_name}:',
new_content,
flags=re.MULTILINE
)
# Update import statements: "../slots/old_name" -> "../slots/new_name"
new_content = re.sub(
rf'(\.\./slots/){old_name}',
rf'\1{new_name}',
new_content
)
if new_content != content:
yaml_file.write_text(new_content)
changes = content.count(old_name) - new_content.count(old_name)
count += content.count(old_name)
print(f" Updated {yaml_file.name}: {content.count(old_name)} references")
return count
def update_main_schema(schema_file: Path, old_name: str, new_name: str) -> int:
"""Update slot import references in the main schema file."""
if not schema_file.exists():
return 0
content = schema_file.read_text()
if old_name not in content:
return 0
# Update import paths: "modules/slots/old_name" -> "modules/slots/new_name"
new_content = re.sub(
rf'(modules/slots/){old_name}',
rf'\1{new_name}',
content
)
if new_content != content:
schema_file.write_text(new_content)
count = content.count(old_name)
print(f" Updated main schema: {count} references")
return count
return 0
def main():
schema_dir = Path("/Users/kempersc/apps/glam/schemas/20251121/linkml")
base_dir = schema_dir / "modules"
slots_dir = base_dir / "slots"
classes_dir = base_dir / "classes"
main_schema = schema_dir / "01_custodian_name_modular.yaml"
print("=== Renaming slots with ontology prefixes (Rule 42) ===\n")
total_files_renamed = 0
total_references_updated = 0
for old_name, new_name in SLOT_RENAMES.items():
print(f"\n{old_name} -> {new_name}")
# Rename the slot file
if rename_slot_file(slots_dir, old_name, new_name):
total_files_renamed += 1
# Update class references
refs = update_class_references(classes_dir, old_name, new_name)
total_references_updated += refs
# Update main schema imports
refs = update_main_schema(main_schema, old_name, new_name)
total_references_updated += refs
print(f"\n=== Summary ===")
print(f"Slot files renamed: {total_files_renamed}")
print(f"Class references updated: {total_references_updated}")
if __name__ == "__main__":
main()