glam/scripts/migrate_staff_count.py
kempersc 7cf10084b4 Implement scripts for schema modifications and ontology verification
- Added `fix_dual_class_link.py` to remove dual class link references from specified YAML files.
- Created `fix_specific_ghosts.py` to apply specific replacements in YAML files based on defined mappings.
- Introduced `migrate_staff_count.py` to migrate staff count references to a new structure in specified YAML files.
- Developed `migrate_type_slots.py` to replace type-related slots with new identifiers across YAML files.
- Implemented `scan_ghost_references.py` to identify and report ghost references to archived slots and classes in YAML files.
- Added `verify_ontology_terms.py` to verify the presence of ontology terms in specified ontology files against schema definitions.
2026-01-29 17:10:25 +01:00

102 lines
4.3 KiB
Python

import os
import yaml
import re
FILES = [
"/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/ConservationLab.yaml",
"/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/OrganizationBranch.yaml",
"/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/OrganizationalStructure.yaml",
"/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/ResearchCenter.yaml",
"/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/GiftShop.yaml"
]
def migrate_staff_count(filepath):
print(f"Processing {filepath}...")
try:
with open(filepath, 'r') as f:
lines = f.readlines()
new_lines = []
in_slot_usage_staff_count = False
staff_count_indent = 0
has_quantity_import = False
has_quantity_class_import = False
# First pass to check existing imports
for line in lines:
if "has_or_had_quantity" in line:
has_quantity_import = True
if "../classes/Quantity" in line:
has_quantity_class_import = True
for line in lines:
# 1. Imports
if line.strip() == "- ../slots/staff_count":
if not has_quantity_import:
new_lines.append("- ../slots/has_or_had_quantity\n")
has_quantity_import = True
if not has_quantity_class_import:
new_lines.append("- ../classes/Quantity\n")
has_quantity_class_import = True
continue
# 2. Slots list
if line.strip() == "- staff_count":
new_lines.append(line.replace("staff_count", "has_or_had_quantity"))
continue
# 3. Slot usage
if "staff_count:" in line and "range:" not in line: # start of slot_usage block
# Detect indentation
staff_count_indent = len(line) - len(line.lstrip())
in_slot_usage_staff_count = True
# Don't add this line
continue
if in_slot_usage_staff_count:
current_indent = len(line) - len(line.lstrip())
if current_indent > staff_count_indent:
# Skip lines inside staff_count block
continue
else:
# End of staff_count block
in_slot_usage_staff_count = False
# Add new slot usage for has_or_had_quantity if we want
# But simpler to just let the class use the default or manual add if needed
# Let's add a basic one
usage_indent = " " * staff_count_indent
new_lines.append(f"{usage_indent}has_or_had_quantity:\n")
new_lines.append(f"{usage_indent} range: Quantity\n")
new_lines.append(f"{usage_indent} inlined: true\n")
new_lines.append(f"{usage_indent} description: Quantity of staff members. MIGRATED from staff_count (2026-01-28).\n")
new_lines.append(line) # Add the line that broke the block
else:
# 4. Examples
# Simple regex replacement for examples: staff_count: 12
# This assumes examples are simple key-value pairs on one line
match = re.match(r"(\s+)staff_count:\s+(\d+)", line)
if match:
indent = match.group(1)
count = match.group(2)
new_lines.append(f"{indent}has_or_had_quantity:\n")
new_lines.append(f"{indent}- numeric_value: {count}\n")
new_lines.append(f"{indent} has_or_had_unit:\n")
new_lines.append(f"{indent} unit_text: staff_members\n")
else:
new_lines.append(line)
with open(filepath, 'w') as f:
f.writelines(new_lines)
print(f"Updated {filepath}")
except Exception as e:
print(f"Error processing {filepath}: {e}")
if __name__ == "__main__":
for f in FILES:
if os.path.exists(f):
migrate_staff_count(f)
else:
print(f"File not found: {f}")