glam/update_specificity.py

import os
import re

def update_file(path):
    with open(path, 'r') as f:
        content = f.read()

    original_content = content

    # Global replace of class name
    content = content.replace("SpecificityAnnotation", "SpecificityScore")
    content = content.replace("specificity_annotation.yaml", "specificity_annotation.yaml") # Wait, filename? imports usually omit extension or use it.
    # Check imports like: - ./SpecificityAnnotation

    # Regex for slot usage
    # If file contains "has_or_had_score", we need to be careful not to duplicate.

    has_score_slot = "has_or_had_score" in content

    lines = content.split('\n')
    new_lines = []

    skip_next = False

    for i, line in enumerate(lines):
        if skip_next:
            skip_next = False
            continue

        stripped = line.strip()

        # Handle Imports
        if "slots/specificity_annotation" in line:
            if has_score_slot:
                continue # Remove import if score already imported (assuming it is, based on check)
                # But wait, "has_or_had_score" string might be in comments.
                # Safer: Replace with has_or_had_score, then deduplicate later?
                # Or just check if "slots/has_or_had_score" is also in content.

            line = line.replace("specificity_annotation", "has_or_had_score")

        if "- ./SpecificityAnnotation" in line:
             line = line.replace("SpecificityAnnotation", "SpecificityScore")

        # Handle Slots list
        if stripped == "- specificity_annotation":
            if has_score_slot:
                # Check if has_or_had_score is in the file as a slot list item
                if re.search(r'-\s+has_or_had_score', content):
                    continue # Remove duplicate

            line = line.replace("specificity_annotation", "has_or_had_score")

        # Handle slot_usage keys
        if stripped == "specificity_annotation:":
            line = line.replace("specificity_annotation:", "has_or_had_score:")

        # Handle range: SpecificityAnnotation -> range: SpecificityScore (Handled by global replace)

        new_lines.append(line)

    new_content = '\n'.join(new_lines)

    # Final deduplication of imports just in case
    # Convert to list of lines again
    final_lines = []
    seen_imports = set()
    in_imports = False

    for line in new_content.split('\n'):
        if line.strip() == "imports:":
            in_imports = True
            final_lines.append(line)
            continue

        if in_imports:
            if not line.strip().startswith("-"):
                in_imports = False
            else:
                import_stmt = line.strip()
                if import_stmt in seen_imports:
                    continue
                seen_imports.add(import_stmt)

        final_lines.append(line)

    final_content = '\n'.join(final_lines)

    if final_content != original_content:
        print(f"Updating {path}")
        with open(path, 'w') as f:
            f.write(final_content)

def process_directory(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".yaml"):
                update_file(os.path.join(root, file))

process_directory("schemas/20251121/linkml/modules/classes")
process_directory("schemas/20251121/linkml/modules/slots")