import os import re import glob ARCHIVE_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/slots/archive/" CLASSES_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/" def get_archived_slot_names(): slots = set() files = os.listdir(ARCHIVE_DIR) for f in files: if not f.endswith(".yaml"): continue # Filename pattern: name.yaml or name_archived_YYYYMMDD.yaml name = f[:-5] # remove .yaml if "_archived_" in name: name = name.split("_archived_")[0] slots.add(name) return slots def find_references(archived_slots): references = {} # {slot_name: [file_paths]} # Metadata keys that mimic slot names but are valid LinkML structure # We ignore "Usage as key" for these SAFE_METADATA_KEYS = { "title", "description", "name", "id", "status", "notes", "comments", "examples", "todos", "see_also", "range", "slot_usage", "required", "multivalued", "inlined", "identifier", "value", "unit", "prefixes", "imports", "classes", "slots", "attributes", "exact_mappings", "close_mappings", "related_mappings" } class_files = glob.glob(os.path.join(CLASSES_DIR, "*.yaml")) for cls_file in class_files: with open(cls_file, 'r') as f: lines = f.readlines() for i, line in enumerate(lines): stripped = line.strip() for slot in archived_slots: # Import check: "- ../slots/slotname" if f"../slots/{slot}" in stripped and not stripped.startswith("#"): if slot not in references: references[slot] = [] references[slot].append(f"{cls_file} (line {i+1}): Import") continue # Usage in slots list: "- slotname" # Must be exact match to avoid partials if stripped == f"- {slot}": if slot not in references: references[slot] = [] references[slot].append(f"{cls_file} (line {i+1}): Usage in slots list") continue # Usage as key: "slotname:" if stripped.startswith(f"{slot}:"): # Check if it's a safe metadata key if slot in SAFE_METADATA_KEYS: continue # Also, if we are inside a slot_usage block, "slotname:" is valid ONLY IF # we are refining that slot. But if the slot is archived, we shouldn't be refining it! # So "Usage as key" is actually relevant for slot_usage of archived slots. if slot not in references: references[slot] = [] references[slot].append(f"{cls_file} (line {i+1}): Usage as key") continue return references def main(): print("Identifying archived slots...") slots = get_archived_slot_names() print(f"Found {len(slots)} archived slots.") # Filter out common false positives if necessary, but better to check everything # 'description' is a common metadata field, but if it's archived as a slot, we should use has_or_had_description print("Scanning class files for references...") refs = find_references(slots) if refs: print(f"Found {len(refs)} archived slots still referenced in classes:") for slot, locations in refs.items(): print(f"\nSLOT: {slot}") for loc in locations: print(f" {loc}") else: print("No dead links found.") if __name__ == "__main__": main()