import os import re import glob ARCHIVE_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/slots/archive/" CLASSES_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/" def get_archived_slot_names(): slots = set() files = os.listdir(ARCHIVE_DIR) for f in files: if not f.endswith(".yaml"): continue # Filename pattern: name.yaml or name_archived_YYYYMMDD.yaml name = f[:-5] # remove .yaml if "_archived_" in name: name = name.split("_archived_")[0] slots.add(name) return slots def find_references(archived_slots): references = {} # {slot_name: [file_paths]} class_files = glob.glob(os.path.join(CLASSES_DIR, "*.yaml")) for cls_file in class_files: with open(cls_file, 'r') as f: lines = f.readlines() for i, line in enumerate(lines): stripped = line.strip() # Check for imports: "- ../slots/slotname" # Check for slot usage: "- slotname" (in slots list) # Check for slot_usage keys: "slotname:" for slot in archived_slots: # Import check if f"../slots/{slot}" in stripped and not stripped.strip().startswith("#"): if slot not in references: references[slot] = [] references[slot].append(f"{cls_file} (line {i+1}): Import") continue # Loose usage check (can be false positive if slot name is common word like 'description') # But we restrict to archived slots. # Check for "- slotname" if stripped == f"- {slot}": if slot not in references: references[slot] = [] references[slot].append(f"{cls_file} (line {i+1}): Usage in slots list") continue # Check for "slotname:" if stripped.startswith(f"{slot}:"): if slot not in references: references[slot] = [] references[slot].append(f"{cls_file} (line {i+1}): Usage as key") continue return references def main(): print("Identifying archived slots...") slots = get_archived_slot_names() print(f"Found {len(slots)} archived slots.") # Filter out common false positives if necessary, but better to check everything # 'description' is a common metadata field, but if it's archived as a slot, we should use has_or_had_description print("Scanning class files for references...") refs = find_references(slots) if refs: print(f"Found {len(refs)} archived slots still referenced in classes:") for slot, locations in refs.items(): print(f"\nSLOT: {slot}") for loc in locations: print(f" {loc}") else: print("No dead links found.") if __name__ == "__main__": main()