import os import re # Paths SCHEMA_ROOT = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules" CLASSES_DIR = os.path.join(SCHEMA_ROOT, "classes") SLOTS_DIR = os.path.join(SCHEMA_ROOT, "slots") ARCHIVED_SLOTS_FILE = "archived_slots.txt" ARCHIVED_CLASSES_FILE = "archived_classes.txt" def parse_archived_name(filename): # Removes _archived_YYYYMMDD.yaml suffix # Also handles cases without date if any (though list showed dates) # The pattern seems to be name_archived_date.yaml match = re.match(r"(.+)_archived_\d+\.yaml", filename) if match: return match.group(1) # Fallback if no date if filename.endswith(".yaml"): return filename[:-5] return filename def load_archived_entities(filepath): entities = set() try: with open(filepath, 'r') as f: for line in f: filename = line.strip() if filename: entity_name = parse_archived_name(filename) entities.add(entity_name) except FileNotFoundError: print(f"Warning: {filepath} not found.") return entities def scan_files(directory, archived_slots, archived_classes): print(f"Scanning {directory}...") issues = [] for root, _, files in os.walk(directory): if "archive" in root: # Skip archive directories continue for file in files: if not file.endswith(".yaml"): continue filepath = os.path.join(root, file) try: with open(filepath, 'r') as f: content = f.read() lines = content.split('\n') for i, line in enumerate(lines): # Check for slot imports # Pattern: - ../slots/slot_name # or - ./slot_name (if in slots dir) # Check for slot usage in slots list # Pattern: - slot_name (indented) stripped = line.strip() # Check for archived slots for slot in archived_slots: # Explicit import check if f"../slots/{slot}" in stripped or f"./{slot}" in stripped: # Be strict about word boundaries to avoid partial matches # e.g. "slot" vs "slot_suffix" # But yaml references usually are explicit paths or items if stripped.endswith(slot) or f"/{slot}" in stripped: issues.append(f"{filepath}:{i+1}: Import of archived slot '{slot}'") # Usage check (simple heuristic) # We look for "- slot_name" in a slots list context if stripped == f"- {slot}": issues.append(f"{filepath}:{i+1}: Usage of archived slot '{slot}'") # Slot usage keys if stripped.startswith(f"{slot}:"): issues.append(f"{filepath}:{i+1}: Slot usage key for archived slot '{slot}'") # Check for archived classes for cls in archived_classes: if cls in stripped: # Exclude the file itself defining the class (if it were somehow here) # Check for imports or range references if f"/{cls}" in stripped or f"range: {cls}" in stripped: issues.append(f"{filepath}:{i+1}: Reference to archived class '{cls}'") except Exception as e: print(f"Error reading {filepath}: {e}") return issues def main(): archived_slots = load_archived_entities(ARCHIVED_SLOTS_FILE) archived_classes = load_archived_entities(ARCHIVED_CLASSES_FILE) # Remove common metadata fields that collide with archived slot names archived_slots.discard("description") archived_slots.discard("title") archived_slots.discard("name") # "name" might be a slot too, but also metadata print(f"Loaded {len(archived_slots)} archived slots.") print(f"Loaded {len(archived_classes)} archived classes.") issues = scan_files(CLASSES_DIR, archived_slots, archived_classes) # Also scan slots dir, though less likely to have cross-slot dependencies that rot issues.extend(scan_files(SLOTS_DIR, archived_slots, archived_classes)) if issues: print("\nFound potential ghost references:") for issue in issues: print(issue) else: print("\nNo obvious ghost references found.") if __name__ == "__main__": main()