import yaml import os def check_dir(directory): print(f"Checking directory: {directory}") target_keys = ["related_mappings", "close_mappings", "exact_mappings", "broad_mappings", "narrow_mappings", "slots", "slot_usage", "attributes", "annotations", "description", "class_uri", "id", "name", "title", "imports", "prefixes", "default_prefix", "default_range", "classes", "types", "enums", "subsets"] for root, dirs, files in os.walk(directory): for file in files: if file.endswith(".yaml"): path = os.path.join(root, file) with open(path, 'r') as f: lines = f.readlines() keys_at_indent = {} # {indent: {key: line_no}} prev_indent = 0 for i, line in enumerate(lines): stripped = line.strip() if not stripped or stripped.startswith('#') or stripped.startswith('-'): continue indent = len(line) - len(line.lstrip()) if ':' in stripped: key = stripped.split(':')[0].strip() # Only check for specific structural keys to avoid noise if key not in target_keys: continue # If indentation increased, we are in a new block if indent > prev_indent: pass # If indentation decreased, clear deeper levels elif indent < prev_indent: keys_to_remove = [k for k in keys_at_indent if k > indent] for k in keys_to_remove: del keys_at_indent[k] if indent not in keys_at_indent: keys_at_indent[indent] = {} if key in keys_at_indent[indent]: prev_line = keys_at_indent[indent][key] # Heuristic: if lines are in same block (no lower indent between) # We assume it's a duplicate in the same object # Double check if there was a lower indent line between them parent_found = False for j in range(prev_line + 1, i): inner_line = lines[j] if inner_line.strip() and not inner_line.strip().startswith('#'): curr_indent = len(inner_line) - len(inner_line.lstrip()) if curr_indent < indent: parent_found = True break if not parent_found: print(f"DUPLICATE KEY '{key}' in {path} at line {i+1} (previous at {prev_line+1})") keys_at_indent[indent][key] = i prev_indent = indent check_dir("schemas/20251121/linkml/modules/classes") check_dir("schemas/20251121/linkml/modules/slots")