import os def remove_duplicates_in_file(path): with open(path, 'r') as f: lines = f.readlines() new_lines = [] # Track keys at each indentation level to detect duplicates # keys_at_indent: {indent: set(keys)} # But we need to clear deeper levels when indent decreases. keys_at_indent = {} prev_indent = 0 # We also need to skip lines belonging to the removed duplicate key (list items) skip_mode = False skip_indent = -1 keys_to_check = ["broad_mappings", "close_mappings", "related_mappings", "exact_mappings"] for i, line in enumerate(lines): stripped = line.strip() # Determine indent if not stripped: new_lines.append(line) continue indent = len(line) - len(line.lstrip()) # If we are skipping a block (children of removed key) if skip_mode: if indent > skip_indent: # Still inside the block of removed key continue else: # Block ended skip_mode = False skip_indent = -1 # Update indentation tracking if indent > prev_indent: pass elif indent < prev_indent: # Clear keys for deeper levels levels = [k for k in keys_at_indent if k > indent] for l in levels: del keys_at_indent[l] if indent not in keys_at_indent: keys_at_indent[indent] = set() # Check if line is a key if ':' in stripped and not stripped.startswith('-') and not stripped.startswith('#'): key = stripped.split(':')[0].strip() if key in keys_to_check: if key in keys_at_indent[indent]: print(f"Removing duplicate key '{key}' in {path} at line {i+1}") skip_mode = True skip_indent = indent continue else: keys_at_indent[indent].add(key) new_lines.append(line) prev_indent = indent with open(path, 'w') as f: f.writelines(new_lines) def process_directory(directory): for root, dirs, files in os.walk(directory): for file in files: if file.endswith(".yaml"): remove_duplicates_in_file(os.path.join(root, file)) process_directory("schemas/20251121/linkml/modules/classes")