glam/check_duplicates.py

43 lines
No EOL
2.1 KiB
Python

import yaml
import os
def check_dir(directory):
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith(".yaml"):
path = os.path.join(root, file)
with open(path, 'r') as f:
lines = f.readlines()
# Store (indentation, key) to check for duplicates in the current block
# This is complex to implement perfectly for YAML, but we can look for
# "related_mappings:" specifically.
related_mappings_indices = [i for i, line in enumerate(lines) if "related_mappings:" in line.strip()]
if len(related_mappings_indices) > 1:
# Check indentation
indents = [len(lines[i]) - len(lines[i].lstrip()) for i in related_mappings_indices]
for i in range(len(related_mappings_indices) - 1):
idx1 = related_mappings_indices[i]
idx2 = related_mappings_indices[i+1]
indent1 = indents[i]
indent2 = indents[i+1]
if indent1 == indent2:
# Check if there is a line between them with LOWER indentation (parent key)
parent_found = False
for j in range(idx1 + 1, idx2):
line = lines[j]
if line.strip() and not line.strip().startswith('#'):
curr_indent = len(line) - len(line.lstrip())
if curr_indent < indent1:
parent_found = True
break
if not parent_found:
print(f"Potential duplicate related_mappings in {path} at lines {idx1+1} and {idx2+1}")
check_dir("schemas/20251121/linkml/modules/classes")