- Removed obsolete slots: `has_or_had_custodian_observation`, `provider`, and `specificity_annotation`. - Updated `has_or_had_score` slot to use `SpecificityScore` class and modified its description and examples. - Added new slots: `end_seconds`, `end_time`, `has_archive_path`, `has_or_had_custodian_name`, `protocol_name`, and `protocol_version`. - Introduced a script `check_annotation_types.py` to validate the presence and structure of `custodian_types` in YAML files. - Added a script `update_specificity.py` to automate updates related to `SpecificityAnnotation` to `SpecificityScore`.
78 lines
2.5 KiB
Python
78 lines
2.5 KiB
Python
import os
|
|
|
|
def remove_duplicates_in_file(path):
|
|
with open(path, 'r') as f:
|
|
lines = f.readlines()
|
|
|
|
new_lines = []
|
|
# Track keys at each indentation level to detect duplicates
|
|
# keys_at_indent: {indent: set(keys)}
|
|
# But we need to clear deeper levels when indent decreases.
|
|
|
|
keys_at_indent = {}
|
|
prev_indent = 0
|
|
|
|
# We also need to skip lines belonging to the removed duplicate key (list items)
|
|
skip_mode = False
|
|
skip_indent = -1
|
|
|
|
keys_to_check = ["broad_mappings", "close_mappings", "related_mappings", "exact_mappings"]
|
|
|
|
for i, line in enumerate(lines):
|
|
stripped = line.strip()
|
|
|
|
# Determine indent
|
|
if not stripped:
|
|
new_lines.append(line)
|
|
continue
|
|
|
|
indent = len(line) - len(line.lstrip())
|
|
|
|
# If we are skipping a block (children of removed key)
|
|
if skip_mode:
|
|
if indent > skip_indent:
|
|
# Still inside the block of removed key
|
|
continue
|
|
else:
|
|
# Block ended
|
|
skip_mode = False
|
|
skip_indent = -1
|
|
|
|
# Update indentation tracking
|
|
if indent > prev_indent:
|
|
pass
|
|
elif indent < prev_indent:
|
|
# Clear keys for deeper levels
|
|
levels = [k for k in keys_at_indent if k > indent]
|
|
for l in levels:
|
|
del keys_at_indent[l]
|
|
|
|
if indent not in keys_at_indent:
|
|
keys_at_indent[indent] = set()
|
|
|
|
# Check if line is a key
|
|
if ':' in stripped and not stripped.startswith('-') and not stripped.startswith('#'):
|
|
key = stripped.split(':')[0].strip()
|
|
|
|
if key in keys_to_check:
|
|
if key in keys_at_indent[indent]:
|
|
print(f"Removing duplicate key '{key}' in {path} at line {i+1}")
|
|
skip_mode = True
|
|
skip_indent = indent
|
|
continue
|
|
else:
|
|
keys_at_indent[indent].add(key)
|
|
|
|
new_lines.append(line)
|
|
prev_indent = indent
|
|
|
|
with open(path, 'w') as f:
|
|
f.writelines(new_lines)
|
|
|
|
def process_directory(directory):
|
|
for root, dirs, files in os.walk(directory):
|
|
for file in files:
|
|
if file.endswith(".yaml"):
|
|
remove_duplicates_in_file(os.path.join(root, file))
|
|
|
|
process_directory("schemas/20251121/linkml/modules/classes")
|