- Removed obsolete slots: `has_or_had_custodian_observation`, `provider`, and `specificity_annotation`. - Updated `has_or_had_score` slot to use `SpecificityScore` class and modified its description and examples. - Added new slots: `end_seconds`, `end_time`, `has_archive_path`, `has_or_had_custodian_name`, `protocol_name`, and `protocol_version`. - Introduced a script `check_annotation_types.py` to validate the presence and structure of `custodian_types` in YAML files. - Added a script `update_specificity.py` to automate updates related to `SpecificityAnnotation` to `SpecificityScore`.
67 lines
No EOL
3.3 KiB
Python
67 lines
No EOL
3.3 KiB
Python
import yaml
|
|
import os
|
|
|
|
def check_dir(directory):
|
|
print(f"Checking directory: {directory}")
|
|
target_keys = ["related_mappings", "close_mappings", "exact_mappings", "broad_mappings", "narrow_mappings", "slots", "slot_usage", "attributes", "annotations", "description", "class_uri", "id", "name", "title", "imports", "prefixes", "default_prefix", "default_range", "classes", "types", "enums", "subsets"]
|
|
|
|
for root, dirs, files in os.walk(directory):
|
|
for file in files:
|
|
if file.endswith(".yaml"):
|
|
path = os.path.join(root, file)
|
|
|
|
with open(path, 'r') as f:
|
|
lines = f.readlines()
|
|
|
|
keys_at_indent = {} # {indent: {key: line_no}}
|
|
prev_indent = 0
|
|
|
|
for i, line in enumerate(lines):
|
|
stripped = line.strip()
|
|
if not stripped or stripped.startswith('#') or stripped.startswith('-'):
|
|
continue
|
|
|
|
indent = len(line) - len(line.lstrip())
|
|
|
|
if ':' in stripped:
|
|
key = stripped.split(':')[0].strip()
|
|
|
|
# Only check for specific structural keys to avoid noise
|
|
if key not in target_keys:
|
|
continue
|
|
|
|
# If indentation increased, we are in a new block
|
|
if indent > prev_indent:
|
|
pass
|
|
# If indentation decreased, clear deeper levels
|
|
elif indent < prev_indent:
|
|
keys_to_remove = [k for k in keys_at_indent if k > indent]
|
|
for k in keys_to_remove:
|
|
del keys_at_indent[k]
|
|
|
|
if indent not in keys_at_indent:
|
|
keys_at_indent[indent] = {}
|
|
|
|
if key in keys_at_indent[indent]:
|
|
prev_line = keys_at_indent[indent][key]
|
|
# Heuristic: if lines are in same block (no lower indent between)
|
|
# We assume it's a duplicate in the same object
|
|
|
|
# Double check if there was a lower indent line between them
|
|
parent_found = False
|
|
for j in range(prev_line + 1, i):
|
|
inner_line = lines[j]
|
|
if inner_line.strip() and not inner_line.strip().startswith('#'):
|
|
curr_indent = len(inner_line) - len(inner_line.lstrip())
|
|
if curr_indent < indent:
|
|
parent_found = True
|
|
break
|
|
|
|
if not parent_found:
|
|
print(f"DUPLICATE KEY '{key}' in {path} at line {i+1} (previous at {prev_line+1})")
|
|
|
|
keys_at_indent[indent][key] = i
|
|
prev_indent = indent
|
|
|
|
check_dir("schemas/20251121/linkml/modules/classes")
|
|
check_dir("schemas/20251121/linkml/modules/slots") |