glam/fix_dangling.py
kempersc fc405445c6 Refactor and update schema definitions
- Removed obsolete slots: `has_or_had_custodian_observation`, `provider`, and `specificity_annotation`.
- Updated `has_or_had_score` slot to use `SpecificityScore` class and modified its description and examples.
- Added new slots: `end_seconds`, `end_time`, `has_archive_path`, `has_or_had_custodian_name`, `protocol_name`, and `protocol_version`.
- Introduced a script `check_annotation_types.py` to validate the presence and structure of `custodian_types` in YAML files.
- Added a script `update_specificity.py` to automate updates related to `SpecificityAnnotation` to `SpecificityScore`.
2026-02-01 19:55:38 +01:00

59 lines
2 KiB
Python

import os
import re
def fix_dangling_in_file(path):
with open(path, 'r') as f:
lines = f.readlines()
new_lines = []
# keys that take lists
list_keys = [
"slots",
"exact_mappings", "close_mappings", "broad_mappings", "related_mappings", "narrow_mappings",
"examples", "comments", "see_also", "keywords", "structured_aliases",
"subsets", "mixins", "apply_to", "union_of", "values", "equals_expression", "equals_string_in",
"aliases", "local_names", "union_of", "defines"
]
last_key_at_4 = None
for i, line in enumerate(lines):
stripped = line.strip()
if not stripped or stripped.startswith('#'):
new_lines.append(line)
continue
indent = len(line) - len(line.lstrip())
# Check if line is a key at 4 spaces
# Regex: start with 4 spaces, then key chars, then colon, then optional space/value
if indent == 4 and ':' in stripped and not stripped.startswith('-'):
key = stripped.split(':')[0].strip()
last_key_at_4 = key
new_lines.append(line)
continue
# Check if line is a list item at 4 spaces
if indent == 4 and stripped.startswith('-'):
if last_key_at_4 not in list_keys:
print(f"Removing dangling list item in {path} at line {i+1} (after {last_key_at_4}): {stripped}")
continue # Skip/Delete
# Reset last_key_at_4 if indentation drops below 4
if indent < 4:
last_key_at_4 = None
new_lines.append(line)
with open(path, 'w') as f:
f.writelines(new_lines)
def process_directory(directory):
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith(".yaml"):
fix_dangling_in_file(os.path.join(root, file))
process_directory("schemas/20251121/linkml/modules/classes")