glam/update_specificity.py
kempersc fc405445c6 Refactor and update schema definitions
- Removed obsolete slots: `has_or_had_custodian_observation`, `provider`, and `specificity_annotation`.
- Updated `has_or_had_score` slot to use `SpecificityScore` class and modified its description and examples.
- Added new slots: `end_seconds`, `end_time`, `has_archive_path`, `has_or_had_custodian_name`, `protocol_name`, and `protocol_version`.
- Introduced a script `check_annotation_types.py` to validate the presence and structure of `custodian_types` in YAML files.
- Added a script `update_specificity.py` to automate updates related to `SpecificityAnnotation` to `SpecificityScore`.
2026-02-01 19:55:38 +01:00

101 lines
3.4 KiB
Python

import os
import re
def update_file(path):
with open(path, 'r') as f:
content = f.read()
original_content = content
# Global replace of class name
content = content.replace("SpecificityAnnotation", "SpecificityScore")
content = content.replace("specificity_annotation.yaml", "specificity_annotation.yaml") # Wait, filename? imports usually omit extension or use it.
# Check imports like: - ./SpecificityAnnotation
# Regex for slot usage
# If file contains "has_or_had_score", we need to be careful not to duplicate.
has_score_slot = "has_or_had_score" in content
lines = content.split('\n')
new_lines = []
skip_next = False
for i, line in enumerate(lines):
if skip_next:
skip_next = False
continue
stripped = line.strip()
# Handle Imports
if "slots/specificity_annotation" in line:
if has_score_slot:
continue # Remove import if score already imported (assuming it is, based on check)
# But wait, "has_or_had_score" string might be in comments.
# Safer: Replace with has_or_had_score, then deduplicate later?
# Or just check if "slots/has_or_had_score" is also in content.
line = line.replace("specificity_annotation", "has_or_had_score")
if "- ./SpecificityAnnotation" in line:
line = line.replace("SpecificityAnnotation", "SpecificityScore")
# Handle Slots list
if stripped == "- specificity_annotation":
if has_score_slot:
# Check if has_or_had_score is in the file as a slot list item
if re.search(r'-\s+has_or_had_score', content):
continue # Remove duplicate
line = line.replace("specificity_annotation", "has_or_had_score")
# Handle slot_usage keys
if stripped == "specificity_annotation:":
line = line.replace("specificity_annotation:", "has_or_had_score:")
# Handle range: SpecificityAnnotation -> range: SpecificityScore (Handled by global replace)
new_lines.append(line)
new_content = '\n'.join(new_lines)
# Final deduplication of imports just in case
# Convert to list of lines again
final_lines = []
seen_imports = set()
in_imports = False
for line in new_content.split('\n'):
if line.strip() == "imports:":
in_imports = True
final_lines.append(line)
continue
if in_imports:
if not line.strip().startswith("-"):
in_imports = False
else:
import_stmt = line.strip()
if import_stmt in seen_imports:
continue
seen_imports.add(import_stmt)
final_lines.append(line)
final_content = '\n'.join(final_lines)
if final_content != original_content:
print(f"Updating {path}")
with open(path, 'w') as f:
f.write(final_content)
def process_directory(directory):
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith(".yaml"):
update_file(os.path.join(root, file))
process_directory("schemas/20251121/linkml/modules/classes")
process_directory("schemas/20251121/linkml/modules/slots")