glam/schemas/20251121/linkml/scripts/auto_mark_processed.py

96 lines
3.3 KiB
Python

import yaml
import os
import datetime
from urllib.parse import urlparse
def get_slot_name(url):
return url.split('/')[-1]
SLOT_FIXES_PATH = 'modules/slots/slot_fixes.yaml'
ARCHIVE_PATH = 'archive/slots'
CLASSES_PATH = 'modules/classes'
def run():
print(f"Reading {SLOT_FIXES_PATH}...")
with open(SLOT_FIXES_PATH, 'r') as f:
try:
data = yaml.safe_load(f)
except yaml.YAMLError as e:
print(f"Error parsing YAML: {e}")
return
if not data or 'fixes' not in data:
print("No 'fixes' key found in YAML.")
return
updated_count = 0
# Get set of archived files for fast lookup
archived_slots = set()
if os.path.exists(ARCHIVE_PATH):
for f in os.listdir(ARCHIVE_PATH):
if f.endswith('.yaml'):
archived_slots.add(f.replace('.yaml', ''))
print(f"Found {len(archived_slots)} archived slots.")
# Get set of existing classes
existing_classes = set()
if os.path.exists(CLASSES_PATH):
for f in os.listdir(CLASSES_PATH):
if f.endswith('.yaml'):
existing_classes.add(f.replace('.yaml', ''))
for fix in data['fixes']:
# Skip if already processed
if 'processed' in fix and fix['processed'].get('status') is True:
continue
slot_id = fix.get('original_slot_id')
if not slot_id:
continue
slot_name = get_slot_name(slot_id)
# Check if archived
is_archived = slot_name in archived_slots
# Check if revision classes exist (heuristic)
revision_classes_exist = False
if 'revision' in fix:
for rev_item in fix['revision']:
if rev_item.get('type') == 'class':
label = rev_item.get('label')
if label and label in existing_classes:
revision_classes_exist = True
break
# Special case for some slots I know I moved but might not be in archive if they were inline
# or if I used mv and they weren't files yet.
# But for now, let's rely on archive presence OR strong evidence of class creation
if is_archived:
print(f"Marking {slot_name} as processed (found in archive).")
fix['processed'] = {
'status': True,
'date': datetime.date.today().isoformat(),
'notes': 'Auto-marked: Slot file found in archive/slots/, indicating migration complete.'
}
updated_count += 1
elif revision_classes_exist:
# Be a bit more careful here. Just because the target class exists doesn't mean *this* specific slot usage was migrated.
# But given our workflow, it's highly likely.
# Let's log it but maybe not auto-mark unless we are sure.
# Actually, for the batch I just did (has_or_had_key_date etc), I moved them to archive.
# So is_archived should cover them.
pass
print(f"Updated {updated_count} entries.")
if updated_count > 0:
with open(SLOT_FIXES_PATH, 'w') as f:
yaml.dump(data, f, sort_keys=False, width=1000)
print("Saved updates to slot_fixes.yaml")
if __name__ == '__main__':
run()