glam/scripts/auto_mark_processed_v3.py

121 lines
3.9 KiB
Python

import yaml
import os
import datetime
# Target the AUTHORITATIVE file
SLOT_FIXES_PATH = '/Users/kempersc/apps/glam/data/fixes/slot_fixes.yaml'
# Paths to check for existence of migrated files
ARCHIVE_PATH = 'schemas/20251121/linkml/archive/slots'
SLOTS_PATH = 'schemas/20251121/linkml/modules/slots'
CLASSES_PATH = 'schemas/20251121/linkml/modules/classes'
def get_slot_name(url):
if not url: return None
return url.split('/')[-1]
def run():
print(f"Reading {SLOT_FIXES_PATH}...")
if not os.path.exists(SLOT_FIXES_PATH):
print(f"Error: {SLOT_FIXES_PATH} not found.")
return
with open(SLOT_FIXES_PATH, 'r') as f:
try:
data = yaml.safe_load(f)
except yaml.YAMLError as e:
print(f"Error parsing YAML: {e}")
return
if not data or 'fixes' not in data:
print("No 'fixes' key found in YAML.")
return
updated_count = 0
# Get set of archived files
archived_slots = set()
if os.path.exists(ARCHIVE_PATH):
for f in os.listdir(ARCHIVE_PATH):
if f.endswith('.yaml'):
archived_slots.add(f.replace('.yaml', ''))
# Get set of new/existing slots
existing_slots = set()
if os.path.exists(SLOTS_PATH):
for f in os.listdir(SLOTS_PATH):
if f.endswith('.yaml'):
existing_slots.add(f.replace('.yaml', ''))
# Get set of classes
existing_classes = set()
if os.path.exists(CLASSES_PATH):
for f in os.listdir(CLASSES_PATH):
if f.endswith('.yaml'):
existing_classes.add(f.replace('.yaml', ''))
today = datetime.date.today().isoformat()
for fix in data['fixes']:
# Skip if already processed
if 'processed' in fix and fix['processed'].get('status') is True:
continue
slot_id = fix.get('original_slot_id') or fix.get('orignal_slot_id')
if not slot_id:
continue
slot_name = get_slot_name(slot_id)
# Determine if migrated
is_migrated = False
notes = []
# 1. Is the old slot archived?
if slot_name in archived_slots:
is_migrated = True
notes.append("Old slot found in archive.")
# 2. Is there a revision and do targets exist?
if 'revision' in fix:
rev_details = []
targets_exist = True
for rev in fix['revision']:
label = rev.get('label')
type_ = rev.get('type')
if type_ == 'slot':
if label in existing_slots:
rev_details.append(f"slot {label} exists")
else:
# Maybe it's a generic slot?
pass
elif type_ == 'class':
if label in existing_classes:
rev_details.append(f"class {label} exists")
else:
targets_exist = False # If class missing, probably not done
if rev_details and targets_exist:
notes.append(f"Targets exist: {', '.join(rev_details)}")
# If target classes exist, we assume migration is done even if old slot isn't archived (was inline)
is_migrated = True
if is_migrated:
print(f"Marking {slot_name} as processed.")
fix['processed'] = {
'status': True,
'date': today,
'notes': f"Auto-marked: {' '.join(notes)}"
}
updated_count += 1
print(f"Updated {updated_count} entries.")
if updated_count > 0:
with open(SLOT_FIXES_PATH, 'w') as f:
yaml.dump(data, f, sort_keys=False, width=1000)
print("Saved updates to slot_fixes.yaml")
if __name__ == '__main__':
run()