149 lines
4.5 KiB
Python
149 lines
4.5 KiB
Python
import os
|
|
import yaml
|
|
import glob
|
|
|
|
CLASSES_DIR = "modules/classes"
|
|
SLOTS_DIR = "modules/slots"
|
|
|
|
# Default values for missing fields
|
|
DEFAULT_SPECIFICITY_SCORE = 0.1
|
|
DEFAULT_SPECIFICITY_RATIONALE = "Generic utility class/slot created during migration"
|
|
DEFAULT_CUSTODIAN_TYPES = ["*"]
|
|
DEFAULT_CUSTODIAN_RATIONALE = "Universal utility concept"
|
|
|
|
def fix_yaml_file(filepath, is_class=True):
|
|
try:
|
|
with open(filepath, 'r') as f:
|
|
content = yaml.safe_load(f)
|
|
except Exception as e:
|
|
print(f"Error reading {filepath}: {e}")
|
|
return False
|
|
|
|
if not content:
|
|
return False
|
|
|
|
modified = False
|
|
|
|
# Get the main entity dict (class or slot name)
|
|
# The file structure is usually:
|
|
# classes:
|
|
# ClassName:
|
|
# ...
|
|
# OR
|
|
# slots:
|
|
# slot_name:
|
|
# ...
|
|
|
|
target_key = 'classes' if is_class else 'slots'
|
|
if target_key not in content:
|
|
# Some slot files might be defined at top level? No, LinkML usually nests them.
|
|
# But my heredocs created them like:
|
|
# id: ...
|
|
# name: ...
|
|
# classes:
|
|
# Name: ...
|
|
# So this structure assumes standard LinkML file layout.
|
|
|
|
# However, for slots created via heredoc in previous turns:
|
|
# id: ...
|
|
# name: has_or_had_policy
|
|
# ...
|
|
# This is a flat slot definition file, NOT nested under 'slots:'.
|
|
# LinkML usually expects 'slots:' for modular files, but sometimes flat works if imported correctly.
|
|
# Let's check the structure.
|
|
pass
|
|
|
|
# Handle the structure where root keys ARE the metadata
|
|
# My heredocs created files like:
|
|
# id: ...
|
|
# name: ...
|
|
# description: ...
|
|
# slot_uri: ...
|
|
# range: ...
|
|
#
|
|
# This is valid for a single-slot module if imported as such.
|
|
# But wait, usually classes are nested under 'classes:'.
|
|
# My class heredocs DID nest:
|
|
# classes:
|
|
# Audit: ...
|
|
|
|
# My slot heredocs DID NOT nest (mostly):
|
|
# cat > modules/slots/has_or_had_policy.yaml <<EOF
|
|
# id: ...
|
|
# name: has_or_had_policy
|
|
# ...
|
|
|
|
entity_dict = None
|
|
|
|
if is_class:
|
|
if 'classes' in content:
|
|
# We assume one class per file for these modules
|
|
class_name = list(content['classes'].keys())[0]
|
|
entity_dict = content['classes'][class_name]
|
|
else:
|
|
# For slots, it might be flat or nested
|
|
if 'slots' in content:
|
|
slot_name = list(content['slots'].keys())[0]
|
|
entity_dict = content['slots'][slot_name]
|
|
else:
|
|
# Flat structure
|
|
entity_dict = content
|
|
|
|
if entity_dict is None:
|
|
print(f"Could not locate entity definition in {filepath}")
|
|
return False
|
|
|
|
# Check/Add annotations
|
|
if 'annotations' not in entity_dict:
|
|
entity_dict['annotations'] = {}
|
|
modified = True
|
|
|
|
annotations = entity_dict['annotations']
|
|
|
|
# Rule 37: Specificity Score (Classes only)
|
|
if is_class:
|
|
if 'specificity_score' not in annotations:
|
|
annotations['specificity_score'] = DEFAULT_SPECIFICITY_SCORE
|
|
annotations['specificity_rationale'] = DEFAULT_SPECIFICITY_RATIONALE
|
|
modified = True
|
|
|
|
# Rule 13: Custodian Types (Classes and Slots)
|
|
if 'custodian_types' not in annotations:
|
|
annotations['custodian_types'] = DEFAULT_CUSTODIAN_TYPES
|
|
annotations['custodian_types_rationale'] = DEFAULT_CUSTODIAN_RATIONALE
|
|
modified = True
|
|
|
|
# Rule 38: Slot URI (Slots only)
|
|
if not is_class:
|
|
if 'slot_uri' not in entity_dict:
|
|
print(f"WARNING: Slot {filepath} missing slot_uri")
|
|
# We can't auto-fix this easily without knowledge, but we can flag it.
|
|
|
|
# Rule 50: Class URI (Classes only)
|
|
if is_class:
|
|
if 'class_uri' not in entity_dict:
|
|
print(f"WARNING: Class {filepath} missing class_uri")
|
|
|
|
if modified:
|
|
with open(filepath, 'w') as f:
|
|
yaml.dump(content, f, sort_keys=False, width=1000)
|
|
print(f"Fixed {filepath}")
|
|
return True
|
|
|
|
return False
|
|
|
|
def run():
|
|
# Fix Classes
|
|
print("Scanning Classes...")
|
|
class_files = glob.glob(os.path.join(CLASSES_DIR, "*.yaml"))
|
|
for f in class_files:
|
|
fix_yaml_file(f, is_class=True)
|
|
|
|
# Fix Slots
|
|
print("Scanning Slots...")
|
|
slot_files = glob.glob(os.path.join(SLOTS_DIR, "*.yaml"))
|
|
for f in slot_files:
|
|
fix_yaml_file(f, is_class=False)
|
|
|
|
if __name__ == "__main__":
|
|
run()
|