import os import glob import yaml import re SLOTS_DIR = "schemas/20251121/linkml/modules/slots" CLASSES_DIR = "schemas/20251121/linkml/modules/classes" STANDARD_PREFIXES = { "linkml": "https://w3id.org/linkml/", "hc": "https://nde.nl/ontology/hc/", "schema": "http://schema.org/", "dcterms": "http://purl.org/dc/terms/", "prov": "http://www.w3.org/ns/prov#", "crm": "http://www.cidoc-crm.org/cidoc-crm/", "skos": "http://www.w3.org/2004/02/skos/core#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "org": "http://www.w3.org/ns/org#", "xsd": "http://www.w3.org/2001/XMLSchema#" } def fix_file_structure(filepath, is_class=False): print(f"Processing {filepath}...") try: with open(filepath, 'r') as f: content = yaml.safe_load(f) except Exception as e: print(f"Error reading {filepath}: {e}") return if not content: return modified = False # 1. Fix Nesting main_key = 'classes' if is_class else 'slots' if main_key not in content: # Check if it looks like a flat definition if 'name' in content: name = content['name'] # Create nested structure new_content = { 'id': content.get('id', f"https://nde.nl/ontology/hc/{'class' if is_class else 'slot'}/{name}"), 'name': name, 'title': content.get('title', name), 'prefixes': content.get('prefixes', STANDARD_PREFIXES), 'default_prefix': 'hc', 'imports': content.get('imports', ['linkml:types']), main_key: { name: content } } # Remove metadata from the inner object that is now at top level # (though LinkML allows overrides, usually we clean it up) # We'll keep them in inner for safety but ensure top level has them content = new_content modified = True else: print(f"Skipping {filepath}: Cannot determine entity name for nesting.") return # 2. Fix Prefixes if 'prefixes' not in content: content['prefixes'] = STANDARD_PREFIXES modified = True else: # Merge missing standard prefixes for k, v in STANDARD_PREFIXES.items(): if k not in content['prefixes']: content['prefixes'][k] = v modified = True # 3. Fix Imports if 'imports' not in content: content['imports'] = ['linkml:types'] modified = True elif 'linkml:types' not in content['imports']: content['imports'].append('linkml:types') modified = True # 4. Fix Annotations (in the inner entity) entities = content[main_key] for entity_name, entity_def in entities.items(): if 'annotations' not in entity_def: entity_def['annotations'] = {} modified = True anns = entity_def['annotations'] if 'custodian_types' not in anns: anns['custodian_types'] = ['*'] anns['custodian_types_rationale'] = 'Universal utility concept' modified = True # 5. Check/Add Mappings (if slot_uri exists but no mappings) if 'slot_uri' in entity_def and 'exact_mappings' not in entity_def and 'close_mappings' not in entity_def: # We won't auto-generate mappings to avoid duplicates if slot_uri IS the mapping # But the user asked for them. # Let's add exact_mappings = [slot_uri] if it looks like a CURIE uri = entity_def['slot_uri'] if ':' in uri and not uri.startswith('http'): entity_def['exact_mappings'] = [uri] modified = True if modified: with open(filepath, 'w') as f: yaml.dump(content, f, sort_keys=False, width=1000) print(f"Fixed {filepath}") def run(): # Fix Slots slot_files = glob.glob(os.path.join(SLOTS_DIR, "*.yaml")) for f in slot_files: # We need to detect if it needs nesting. # The check_schema_quality.py identified them. # We'll just run generic fix on all, as it checks for 'slots' key. fix_file_structure(f, is_class=False) # Fix Classes (less likely to be flat, but good to check) class_files = glob.glob(os.path.join(CLASSES_DIR, "*.yaml")) for f in class_files: fix_file_structure(f, is_class=True) if __name__ == "__main__": run()