125 lines
4.4 KiB
Python
125 lines
4.4 KiB
Python
import os
|
|
import glob
|
|
import yaml
|
|
import re
|
|
|
|
SLOTS_DIR = "schemas/20251121/linkml/modules/slots"
|
|
CLASSES_DIR = "schemas/20251121/linkml/modules/classes"
|
|
|
|
STANDARD_PREFIXES = {
|
|
"linkml": "https://w3id.org/linkml/",
|
|
"hc": "https://nde.nl/ontology/hc/",
|
|
"schema": "http://schema.org/",
|
|
"dcterms": "http://purl.org/dc/terms/",
|
|
"prov": "http://www.w3.org/ns/prov#",
|
|
"crm": "http://www.cidoc-crm.org/cidoc-crm/",
|
|
"skos": "http://www.w3.org/2004/02/skos/core#",
|
|
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
|
"org": "http://www.w3.org/ns/org#",
|
|
"xsd": "http://www.w3.org/2001/XMLSchema#"
|
|
}
|
|
|
|
def fix_file_structure(filepath, is_class=False):
|
|
print(f"Processing {filepath}...")
|
|
try:
|
|
with open(filepath, 'r') as f:
|
|
content = yaml.safe_load(f)
|
|
except Exception as e:
|
|
print(f"Error reading {filepath}: {e}")
|
|
return
|
|
|
|
if not content:
|
|
return
|
|
|
|
modified = False
|
|
|
|
# 1. Fix Nesting
|
|
main_key = 'classes' if is_class else 'slots'
|
|
if main_key not in content:
|
|
# Check if it looks like a flat definition
|
|
if 'name' in content:
|
|
name = content['name']
|
|
# Create nested structure
|
|
new_content = {
|
|
'id': content.get('id', f"https://nde.nl/ontology/hc/{'class' if is_class else 'slot'}/{name}"),
|
|
'name': name,
|
|
'title': content.get('title', name),
|
|
'prefixes': content.get('prefixes', STANDARD_PREFIXES),
|
|
'default_prefix': 'hc',
|
|
'imports': content.get('imports', ['linkml:types']),
|
|
main_key: {
|
|
name: content
|
|
}
|
|
}
|
|
# Remove metadata from the inner object that is now at top level
|
|
# (though LinkML allows overrides, usually we clean it up)
|
|
# We'll keep them in inner for safety but ensure top level has them
|
|
content = new_content
|
|
modified = True
|
|
else:
|
|
print(f"Skipping {filepath}: Cannot determine entity name for nesting.")
|
|
return
|
|
|
|
# 2. Fix Prefixes
|
|
if 'prefixes' not in content:
|
|
content['prefixes'] = STANDARD_PREFIXES
|
|
modified = True
|
|
else:
|
|
# Merge missing standard prefixes
|
|
for k, v in STANDARD_PREFIXES.items():
|
|
if k not in content['prefixes']:
|
|
content['prefixes'][k] = v
|
|
modified = True
|
|
|
|
# 3. Fix Imports
|
|
if 'imports' not in content:
|
|
content['imports'] = ['linkml:types']
|
|
modified = True
|
|
elif 'linkml:types' not in content['imports']:
|
|
content['imports'].append('linkml:types')
|
|
modified = True
|
|
|
|
# 4. Fix Annotations (in the inner entity)
|
|
entities = content[main_key]
|
|
for entity_name, entity_def in entities.items():
|
|
if 'annotations' not in entity_def:
|
|
entity_def['annotations'] = {}
|
|
modified = True
|
|
|
|
anns = entity_def['annotations']
|
|
if 'custodian_types' not in anns:
|
|
anns['custodian_types'] = ['*']
|
|
anns['custodian_types_rationale'] = 'Universal utility concept'
|
|
modified = True
|
|
|
|
# 5. Check/Add Mappings (if slot_uri exists but no mappings)
|
|
if 'slot_uri' in entity_def and 'exact_mappings' not in entity_def and 'close_mappings' not in entity_def:
|
|
# We won't auto-generate mappings to avoid duplicates if slot_uri IS the mapping
|
|
# But the user asked for them.
|
|
# Let's add exact_mappings = [slot_uri] if it looks like a CURIE
|
|
uri = entity_def['slot_uri']
|
|
if ':' in uri and not uri.startswith('http'):
|
|
entity_def['exact_mappings'] = [uri]
|
|
modified = True
|
|
|
|
if modified:
|
|
with open(filepath, 'w') as f:
|
|
yaml.dump(content, f, sort_keys=False, width=1000)
|
|
print(f"Fixed {filepath}")
|
|
|
|
def run():
|
|
# Fix Slots
|
|
slot_files = glob.glob(os.path.join(SLOTS_DIR, "*.yaml"))
|
|
for f in slot_files:
|
|
# We need to detect if it needs nesting.
|
|
# The check_schema_quality.py identified them.
|
|
# We'll just run generic fix on all, as it checks for 'slots' key.
|
|
fix_file_structure(f, is_class=False)
|
|
|
|
# Fix Classes (less likely to be flat, but good to check)
|
|
class_files = glob.glob(os.path.join(CLASSES_DIR, "*.yaml"))
|
|
for f in class_files:
|
|
fix_file_structure(f, is_class=True)
|
|
|
|
if __name__ == "__main__":
|
|
run()
|