glam/schemas/20251121/linkml/scripts/check_schema_quality.py

74 lines
1.8 KiB
Python

import os
import glob
import yaml
import time
SLOTS_DIR = "schemas/20251121/linkml/modules/slots"
CLASSES_DIR = "schemas/20251121/linkml/modules/classes"
def check_file(filepath):
# Check modification time (last 24 hours)
mtime = os.path.getmtime(filepath)
if time.time() - mtime > 86400:
return None
try:
with open(filepath, 'r') as f:
content = yaml.safe_load(f)
except:
return None
if not content:
return None
issues = []
# Check structure (slots/classes nesting)
is_nested = 'slots' in content or 'classes' in content
if not is_nested:
issues.append("Not nested under slots/classes")
# Get entity
entity = None
if 'slots' in content:
entity = list(content['slots'].values())[0]
elif 'classes' in content:
entity = list(content['classes'].values())[0]
else:
entity = content # Flat structure
if not entity:
return None
# Check mappings
has_mappings = False
for k in ['mappings', 'exact_mappings', 'close_mappings', 'related_mappings', 'broad_mappings', 'narrow_mappings']:
if k in entity:
has_mappings = True
break
if not has_mappings:
issues.append("No mappings")
# Check prefixes
if 'prefixes' not in content:
issues.append("No prefixes")
return issues
def run():
print("Checking recently modified files...")
files = glob.glob(os.path.join(SLOTS_DIR, "*.yaml")) + glob.glob(os.path.join(CLASSES_DIR, "*.yaml"))
count = 0
for f in files:
issues = check_file(f)
if issues is not None and len(issues) > 0:
print(f"{f}: {', '.join(issues)}")
count += 1
print(f"Found {count} files with issues.")
if __name__ == "__main__":
run()