- Added `id`, `name`, `title`, and `description` fields to multiple LinkML class YAML files. - Standardized prefixes across all class definitions. - Introduced a new script `fix_linkml_metadata.py` to automate the addition of metadata to class files. - Updated existing class files to ensure compliance with the new metadata structure.
111 lines
3.8 KiB
Python
111 lines
3.8 KiB
Python
import os
|
|
import re
|
|
|
|
directory = "schemas/20251121/linkml/modules/classes/"
|
|
|
|
prefixes_block = """prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
schema: http://schema.org/
|
|
skos: http://www.w3.org/2004/02/skos/core#
|
|
rico: https://www.ica.org/standards/RiC/ontology#
|
|
wd: http://www.wikidata.org/entity/
|
|
"""
|
|
|
|
imports_block = """imports:
|
|
- linkml:types
|
|
"""
|
|
|
|
def split_camel_case(name):
|
|
return re.sub('([a-z0-9])([A-Z])', r'\1 \2', name)
|
|
|
|
count = 0
|
|
|
|
for filename in os.listdir(directory):
|
|
if not filename.endswith(".yaml"):
|
|
continue
|
|
|
|
filepath = os.path.join(directory, filename)
|
|
with open(filepath, 'r') as f:
|
|
content = f.read()
|
|
|
|
if content.startswith("id:"):
|
|
continue # Already has metadata
|
|
|
|
# Check if imports already exist in the file (even if unstructured)
|
|
has_imports = re.search(r"^imports:", content, re.MULTILINE)
|
|
|
|
if not content.strip().startswith("classes:") and not has_imports:
|
|
# Some files might have comments at the top?
|
|
# If it doesn't start with classes: or id:, we should check.
|
|
# But my grep showed files starting with classes:
|
|
pass
|
|
|
|
# Simple parsing
|
|
lines = content.splitlines()
|
|
class_name = None
|
|
description = None
|
|
|
|
# Determine class name from filename first as fallback/confirmation
|
|
filename_class = filename.replace(".yaml", "")
|
|
|
|
found_class_in_content = False
|
|
|
|
for i, line in enumerate(lines):
|
|
if line.strip().startswith("classes:"):
|
|
# Look for class name in subsequent lines
|
|
for j in range(i+1, min(i+5, len(lines))):
|
|
# Matches " ClassName:"
|
|
match = re.match(r"^ ([a-zA-Z0-9_]+):", lines[j])
|
|
if match:
|
|
class_name = match.group(1)
|
|
found_class_in_content = True
|
|
|
|
# Look for description inside the class
|
|
for k in range(j+1, min(j+15, len(lines))):
|
|
# Matches " description: Value"
|
|
desc_match = re.match(r"^ description:\s+(.*)", lines[k])
|
|
if desc_match:
|
|
description = desc_match.group(1).strip()
|
|
# Handle multi-line description if needed?
|
|
if description.startswith(">") or description.startswith("|"):
|
|
description = None
|
|
break
|
|
break
|
|
break
|
|
|
|
if not class_name:
|
|
# Fallback to filename if parsing failed (e.g. if file is empty or weird)
|
|
class_name = filename_class
|
|
|
|
# Ensure class name matches filename (convention)
|
|
if class_name != filename_class:
|
|
print(f"Warning: Class name '{class_name}' in content differs from filename '{filename_class}'. Using filename.")
|
|
class_name = filename_class
|
|
|
|
title = split_camel_case(class_name)
|
|
if not description:
|
|
description = f"LinkML class definition for {title}"
|
|
else:
|
|
# Strip quotes if present
|
|
if (description.startswith('"') and description.endswith('"')) or (description.startswith("'") and description.endswith("'")):
|
|
description = description[1:-1]
|
|
|
|
# Construct new content
|
|
new_header = f"id: https://nde.nl/ontology/hc/class/{class_name}\n"
|
|
new_header += f"name: {class_name}\n"
|
|
new_header += f"title: {title}\n"
|
|
new_header += f"description: {description}\n"
|
|
new_header += prefixes_block
|
|
|
|
if not has_imports:
|
|
new_header += imports_block
|
|
|
|
new_content = new_header + content
|
|
|
|
with open(filepath, 'w') as f:
|
|
f.write(new_content)
|
|
|
|
count += 1
|
|
# print(f"Updated {filename}")
|
|
|
|
print(f"Total files updated: {count}")
|