import os import re directory = "schemas/20251121/linkml/modules/classes/" prefixes_block = """prefixes: linkml: https://w3id.org/linkml/ schema: http://schema.org/ skos: http://www.w3.org/2004/02/skos/core# rico: https://www.ica.org/standards/RiC/ontology# wd: http://www.wikidata.org/entity/ """ imports_block = """imports: - linkml:types """ def split_camel_case(name): return re.sub('([a-z0-9])([A-Z])', r'\1 \2', name) count = 0 for filename in os.listdir(directory): if not filename.endswith(".yaml"): continue filepath = os.path.join(directory, filename) with open(filepath, 'r') as f: content = f.read() if content.startswith("id:"): continue # Already has metadata # Check if imports already exist in the file (even if unstructured) has_imports = re.search(r"^imports:", content, re.MULTILINE) if not content.strip().startswith("classes:") and not has_imports: # Some files might have comments at the top? # If it doesn't start with classes: or id:, we should check. # But my grep showed files starting with classes: pass # Simple parsing lines = content.splitlines() class_name = None description = None # Determine class name from filename first as fallback/confirmation filename_class = filename.replace(".yaml", "") found_class_in_content = False for i, line in enumerate(lines): if line.strip().startswith("classes:"): # Look for class name in subsequent lines for j in range(i+1, min(i+5, len(lines))): # Matches " ClassName:" match = re.match(r"^ ([a-zA-Z0-9_]+):", lines[j]) if match: class_name = match.group(1) found_class_in_content = True # Look for description inside the class for k in range(j+1, min(j+15, len(lines))): # Matches " description: Value" desc_match = re.match(r"^ description:\s+(.*)", lines[k]) if desc_match: description = desc_match.group(1).strip() # Handle multi-line description if needed? if description.startswith(">") or description.startswith("|"): description = None break break break if not class_name: # Fallback to filename if parsing failed (e.g. if file is empty or weird) class_name = filename_class # Ensure class name matches filename (convention) if class_name != filename_class: print(f"Warning: Class name '{class_name}' in content differs from filename '{filename_class}'. Using filename.") class_name = filename_class title = split_camel_case(class_name) if not description: description = f"LinkML class definition for {title}" else: # Strip quotes if present if (description.startswith('"') and description.endswith('"')) or (description.startswith("'") and description.endswith("'")): description = description[1:-1] # Construct new content new_header = f"id: https://nde.nl/ontology/hc/class/{class_name}\n" new_header += f"name: {class_name}\n" new_header += f"title: {title}\n" new_header += f"description: {description}\n" new_header += prefixes_block if not has_imports: new_header += imports_block new_content = new_header + content with open(filepath, 'w') as f: f.write(new_content) count += 1 # print(f"Updated {filename}") print(f"Total files updated: {count}")