#!/usr/bin/env python3 """ Generate Mermaid ER diagrams from modular LinkML schemas. This script works around the bug in gen-erdiagram that fails to resolve linkml:types imports in modular schemas by using SchemaView. Usage: python3 generate_mermaid_modular.py schema.yaml [output.mmd] """ import sys from pathlib import Path from linkml_runtime.utils.schemaview import SchemaView # Classes to exclude from diagrams (technical artifacts with no semantic significance) EXCLUDED_CLASSES = { "Container", # LinkML tree_root for validation only, not part of ontology } # Maximum number of enum values to show in diagram (for readability) MAX_ENUM_VALUES_IN_DIAGRAM = 10 def generate_mermaid_from_schemaview(sv: SchemaView, include_enums: bool = True) -> str: """ Generate Mermaid ER diagram from SchemaView. This manually constructs the Mermaid syntax instead of using the buggy ErdiagramGenerator class. IMPORTANT: ALL slots are shown as properties in the class box, including those with class ranges. Class-typed slots are ALSO shown as relationship lines between classes. Args: sv: SchemaView instance include_enums: If True, include enum entities with their values """ lines = ["```mermaid"] lines.append("erDiagram") # Get all classes except excluded ones all_classes = [c for c in sv.all_classes() if c not in EXCLUDED_CLASSES] # Get all enums all_enums = list(sv.all_enums()) if include_enums else [] # Generate class entities for class_name in all_classes: cls = sv.get_class(class_name) lines.append(f"{class_name} {{") # Add ALL attributes/slots - use induced_slot for correct range resolution for slot_name in sv.class_slots(class_name): # Use induced_slot to properly merge base slot with slot_usage # This correctly resolves the range even when slot_usage doesn't override it slot = sv.induced_slot(slot_name, class_name) if slot: slot_range = slot.range if slot.range else "string" # Skip excluded classes only if slot_range in EXCLUDED_CLASSES: continue # Format: type attribute_name # Show List suffix for multivalued slots multivalued_marker = "List" if slot.multivalued else "" # Show PK for required slots (primary key indicator in ER diagrams) required_marker = " PK" if slot.required else "" lines.append(f" {slot_range}{multivalued_marker} {slot_name}{required_marker}") lines.append("}") # Generate enum entities (shown as special entities with values) if include_enums and all_enums: lines.append("") lines.append(" %% Enumerations") for enum_name in all_enums: enum_def = sv.get_enum(enum_name) if enum_def and enum_def.permissible_values: lines.append(f"{enum_name} {{") lines.append(" string enum_type PK") # Add enum values as attributes (limited for readability) values = list(enum_def.permissible_values.keys()) for i, value_name in enumerate(values[:MAX_ENUM_VALUES_IN_DIAGRAM]): lines.append(f" string {value_name}") if len(values) > MAX_ENUM_VALUES_IN_DIAGRAM: remaining = len(values) - MAX_ENUM_VALUES_IN_DIAGRAM lines.append(f" string _and_{remaining}_more") lines.append("}") lines.append("") # Generate relationships for class_name in all_classes: cls = sv.get_class(class_name) # Inheritance relationships - skip if parent is excluded if cls.is_a and cls.is_a not in EXCLUDED_CLASSES: # Render inheritance as a relationship in ER diagram # Using "inherits" label to indicate subclass relationship lines.append(f'{class_name} ||--|| {cls.is_a} : "inherits"') # Association relationships for slot_name in sv.class_slots(class_name): # Use induced_slot to properly merge base slot with slot_usage slot = sv.induced_slot(slot_name, class_name) if slot and slot.range: # Check if range is a class if slot.range in all_classes: # Determine cardinality if slot.multivalued: if slot.required: # One-to-many required cardinality = "||--}|" else: # One-to-many optional cardinality = "||--}o" else: if slot.required: # One-to-one required cardinality = "||--||" else: # One-to-one optional cardinality = "||--|o" lines.append(f'{class_name} {cardinality} {slot.range} : "{slot_name}"') # Check if range is an enum (show relationship to enum) elif include_enums and slot.range in all_enums: # Enum relationships - always optional one-to-one/one-to-many if slot.multivalued: cardinality = "||--}o" else: cardinality = "||--|o" lines.append(f'{class_name} {cardinality} {slot.range} : "{slot_name}"') lines.append("") lines.append("```") lines.append("") return '\n'.join(lines) def main(): if len(sys.argv) < 2: print("Usage: generate_mermaid_modular.py [output.mmd] [--no-enums]") print("\nGenerates Mermaid ER diagrams from modular LinkML schemas.") print("Works around gen-erdiagram bug with linkml:types imports.") print("\nOptions:") print(" --no-enums Exclude enum entities from the diagram") sys.exit(1) schema_path = Path(sys.argv[1]) # Parse arguments include_enums = "--no-enums" not in sys.argv remaining_args = [a for a in sys.argv[2:] if a != "--no-enums"] output_path = Path(remaining_args[0]) if remaining_args else None if not schema_path.exists(): print(f"❌ Error: Schema file not found: {schema_path}", file=sys.stderr) sys.exit(1) # Load schema with SchemaView (handles modular imports) print(f"Loading schema: {schema_path}", file=sys.stderr) sv = SchemaView(str(schema_path)) print(f"✅ Loaded schema: {sv.schema.name}", file=sys.stderr) print(f" Classes: {len(list(sv.all_classes()))}", file=sys.stderr) print(f" Enums: {len(list(sv.all_enums()))}", file=sys.stderr) # Generate Mermaid print(f"Generating Mermaid ER diagram (include_enums={include_enums})...", file=sys.stderr) mermaid = generate_mermaid_from_schemaview(sv, include_enums=include_enums) # Output if output_path: output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(mermaid) print(f"✅ Generated: {output_path}", file=sys.stderr) print(f" Size: {len(mermaid)} bytes", file=sys.stderr) else: print(mermaid) if __name__ == '__main__': main()