glam/scripts/generate_mermaid_modular.py
2025-12-01 16:06:34 +01:00

191 lines
7.5 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Generate Mermaid ER diagrams from modular LinkML schemas.
This script works around the bug in gen-erdiagram that fails to resolve
linkml:types imports in modular schemas by using SchemaView.
Usage:
python3 generate_mermaid_modular.py schema.yaml [output.mmd]
"""
import sys
from pathlib import Path
from linkml_runtime.utils.schemaview import SchemaView
# Classes to exclude from diagrams (technical artifacts with no semantic significance)
EXCLUDED_CLASSES = {
"Container", # LinkML tree_root for validation only, not part of ontology
}
# Maximum number of enum values to show in diagram (for readability)
MAX_ENUM_VALUES_IN_DIAGRAM = 10
def generate_mermaid_from_schemaview(sv: SchemaView, include_enums: bool = True) -> str:
"""
Generate Mermaid ER diagram from SchemaView.
This manually constructs the Mermaid syntax instead of using
the buggy ErdiagramGenerator class.
IMPORTANT: ALL slots are shown as properties in the class box,
including those with class ranges. Class-typed slots are ALSO
shown as relationship lines between classes.
Args:
sv: SchemaView instance
include_enums: If True, include enum entities with their values
"""
lines = ["```mermaid"]
lines.append("erDiagram")
# Get all classes except excluded ones
all_classes = [c for c in sv.all_classes() if c not in EXCLUDED_CLASSES]
# Get all enums
all_enums = list(sv.all_enums()) if include_enums else []
# Generate class entities
for class_name in all_classes:
cls = sv.get_class(class_name)
lines.append(f"{class_name} {{")
# Add ALL attributes/slots - use induced_slot for correct range resolution
for slot_name in sv.class_slots(class_name):
# Use induced_slot to properly merge base slot with slot_usage
# This correctly resolves the range even when slot_usage doesn't override it
slot = sv.induced_slot(slot_name, class_name)
if slot:
slot_range = slot.range if slot.range else "string"
# Skip excluded classes only
if slot_range in EXCLUDED_CLASSES:
continue
# Format: type attribute_name
# Show List suffix for multivalued slots
multivalued_marker = "List" if slot.multivalued else ""
# Show PK for required slots (primary key indicator in ER diagrams)
required_marker = " PK" if slot.required else ""
lines.append(f" {slot_range}{multivalued_marker} {slot_name}{required_marker}")
lines.append("}")
# Generate enum entities (shown as special entities with values)
if include_enums and all_enums:
lines.append("")
lines.append(" %% Enumerations")
for enum_name in all_enums:
enum_def = sv.get_enum(enum_name)
if enum_def and enum_def.permissible_values:
lines.append(f"{enum_name} {{")
lines.append(" string enum_type PK")
# Add enum values as attributes (limited for readability)
values = list(enum_def.permissible_values.keys())
for i, value_name in enumerate(values[:MAX_ENUM_VALUES_IN_DIAGRAM]):
lines.append(f" string {value_name}")
if len(values) > MAX_ENUM_VALUES_IN_DIAGRAM:
remaining = len(values) - MAX_ENUM_VALUES_IN_DIAGRAM
lines.append(f" string _and_{remaining}_more")
lines.append("}")
lines.append("")
# Generate relationships
for class_name in all_classes:
cls = sv.get_class(class_name)
# Inheritance relationships - skip if parent is excluded
if cls.is_a and cls.is_a not in EXCLUDED_CLASSES:
# Render inheritance as a relationship in ER diagram
# Using "inherits" label to indicate subclass relationship
lines.append(f'{class_name} ||--|| {cls.is_a} : "inherits"')
# Association relationships
for slot_name in sv.class_slots(class_name):
# Use induced_slot to properly merge base slot with slot_usage
slot = sv.induced_slot(slot_name, class_name)
if slot and slot.range:
# Check if range is a class
if slot.range in all_classes:
# Determine cardinality
if slot.multivalued:
if slot.required:
# One-to-many required
cardinality = "||--}|"
else:
# One-to-many optional
cardinality = "||--}o"
else:
if slot.required:
# One-to-one required
cardinality = "||--||"
else:
# One-to-one optional
cardinality = "||--|o"
lines.append(f'{class_name} {cardinality} {slot.range} : "{slot_name}"')
# Check if range is an enum (show relationship to enum)
elif include_enums and slot.range in all_enums:
# Enum relationships - always optional one-to-one/one-to-many
if slot.multivalued:
cardinality = "||--}o"
else:
cardinality = "||--|o"
lines.append(f'{class_name} {cardinality} {slot.range} : "{slot_name}"')
lines.append("")
lines.append("```")
lines.append("")
return '\n'.join(lines)
def main():
if len(sys.argv) < 2:
print("Usage: generate_mermaid_modular.py <schema.yaml> [output.mmd] [--no-enums]")
print("\nGenerates Mermaid ER diagrams from modular LinkML schemas.")
print("Works around gen-erdiagram bug with linkml:types imports.")
print("\nOptions:")
print(" --no-enums Exclude enum entities from the diagram")
sys.exit(1)
schema_path = Path(sys.argv[1])
# Parse arguments
include_enums = "--no-enums" not in sys.argv
remaining_args = [a for a in sys.argv[2:] if a != "--no-enums"]
output_path = Path(remaining_args[0]) if remaining_args else None
if not schema_path.exists():
print(f"❌ Error: Schema file not found: {schema_path}", file=sys.stderr)
sys.exit(1)
# Load schema with SchemaView (handles modular imports)
print(f"Loading schema: {schema_path}", file=sys.stderr)
sv = SchemaView(str(schema_path))
print(f"✅ Loaded schema: {sv.schema.name}", file=sys.stderr)
print(f" Classes: {len(list(sv.all_classes()))}", file=sys.stderr)
print(f" Enums: {len(list(sv.all_enums()))}", file=sys.stderr)
# Generate Mermaid
print(f"Generating Mermaid ER diagram (include_enums={include_enums})...", file=sys.stderr)
mermaid = generate_mermaid_from_schemaview(sv, include_enums=include_enums)
# Output
if output_path:
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(mermaid)
print(f"✅ Generated: {output_path}", file=sys.stderr)
print(f" Size: {len(mermaid)} bytes", file=sys.stderr)
else:
print(mermaid)
if __name__ == '__main__':
main()