glam/scripts/generate_complete_mermaid_diagram.py

164 lines
6.1 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Generate a complete Mermaid class diagram for the entire Heritage Custodian schema.
This script extends LinkML's MermaidRenderer to generate a single comprehensive diagram
showing all classes and their relationships, rather than individual per-class diagrams.
Usage:
python3 scripts/generate_complete_mermaid_diagram.py
Output:
schemas/20251121/uml/mermaid/complete_schema_YYYYMMDD_HHMMSS.mmd
"""
import sys
from pathlib import Path
from datetime import datetime
from linkml_runtime.utils.schemaview import SchemaView
from linkml_renderer.renderers.mermaid_renderer import MermaidRenderer
# Configuration
SCHEMA_PATH = "schemas/20251121/linkml/01_custodian_name_modular.yaml"
OUTPUT_DIR = "schemas/20251121/uml/mermaid"
# Classes to exclude from diagrams (technical artifacts with no semantic significance)
EXCLUDED_CLASSES = {
"Container", # LinkML tree_root for validation only, not part of ontology
}
def generate_complete_diagram(schema_path: str, output_path: str):
"""
Generate a complete Mermaid class diagram for all classes in schema.
Args:
schema_path: Path to the LinkML schema YAML file
output_path: Path to write the generated Mermaid diagram
"""
print(f"Loading schema from: {schema_path}")
# Load schema using SchemaView
schemaview = SchemaView(schema_path)
print(f"Schema loaded: {schemaview.schema.name}")
print(f"Found {len(schemaview.all_classes())} classes")
print(f"Found {len(schemaview.all_slots())} slots")
print(f"Found {len(schemaview.all_enums())} enums")
# Filter out excluded classes
all_classes = [c for c in schemaview.all_classes() if c not in EXCLUDED_CLASSES]
print(f"After filtering: {len(all_classes)} classes (excluded: {EXCLUDED_CLASSES})")
# Initialize Mermaid renderer
renderer = MermaidRenderer()
# Build complete diagram manually
mermaid_lines = ["classDiagram"]
mermaid_lines.append("")
mermaid_lines.append(" %% Heritage Custodian Complete Schema")
mermaid_lines.append(f" %% Generated: {datetime.now().isoformat()}")
mermaid_lines.append(f" %% Schema: {schemaview.schema.name}")
mermaid_lines.append(f" %% Version: {schemaview.schema.version if hasattr(schemaview.schema, 'version') else 'N/A'}")
mermaid_lines.append(f" %% Excluded classes: {', '.join(EXCLUDED_CLASSES)}")
mermaid_lines.append("")
# Track processed classes
processed_classes = set()
# First pass: Define all classes with their attributes
print("\nGenerating class definitions...")
for class_name in sorted(all_classes):
cls = schemaview.get_class(class_name)
# Skip if already processed
if class_name in processed_classes:
continue
processed_classes.add(class_name)
# Class header (declare class first)
mermaid_lines.append(f" class {class_name}")
# Add attributes (slots) using the property syntax
class_slots = schemaview.class_slots(class_name)
if class_slots:
for slot_name in class_slots[:10]: # Limit to 10 slots to avoid huge diagrams
slot = schemaview.get_slot(slot_name)
slot_type = slot.range if slot.range else "string"
required = "*" if slot.required else ""
mermaid_lines.append(f" {class_name} : {required}{slot_name} {slot_type}")
# Add abstract annotation as a separate line if needed
if cls.abstract:
mermaid_lines.append(f" <<abstract>> {class_name}")
mermaid_lines.append("")
# Second pass: Define relationships
print("\nGenerating relationships...")
for class_name in sorted(all_classes):
cls = schemaview.get_class(class_name)
# Inheritance (is_a) - skip if parent is excluded
if cls.is_a and cls.is_a not in EXCLUDED_CLASSES:
mermaid_lines.append(f" {cls.is_a} <|-- {class_name} : inherits")
# Mixins - skip if mixin is excluded
if cls.mixins:
for mixin in cls.mixins:
if mixin not in EXCLUDED_CLASSES:
mermaid_lines.append(f" {mixin} <|.. {class_name} : mixin")
# Relationships through slots - skip if target is excluded
class_slots = schemaview.class_slots(class_name)
for slot_name in class_slots:
slot = schemaview.get_slot(slot_name)
# Check if range is a class (not a primitive type) and not excluded
if slot.range and slot.range in all_classes:
relationship_type = "\"1..*\"" if slot.multivalued else "\"1\""
mermaid_lines.append(f" {class_name} --> {relationship_type} {slot.range} : {slot_name}")
mermaid_lines.append("")
# Write to file
mermaid_content = "\n".join(mermaid_lines)
print(f"\nWriting diagram to: {output_path}")
with open(output_path, 'w', encoding='utf-8') as f:
f.write(mermaid_content)
print(f"\n✅ Complete diagram generated successfully!")
print(f" - {len(processed_classes)} classes")
print(f" - {len([line for line in mermaid_lines if '<|--' in line or '-->' in line])} relationships")
print(f" - {len(mermaid_lines)} total lines")
return output_path
def main():
"""Main entry point."""
# Generate timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Ensure output directory exists
output_dir = Path(OUTPUT_DIR)
output_dir.mkdir(parents=True, exist_ok=True)
# Generate output filename
output_file = output_dir / f"complete_schema_{timestamp}.mmd"
# Generate diagram
try:
generate_complete_diagram(SCHEMA_PATH, str(output_file))
print(f"\n📊 View diagram at: {output_file}")
print("\n💡 Tip: Copy contents to https://mermaid.live/ for interactive visualization")
except Exception as e:
print(f"\n❌ Error generating diagram: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()