glam/scripts/generate_uml_diagrams.py
kempersc 3ff0e33bf9 Add UML diagrams and scripts for custodian schema
- Created PlantUML diagrams for custodian types, full schema, legal status, and organizational structure.
- Implemented a script to generate GraphViz DOT diagrams from OWL/RDF ontology files.
- Developed a script to generate UML diagrams from modular LinkML schema, supporting both Mermaid and PlantUML formats.
- Enhanced class definitions and relationships in UML diagrams to reflect the latest schema updates.
2025-11-23 23:05:33 +01:00

358 lines
11 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Generate UML diagrams from modular LinkML schema.
This script manually creates Mermaid class diagrams from the modular LinkML schema,
bypassing compatibility issues with gen-mermaid-class-diagram.
"""
import yaml
from pathlib import Path
from datetime import datetime
from collections import defaultdict
def load_schema_modules(schema_path: Path):
"""Load all schema modules and merge them."""
# Load main schema
with open(schema_path) as f:
main_schema = yaml.safe_load(f)
schema_dir = schema_path.parent
# Initialize merged schema
merged = {
'id': main_schema.get('id'),
'name': main_schema.get('name'),
'title': main_schema.get('title'),
'version': main_schema.get('version'),
'classes': {},
'slots': {},
'enums': {},
'types': {}
}
# Load all imported modules
imports = main_schema.get('imports', [])
for import_path in imports:
if import_path.startswith('linkml:'):
continue # Skip LinkML core types
# Convert import path to file path
if import_path.startswith('modules/'):
module_file = schema_dir / import_path
if not module_file.exists():
module_file = schema_dir / f"{import_path}.yaml"
else:
module_file = schema_dir / f"{import_path}.yaml"
if not module_file.exists():
continue
try:
with open(module_file) as f:
module = yaml.safe_load(f)
# Merge classes
if 'classes' in module:
merged['classes'].update(module['classes'])
# Merge slots
if 'slots' in module:
merged['slots'].update(module['slots'])
# Merge enums
if 'enums' in module:
merged['enums'].update(module['enums'])
# Merge types
if 'types' in module:
merged['types'].update(module['types'])
except Exception as e:
print(f"Warning: Could not load {module_file}: {e}")
return merged
def generate_mermaid_class_diagram(schema, output_file: Path, focus_classes=None):
"""Generate Mermaid class diagram from schema."""
lines = []
lines.append("classDiagram")
lines.append("")
classes = schema.get('classes', {})
slots = schema.get('slots', {})
enums = schema.get('enums', {})
# Filter classes if focus list provided
if focus_classes:
classes = {k: v for k, v in classes.items() if k in focus_classes}
# Generate class definitions
for class_name, class_def in classes.items():
if class_def.get('abstract'):
lines.append(f" class {class_name}")
lines.append(f" <<abstract>> {class_name}")
else:
lines.append(f" class {class_name}")
# Add slots
class_slots = class_def.get('slots', [])
if class_slots:
for slot_name in class_slots[:10]: # Limit to first 10 slots
slot_def = slots.get(slot_name, {})
slot_range = slot_def.get('range', 'string')
required = slot_def.get('required', False)
multivalued = slot_def.get('multivalued', False)
prefix = "+" if not required else "*"
suffix = "[]" if multivalued else ""
lines.append(f" {class_name} : {prefix}{slot_name} {slot_range}{suffix}")
lines.append("")
# Generate inheritance relationships
for class_name, class_def in classes.items():
if 'is_a' in class_def:
parent = class_def['is_a']
lines.append(f" {parent} <|-- {class_name}")
lines.append("")
# Generate composition relationships (key slots)
for class_name, class_def in classes.items():
class_slots = class_def.get('slots', [])
slot_usage = class_def.get('slot_usage', {})
for slot_name in class_slots:
slot_def = slots.get(slot_name, {})
slot_range = slot_def.get('range')
# Check if range is a class
if slot_range and slot_range in classes:
multivalued = slot_def.get('multivalued', False)
# Check slot_usage for overrides
if slot_name in slot_usage:
slot_range = slot_usage[slot_name].get('range', slot_range)
multivalued = slot_usage[slot_name].get('multivalued', multivalued)
cardinality = '"1..*"' if multivalued else '"0..1"'
lines.append(f" {class_name} --> {cardinality} {slot_range} : {slot_name}")
# Write output
output_file.parent.mkdir(parents=True, exist_ok=True)
with open(output_file, 'w') as f:
f.write('\n'.join(lines))
print(f"✓ Generated Mermaid diagram: {output_file}")
return output_file
def generate_plantuml_diagram(schema, output_file: Path, focus_classes=None):
"""Generate PlantUML class diagram from schema."""
lines = []
lines.append("@startuml")
lines.append("")
lines.append("!define ABSTRACT_CLASS abstract class")
lines.append("")
lines.append("skinparam classAttributeIconSize 0")
lines.append("skinparam classFontSize 12")
lines.append("skinparam packageStyle rectangle")
lines.append("")
classes = schema.get('classes', {})
slots = schema.get('slots', {})
# Filter classes
if focus_classes:
classes = {k: v for k, v in classes.items() if k in focus_classes}
# Generate class definitions
for class_name, class_def in classes.items():
if class_def.get('abstract'):
lines.append(f"abstract class {class_name} {{")
else:
lines.append(f"class {class_name} {{")
# Add slots
class_slots = class_def.get('slots', [])
if class_slots:
for slot_name in class_slots[:15]: # Limit to 15 slots
slot_def = slots.get(slot_name, {})
slot_range = slot_def.get('range', 'string')
required = slot_def.get('required', False)
multivalued = slot_def.get('multivalued', False)
prefix = "+" if required else "-"
suffix = "[]" if multivalued else ""
lines.append(f" {prefix} {slot_name}: {slot_range}{suffix}")
lines.append("}")
lines.append("")
# Generate inheritance
for class_name, class_def in classes.items():
if 'is_a' in class_def:
parent = class_def['is_a']
lines.append(f"{parent} <|-- {class_name}")
lines.append("")
# Generate associations
for class_name, class_def in classes.items():
class_slots = class_def.get('slots', [])
for slot_name in class_slots:
slot_def = slots.get(slot_name, {})
slot_range = slot_def.get('range')
if slot_range and slot_range in classes:
multivalued = slot_def.get('multivalued', False)
cardinality = '"1..*"' if multivalued else '"0..1"'
lines.append(f"{class_name} --> {cardinality} {slot_range} : {slot_name}")
lines.append("")
lines.append("@enduml")
# Write output
output_file.parent.mkdir(parents=True, exist_ok=True)
with open(output_file, 'w') as f:
f.write('\n'.join(lines))
print(f"✓ Generated PlantUML diagram: {output_file}")
return output_file
def main():
"""Generate all UML diagrams."""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
base_dir = Path(__file__).parent.parent
schema_file = base_dir / "schemas/20251121/linkml/01_custodian_name_modular.yaml"
print(f"Loading schema from: {schema_file}")
schema = load_schema_modules(schema_file)
print(f"\nFound {len(schema['classes'])} classes")
print(f"Found {len(schema['slots'])} slots")
print(f"Found {len(schema['enums'])} enums")
# Generate full diagrams
mermaid_dir = base_dir / "schemas/20251121/uml/mermaid"
plantuml_dir = base_dir / "schemas/20251121/uml/plantuml"
# Full schema diagram (all classes)
print("\n=== Generating Full Schema Diagrams ===")
generate_mermaid_class_diagram(
schema,
mermaid_dir / f"full_schema_{timestamp}.mmd"
)
generate_plantuml_diagram(
schema,
plantuml_dir / f"full_schema_{timestamp}.puml"
)
# Core classes diagram (main entities)
print("\n=== Generating Core Classes Diagram ===")
core_classes = [
'Custodian',
'CustodianObservation',
'CustodianName',
'CustodianType',
'CustodianLegalStatus',
'CustodianPlace',
'CustodianCollection',
'ReconstructionActivity'
]
generate_mermaid_class_diagram(
schema,
mermaid_dir / f"core_classes_{timestamp}.mmd",
focus_classes=core_classes
)
generate_plantuml_diagram(
schema,
plantuml_dir / f"core_classes_{timestamp}.puml",
focus_classes=core_classes
)
# CustodianType hierarchy
print("\n=== Generating CustodianType Diagram ===")
type_classes = [
'CustodianType',
'Custodian'
]
generate_mermaid_class_diagram(
schema,
mermaid_dir / f"custodian_type_{timestamp}.mmd",
focus_classes=type_classes
)
generate_plantuml_diagram(
schema,
plantuml_dir / f"custodian_type_{timestamp}.puml",
focus_classes=type_classes
)
# Legal status classes
print("\n=== Generating Legal Status Diagram ===")
legal_classes = [
'Custodian',
'CustodianLegalStatus',
'LegalEntityType',
'LegalForm',
'LegalName',
'RegistrationInfo'
]
generate_mermaid_class_diagram(
schema,
mermaid_dir / f"legal_status_{timestamp}.mmd",
focus_classes=legal_classes
)
generate_plantuml_diagram(
schema,
plantuml_dir / f"legal_status_{timestamp}.puml",
focus_classes=legal_classes
)
# Organizational structure
print("\n=== Generating Organizational Structure Diagram ===")
org_classes = [
'Custodian',
'OrganizationalStructure',
'OrganizationalChangeEvent',
'PersonObservation'
]
generate_mermaid_class_diagram(
schema,
mermaid_dir / f"organizational_structure_{timestamp}.mmd",
focus_classes=org_classes
)
generate_plantuml_diagram(
schema,
plantuml_dir / f"organizational_structure_{timestamp}.puml",
focus_classes=org_classes
)
print(f"\n✅ All UML diagrams generated with timestamp: {timestamp}")
print(f"\nMermaid diagrams: {mermaid_dir}")
print(f"PlantUML diagrams: {plantuml_dir}")
if __name__ == '__main__':
main()