#!/usr/bin/env python3 """ Generate UML diagrams from modular LinkML schema. This script manually creates Mermaid class diagrams from the modular LinkML schema, bypassing compatibility issues with gen-mermaid-class-diagram. """ import yaml from pathlib import Path from datetime import datetime from collections import defaultdict def load_schema_modules(schema_path: Path): """Load all schema modules and merge them.""" # Load main schema with open(schema_path) as f: main_schema = yaml.safe_load(f) schema_dir = schema_path.parent # Initialize merged schema merged = { 'id': main_schema.get('id'), 'name': main_schema.get('name'), 'title': main_schema.get('title'), 'version': main_schema.get('version'), 'classes': {}, 'slots': {}, 'enums': {}, 'types': {} } # Load all imported modules imports = main_schema.get('imports', []) for import_path in imports: if import_path.startswith('linkml:'): continue # Skip LinkML core types # Convert import path to file path if import_path.startswith('modules/'): module_file = schema_dir / import_path if not module_file.exists(): module_file = schema_dir / f"{import_path}.yaml" else: module_file = schema_dir / f"{import_path}.yaml" if not module_file.exists(): continue try: with open(module_file) as f: module = yaml.safe_load(f) # Merge classes if 'classes' in module: merged['classes'].update(module['classes']) # Merge slots if 'slots' in module: merged['slots'].update(module['slots']) # Merge enums if 'enums' in module: merged['enums'].update(module['enums']) # Merge types if 'types' in module: merged['types'].update(module['types']) except Exception as e: print(f"Warning: Could not load {module_file}: {e}") return merged def generate_mermaid_class_diagram(schema, output_file: Path, focus_classes=None): """Generate Mermaid class diagram from schema.""" lines = [] lines.append("classDiagram") lines.append("") classes = schema.get('classes', {}) slots = schema.get('slots', {}) enums = schema.get('enums', {}) # Filter classes if focus list provided if focus_classes: classes = {k: v for k, v in classes.items() if k in focus_classes} # Generate class definitions for class_name, class_def in classes.items(): if class_def.get('abstract'): lines.append(f" class {class_name}") lines.append(f" <> {class_name}") else: lines.append(f" class {class_name}") # Add slots class_slots = class_def.get('slots', []) if class_slots: for slot_name in class_slots[:10]: # Limit to first 10 slots slot_def = slots.get(slot_name, {}) slot_range = slot_def.get('range', 'string') required = slot_def.get('required', False) multivalued = slot_def.get('multivalued', False) prefix = "+" if not required else "*" suffix = "[]" if multivalued else "" lines.append(f" {class_name} : {prefix}{slot_name} {slot_range}{suffix}") lines.append("") # Generate inheritance relationships for class_name, class_def in classes.items(): if 'is_a' in class_def: parent = class_def['is_a'] lines.append(f" {parent} <|-- {class_name}") lines.append("") # Generate composition relationships (key slots) for class_name, class_def in classes.items(): class_slots = class_def.get('slots', []) slot_usage = class_def.get('slot_usage', {}) for slot_name in class_slots: slot_def = slots.get(slot_name, {}) slot_range = slot_def.get('range') # Check if range is a class if slot_range and slot_range in classes: multivalued = slot_def.get('multivalued', False) # Check slot_usage for overrides if slot_name in slot_usage: slot_range = slot_usage[slot_name].get('range', slot_range) multivalued = slot_usage[slot_name].get('multivalued', multivalued) cardinality = '"1..*"' if multivalued else '"0..1"' lines.append(f" {class_name} --> {cardinality} {slot_range} : {slot_name}") # Write output output_file.parent.mkdir(parents=True, exist_ok=True) with open(output_file, 'w') as f: f.write('\n'.join(lines)) print(f"āœ“ Generated Mermaid diagram: {output_file}") return output_file def generate_plantuml_diagram(schema, output_file: Path, focus_classes=None): """Generate PlantUML class diagram from schema.""" lines = [] lines.append("@startuml") lines.append("") lines.append("!define ABSTRACT_CLASS abstract class") lines.append("") lines.append("skinparam classAttributeIconSize 0") lines.append("skinparam classFontSize 12") lines.append("skinparam packageStyle rectangle") lines.append("") classes = schema.get('classes', {}) slots = schema.get('slots', {}) # Filter classes if focus_classes: classes = {k: v for k, v in classes.items() if k in focus_classes} # Generate class definitions for class_name, class_def in classes.items(): if class_def.get('abstract'): lines.append(f"abstract class {class_name} {{") else: lines.append(f"class {class_name} {{") # Add slots class_slots = class_def.get('slots', []) if class_slots: for slot_name in class_slots[:15]: # Limit to 15 slots slot_def = slots.get(slot_name, {}) slot_range = slot_def.get('range', 'string') required = slot_def.get('required', False) multivalued = slot_def.get('multivalued', False) prefix = "+" if required else "-" suffix = "[]" if multivalued else "" lines.append(f" {prefix} {slot_name}: {slot_range}{suffix}") lines.append("}") lines.append("") # Generate inheritance for class_name, class_def in classes.items(): if 'is_a' in class_def: parent = class_def['is_a'] lines.append(f"{parent} <|-- {class_name}") lines.append("") # Generate associations for class_name, class_def in classes.items(): class_slots = class_def.get('slots', []) for slot_name in class_slots: slot_def = slots.get(slot_name, {}) slot_range = slot_def.get('range') if slot_range and slot_range in classes: multivalued = slot_def.get('multivalued', False) cardinality = '"1..*"' if multivalued else '"0..1"' lines.append(f"{class_name} --> {cardinality} {slot_range} : {slot_name}") lines.append("") lines.append("@enduml") # Write output output_file.parent.mkdir(parents=True, exist_ok=True) with open(output_file, 'w') as f: f.write('\n'.join(lines)) print(f"āœ“ Generated PlantUML diagram: {output_file}") return output_file def main(): """Generate all UML diagrams.""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") base_dir = Path(__file__).parent.parent schema_file = base_dir / "schemas/20251121/linkml/01_custodian_name_modular.yaml" print(f"Loading schema from: {schema_file}") schema = load_schema_modules(schema_file) print(f"\nFound {len(schema['classes'])} classes") print(f"Found {len(schema['slots'])} slots") print(f"Found {len(schema['enums'])} enums") # Generate full diagrams mermaid_dir = base_dir / "schemas/20251121/uml/mermaid" plantuml_dir = base_dir / "schemas/20251121/uml/plantuml" # Full schema diagram (all classes) print("\n=== Generating Full Schema Diagrams ===") generate_mermaid_class_diagram( schema, mermaid_dir / f"full_schema_{timestamp}.mmd" ) generate_plantuml_diagram( schema, plantuml_dir / f"full_schema_{timestamp}.puml" ) # Core classes diagram (main entities) print("\n=== Generating Core Classes Diagram ===") core_classes = [ 'Custodian', 'CustodianObservation', 'CustodianName', 'CustodianType', 'CustodianLegalStatus', 'CustodianPlace', 'CustodianCollection', 'ReconstructionActivity' ] generate_mermaid_class_diagram( schema, mermaid_dir / f"core_classes_{timestamp}.mmd", focus_classes=core_classes ) generate_plantuml_diagram( schema, plantuml_dir / f"core_classes_{timestamp}.puml", focus_classes=core_classes ) # CustodianType hierarchy print("\n=== Generating CustodianType Diagram ===") type_classes = [ 'CustodianType', 'Custodian' ] generate_mermaid_class_diagram( schema, mermaid_dir / f"custodian_type_{timestamp}.mmd", focus_classes=type_classes ) generate_plantuml_diagram( schema, plantuml_dir / f"custodian_type_{timestamp}.puml", focus_classes=type_classes ) # Legal status classes print("\n=== Generating Legal Status Diagram ===") legal_classes = [ 'Custodian', 'CustodianLegalStatus', 'LegalEntityType', 'LegalForm', 'LegalName', 'RegistrationInfo' ] generate_mermaid_class_diagram( schema, mermaid_dir / f"legal_status_{timestamp}.mmd", focus_classes=legal_classes ) generate_plantuml_diagram( schema, plantuml_dir / f"legal_status_{timestamp}.puml", focus_classes=legal_classes ) # Organizational structure print("\n=== Generating Organizational Structure Diagram ===") org_classes = [ 'Custodian', 'OrganizationalStructure', 'OrganizationalChangeEvent', 'PersonObservation' ] generate_mermaid_class_diagram( schema, mermaid_dir / f"organizational_structure_{timestamp}.mmd", focus_classes=org_classes ) generate_plantuml_diagram( schema, plantuml_dir / f"organizational_structure_{timestamp}.puml", focus_classes=org_classes ) print(f"\nāœ… All UML diagrams generated with timestamp: {timestamp}") print(f"\nMermaid diagrams: {mermaid_dir}") print(f"PlantUML diagrams: {plantuml_dir}") if __name__ == '__main__': main()