glam/scripts/owl_to_plantuml.py
kempersc 2761857b0d Add scripts for converting OWL/Turtle ontology to Mermaid and PlantUML diagrams
- Implemented `owl_to_mermaid.py` to convert OWL/Turtle files into Mermaid class diagrams.
- Implemented `owl_to_plantuml.py` to convert OWL/Turtle files into PlantUML class diagrams.
- Added two new PlantUML files for custodian multi-aspect diagrams.
2025-11-22 23:01:13 +01:00

172 lines
5.3 KiB
Python

#!/usr/bin/env python3
"""
Convert OWL/Turtle ontology to PlantUML class diagram.
Usage:
python owl_to_plantuml.py input.owl.ttl output.puml
"""
import sys
from pathlib import Path
from rdflib import Graph, Namespace, RDF, RDFS, OWL
from collections import defaultdict
# Namespaces
LINKML = Namespace("https://w3id.org/linkml/")
HC = Namespace("https://nde.nl/ontology/hc/")
SCHEMA = Namespace("https://schema.org/")
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
def simplify_uri(uri, graph):
"""Convert URI to short form using prefixes."""
try:
# Try to get qname (prefix:localname)
qname = graph.qname(uri)
if ':' in qname:
prefix, local = qname.split(':', 1)
# For LinkML generated URIs, just use the local name
if prefix in ['hc_class', 'hc_slot', 'hc_enum', 'hc']:
return local
return qname
return qname
except Exception:
# Fallback to last part of URI
return str(uri).split('/')[-1].split('#')[-1]
def get_label(graph, uri):
"""Get human-readable label for a URI."""
# Try rdfs:label first
label = graph.value(uri, RDFS.label)
if label:
return str(label)
# Try skos:prefLabel
label = graph.value(uri, SKOS.prefLabel)
if label:
return str(label)
# Fallback to simplified URI
return simplify_uri(uri, graph)
def parse_owl_to_plantuml(owl_file, output_file):
"""Parse OWL/Turtle and generate PlantUML diagram."""
print(f"Loading OWL file: {owl_file}")
graph = Graph()
graph.parse(owl_file, format='turtle')
print(f"Loaded {len(graph)} triples")
# Find all classes
classes = set()
for s in graph.subjects(RDF.type, OWL.Class):
classes.add(s)
for s in graph.subjects(RDF.type, RDFS.Class):
classes.add(s)
# Filter to keep only HC namespace classes (our ontology)
hc_classes = [c for c in classes if str(c).startswith(str(HC))]
print(f"Found {len(hc_classes)} HC classes")
# Find properties
properties = defaultdict(list) # class -> [(property, range, cardinality)]
for cls in hc_classes:
# Find properties via rdfs:domain
for prop in graph.subjects(RDFS.domain, cls):
prop_range = graph.value(prop, RDFS.range)
properties[cls].append({
'uri': prop,
'label': get_label(graph, prop),
'range': get_label(graph, prop_range) if prop_range else 'string',
'type': 'property'
})
# Find inheritance relationships
subclass_of = {} # child -> parent
for cls in hc_classes:
parent = graph.value(cls, RDFS.subClassOf)
if parent and parent in hc_classes:
subclass_of[cls] = parent
# Generate PlantUML
lines = [
"@startuml",
"!theme plain",
"skinparam classAttributeIconSize 0",
"skinparam linetype ortho",
"skinparam class {",
" BackgroundColor white",
" BorderColor black",
" ArrowColor black",
"}",
"",
"' Heritage Custodian Ontology - Class Diagram",
f"' Generated from: {Path(owl_file).name}",
"",
]
# Sort classes by name for consistent output
sorted_classes = sorted(hc_classes, key=lambda c: get_label(graph, c))
# Output classes
for cls in sorted_classes:
cls_label = get_label(graph, cls)
# Get class description
description = graph.value(cls, RDFS.comment)
lines.append(f"class {cls_label} {{")
# Add properties
if cls in properties:
for prop in sorted(properties[cls], key=lambda p: p['label']):
prop_name = prop['label']
prop_range = prop['range']
lines.append(f" {prop_name}: {prop_range}")
lines.append("}")
# Add note if there's a description
if description:
desc_short = str(description).replace('\n', '\\n')[:150]
if len(str(description)) > 150:
desc_short += "..."
lines.append(f"note right of {cls_label}")
lines.append(f" {desc_short}")
lines.append("end note")
lines.append("")
# Output inheritance relationships
lines.append("' Inheritance relationships")
for child, parent in sorted(subclass_of.items(), key=lambda x: get_label(graph, x[0])):
child_label = get_label(graph, child)
parent_label = get_label(graph, parent)
lines.append(f"{parent_label} <|-- {child_label}")
lines.append("")
lines.append("@enduml")
# Write output
print(f"Writing PlantUML to: {output_file}")
with open(output_file, 'w') as f:
f.write('\n'.join(lines))
print(f"Generated PlantUML diagram with {len(sorted_classes)} classes")
print(f"To render: plantuml {output_file}")
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: python owl_to_plantuml.py input.owl.ttl output.puml")
sys.exit(1)
owl_file = Path(sys.argv[1])
output_file = Path(sys.argv[2])
if not owl_file.exists():
print(f"Error: Input file not found: {owl_file}")
sys.exit(1)
parse_owl_to_plantuml(owl_file, output_file)