glam/scripts/owl_to_mermaid.py
kempersc 2761857b0d Add scripts for converting OWL/Turtle ontology to Mermaid and PlantUML diagrams
- Implemented `owl_to_mermaid.py` to convert OWL/Turtle files into Mermaid class diagrams.
- Implemented `owl_to_plantuml.py` to convert OWL/Turtle files into PlantUML class diagrams.
- Added two new PlantUML files for custodian multi-aspect diagrams.
2025-11-22 23:01:13 +01:00

152 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""
Convert OWL/Turtle ontology to Mermaid class diagram.
Usage:
python owl_to_mermaid.py input.owl.ttl output.mmd
"""
import sys
from pathlib import Path
from rdflib import Graph, Namespace, RDF, RDFS, OWL
from collections import defaultdict
# Namespaces
LINKML = Namespace("https://w3id.org/linkml/")
HC = Namespace("https://nde.nl/ontology/hc/")
SCHEMA = Namespace("https://schema.org/")
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
def simplify_uri(uri, graph):
"""Convert URI to short form using prefixes."""
try:
qname = graph.qname(uri)
if ':' in qname:
prefix, local = qname.split(':', 1)
if prefix in ['hc_class', 'hc_slot', 'hc_enum', 'hc']:
return local
return qname
return qname
except Exception:
return str(uri).split('/')[-1].split('#')[-1]
def get_label(graph, uri):
"""Get human-readable label for a URI."""
label = graph.value(uri, RDFS.label)
if label:
return str(label)
label = graph.value(uri, SKOS.prefLabel)
if label:
return str(label)
return simplify_uri(uri, graph)
def sanitize_mermaid_id(name):
"""Sanitize class name for Mermaid (no spaces, special chars)."""
return name.replace(' ', '_').replace('-', '_').replace('.', '_')
def parse_owl_to_mermaid(owl_file, output_file):
"""Parse OWL/Turtle and generate Mermaid diagram."""
print(f"Loading OWL file: {owl_file}")
graph = Graph()
graph.parse(owl_file, format='turtle')
print(f"Loaded {len(graph)} triples")
# Find all classes
classes = set()
for s in graph.subjects(RDF.type, OWL.Class):
classes.add(s)
for s in graph.subjects(RDF.type, RDFS.Class):
classes.add(s)
# Filter HC namespace classes
hc_classes = [c for c in classes if str(c).startswith(str(HC))]
print(f"Found {len(hc_classes)} HC classes")
# Find properties
properties = defaultdict(list)
for cls in hc_classes:
for prop in graph.subjects(RDFS.domain, cls):
prop_range = graph.value(prop, RDFS.range)
properties[cls].append({
'uri': prop,
'label': get_label(graph, prop),
'range': get_label(graph, prop_range) if prop_range else 'string',
})
# Find inheritance
subclass_of = {}
for cls in hc_classes:
parent = graph.value(cls, RDFS.subClassOf)
if parent and parent in hc_classes:
subclass_of[cls] = parent
# Generate Mermaid
lines = [
"```mermaid",
"classDiagram",
" %% Heritage Custodian Ontology - Class Diagram",
f" %% Generated from: {Path(owl_file).name}",
"",
]
# Sort classes
sorted_classes = sorted(hc_classes, key=lambda c: get_label(graph, c))
# Output classes
for cls in sorted_classes:
cls_label = get_label(graph, cls)
cls_id = sanitize_mermaid_id(cls_label)
lines.append(f" class {cls_id} {{")
# Add properties (limit to first 8 for readability)
if cls in properties:
props_sorted = sorted(properties[cls], key=lambda p: p['label'])[:8]
for prop in props_sorted:
prop_name = prop['label']
prop_range = prop['range']
# Sanitize property names for Mermaid
prop_name_safe = prop_name.replace('-', '_').replace(' ', '_')
lines.append(f" +{prop_range} {prop_name_safe}")
if len(properties[cls]) > 8:
lines.append(f" +... ({len(properties[cls]) - 8} more properties)")
lines.append(" }")
lines.append("")
# Output inheritance relationships
lines.append(" %% Inheritance relationships")
for child, parent in sorted(subclass_of.items(), key=lambda x: get_label(graph, x[0])):
child_label = sanitize_mermaid_id(get_label(graph, child))
parent_label = sanitize_mermaid_id(get_label(graph, parent))
lines.append(f" {parent_label} <|-- {child_label}")
lines.append("```")
# Write output
print(f"Writing Mermaid to: {output_file}")
with open(output_file, 'w') as f:
f.write('\n'.join(lines))
print(f"Generated Mermaid diagram with {len(sorted_classes)} classes")
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: python owl_to_mermaid.py input.owl.ttl output.mmd")
sys.exit(1)
owl_file = Path(sys.argv[1])
output_file = Path(sys.argv[2])
if not owl_file.exists():
print(f"Error: Input file not found: {owl_file}")
sys.exit(1)
parse_owl_to_mermaid(owl_file, output_file)