glam/scripts/generate_graphviz_from_owl.py
kempersc 3ff0e33bf9 Add UML diagrams and scripts for custodian schema
- Created PlantUML diagrams for custodian types, full schema, legal status, and organizational structure.
- Implemented a script to generate GraphViz DOT diagrams from OWL/RDF ontology files.
- Developed a script to generate UML diagrams from modular LinkML schema, supporting both Mermaid and PlantUML formats.
- Enhanced class definitions and relationships in UML diagrams to reflect the latest schema updates.
2025-11-23 23:05:33 +01:00

208 lines
7.3 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Generate GraphViz DOT diagram from OWL/RDF ontology file.
Extracts classes and their relationships (inheritance, properties) from
an OWL ontology and generates a GraphViz DOT file for visualization.
Usage:
python scripts/generate_graphviz_from_owl.py \
schemas/20251121/rdf/custodian_multi_aspect_20251122_155319.owl.ttl \
schemas/20251121/uml/graphviz/custodian_multi_aspect_20251122_155319.dot
"""
import sys
from pathlib import Path
from typing import Set, Dict, List, Tuple
import re
def extract_classes_from_ttl(ttl_content: str) -> Set[str]:
"""Extract class names from Turtle/OWL file."""
classes = set()
# Pattern: <https://nde.nl/ontology/hc/class/ClassName/ClassName> a owl:Class
class_pattern = r'<https://nde\.nl/ontology/hc/class/([^/]+)/\1>\s+a\s+owl:Class'
for match in re.finditer(class_pattern, ttl_content):
class_name = match.group(1)
classes.add(class_name)
# Also look for rdfs:label patterns
label_pattern = r'rdfs:label\s+"([^"]+)"'
for match in re.finditer(label_pattern, ttl_content):
label = match.group(1)
# Filter out properties (lowercase) and keep classes (CamelCase)
if label and label[0].isupper():
classes.add(label)
return classes
def extract_enums_from_ttl(ttl_content: str) -> Set[str]:
"""Extract enum names from Turtle/OWL file."""
enums = set()
# Pattern: enum names typically end with "Enum"
enum_pattern = r'<https://nde\.nl/ontology/hc/enum/([^/]+Enum)/\1>'
for match in re.finditer(enum_pattern, ttl_content):
enum_name = match.group(1)
enums.add(enum_name)
# Also look for labels ending in Enum
label_pattern = r'rdfs:label\s+"([^"]+Enum)"'
for match in re.finditer(label_pattern, ttl_content):
enum_name = match.group(1)
enums.add(enum_name)
return enums
def extract_inheritance_from_ttl(ttl_content: str) -> List[Tuple[str, str]]:
"""Extract inheritance relationships (subclass -> superclass)."""
relationships = []
# Pattern: rdfs:subClassOf with class references
# This is complex in OWL due to restrictions, so we'll use a simpler heuristic
# Look for explicit subClassOf declarations
lines = ttl_content.split('\n')
current_class = None
for line in lines:
# Detect class declaration
class_match = re.search(r'<https://nde\.nl/ontology/hc/class/([^/]+)/\1>\s+a\s+owl:Class', line)
if class_match:
current_class = class_match.group(1)
# Look for rdfs:subClassOf on next lines
if current_class and 'rdfs:subClassOf' in line:
# Try to extract superclass
superclass_match = re.search(r'<https://nde\.nl/ontology/hc/class/([^/]+)/', line)
if superclass_match:
superclass = superclass_match.group(1)
if superclass != current_class: # Avoid self-references
relationships.append((current_class, superclass))
return relationships
def extract_properties_from_ttl(ttl_content: str) -> Dict[str, List[str]]:
"""Extract properties for each class."""
properties = {}
# Pattern: owl:onProperty declarations within class restrictions
lines = ttl_content.split('\n')
current_class = None
for line in lines:
# Detect class declaration
class_match = re.search(r'<https://nde\.nl/ontology/hc/class/([^/]+)/\1>\s+a\s+owl:Class', line)
if class_match:
current_class = class_match.group(1)
if current_class not in properties:
properties[current_class] = []
# Extract property names from owl:onProperty
if current_class and 'owl:onProperty' in line:
prop_match = re.search(r'<https://nde\.nl/ontology/hc/slot/([^/]+)/', line)
if prop_match:
prop_name = prop_match.group(1)
if prop_name not in properties[current_class]:
properties[current_class].append(prop_name)
return properties
def generate_dot_file(classes: Set[str],
enums: Set[str],
inheritance: List[Tuple[str, str]],
properties: Dict[str, List[str]],
output_path: Path):
"""Generate GraphViz DOT file."""
with open(output_path, 'w', encoding='utf-8') as f:
f.write('digraph HeritageCustodianOntology {\n')
f.write(' // Graph settings\n')
f.write(' rankdir=TB;\n')
f.write(' node [shape=record, style=filled, fillcolor=lightblue];\n')
f.write(' edge [dir=back, arrowtail=empty];\n')
f.write(' \n')
f.write(' // Classes\n')
for cls in sorted(classes):
# Get properties for this class
props = properties.get(cls, [])
if props:
# Create record with class name and properties
prop_list = '\\l'.join(props[:5]) # Limit to first 5 properties
if len(props) > 5:
prop_list += f'\\l... ({len(props)} total)'
else:
prop_list += '\\l'
label = f'{cls}|{prop_list}'
else:
label = cls
f.write(f' {cls} [label="{{{label}}}"];\n')
f.write(' \n')
f.write(' // Enums\n')
for enum in sorted(enums):
f.write(f' {enum} [label="{enum}", fillcolor=lightyellow];\n')
f.write(' \n')
f.write(' // Inheritance relationships\n')
for subclass, superclass in inheritance:
if subclass in classes and superclass in classes:
f.write(f' {subclass} -> {superclass} [label="inherits"];\n')
f.write('}\n')
print(f"✅ Generated GraphViz DOT file: {output_path}")
print(f" - Classes: {len(classes)}")
print(f" - Enums: {len(enums)}")
print(f" - Inheritance relationships: {len(inheritance)}")
def main():
if len(sys.argv) != 3:
print("Usage: python generate_graphviz_from_owl.py <input.owl.ttl> <output.dot>")
sys.exit(1)
input_file = Path(sys.argv[1])
output_file = Path(sys.argv[2])
if not input_file.exists():
print(f"❌ Error: Input file not found: {input_file}")
sys.exit(1)
print(f"📖 Reading OWL file: {input_file}")
ttl_content = input_file.read_text(encoding='utf-8')
print("🔍 Extracting classes...")
classes = extract_classes_from_ttl(ttl_content)
print("🔍 Extracting enums...")
enums = extract_enums_from_ttl(ttl_content)
print("🔍 Extracting inheritance relationships...")
inheritance = extract_inheritance_from_ttl(ttl_content)
print("🔍 Extracting properties...")
properties = extract_properties_from_ttl(ttl_content)
print(f"📝 Generating DOT file...")
output_file.parent.mkdir(parents=True, exist_ok=True)
generate_dot_file(classes, enums, inheritance, properties, output_file)
print(f"\n✨ Done! You can now visualize with:")
print(f" dot -Tpng {output_file} -o {output_file.stem}.png")
print(f" or load in the UML Viewer frontend")
if __name__ == '__main__':
main()