- Created PlantUML diagrams for custodian types, full schema, legal status, and organizational structure. - Implemented a script to generate GraphViz DOT diagrams from OWL/RDF ontology files. - Developed a script to generate UML diagrams from modular LinkML schema, supporting both Mermaid and PlantUML formats. - Enhanced class definitions and relationships in UML diagrams to reflect the latest schema updates.
208 lines
7.3 KiB
Python
Executable file
208 lines
7.3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Generate GraphViz DOT diagram from OWL/RDF ontology file.
|
|
|
|
Extracts classes and their relationships (inheritance, properties) from
|
|
an OWL ontology and generates a GraphViz DOT file for visualization.
|
|
|
|
Usage:
|
|
python scripts/generate_graphviz_from_owl.py \
|
|
schemas/20251121/rdf/custodian_multi_aspect_20251122_155319.owl.ttl \
|
|
schemas/20251121/uml/graphviz/custodian_multi_aspect_20251122_155319.dot
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Set, Dict, List, Tuple
|
|
import re
|
|
|
|
|
|
def extract_classes_from_ttl(ttl_content: str) -> Set[str]:
|
|
"""Extract class names from Turtle/OWL file."""
|
|
classes = set()
|
|
|
|
# Pattern: <https://nde.nl/ontology/hc/class/ClassName/ClassName> a owl:Class
|
|
class_pattern = r'<https://nde\.nl/ontology/hc/class/([^/]+)/\1>\s+a\s+owl:Class'
|
|
|
|
for match in re.finditer(class_pattern, ttl_content):
|
|
class_name = match.group(1)
|
|
classes.add(class_name)
|
|
|
|
# Also look for rdfs:label patterns
|
|
label_pattern = r'rdfs:label\s+"([^"]+)"'
|
|
for match in re.finditer(label_pattern, ttl_content):
|
|
label = match.group(1)
|
|
# Filter out properties (lowercase) and keep classes (CamelCase)
|
|
if label and label[0].isupper():
|
|
classes.add(label)
|
|
|
|
return classes
|
|
|
|
|
|
def extract_enums_from_ttl(ttl_content: str) -> Set[str]:
|
|
"""Extract enum names from Turtle/OWL file."""
|
|
enums = set()
|
|
|
|
# Pattern: enum names typically end with "Enum"
|
|
enum_pattern = r'<https://nde\.nl/ontology/hc/enum/([^/]+Enum)/\1>'
|
|
|
|
for match in re.finditer(enum_pattern, ttl_content):
|
|
enum_name = match.group(1)
|
|
enums.add(enum_name)
|
|
|
|
# Also look for labels ending in Enum
|
|
label_pattern = r'rdfs:label\s+"([^"]+Enum)"'
|
|
for match in re.finditer(label_pattern, ttl_content):
|
|
enum_name = match.group(1)
|
|
enums.add(enum_name)
|
|
|
|
return enums
|
|
|
|
|
|
def extract_inheritance_from_ttl(ttl_content: str) -> List[Tuple[str, str]]:
|
|
"""Extract inheritance relationships (subclass -> superclass)."""
|
|
relationships = []
|
|
|
|
# Pattern: rdfs:subClassOf with class references
|
|
# This is complex in OWL due to restrictions, so we'll use a simpler heuristic
|
|
|
|
# Look for explicit subClassOf declarations
|
|
lines = ttl_content.split('\n')
|
|
current_class = None
|
|
|
|
for line in lines:
|
|
# Detect class declaration
|
|
class_match = re.search(r'<https://nde\.nl/ontology/hc/class/([^/]+)/\1>\s+a\s+owl:Class', line)
|
|
if class_match:
|
|
current_class = class_match.group(1)
|
|
|
|
# Look for rdfs:subClassOf on next lines
|
|
if current_class and 'rdfs:subClassOf' in line:
|
|
# Try to extract superclass
|
|
superclass_match = re.search(r'<https://nde\.nl/ontology/hc/class/([^/]+)/', line)
|
|
if superclass_match:
|
|
superclass = superclass_match.group(1)
|
|
if superclass != current_class: # Avoid self-references
|
|
relationships.append((current_class, superclass))
|
|
|
|
return relationships
|
|
|
|
|
|
def extract_properties_from_ttl(ttl_content: str) -> Dict[str, List[str]]:
|
|
"""Extract properties for each class."""
|
|
properties = {}
|
|
|
|
# Pattern: owl:onProperty declarations within class restrictions
|
|
lines = ttl_content.split('\n')
|
|
current_class = None
|
|
|
|
for line in lines:
|
|
# Detect class declaration
|
|
class_match = re.search(r'<https://nde\.nl/ontology/hc/class/([^/]+)/\1>\s+a\s+owl:Class', line)
|
|
if class_match:
|
|
current_class = class_match.group(1)
|
|
if current_class not in properties:
|
|
properties[current_class] = []
|
|
|
|
# Extract property names from owl:onProperty
|
|
if current_class and 'owl:onProperty' in line:
|
|
prop_match = re.search(r'<https://nde\.nl/ontology/hc/slot/([^/]+)/', line)
|
|
if prop_match:
|
|
prop_name = prop_match.group(1)
|
|
if prop_name not in properties[current_class]:
|
|
properties[current_class].append(prop_name)
|
|
|
|
return properties
|
|
|
|
|
|
def generate_dot_file(classes: Set[str],
|
|
enums: Set[str],
|
|
inheritance: List[Tuple[str, str]],
|
|
properties: Dict[str, List[str]],
|
|
output_path: Path):
|
|
"""Generate GraphViz DOT file."""
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write('digraph HeritageCustodianOntology {\n')
|
|
f.write(' // Graph settings\n')
|
|
f.write(' rankdir=TB;\n')
|
|
f.write(' node [shape=record, style=filled, fillcolor=lightblue];\n')
|
|
f.write(' edge [dir=back, arrowtail=empty];\n')
|
|
f.write(' \n')
|
|
|
|
f.write(' // Classes\n')
|
|
for cls in sorted(classes):
|
|
# Get properties for this class
|
|
props = properties.get(cls, [])
|
|
|
|
if props:
|
|
# Create record with class name and properties
|
|
prop_list = '\\l'.join(props[:5]) # Limit to first 5 properties
|
|
if len(props) > 5:
|
|
prop_list += f'\\l... ({len(props)} total)'
|
|
else:
|
|
prop_list += '\\l'
|
|
|
|
label = f'{cls}|{prop_list}'
|
|
else:
|
|
label = cls
|
|
|
|
f.write(f' {cls} [label="{{{label}}}"];\n')
|
|
|
|
f.write(' \n')
|
|
f.write(' // Enums\n')
|
|
for enum in sorted(enums):
|
|
f.write(f' {enum} [label="{enum}", fillcolor=lightyellow];\n')
|
|
|
|
f.write(' \n')
|
|
f.write(' // Inheritance relationships\n')
|
|
for subclass, superclass in inheritance:
|
|
if subclass in classes and superclass in classes:
|
|
f.write(f' {subclass} -> {superclass} [label="inherits"];\n')
|
|
|
|
f.write('}\n')
|
|
|
|
print(f"✅ Generated GraphViz DOT file: {output_path}")
|
|
print(f" - Classes: {len(classes)}")
|
|
print(f" - Enums: {len(enums)}")
|
|
print(f" - Inheritance relationships: {len(inheritance)}")
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) != 3:
|
|
print("Usage: python generate_graphviz_from_owl.py <input.owl.ttl> <output.dot>")
|
|
sys.exit(1)
|
|
|
|
input_file = Path(sys.argv[1])
|
|
output_file = Path(sys.argv[2])
|
|
|
|
if not input_file.exists():
|
|
print(f"❌ Error: Input file not found: {input_file}")
|
|
sys.exit(1)
|
|
|
|
print(f"📖 Reading OWL file: {input_file}")
|
|
ttl_content = input_file.read_text(encoding='utf-8')
|
|
|
|
print("🔍 Extracting classes...")
|
|
classes = extract_classes_from_ttl(ttl_content)
|
|
|
|
print("🔍 Extracting enums...")
|
|
enums = extract_enums_from_ttl(ttl_content)
|
|
|
|
print("🔍 Extracting inheritance relationships...")
|
|
inheritance = extract_inheritance_from_ttl(ttl_content)
|
|
|
|
print("🔍 Extracting properties...")
|
|
properties = extract_properties_from_ttl(ttl_content)
|
|
|
|
print(f"📝 Generating DOT file...")
|
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
generate_dot_file(classes, enums, inheritance, properties, output_file)
|
|
|
|
print(f"\n✨ Done! You can now visualize with:")
|
|
print(f" dot -Tpng {output_file} -o {output_file.stem}.png")
|
|
print(f" or load in the UML Viewer frontend")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|