222 lines
6.5 KiB
Python
222 lines
6.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Validate HC Storage ontology example files against SHACL shapes.
|
|
|
|
This script validates all the institution-specific example TTL files
|
|
against the HC Storage SHACL shapes to ensure data quality.
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
try:
|
|
from pyshacl import validate
|
|
from rdflib import Graph, Namespace
|
|
from rdflib.namespace import RDF, RDFS, XSD
|
|
except ImportError:
|
|
print("ERROR: Required packages not installed.")
|
|
print("Run: pip install pyshacl rdflib")
|
|
sys.exit(1)
|
|
|
|
# Paths
|
|
BASE_DIR = Path(__file__).parent.parent
|
|
ONTOLOGY_DIR = BASE_DIR / "frontend" / "public" / "ontology"
|
|
EXAMPLES_DIR = ONTOLOGY_DIR / "examples"
|
|
|
|
# Files to validate
|
|
SHAPES_FILE = ONTOLOGY_DIR / "hc-storage-shapes.ttl"
|
|
ONTOLOGY_FILE = ONTOLOGY_DIR / "hc-storage.ttl"
|
|
|
|
EXAMPLE_FILES = [
|
|
EXAMPLES_DIR / "hc-storage-examples.ttl", # Original archive examples
|
|
EXAMPLES_DIR / "hc-storage-museum-examples.ttl", # Art museum
|
|
EXAMPLES_DIR / "hc-storage-library-examples.ttl", # Rare books library
|
|
EXAMPLES_DIR / "hc-storage-naturalhistory-examples.ttl", # Natural history
|
|
EXAMPLES_DIR / "hc-storage-archaeology-examples.ttl", # Archaeological depot
|
|
EXAMPLES_DIR / "hc-storage-all-examples.ttl", # Combined all domains
|
|
]
|
|
|
|
|
|
def load_graph(filepath: Path, format: str = "turtle") -> Graph:
|
|
"""Load an RDF graph from a file."""
|
|
g = Graph()
|
|
g.parse(filepath, format=format)
|
|
return g
|
|
|
|
|
|
def count_instances(graph: Graph, ontology_ns: str = "https://nde.nl/ontology/hc/") -> dict:
|
|
"""Count instances of each class in the graph."""
|
|
HC = Namespace(ontology_ns)
|
|
|
|
class_counts = {}
|
|
|
|
# Key classes to count
|
|
classes = [
|
|
"StorageFacility",
|
|
"StorageUnit",
|
|
"StorageLocation",
|
|
"StorageAssignment",
|
|
"EnvironmentalRequirement",
|
|
"EnvironmentalObservation",
|
|
"StorageAssessment",
|
|
"HeldItem",
|
|
]
|
|
|
|
for cls_name in classes:
|
|
cls_uri = HC[cls_name]
|
|
# Count direct instances and subclass instances
|
|
count = 0
|
|
for s, p, o in graph.triples((None, RDF.type, None)):
|
|
if str(o) == str(cls_uri) or str(o).startswith(str(cls_uri)):
|
|
count += 1
|
|
class_counts[cls_name] = count
|
|
|
|
return class_counts
|
|
|
|
|
|
def validate_example_file(
|
|
example_file: Path,
|
|
shapes_graph: Graph,
|
|
ontology_graph: Graph,
|
|
verbose: bool = True
|
|
) -> tuple[bool, str, dict]:
|
|
"""
|
|
Validate a single example file against SHACL shapes.
|
|
|
|
Returns:
|
|
tuple: (conforms: bool, results_text: str, stats: dict)
|
|
"""
|
|
if not example_file.exists():
|
|
return False, f"File not found: {example_file}", {}
|
|
|
|
# Load example data
|
|
data_graph = Graph()
|
|
try:
|
|
data_graph.parse(example_file, format="turtle")
|
|
except Exception as e:
|
|
return False, f"Parse error: {e}", {}
|
|
|
|
# Add ontology for class hierarchy inference
|
|
combined_graph = data_graph + ontology_graph
|
|
|
|
# Get statistics
|
|
stats = {
|
|
"triples": len(data_graph),
|
|
"instances": count_instances(data_graph),
|
|
}
|
|
|
|
# Run SHACL validation
|
|
try:
|
|
conforms, results_graph, results_text = validate(
|
|
combined_graph,
|
|
shacl_graph=shapes_graph,
|
|
inference='rdfs', # Enable RDFS inference for subclass reasoning
|
|
abort_on_first=False, # Report all violations
|
|
)
|
|
except Exception as e:
|
|
return False, f"Validation error: {e}", stats
|
|
|
|
return conforms, results_text, stats
|
|
|
|
|
|
def main():
|
|
"""Run validation on all example files."""
|
|
print("=" * 70)
|
|
print("HC Storage Ontology - SHACL Validation")
|
|
print("=" * 70)
|
|
print()
|
|
|
|
# Check files exist
|
|
if not SHAPES_FILE.exists():
|
|
print(f"ERROR: Shapes file not found: {SHAPES_FILE}")
|
|
sys.exit(1)
|
|
|
|
if not ONTOLOGY_FILE.exists():
|
|
print(f"ERROR: Ontology file not found: {ONTOLOGY_FILE}")
|
|
sys.exit(1)
|
|
|
|
# Load shapes and ontology once
|
|
print(f"Loading SHACL shapes: {SHAPES_FILE.name}")
|
|
shapes_graph = load_graph(SHAPES_FILE)
|
|
print(f" → {len(shapes_graph)} triples")
|
|
|
|
print(f"Loading ontology: {ONTOLOGY_FILE.name}")
|
|
ontology_graph = load_graph(ONTOLOGY_FILE)
|
|
print(f" → {len(ontology_graph)} triples")
|
|
print()
|
|
|
|
# Validate each example file
|
|
results = []
|
|
|
|
for example_file in EXAMPLE_FILES:
|
|
print("-" * 70)
|
|
print(f"Validating: {example_file.name}")
|
|
print("-" * 70)
|
|
|
|
if not example_file.exists():
|
|
print(f" ⚠ SKIPPED: File not found")
|
|
results.append((example_file.name, None, "File not found", {}))
|
|
continue
|
|
|
|
conforms, results_text, stats = validate_example_file(
|
|
example_file, shapes_graph, ontology_graph
|
|
)
|
|
|
|
# Print statistics
|
|
print(f" Triples: {stats.get('triples', 0)}")
|
|
if stats.get('instances'):
|
|
print(" Instances:")
|
|
for cls_name, count in stats['instances'].items():
|
|
if count > 0:
|
|
print(f" - {cls_name}: {count}")
|
|
|
|
# Print result
|
|
if conforms:
|
|
print(f" ✓ VALID - No SHACL violations")
|
|
else:
|
|
print(f" ✗ INVALID - SHACL violations found:")
|
|
# Print abbreviated results
|
|
for line in results_text.split('\n')[:50]:
|
|
if line.strip():
|
|
print(f" {line}")
|
|
if results_text.count('\n') > 50:
|
|
print(f" ... ({results_text.count('Constraint Violation')} total violations)")
|
|
|
|
results.append((example_file.name, conforms, results_text, stats))
|
|
print()
|
|
|
|
# Summary
|
|
print("=" * 70)
|
|
print("VALIDATION SUMMARY")
|
|
print("=" * 70)
|
|
|
|
total_valid = 0
|
|
total_invalid = 0
|
|
total_skipped = 0
|
|
|
|
for filename, conforms, _, stats in results:
|
|
if conforms is None:
|
|
status = "⚠ SKIPPED"
|
|
total_skipped += 1
|
|
elif conforms:
|
|
status = "✓ VALID"
|
|
total_valid += 1
|
|
else:
|
|
status = "✗ INVALID"
|
|
total_invalid += 1
|
|
|
|
triples = stats.get('triples', '-')
|
|
print(f" {status:12} {filename:50} ({triples} triples)")
|
|
|
|
print()
|
|
print(f"Total: {total_valid} valid, {total_invalid} invalid, {total_skipped} skipped")
|
|
|
|
# Exit code
|
|
if total_invalid > 0:
|
|
sys.exit(1)
|
|
else:
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|