#!/usr/bin/env python3 """ SHACL Validation Script for Heritage Custodian Ontology Uses pyshacl library to validate RDF data against SHACL shapes. Usage: python scripts/validate_with_shacl.py python scripts/validate_with_shacl.py --shapes python scripts/validate_with_shacl.py --format jsonld python scripts/validate_with_shacl.py --output report.ttl Author: Heritage Custodian Ontology Project Date: 2025-11-22 Schema Version: v0.7.0 (Phase 7: SHACL Validation) """ import sys import argparse from pathlib import Path from typing import Optional try: from pyshacl import validate from rdflib import Graph except ImportError: print("ERROR: Required libraries not installed.") print("Install with: pip install pyshacl rdflib") sys.exit(1) # ============================================================================ # Constants # ============================================================================ DEFAULT_SHAPES_FILE = "schemas/20251121/shacl/custodian_validation_shapes.ttl" SUPPORTED_FORMATS = ["turtle", "ttl", "xml", "n3", "nt", "jsonld", "json-ld"] # ============================================================================ # Validation Functions # ============================================================================ def validate_rdf_data( data_file: Path, shapes_file: Optional[Path] = None, data_format: str = "turtle", output_file: Optional[Path] = None, verbose: bool = False ) -> bool: """ Validate RDF data against SHACL shapes. Args: data_file: Path to RDF data file to validate shapes_file: Path to SHACL shapes file (default: schemas/20251121/shacl/custodian_validation_shapes.ttl) data_format: RDF format (turtle, xml, n3, nt, jsonld) output_file: Optional path to write validation report verbose: Print detailed validation report Returns: True if validation passes, False otherwise """ # Use default shapes file if not specified if shapes_file is None: shapes_file = Path(DEFAULT_SHAPES_FILE) # Check files exist if not data_file.exists(): print(f"ERROR: Data file not found: {data_file}") return False if not shapes_file.exists(): print(f"ERROR: SHACL shapes file not found: {shapes_file}") return False print(f"\n{'=' * 80}") print("SHACL VALIDATION") print(f"{'=' * 80}") print(f"Data file: {data_file}") print(f"Shapes file: {shapes_file}") print(f"Data format: {data_format}") print(f"{'=' * 80}\n") try: # Load data graph if verbose: print("Loading data graph...") data_graph = Graph() data_graph.parse(str(data_file), format=data_format) if verbose: print(f" Loaded {len(data_graph)} triples") # Load shapes graph if verbose: print("Loading SHACL shapes...") shapes_graph = Graph() shapes_graph.parse(str(shapes_file), format="turtle") if verbose: print(f" Loaded {len(shapes_graph)} shape triples") print("\nExecuting SHACL validation...") # Run SHACL validation conforms, results_graph, results_text = validate( data_graph, shacl_graph=shapes_graph, inference='rdfs', # Use RDFS inference abort_on_first=False, # Check all violations meta_shacl=False, # Don't validate shapes themselves advanced=True, # Enable SHACL-AF features js=False # Disable SHACL-JS (not needed) ) # Print results print(f"\n{'=' * 80}") print("VALIDATION RESULTS") print(f"{'=' * 80}") if conforms: print("✅ VALIDATION PASSED") print("No constraint violations found.") else: print("❌ VALIDATION FAILED") print("\nConstraint Violations:") print("-" * 80) print(results_text) print(f"{'=' * 80}\n") # Write validation report if requested if output_file: print(f"Writing validation report to: {output_file}") results_graph.serialize(destination=str(output_file), format="turtle") print(f"Report written successfully.\n") # Print statistics if verbose: print("\nValidation Statistics:") print(f" Triples validated: {len(data_graph)}") print(f" Shapes applied: {count_shapes(shapes_graph)}") print(f" Violations found: {count_violations(results_graph)}") return conforms except Exception as e: print(f"\nERROR during validation: {e}") import traceback traceback.print_exc() return False def count_shapes(shapes_graph: Graph) -> int: """Count number of SHACL shapes in graph.""" from rdflib import SH return len(list(shapes_graph.subjects(predicate=SH.targetClass, object=None))) def count_violations(results_graph: Graph) -> int: """Count number of validation violations in results graph.""" from rdflib import SH return len(list(results_graph.subjects(predicate=SH.resultSeverity, object=None))) # ============================================================================ # CLI Interface # ============================================================================ def main(): """Main entry point for CLI.""" parser = argparse.ArgumentParser( description="Validate RDF data against Heritage Custodian SHACL shapes", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Validate Turtle file with default shapes python scripts/validate_with_shacl.py data.ttl # Validate JSON-LD file with custom shapes python scripts/validate_with_shacl.py data.jsonld --shapes custom_shapes.ttl --format jsonld # Validate and save report python scripts/validate_with_shacl.py data.ttl --output validation_report.ttl # Verbose output python scripts/validate_with_shacl.py data.ttl --verbose Exit Codes: 0 = Validation passed (no violations) 1 = Validation failed (violations found) 2 = Error during validation (file not found, parse error, etc.) """ ) parser.add_argument( "data_file", type=Path, help="RDF data file to validate (Turtle, JSON-LD, N-Triples, etc.)" ) parser.add_argument( "-s", "--shapes", type=Path, default=None, help=f"SHACL shapes file (default: {DEFAULT_SHAPES_FILE})" ) parser.add_argument( "-f", "--format", type=str, default="turtle", choices=SUPPORTED_FORMATS, help="RDF format of data file (default: turtle)" ) parser.add_argument( "-o", "--output", type=Path, default=None, help="Write validation report to file (Turtle format)" ) parser.add_argument( "-v", "--verbose", action="store_true", help="Print detailed validation information" ) args = parser.parse_args() # Normalize format aliases if args.format in ["ttl", "turtle"]: args.format = "turtle" elif args.format in ["jsonld", "json-ld"]: args.format = "json-ld" # Run validation try: conforms = validate_rdf_data( data_file=args.data_file, shapes_file=args.shapes, data_format=args.format, output_file=args.output, verbose=args.verbose ) # Exit with appropriate code sys.exit(0 if conforms else 1) except KeyboardInterrupt: print("\n\nValidation interrupted by user.") sys.exit(2) except Exception as e: print(f"\n\nFATAL ERROR: {e}") sys.exit(2) # ============================================================================ # Library Interface # ============================================================================ def validate_file(data_file: str, shapes_file: Optional[str] = None) -> bool: """ Library interface for programmatic validation. Args: data_file: Path to RDF data file shapes_file: Optional path to SHACL shapes file Returns: True if validation passes, False otherwise Example: from scripts.validate_with_shacl import validate_file if validate_file("data.ttl"): print("Valid!") else: print("Invalid!") """ return validate_rdf_data( data_file=Path(data_file), shapes_file=Path(shapes_file) if shapes_file else None, verbose=False ) # ============================================================================ # Entry Point # ============================================================================ if __name__ == "__main__": main()