#!/usr/bin/env python3 """ Export HC Storage ontology examples to multiple RDF formats. Converts Turtle (.ttl) files to: - N-Triples (.nt) - JSON-LD (.jsonld) - RDF/XML (.rdf) Usage: python scripts/export_hc_storage_rdf.py [--all] [--file FILENAME] Options: --all Export all example files --file Export specific file (e.g., hc-storage-all-examples.ttl) --formats Comma-separated formats: nt,jsonld,rdf (default: all) --output Output directory (default: same as input) """ import argparse import sys from pathlib import Path from typing import List, Optional try: from rdflib import Graph from rdflib.namespace import RDF, RDFS, XSD, OWL except ImportError: print("ERROR: rdflib not installed.") print("Run: pip install rdflib") sys.exit(1) # Paths BASE_DIR = Path(__file__).parent.parent ONTOLOGY_DIR = BASE_DIR / "frontend" / "public" / "ontology" EXAMPLES_DIR = ONTOLOGY_DIR / "examples" # JSON-LD context for heritage custodian storage ontology JSONLD_CONTEXT = { "@context": { "hc": "https://nde.nl/ontology/hc/", "crm": "http://www.cidoc-crm.org/cidoc-crm/", "sosa": "http://www.w3.org/ns/sosa/", "ssn": "http://www.w3.org/ns/ssn/", "schema": "http://schema.org/", "aat": "http://vocab.getty.edu/aat/", "wd": "http://www.wikidata.org/entity/", "xsd": "http://www.w3.org/2001/XMLSchema#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "owl": "http://www.w3.org/2002/07/owl#", "dcterms": "http://purl.org/dc/terms/", # Domain-specific example namespaces "ex": "http://example.org/archive/", "mus": "http://example.org/museum/", "lib": "http://example.org/library/", "nat": "http://example.org/nathistory/", "arch": "http://example.org/archaeology/", # Common property mappings "label": "rdfs:label", "comment": "rdfs:comment", "type": "@type", "id": "@id" } } # Format mappings FORMAT_MAP = { "nt": ("nt", "N-Triples"), "ntriples": ("nt", "N-Triples"), "jsonld": ("json-ld", "JSON-LD"), "json-ld": ("json-ld", "JSON-LD"), "rdf": ("xml", "RDF/XML"), "xml": ("xml", "RDF/XML"), "rdfxml": ("xml", "RDF/XML"), } EXTENSION_MAP = { "nt": ".nt", "json-ld": ".jsonld", "xml": ".rdf", } def load_turtle(filepath: Path) -> Graph: """Load a Turtle file into an RDF graph.""" g = Graph() g.parse(filepath, format="turtle") return g def export_graph( graph: Graph, output_path: Path, format_key: str, verbose: bool = True ) -> bool: """ Export an RDF graph to a specific format. Args: graph: The RDF graph to export output_path: Path for the output file format_key: Format identifier (nt, json-ld, xml) verbose: Print progress messages Returns: True if export succeeded, False otherwise """ try: if format_key == "json-ld": # For JSON-LD, serialize with context serialized = graph.serialize(format="json-ld", indent=2) output_path.write_text(serialized) else: # For other formats, use standard serialization serialized = graph.serialize(format=format_key) if isinstance(serialized, bytes): output_path.write_bytes(serialized) else: output_path.write_text(serialized) if verbose: size_kb = output_path.stat().st_size / 1024 print(f" ✓ {output_path.name} ({size_kb:.1f} KB)") return True except Exception as e: if verbose: print(f" ✗ {output_path.name}: {e}") return False def export_file( input_path: Path, output_dir: Optional[Path] = None, formats: Optional[List[str]] = None, verbose: bool = True ) -> dict: """ Export a Turtle file to multiple formats. Args: input_path: Path to the input .ttl file output_dir: Directory for output files (default: same as input) formats: List of format keys to export (default: all) verbose: Print progress messages Returns: Dict mapping format to success status """ if not input_path.exists(): if verbose: print(f"ERROR: File not found: {input_path}") return {} if output_dir is None: output_dir = input_path.parent if formats is None: formats = ["nt", "json-ld", "xml"] # Load the Turtle file if verbose: print(f"\nProcessing: {input_path.name}") try: graph = load_turtle(input_path) if verbose: print(f" Loaded {len(graph)} triples") except Exception as e: if verbose: print(f" ERROR loading file: {e}") return {} # Export to each format results = {} base_name = input_path.stem if verbose: print(" Exporting:") for fmt in formats: if fmt not in FORMAT_MAP: if verbose: print(f" ⚠ Unknown format: {fmt}") continue rdflib_format, format_name = FORMAT_MAP[fmt] extension = EXTENSION_MAP[rdflib_format] output_path = output_dir / f"{base_name}{extension}" success = export_graph(graph, output_path, rdflib_format, verbose) results[fmt] = success return results def find_example_files() -> List[Path]: """Find all HC Storage example files.""" if not EXAMPLES_DIR.exists(): return [] return sorted(EXAMPLES_DIR.glob("hc-storage*.ttl")) def main(): parser = argparse.ArgumentParser( description="Export HC Storage ontology examples to multiple RDF formats." ) parser.add_argument( "--all", "-a", action="store_true", help="Export all example files" ) parser.add_argument( "--file", "-f", type=str, help="Specific file to export (filename or full path)" ) parser.add_argument( "--formats", type=str, default="nt,jsonld,rdf", help="Comma-separated formats: nt,jsonld,rdf (default: all)" ) parser.add_argument( "--output", "-o", type=str, help="Output directory (default: same as input)" ) parser.add_argument( "--quiet", "-q", action="store_true", help="Suppress progress messages" ) args = parser.parse_args() verbose = not args.quiet # Parse formats formats = [f.strip().lower() for f in args.formats.split(",")] # Determine output directory output_dir = Path(args.output) if args.output else None if verbose: print("=" * 70) print("HC Storage Ontology - RDF Export") print("=" * 70) print(f"Formats: {', '.join(formats)}") # Collect files to process files_to_process = [] if args.file: # Specific file file_path = Path(args.file) if not file_path.is_absolute(): # Try relative to examples dir if (EXAMPLES_DIR / args.file).exists(): file_path = EXAMPLES_DIR / args.file elif (ONTOLOGY_DIR / args.file).exists(): file_path = ONTOLOGY_DIR / args.file files_to_process.append(file_path) elif args.all: # All example files files_to_process = find_example_files() if not files_to_process: print("ERROR: No example files found in", EXAMPLES_DIR) sys.exit(1) else: # Default: just the combined file combined = EXAMPLES_DIR / "hc-storage-all-examples.ttl" if combined.exists(): files_to_process.append(combined) else: print("ERROR: Combined examples file not found.") print("Use --all to export all files, or --file to specify a file.") sys.exit(1) # Process each file total_success = 0 total_failed = 0 for file_path in files_to_process: results = export_file(file_path, output_dir, formats, verbose) total_success += sum(1 for v in results.values() if v) total_failed += sum(1 for v in results.values() if not v) # Summary if verbose: print("\n" + "=" * 70) print("EXPORT SUMMARY") print("=" * 70) print(f" Files processed: {len(files_to_process)}") print(f" Exports succeeded: {total_success}") print(f" Exports failed: {total_failed}") sys.exit(0 if total_failed == 0 else 1) if __name__ == "__main__": main()