glam/scripts/export_hc_storage_rdf.py
2026-01-04 13:12:32 +01:00

309 lines
8.7 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Export HC Storage ontology examples to multiple RDF formats.
Converts Turtle (.ttl) files to:
- N-Triples (.nt)
- JSON-LD (.jsonld)
- RDF/XML (.rdf)
Usage:
python scripts/export_hc_storage_rdf.py [--all] [--file FILENAME]
Options:
--all Export all example files
--file Export specific file (e.g., hc-storage-all-examples.ttl)
--formats Comma-separated formats: nt,jsonld,rdf (default: all)
--output Output directory (default: same as input)
"""
import argparse
import sys
from pathlib import Path
from typing import List, Optional
try:
from rdflib import Graph
from rdflib.namespace import RDF, RDFS, XSD, OWL
except ImportError:
print("ERROR: rdflib not installed.")
print("Run: pip install rdflib")
sys.exit(1)
# Paths
BASE_DIR = Path(__file__).parent.parent
ONTOLOGY_DIR = BASE_DIR / "frontend" / "public" / "ontology"
EXAMPLES_DIR = ONTOLOGY_DIR / "examples"
# JSON-LD context for heritage custodian storage ontology
JSONLD_CONTEXT = {
"@context": {
"hc": "https://nde.nl/ontology/hc/",
"crm": "http://www.cidoc-crm.org/cidoc-crm/",
"sosa": "http://www.w3.org/ns/sosa/",
"ssn": "http://www.w3.org/ns/ssn/",
"schema": "http://schema.org/",
"aat": "http://vocab.getty.edu/aat/",
"wd": "http://www.wikidata.org/entity/",
"xsd": "http://www.w3.org/2001/XMLSchema#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"owl": "http://www.w3.org/2002/07/owl#",
"dcterms": "http://purl.org/dc/terms/",
# Domain-specific example namespaces
"ex": "http://example.org/archive/",
"mus": "http://example.org/museum/",
"lib": "http://example.org/library/",
"nat": "http://example.org/nathistory/",
"arch": "http://example.org/archaeology/",
# Common property mappings
"label": "rdfs:label",
"comment": "rdfs:comment",
"type": "@type",
"id": "@id"
}
}
# Format mappings
FORMAT_MAP = {
"nt": ("nt", "N-Triples"),
"ntriples": ("nt", "N-Triples"),
"jsonld": ("json-ld", "JSON-LD"),
"json-ld": ("json-ld", "JSON-LD"),
"rdf": ("xml", "RDF/XML"),
"xml": ("xml", "RDF/XML"),
"rdfxml": ("xml", "RDF/XML"),
}
EXTENSION_MAP = {
"nt": ".nt",
"json-ld": ".jsonld",
"xml": ".rdf",
}
def load_turtle(filepath: Path) -> Graph:
"""Load a Turtle file into an RDF graph."""
g = Graph()
g.parse(filepath, format="turtle")
return g
def export_graph(
graph: Graph,
output_path: Path,
format_key: str,
verbose: bool = True
) -> bool:
"""
Export an RDF graph to a specific format.
Args:
graph: The RDF graph to export
output_path: Path for the output file
format_key: Format identifier (nt, json-ld, xml)
verbose: Print progress messages
Returns:
True if export succeeded, False otherwise
"""
try:
if format_key == "json-ld":
# For JSON-LD, serialize with context
serialized = graph.serialize(format="json-ld", indent=2)
output_path.write_text(serialized)
else:
# For other formats, use standard serialization
serialized = graph.serialize(format=format_key)
if isinstance(serialized, bytes):
output_path.write_bytes(serialized)
else:
output_path.write_text(serialized)
if verbose:
size_kb = output_path.stat().st_size / 1024
print(f"{output_path.name} ({size_kb:.1f} KB)")
return True
except Exception as e:
if verbose:
print(f"{output_path.name}: {e}")
return False
def export_file(
input_path: Path,
output_dir: Optional[Path] = None,
formats: Optional[List[str]] = None,
verbose: bool = True
) -> dict:
"""
Export a Turtle file to multiple formats.
Args:
input_path: Path to the input .ttl file
output_dir: Directory for output files (default: same as input)
formats: List of format keys to export (default: all)
verbose: Print progress messages
Returns:
Dict mapping format to success status
"""
if not input_path.exists():
if verbose:
print(f"ERROR: File not found: {input_path}")
return {}
if output_dir is None:
output_dir = input_path.parent
if formats is None:
formats = ["nt", "json-ld", "xml"]
# Load the Turtle file
if verbose:
print(f"\nProcessing: {input_path.name}")
try:
graph = load_turtle(input_path)
if verbose:
print(f" Loaded {len(graph)} triples")
except Exception as e:
if verbose:
print(f" ERROR loading file: {e}")
return {}
# Export to each format
results = {}
base_name = input_path.stem
if verbose:
print(" Exporting:")
for fmt in formats:
if fmt not in FORMAT_MAP:
if verbose:
print(f" ⚠ Unknown format: {fmt}")
continue
rdflib_format, format_name = FORMAT_MAP[fmt]
extension = EXTENSION_MAP[rdflib_format]
output_path = output_dir / f"{base_name}{extension}"
success = export_graph(graph, output_path, rdflib_format, verbose)
results[fmt] = success
return results
def find_example_files() -> List[Path]:
"""Find all HC Storage example files."""
if not EXAMPLES_DIR.exists():
return []
return sorted(EXAMPLES_DIR.glob("hc-storage*.ttl"))
def main():
parser = argparse.ArgumentParser(
description="Export HC Storage ontology examples to multiple RDF formats."
)
parser.add_argument(
"--all", "-a",
action="store_true",
help="Export all example files"
)
parser.add_argument(
"--file", "-f",
type=str,
help="Specific file to export (filename or full path)"
)
parser.add_argument(
"--formats",
type=str,
default="nt,jsonld,rdf",
help="Comma-separated formats: nt,jsonld,rdf (default: all)"
)
parser.add_argument(
"--output", "-o",
type=str,
help="Output directory (default: same as input)"
)
parser.add_argument(
"--quiet", "-q",
action="store_true",
help="Suppress progress messages"
)
args = parser.parse_args()
verbose = not args.quiet
# Parse formats
formats = [f.strip().lower() for f in args.formats.split(",")]
# Determine output directory
output_dir = Path(args.output) if args.output else None
if verbose:
print("=" * 70)
print("HC Storage Ontology - RDF Export")
print("=" * 70)
print(f"Formats: {', '.join(formats)}")
# Collect files to process
files_to_process = []
if args.file:
# Specific file
file_path = Path(args.file)
if not file_path.is_absolute():
# Try relative to examples dir
if (EXAMPLES_DIR / args.file).exists():
file_path = EXAMPLES_DIR / args.file
elif (ONTOLOGY_DIR / args.file).exists():
file_path = ONTOLOGY_DIR / args.file
files_to_process.append(file_path)
elif args.all:
# All example files
files_to_process = find_example_files()
if not files_to_process:
print("ERROR: No example files found in", EXAMPLES_DIR)
sys.exit(1)
else:
# Default: just the combined file
combined = EXAMPLES_DIR / "hc-storage-all-examples.ttl"
if combined.exists():
files_to_process.append(combined)
else:
print("ERROR: Combined examples file not found.")
print("Use --all to export all files, or --file to specify a file.")
sys.exit(1)
# Process each file
total_success = 0
total_failed = 0
for file_path in files_to_process:
results = export_file(file_path, output_dir, formats, verbose)
total_success += sum(1 for v in results.values() if v)
total_failed += sum(1 for v in results.values() if not v)
# Summary
if verbose:
print("\n" + "=" * 70)
print("EXPORT SUMMARY")
print("=" * 70)
print(f" Files processed: {len(files_to_process)}")
print(f" Exports succeeded: {total_success}")
print(f" Exports failed: {total_failed}")
sys.exit(0 if total_failed == 0 else 1)
if __name__ == "__main__":
main()