309 lines
8.7 KiB
Python
Executable file
309 lines
8.7 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Export HC Storage ontology examples to multiple RDF formats.
|
|
|
|
Converts Turtle (.ttl) files to:
|
|
- N-Triples (.nt)
|
|
- JSON-LD (.jsonld)
|
|
- RDF/XML (.rdf)
|
|
|
|
Usage:
|
|
python scripts/export_hc_storage_rdf.py [--all] [--file FILENAME]
|
|
|
|
Options:
|
|
--all Export all example files
|
|
--file Export specific file (e.g., hc-storage-all-examples.ttl)
|
|
--formats Comma-separated formats: nt,jsonld,rdf (default: all)
|
|
--output Output directory (default: same as input)
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import List, Optional
|
|
|
|
try:
|
|
from rdflib import Graph
|
|
from rdflib.namespace import RDF, RDFS, XSD, OWL
|
|
except ImportError:
|
|
print("ERROR: rdflib not installed.")
|
|
print("Run: pip install rdflib")
|
|
sys.exit(1)
|
|
|
|
# Paths
|
|
BASE_DIR = Path(__file__).parent.parent
|
|
ONTOLOGY_DIR = BASE_DIR / "frontend" / "public" / "ontology"
|
|
EXAMPLES_DIR = ONTOLOGY_DIR / "examples"
|
|
|
|
# JSON-LD context for heritage custodian storage ontology
|
|
JSONLD_CONTEXT = {
|
|
"@context": {
|
|
"hc": "https://nde.nl/ontology/hc/",
|
|
"crm": "http://www.cidoc-crm.org/cidoc-crm/",
|
|
"sosa": "http://www.w3.org/ns/sosa/",
|
|
"ssn": "http://www.w3.org/ns/ssn/",
|
|
"schema": "http://schema.org/",
|
|
"aat": "http://vocab.getty.edu/aat/",
|
|
"wd": "http://www.wikidata.org/entity/",
|
|
"xsd": "http://www.w3.org/2001/XMLSchema#",
|
|
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
|
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
|
"owl": "http://www.w3.org/2002/07/owl#",
|
|
"dcterms": "http://purl.org/dc/terms/",
|
|
|
|
# Domain-specific example namespaces
|
|
"ex": "http://example.org/archive/",
|
|
"mus": "http://example.org/museum/",
|
|
"lib": "http://example.org/library/",
|
|
"nat": "http://example.org/nathistory/",
|
|
"arch": "http://example.org/archaeology/",
|
|
|
|
# Common property mappings
|
|
"label": "rdfs:label",
|
|
"comment": "rdfs:comment",
|
|
"type": "@type",
|
|
"id": "@id"
|
|
}
|
|
}
|
|
|
|
# Format mappings
|
|
FORMAT_MAP = {
|
|
"nt": ("nt", "N-Triples"),
|
|
"ntriples": ("nt", "N-Triples"),
|
|
"jsonld": ("json-ld", "JSON-LD"),
|
|
"json-ld": ("json-ld", "JSON-LD"),
|
|
"rdf": ("xml", "RDF/XML"),
|
|
"xml": ("xml", "RDF/XML"),
|
|
"rdfxml": ("xml", "RDF/XML"),
|
|
}
|
|
|
|
EXTENSION_MAP = {
|
|
"nt": ".nt",
|
|
"json-ld": ".jsonld",
|
|
"xml": ".rdf",
|
|
}
|
|
|
|
|
|
def load_turtle(filepath: Path) -> Graph:
|
|
"""Load a Turtle file into an RDF graph."""
|
|
g = Graph()
|
|
g.parse(filepath, format="turtle")
|
|
return g
|
|
|
|
|
|
def export_graph(
|
|
graph: Graph,
|
|
output_path: Path,
|
|
format_key: str,
|
|
verbose: bool = True
|
|
) -> bool:
|
|
"""
|
|
Export an RDF graph to a specific format.
|
|
|
|
Args:
|
|
graph: The RDF graph to export
|
|
output_path: Path for the output file
|
|
format_key: Format identifier (nt, json-ld, xml)
|
|
verbose: Print progress messages
|
|
|
|
Returns:
|
|
True if export succeeded, False otherwise
|
|
"""
|
|
try:
|
|
if format_key == "json-ld":
|
|
# For JSON-LD, serialize with context
|
|
serialized = graph.serialize(format="json-ld", indent=2)
|
|
output_path.write_text(serialized)
|
|
else:
|
|
# For other formats, use standard serialization
|
|
serialized = graph.serialize(format=format_key)
|
|
if isinstance(serialized, bytes):
|
|
output_path.write_bytes(serialized)
|
|
else:
|
|
output_path.write_text(serialized)
|
|
|
|
if verbose:
|
|
size_kb = output_path.stat().st_size / 1024
|
|
print(f" ✓ {output_path.name} ({size_kb:.1f} KB)")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
if verbose:
|
|
print(f" ✗ {output_path.name}: {e}")
|
|
return False
|
|
|
|
|
|
def export_file(
|
|
input_path: Path,
|
|
output_dir: Optional[Path] = None,
|
|
formats: Optional[List[str]] = None,
|
|
verbose: bool = True
|
|
) -> dict:
|
|
"""
|
|
Export a Turtle file to multiple formats.
|
|
|
|
Args:
|
|
input_path: Path to the input .ttl file
|
|
output_dir: Directory for output files (default: same as input)
|
|
formats: List of format keys to export (default: all)
|
|
verbose: Print progress messages
|
|
|
|
Returns:
|
|
Dict mapping format to success status
|
|
"""
|
|
if not input_path.exists():
|
|
if verbose:
|
|
print(f"ERROR: File not found: {input_path}")
|
|
return {}
|
|
|
|
if output_dir is None:
|
|
output_dir = input_path.parent
|
|
|
|
if formats is None:
|
|
formats = ["nt", "json-ld", "xml"]
|
|
|
|
# Load the Turtle file
|
|
if verbose:
|
|
print(f"\nProcessing: {input_path.name}")
|
|
|
|
try:
|
|
graph = load_turtle(input_path)
|
|
if verbose:
|
|
print(f" Loaded {len(graph)} triples")
|
|
except Exception as e:
|
|
if verbose:
|
|
print(f" ERROR loading file: {e}")
|
|
return {}
|
|
|
|
# Export to each format
|
|
results = {}
|
|
base_name = input_path.stem
|
|
|
|
if verbose:
|
|
print(" Exporting:")
|
|
|
|
for fmt in formats:
|
|
if fmt not in FORMAT_MAP:
|
|
if verbose:
|
|
print(f" ⚠ Unknown format: {fmt}")
|
|
continue
|
|
|
|
rdflib_format, format_name = FORMAT_MAP[fmt]
|
|
extension = EXTENSION_MAP[rdflib_format]
|
|
output_path = output_dir / f"{base_name}{extension}"
|
|
|
|
success = export_graph(graph, output_path, rdflib_format, verbose)
|
|
results[fmt] = success
|
|
|
|
return results
|
|
|
|
|
|
def find_example_files() -> List[Path]:
|
|
"""Find all HC Storage example files."""
|
|
if not EXAMPLES_DIR.exists():
|
|
return []
|
|
|
|
return sorted(EXAMPLES_DIR.glob("hc-storage*.ttl"))
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Export HC Storage ontology examples to multiple RDF formats."
|
|
)
|
|
parser.add_argument(
|
|
"--all", "-a",
|
|
action="store_true",
|
|
help="Export all example files"
|
|
)
|
|
parser.add_argument(
|
|
"--file", "-f",
|
|
type=str,
|
|
help="Specific file to export (filename or full path)"
|
|
)
|
|
parser.add_argument(
|
|
"--formats",
|
|
type=str,
|
|
default="nt,jsonld,rdf",
|
|
help="Comma-separated formats: nt,jsonld,rdf (default: all)"
|
|
)
|
|
parser.add_argument(
|
|
"--output", "-o",
|
|
type=str,
|
|
help="Output directory (default: same as input)"
|
|
)
|
|
parser.add_argument(
|
|
"--quiet", "-q",
|
|
action="store_true",
|
|
help="Suppress progress messages"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
verbose = not args.quiet
|
|
|
|
# Parse formats
|
|
formats = [f.strip().lower() for f in args.formats.split(",")]
|
|
|
|
# Determine output directory
|
|
output_dir = Path(args.output) if args.output else None
|
|
|
|
if verbose:
|
|
print("=" * 70)
|
|
print("HC Storage Ontology - RDF Export")
|
|
print("=" * 70)
|
|
print(f"Formats: {', '.join(formats)}")
|
|
|
|
# Collect files to process
|
|
files_to_process = []
|
|
|
|
if args.file:
|
|
# Specific file
|
|
file_path = Path(args.file)
|
|
if not file_path.is_absolute():
|
|
# Try relative to examples dir
|
|
if (EXAMPLES_DIR / args.file).exists():
|
|
file_path = EXAMPLES_DIR / args.file
|
|
elif (ONTOLOGY_DIR / args.file).exists():
|
|
file_path = ONTOLOGY_DIR / args.file
|
|
files_to_process.append(file_path)
|
|
|
|
elif args.all:
|
|
# All example files
|
|
files_to_process = find_example_files()
|
|
if not files_to_process:
|
|
print("ERROR: No example files found in", EXAMPLES_DIR)
|
|
sys.exit(1)
|
|
|
|
else:
|
|
# Default: just the combined file
|
|
combined = EXAMPLES_DIR / "hc-storage-all-examples.ttl"
|
|
if combined.exists():
|
|
files_to_process.append(combined)
|
|
else:
|
|
print("ERROR: Combined examples file not found.")
|
|
print("Use --all to export all files, or --file to specify a file.")
|
|
sys.exit(1)
|
|
|
|
# Process each file
|
|
total_success = 0
|
|
total_failed = 0
|
|
|
|
for file_path in files_to_process:
|
|
results = export_file(file_path, output_dir, formats, verbose)
|
|
total_success += sum(1 for v in results.values() if v)
|
|
total_failed += sum(1 for v in results.values() if not v)
|
|
|
|
# Summary
|
|
if verbose:
|
|
print("\n" + "=" * 70)
|
|
print("EXPORT SUMMARY")
|
|
print("=" * 70)
|
|
print(f" Files processed: {len(files_to_process)}")
|
|
print(f" Exports succeeded: {total_success}")
|
|
print(f" Exports failed: {total_failed}")
|
|
|
|
sys.exit(0 if total_failed == 0 else 1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|