#!/usr/bin/env python3 """ Generate a combined static Turtle file for frontend visualization fallback. This creates a smaller subset of the full RDF data, optimized for Force-directed graph visualization when the SPARQL endpoint is not available. Output: frontend/public/data/nde_heritage_custodians.ttl """ import os import sys from pathlib import Path from rdflib import Graph, Namespace, URIRef, Literal from rdflib.namespace import RDF, SKOS, XSD # Namespaces CRM = Namespace("http://www.cidoc-crm.org/cidoc-crm/") HC = Namespace("https://nde.nl/ontology/hc/") HCC = Namespace("https://nde.nl/ontology/hc/class/") SCHEMA = Namespace("http://schema.org/") def main(): """Combine essential RDF data from all TTL files into a single visualization file.""" rdf_dir = Path(__file__).parent.parent / "data" / "nde" / "rdf" output_path = Path(__file__).parent.parent / "frontend" / "public" / "data" / "nde_heritage_custodians.ttl" if not rdf_dir.exists(): print(f"Error: RDF directory not found: {rdf_dir}") sys.exit(1) # Create combined graph with just visualization-essential triples combined = Graph() combined.bind("crm", CRM) combined.bind("hc", HC) combined.bind("hcc", HCC) combined.bind("skos", SKOS) combined.bind("schema", SCHEMA) ttl_files = list(rdf_dir.glob("*.ttl")) print(f"Found {len(ttl_files)} TTL files to process...") processed = 0 for ttl_file in ttl_files: try: g = Graph() g.parse(ttl_file, format="turtle") # Extract only visualization-essential triples for each custodian for custodian in g.subjects(RDF.type, CRM.E39_Actor): # Type triple combined.add((custodian, RDF.type, CRM.E39_Actor)) # Label (prefLabel) for label in g.objects(custodian, SKOS.prefLabel): combined.add((custodian, SKOS.prefLabel, label)) # Custodian type for ctype in g.objects(custodian, HC.custodian_type): combined.add((custodian, HC.custodian_type, ctype)) # Location and coordinates for place in g.objects(custodian, CRM.P53_has_former_or_current_location): combined.add((custodian, CRM.P53_has_former_or_current_location, place)) for lat in g.objects(place, SCHEMA.latitude): combined.add((place, SCHEMA.latitude, lat)) for lon in g.objects(place, SCHEMA.longitude): combined.add((place, SCHEMA.longitude, lon)) for city in g.objects(place, SCHEMA.addressLocality): combined.add((place, SCHEMA.addressLocality, city)) processed += 1 if processed % 100 == 0: print(f" Processed {processed}/{len(ttl_files)} files...") except Exception as e: print(f" Warning: Could not parse {ttl_file.name}: {e}") # Add header comment header = f"""# NDE Heritage Custodians - Static RDF for Frontend Visualization # Generated from {len(ttl_files)} individual TTL files # Contains essential visualization data: labels, types, locations # # Full data available via SPARQL endpoint at http://91.98.224.44/query # """ # Serialize output_path.parent.mkdir(parents=True, exist_ok=True) turtle_content = combined.serialize(format="turtle") with open(output_path, "w") as f: f.write(header) f.write(turtle_content) # Stats custodian_count = len(list(combined.subjects(RDF.type, CRM.E39_Actor))) triple_count = len(combined) file_size = output_path.stat().st_size / 1024 # KB print(f"\n✅ Generated static RDF file:") print(f" Path: {output_path}") print(f" Custodians: {custodian_count}") print(f" Triples: {triple_count}") print(f" Size: {file_size:.1f} KB") if __name__ == "__main__": main()