#!/usr/bin/env python3
"""
Generate a combined static Turtle file for frontend visualization fallback.

This creates a smaller subset of the full RDF data, optimized for Force-directed
graph visualization when the SPARQL endpoint is not available.

Output: frontend/public/data/nde_heritage_custodians.ttl
"""

import os
import sys
from pathlib import Path
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, SKOS, XSD

# Namespaces
CRM = Namespace("http://www.cidoc-crm.org/cidoc-crm/")
HC = Namespace("https://nde.nl/ontology/hc/")
HCC = Namespace("https://nde.nl/ontology/hc/class/")
SCHEMA = Namespace("http://schema.org/")

def main():
    """Combine essential RDF data from all TTL files into a single visualization file."""
    
    rdf_dir = Path(__file__).parent.parent / "data" / "nde" / "rdf"
    output_path = Path(__file__).parent.parent / "frontend" / "public" / "data" / "nde_heritage_custodians.ttl"
    
    if not rdf_dir.exists():
        print(f"Error: RDF directory not found: {rdf_dir}")
        sys.exit(1)
    
    # Create combined graph with just visualization-essential triples
    combined = Graph()
    combined.bind("crm", CRM)
    combined.bind("hc", HC)
    combined.bind("hcc", HCC)
    combined.bind("skos", SKOS)
    combined.bind("schema", SCHEMA)
    
    ttl_files = list(rdf_dir.glob("*.ttl"))
    print(f"Found {len(ttl_files)} TTL files to process...")
    
    processed = 0
    for ttl_file in ttl_files:
        try:
            g = Graph()
            g.parse(ttl_file, format="turtle")
            
            # Extract only visualization-essential triples for each custodian
            for custodian in g.subjects(RDF.type, CRM.E39_Actor):
                # Type triple
                combined.add((custodian, RDF.type, CRM.E39_Actor))
                
                # Label (prefLabel)
                for label in g.objects(custodian, SKOS.prefLabel):
                    combined.add((custodian, SKOS.prefLabel, label))
                
                # Custodian type
                for ctype in g.objects(custodian, HC.custodian_type):
                    combined.add((custodian, HC.custodian_type, ctype))
                
                # Location and coordinates
                for place in g.objects(custodian, CRM.P53_has_former_or_current_location):
                    combined.add((custodian, CRM.P53_has_former_or_current_location, place))
                    
                    for lat in g.objects(place, SCHEMA.latitude):
                        combined.add((place, SCHEMA.latitude, lat))
                    for lon in g.objects(place, SCHEMA.longitude):
                        combined.add((place, SCHEMA.longitude, lon))
                    for city in g.objects(place, SCHEMA.addressLocality):
                        combined.add((place, SCHEMA.addressLocality, city))
            
            processed += 1
            if processed % 100 == 0:
                print(f"  Processed {processed}/{len(ttl_files)} files...")
                
        except Exception as e:
            print(f"  Warning: Could not parse {ttl_file.name}: {e}")
    
    # Add header comment
    header = f"""# NDE Heritage Custodians - Static RDF for Frontend Visualization
# Generated from {len(ttl_files)} individual TTL files
# Contains essential visualization data: labels, types, locations
# 
# Full data available via SPARQL endpoint at http://91.98.224.44/query
#

"""
    
    # Serialize
    output_path.parent.mkdir(parents=True, exist_ok=True)
    turtle_content = combined.serialize(format="turtle")
    
    with open(output_path, "w") as f:
        f.write(header)
        f.write(turtle_content)
    
    # Stats
    custodian_count = len(list(combined.subjects(RDF.type, CRM.E39_Actor)))
    triple_count = len(combined)
    file_size = output_path.stat().st_size / 1024  # KB
    
    print(f"\n✅ Generated static RDF file:")
    print(f"   Path: {output_path}")
    print(f"   Custodians: {custodian_count}")
    print(f"   Triples: {triple_count}")
    print(f"   Size: {file_size:.1f} KB")

if __name__ == "__main__":
    main()