#cloud-config # Cloud-init configuration for GLAM SPARQL Server # This script runs on first boot to configure the server package_update: true package_upgrade: true packages: - curl - wget - unzip - htop - vim - git - jq - apt-transport-https - ca-certificates - debian-keyring - debian-archive-keyring # Create users and groups groups: - glam users: - name: glam groups: glam shell: /bin/bash system: true home: /var/lib/glam # Write configuration files write_files: # Oxigraph systemd service - path: /etc/systemd/system/oxigraph.service content: | [Unit] Description=Oxigraph SPARQL Server After=network.target Requires=mnt-data.mount [Service] Type=simple User=glam Group=glam ExecStart=/usr/local/bin/oxigraph serve \ --location /mnt/data/oxigraph \ --bind 127.0.0.1:7878 Restart=always RestartSec=10 StandardOutput=journal StandardError=journal # Security hardening NoNewPrivileges=true ProtectSystem=strict ProtectHome=true ReadWritePaths=/mnt/data/oxigraph PrivateTmp=true [Install] WantedBy=multi-user.target # GLAM API (FastAPI) systemd service - path: /etc/systemd/system/glam-api.service content: | [Unit] Description=GLAM FastAPI Backend (DSPy SPARQL Generation) After=network.target oxigraph.service Wants=oxigraph.service [Service] Type=simple User=glam Group=glam WorkingDirectory=/var/lib/glam/api Environment="PATH=/var/lib/glam/api/venv/bin:/usr/local/bin:/usr/bin:/bin" Environment="SPARQL_ENDPOINT=http://127.0.0.1:7878/query" EnvironmentFile=-/var/lib/glam/.env ExecStart=/var/lib/glam/api/venv/bin/uvicorn main:app --host 127.0.0.1 --port 8000 Restart=always RestartSec=10 StandardOutput=journal StandardError=journal # Security hardening NoNewPrivileges=true ProtectSystem=strict ProtectHome=true ReadWritePaths=/var/lib/glam PrivateTmp=true [Install] WantedBy=multi-user.target # Caddy configuration for reverse proxy - path: /etc/caddy/Caddyfile content: | ${domain} { # TLS with automatic HTTPS tls ${admin_email} # SPARQL Query endpoint - use exact path matcher to avoid matching /query-builder @sparqlQuery { path /query } handle @sparqlQuery { reverse_proxy localhost:7878 } # SPARQL Update endpoint handle /update* { reverse_proxy localhost:7878 } # Graph Store Protocol handle /store* { reverse_proxy localhost:7878 } # FastAPI Backend (DSPy SPARQL Generation) handle /api/dspy/* { reverse_proxy localhost:8000 } # Valkey Semantic Cache API handle /api/cache/* { uri strip_prefix /api/cache reverse_proxy localhost:8090 } # Other API routes handle /api/* { reverse_proxy localhost:8000 } # Qdrant Vector Database REST API handle /qdrant/* { uri strip_prefix /qdrant reverse_proxy localhost:6333 } # Frontend SPA (React app) - must be after specific API routes # This handles all frontend routes including /visualize, /map, /query-builder, etc. handle { root * /var/www/glam-frontend try_files {path} /index.html file_server } # Static ontology files handle /ontology/* { root * /mnt/data file_server browse } # Static LinkML files handle /linkml/* { root * /mnt/data file_server browse } # Static UML files handle /uml/* { root * /mnt/data file_server browse } # Health check endpoint handle /health { respond "OK" 200 } # CORS headers for SPARQL endpoints header Access-Control-Allow-Origin "*" header Access-Control-Allow-Methods "GET, POST, OPTIONS" header Access-Control-Allow-Headers "Content-Type, Accept" # Compression encode gzip zstd # Logging log { output file /var/log/caddy/access.log { roll_size 100mb roll_keep 5 } } } # Script to load ontologies into Oxigraph - path: /var/lib/glam/scripts/load-ontologies.sh permissions: '0755' content: | #!/bin/bash set -e OXIGRAPH_ENDPOINT="http://127.0.0.1:7878" ONTOLOGY_DIR="/mnt/data/ontologies" echo "Loading ontologies into Oxigraph..." # Wait for Oxigraph to be ready until curl -s "$OXIGRAPH_ENDPOINT/" > /dev/null 2>&1; do echo "Waiting for Oxigraph to start..." sleep 2 done # Load Turtle files for file in "$ONTOLOGY_DIR"/*.ttl; do if [ -f "$file" ]; then filename=$(basename "$file") echo "Loading: $filename" curl -X POST \ -H 'Content-Type: text/turtle' \ --data-binary "@$file" \ "$OXIGRAPH_ENDPOINT/store?default" \ -w " HTTP %%{http_code}\n" \ -o /dev/null -s fi done # Load RDF/XML files for file in "$ONTOLOGY_DIR"/*.rdf "$ONTOLOGY_DIR"/*.owl; do if [ -f "$file" ]; then filename=$(basename "$file") echo "Loading: $filename" curl -X POST \ -H 'Content-Type: application/rdf+xml' \ --data-binary "@$file" \ "$OXIGRAPH_ENDPOINT/store?default" \ -w " HTTP %%{http_code}\n" \ -o /dev/null -s fi done # Load N-Triples files for file in "$ONTOLOGY_DIR"/*.nt; do if [ -f "$file" ]; then filename=$(basename "$file") echo "Loading: $filename" curl -X POST \ -H 'Content-Type: application/n-triples' \ --data-binary "@$file" \ "$OXIGRAPH_ENDPOINT/store?default" \ -w " HTTP %%{http_code}\n" \ -o /dev/null -s fi done echo "Done loading ontologies!" # Show triple count echo "" echo "Triple count:" curl -s -X POST \ -H 'Content-Type: application/sparql-query' \ -H 'Accept: application/sparql-results+json' \ --data 'SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }' \ "$OXIGRAPH_ENDPOINT/query" | jq -r '.results.bindings[0].count.value' # Qdrant Vector Database systemd service (Docker-based) - path: /etc/systemd/system/qdrant.service content: | [Unit] Description=Qdrant Vector Database After=network.target docker.service Requires=docker.service mnt-data.mount [Service] Type=simple Restart=always RestartSec=10 ExecStartPre=-/usr/bin/docker stop qdrant ExecStartPre=-/usr/bin/docker rm qdrant ExecStart=/usr/bin/docker run --name qdrant \ -p 127.0.0.1:6333:6333 \ -p 127.0.0.1:6334:6334 \ -v /mnt/data/qdrant/storage:/qdrant/storage:z \ -v /mnt/data/qdrant/snapshots:/qdrant/snapshots:z \ --memory=2g \ --cpus=2 \ qdrant/qdrant:latest ExecStop=/usr/bin/docker stop qdrant [Install] WantedBy=multi-user.target # Backup script - path: /var/lib/glam/scripts/backup.sh permissions: '0755' content: | #!/bin/bash set -e BACKUP_DIR="/mnt/data/backups" DATE=$(date +%Y%m%d_%H%M%S) mkdir -p "$BACKUP_DIR" echo "Creating Oxigraph backup..." # Stop Oxigraph for consistent backup systemctl stop oxigraph # Create tarball of Oxigraph data tar -czf "$BACKUP_DIR/oxigraph_$DATE.tar.gz" -C /mnt/data oxigraph # Restart Oxigraph systemctl start oxigraph # Keep only last 7 backups ls -t "$BACKUP_DIR"/oxigraph_*.tar.gz | tail -n +8 | xargs -r rm echo "Backup complete: $BACKUP_DIR/oxigraph_$DATE.tar.gz" # Logrotate configuration - path: /etc/logrotate.d/glam content: | /var/log/caddy/*.log { daily missingok rotate 14 compress delaycompress notifempty create 0640 caddy caddy sharedscripts postrotate systemctl reload caddy endscript } runcmd: # Mount data volume - mkdir -p /mnt/data - | VOLUME_DEVICE="${volume_device}" if [ -e "$VOLUME_DEVICE" ]; then # Check if already formatted if ! blkid "$VOLUME_DEVICE" | grep -q ext4; then mkfs.ext4 "$VOLUME_DEVICE" fi mount "$VOLUME_DEVICE" /mnt/data echo "$VOLUME_DEVICE /mnt/data ext4 defaults 0 2" >> /etc/fstab fi # Create directory structure - mkdir -p /mnt/data/oxigraph - mkdir -p /mnt/data/ontologies - mkdir -p /mnt/data/linkml - mkdir -p /mnt/data/uml - mkdir -p /mnt/data/backups - mkdir -p /mnt/data/qdrant/storage - mkdir -p /mnt/data/qdrant/snapshots - mkdir -p /var/www/glam-frontend - mkdir -p /var/lib/glam/api - mkdir -p /var/log/caddy - chown -R glam:glam /mnt/data - chown -R glam:glam /var/lib/glam # Install Caddy - curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg - curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list - apt-get update - apt-get install -y caddy # Install Python 3.11+ and pip for FastAPI backend - apt-get install -y python3 python3-pip python3-venv # Install Docker for Qdrant - curl -fsSL https://get.docker.com | sh - systemctl enable docker - systemctl start docker # Install Oxigraph - | OXIGRAPH_VERSION="${oxigraph_version}" ARCH=$(uname -m) if [ "$ARCH" = "x86_64" ]; then ARCH="x86_64" elif [ "$ARCH" = "aarch64" ]; then ARCH="aarch64" fi wget -q "https://github.com/oxigraph/oxigraph/releases/download/v$OXIGRAPH_VERSION/oxigraph_v$${OXIGRAPH_VERSION}_$${ARCH}_linux_gnu" -O /usr/local/bin/oxigraph chmod +x /usr/local/bin/oxigraph # Enable and start services - systemctl daemon-reload - systemctl enable oxigraph - systemctl start oxigraph - systemctl enable glam-api # Note: glam-api will start after first --api deployment with source code - systemctl enable qdrant - systemctl start qdrant - systemctl enable caddy - systemctl start caddy # Setup automatic backups (daily at 3 AM) - echo "0 3 * * * /var/lib/glam/scripts/backup.sh >> /var/log/glam-backup.log 2>&1" | crontab - # Notify that setup is complete - echo "GLAM SPARQL server setup complete!" > /var/lib/glam/setup-complete