#!/bin/bash # Automated deployment script for GLAM SPARQL server # Uses Hetzner API to discover server IP and deploys via SSH # # Usage: ./deploy.sh [options] # Options: # --infra Deploy infrastructure changes (Terraform) # --data Deploy ontology/schema data # --frontend Build and deploy frontend # --api Deploy FastAPI backend (DSPy SPARQL generation) # --ducklake Deploy DuckLake API backend # --reload Reload data into Oxigraph # --all Deploy everything # --status Check server status only set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Load environment variables if [ -f "$PROJECT_ROOT/.env" ]; then export $(grep -v '^#' "$PROJECT_ROOT/.env" | xargs) fi # Configuration HCLOUD_TOKEN="${HETZNER_HC_API_TOKEN:-}" SERVER_NAME="glam-sparql" SERVER_USER="root" REMOTE_DATA_DIR="/mnt/data" # Check for required token if [ -z "$HCLOUD_TOKEN" ]; then echo -e "${RED}Error: HETZNER_HC_API_TOKEN not found in .env${NC}" echo "Please add your Hetzner API token to .env:" echo " HETZNER_HC_API_TOKEN=your_token_here" exit 1 fi # Parse arguments DEPLOY_INFRA=false DEPLOY_DATA=false DEPLOY_FRONTEND=false DEPLOY_API=false DEPLOY_DUCKLAKE=false RELOAD_OXIGRAPH=false STATUS_ONLY=false if [ $# -eq 0 ]; then echo "Usage: $0 [--infra] [--data] [--frontend] [--api] [--ducklake] [--reload] [--all] [--status]" exit 1 fi for arg in "$@"; do case $arg in --infra) DEPLOY_INFRA=true ;; --data) DEPLOY_DATA=true ;; --frontend) DEPLOY_FRONTEND=true ;; --api) DEPLOY_API=true ;; --ducklake) DEPLOY_DUCKLAKE=true ;; --reload) RELOAD_OXIGRAPH=true ;; --all) DEPLOY_INFRA=true DEPLOY_DATA=true DEPLOY_FRONTEND=true DEPLOY_API=true DEPLOY_DUCKLAKE=true RELOAD_OXIGRAPH=true ;; --status) STATUS_ONLY=true ;; *) echo "Unknown option: $arg" exit 1 ;; esac done # Function to get server IP from Hetzner API get_server_ip() { local response=$(curl -s -H "Authorization: Bearer $HCLOUD_TOKEN" \ "https://api.hetzner.cloud/v1/servers?name=$SERVER_NAME") local ip=$(echo "$response" | jq -r '.servers[0].public_net.ipv4.ip // empty') if [ -z "$ip" ] || [ "$ip" = "null" ]; then echo "" else echo "$ip" fi } # Function to check server status check_server_status() { local response=$(curl -s -H "Authorization: Bearer $HCLOUD_TOKEN" \ "https://api.hetzner.cloud/v1/servers?name=$SERVER_NAME") echo "$response" | jq -r '.servers[0] | "Server: \(.name)\nStatus: \(.status)\nIP: \(.public_net.ipv4.ip)\nType: \(.server_type.name)\nLocation: \(.datacenter.name)"' } # Function to wait for SSH wait_for_ssh() { local ip=$1 local max_attempts=30 echo -e "${BLUE}Waiting for SSH to be available on $ip...${NC}" for i in $(seq 1 $max_attempts); do if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o BatchMode=yes \ "$SERVER_USER@$ip" "echo 'connected'" 2>/dev/null; then echo -e "${GREEN}SSH connection established${NC}" return 0 fi echo " Attempt $i/$max_attempts..." sleep 10 done echo -e "${RED}Failed to establish SSH connection${NC}" return 1 } echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" echo -e "${BLUE} GLAM Infrastructure Deployment${NC}" echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" echo "" # Get server IP echo -e "${YELLOW}Discovering server...${NC}" SERVER_IP=$(get_server_ip) if [ -z "$SERVER_IP" ]; then if [ "$DEPLOY_INFRA" = true ]; then echo -e "${YELLOW}Server not found. Will be created by Terraform.${NC}" else echo -e "${RED}Error: Server '$SERVER_NAME' not found in Hetzner Cloud${NC}" echo "Run with --infra to create the server first." exit 1 fi else echo -e "${GREEN}Found server: $SERVER_IP${NC}" fi # Status only mode if [ "$STATUS_ONLY" = true ]; then echo "" echo -e "${BLUE}Server Status:${NC}" check_server_status if [ -n "$SERVER_IP" ]; then echo "" echo -e "${BLUE}Service Status:${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "systemctl is-active oxigraph && echo 'Oxigraph: Running' || echo 'Oxigraph: Stopped'; \ systemctl is-active glam-api && echo 'GLAM API: Running' || echo 'GLAM API: Stopped'; \ systemctl is-active ducklake && echo 'DuckLake API: Running' || echo 'DuckLake API: Stopped'; \ systemctl is-active caddy && echo 'Caddy: Running' || echo 'Caddy: Stopped'" echo "" echo -e "${BLUE}SPARQL Triple Count:${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "curl -s -X POST -H 'Content-Type: application/sparql-query' \ -H 'Accept: application/sparql-results+json' \ --data 'SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }' \ http://localhost:7878/query | jq -r '.results.bindings[0].count.value // \"0\"' | xargs -I {} echo '{} triples'" fi exit 0 fi # Deploy infrastructure if [ "$DEPLOY_INFRA" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Deploying Infrastructure${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" cd "$PROJECT_ROOT/infrastructure/terraform" # Initialize Terraform echo -e "${YELLOW}Initializing Terraform...${NC}" terraform init -upgrade # Create terraform.tfvars if it doesn't exist if [ ! -f terraform.tfvars ]; then echo -e "${YELLOW}Creating terraform.tfvars from environment...${NC}" cat > terraform.tfvars </dev/null | wc -l) files synced${NC}" fi echo -e "${GREEN}Data deployment complete${NC}" fi # Deploy frontend if [ "$DEPLOY_FRONTEND" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Building & Deploying Frontend${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" cd "$PROJECT_ROOT/frontend" # Build frontend echo -e "${YELLOW}Building frontend...${NC}" npm ci npm run build # Deploy to server echo -e "${YELLOW}Deploying frontend to server...${NC}" rsync -avz --progress --delete \ -e "ssh -o StrictHostKeyChecking=no" \ dist/ \ "$SERVER_USER@$SERVER_IP:/var/www/glam-frontend/" cd "$PROJECT_ROOT" echo -e "${GREEN}Frontend deployment complete${NC}" fi # Deploy API if [ "$DEPLOY_API" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Deploying FastAPI Backend (DSPy SPARQL Generation)${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Ensure remote directories exist with proper Python package structure echo -e "${YELLOW}Setting up API directory on server...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "mkdir -p /var/lib/glam/api/src/glam_extractor/api && \ touch /var/lib/glam/api/src/__init__.py && \ touch /var/lib/glam/api/src/glam_extractor/__init__.py" # Sync API source code (preserving package structure) echo -e "${YELLOW}Syncing API source code...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ --exclude="__pycache__" \ --exclude="*.pyc" \ --exclude=".pytest_cache" \ "$PROJECT_ROOT/src/glam_extractor/api/" \ "$SERVER_USER@$SERVER_IP:/var/lib/glam/api/src/glam_extractor/api/" # Sync requirements file echo -e "${YELLOW}Syncing API requirements...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ "$PROJECT_ROOT/infrastructure/api-requirements.txt" \ "$SERVER_USER@$SERVER_IP:/var/lib/glam/api/requirements.txt" # Install/update Python dependencies and restart service echo -e "${YELLOW}Installing Python dependencies and restarting API service...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" << 'ENDSSH' set -e # Create virtual environment if it doesn't exist if [ ! -d /var/lib/glam/api/venv ]; then echo "Creating Python virtual environment..." python3 -m venv /var/lib/glam/api/venv fi # Activate and install dependencies source /var/lib/glam/api/venv/bin/activate pip install --upgrade pip pip install -r /var/lib/glam/api/requirements.txt # Restart the API service if systemctl is-active --quiet glam-api; then echo "Restarting glam-api service..." systemctl restart glam-api else echo "Starting glam-api service..." systemctl start glam-api fi # Wait for service to be ready sleep 2 # Check service status if systemctl is-active --quiet glam-api; then echo "glam-api service is running" else echo "Warning: glam-api service failed to start" journalctl -u glam-api --no-pager -n 20 fi ENDSSH echo -e "${GREEN}API deployment complete${NC}" fi # Deploy DuckLake API if [ "$DEPLOY_DUCKLAKE" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Deploying DuckLake API Backend (Time Travel & Schema Evolution)${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Ensure remote directories exist echo -e "${YELLOW}Setting up DuckLake API directory on server...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "mkdir -p /var/lib/glam/ducklake/{data,catalog}" # Sync DuckLake API source code echo -e "${YELLOW}Syncing DuckLake API source code...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ --exclude="__pycache__" \ --exclude="*.pyc" \ --exclude=".pytest_cache" \ --exclude="*.db" \ --exclude="data/" \ --exclude="catalog/" \ "$PROJECT_ROOT/backend/ducklake/" \ "$SERVER_USER@$SERVER_IP:/var/lib/glam/ducklake/" # Install/update Python dependencies and set up service echo -e "${YELLOW}Installing Python dependencies and setting up DuckLake service...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" << 'ENDSSH' set -e # Create virtual environment if it doesn't exist if [ ! -d /var/lib/glam/ducklake/venv ]; then echo "Creating Python virtual environment..." python3 -m venv /var/lib/glam/ducklake/venv fi # Activate and install dependencies source /var/lib/glam/ducklake/venv/bin/activate pip install --upgrade pip pip install -r /var/lib/glam/ducklake/requirements.txt # Create systemd service if it doesn't exist if [ ! -f /etc/systemd/system/ducklake.service ]; then echo "Creating DuckLake systemd service..." cat > /etc/systemd/system/ducklake.service << 'EOF' [Unit] Description=DuckLake API Server After=network.target [Service] Type=simple User=root WorkingDirectory=/var/lib/glam/ducklake Environment="PATH=/var/lib/glam/ducklake/venv/bin" ExecStart=/var/lib/glam/ducklake/venv/bin/uvicorn main:app --host 0.0.0.0 --port 8765 Restart=always RestartSec=3 [Install] WantedBy=multi-user.target EOF systemctl daemon-reload systemctl enable ducklake fi # Restart the DuckLake service if systemctl is-active --quiet ducklake; then echo "Restarting ducklake service..." systemctl restart ducklake else echo "Starting ducklake service..." systemctl start ducklake fi # Wait for service to be ready sleep 2 # Check service status if systemctl is-active --quiet ducklake; then echo "ducklake service is running on port 8765" else echo "Warning: ducklake service failed to start" journalctl -u ducklake --no-pager -n 20 fi ENDSSH echo -e "${GREEN}DuckLake API deployment complete${NC}" fi # Reload Oxigraph if [ "$RELOAD_OXIGRAPH" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Reloading Oxigraph Data${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "/var/lib/glam/scripts/load-ontologies.sh" echo -e "${GREEN}Oxigraph reload complete${NC}" fi # Final status echo "" echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" echo -e "${GREEN} Deployment Complete!${NC}" echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" echo "" echo "Server IP: $SERVER_IP" echo "SPARQL Query: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/query" echo "Frontend: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/" echo "API: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/api/" echo "DuckLake: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/ducklake/ (port 8765)" echo "" echo "Check status with: $0 --status"