#!/bin/bash # Automated deployment script for GLAM SPARQL server # Uses Hetzner API to discover server IP and deploys via SSH # # Usage: ./deploy.sh [options] # Options: # --infra Deploy infrastructure changes (Terraform) # --data Deploy ontologies directly to Oxigraph (no intermediate storage) # --frontend Build locally and deploy frontend (bronhouder.nl) - SLOW on low-RAM machines # --frontend-server Build on server and deploy frontend (RECOMMENDED) # --archief Build and deploy archief-assistent (archief.support) # --api Deploy FastAPI backend (DSPy SPARQL generation) # --ducklake Deploy DuckLake API backend # --qdrant Deploy/restart Qdrant vector database # --valkey Deploy Valkey semantic cache (Redis-compatible) # --rag Deploy RAG API via Podman container # --all Deploy everything # --status Check server status only # --clear Clear Oxigraph store before loading (use with --data) # --sync-reviews Sync entity resolution review data FROM server to local set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Load environment variables if [ -f "$PROJECT_ROOT/.env" ]; then # Only export lines that are valid VAR=value assignments (no comments, no empty lines) while IFS='=' read -r key value; do # Skip empty lines, comments, and lines without '=' [[ -z "$key" || "$key" =~ ^# ]] && continue # Remove inline comments from value and trim whitespace value="${value%%#*}" value="${value%"${value##*[![:space:]]}"}" # Export if we have both key and value [[ -n "$key" && -n "$value" ]] && export "$key=$value" done < "$PROJECT_ROOT/.env" fi # Configuration HCLOUD_TOKEN="${HETZNER_HC_API_TOKEN:-}" SERVER_NAME="glam-sparql" SERVER_USER="root" REMOTE_DATA_DIR="/mnt/data" # Used for LinkML/UML static files (if needed in future) # Check for required token if [ -z "$HCLOUD_TOKEN" ]; then echo -e "${RED}Error: HETZNER_HC_API_TOKEN not found in .env${NC}" echo "Please add your Hetzner API token to .env:" echo " HETZNER_HC_API_TOKEN=your_token_here" exit 1 fi # Parse arguments DEPLOY_INFRA=false DEPLOY_DATA=false DEPLOY_FRONTEND=false DEPLOY_ARCHIEF=false DEPLOY_API=false DEPLOY_DUCKLAKE=false DEPLOY_QDRANT=false DEPLOY_VALKEY=false DEPLOY_RAG=false DEPLOY_FRONTEND_SERVER=false CLEAR_OXIGRAPH=false STATUS_ONLY=false SYNC_REVIEWS=false if [ $# -eq 0 ]; then echo "Usage: $0 [--infra] [--data] [--frontend] [--frontend-server] [--archief] [--api] [--ducklake] [--qdrant] [--valkey] [--rag] [--all] [--status] [--clear] [--sync-reviews]" exit 1 fi for arg in "$@"; do case $arg in --infra) DEPLOY_INFRA=true ;; --data) DEPLOY_DATA=true ;; --frontend) DEPLOY_FRONTEND=true ;; --frontend-server) DEPLOY_FRONTEND_SERVER=true ;; --archief) DEPLOY_ARCHIEF=true ;; --api) DEPLOY_API=true ;; --ducklake) DEPLOY_DUCKLAKE=true ;; --qdrant) DEPLOY_QDRANT=true ;; --valkey) DEPLOY_VALKEY=true ;; --rag) DEPLOY_RAG=true ;; --clear) CLEAR_OXIGRAPH=true ;; --all) DEPLOY_INFRA=true DEPLOY_DATA=true DEPLOY_FRONTEND_SERVER=true # Use server build (faster, recommended) DEPLOY_ARCHIEF=true DEPLOY_API=true DEPLOY_DUCKLAKE=true DEPLOY_QDRANT=true DEPLOY_VALKEY=true DEPLOY_RAG=true ;; --status) STATUS_ONLY=true ;; --sync-reviews) SYNC_REVIEWS=true ;; *) echo "Unknown option: $arg" exit 1 ;; esac done # Function to get server IP from Hetzner API get_server_ip() { local response=$(curl -s -H "Authorization: Bearer $HCLOUD_TOKEN" \ "https://api.hetzner.cloud/v1/servers?name=$SERVER_NAME") local ip=$(echo "$response" | jq -r '.servers[0].public_net.ipv4.ip // empty') if [ -z "$ip" ] || [ "$ip" = "null" ]; then echo "" else echo "$ip" fi } # Function to check server status check_server_status() { local response=$(curl -s -H "Authorization: Bearer $HCLOUD_TOKEN" \ "https://api.hetzner.cloud/v1/servers?name=$SERVER_NAME") echo "$response" | jq -r '.servers[0] | "Server: \(.name)\nStatus: \(.status)\nIP: \(.public_net.ipv4.ip)\nType: \(.server_type.name)\nLocation: \(.datacenter.name)"' } # Function to wait for SSH wait_for_ssh() { local ip=$1 local max_attempts=30 echo -e "${BLUE}Waiting for SSH to be available on $ip...${NC}" for i in $(seq 1 $max_attempts); do if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o BatchMode=yes \ "$SERVER_USER@$ip" "echo 'connected'" 2>/dev/null; then echo -e "${GREEN}SSH connection established${NC}" return 0 fi echo " Attempt $i/$max_attempts..." sleep 10 done echo -e "${RED}Failed to establish SSH connection${NC}" return 1 } echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" echo -e "${BLUE} GLAM Infrastructure Deployment${NC}" echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" echo "" # Get server IP echo -e "${YELLOW}Discovering server...${NC}" SERVER_IP=$(get_server_ip) if [ -z "$SERVER_IP" ]; then if [ "$DEPLOY_INFRA" = true ]; then echo -e "${YELLOW}Server not found. Will be created by Terraform.${NC}" else echo -e "${RED}Error: Server '$SERVER_NAME' not found in Hetzner Cloud${NC}" echo "Run with --infra to create the server first." exit 1 fi else echo -e "${GREEN}Found server: $SERVER_IP${NC}" fi # Status only mode if [ "$STATUS_ONLY" = true ]; then echo "" echo -e "${BLUE}Server Status:${NC}" check_server_status if [ -n "$SERVER_IP" ]; then echo "" echo -e "${BLUE}Service Status:${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "systemctl is-active oxigraph && echo 'Oxigraph: Running' || echo 'Oxigraph: Stopped'; \ systemctl is-active glam-api && echo 'GLAM API: Running' || echo 'GLAM API: Stopped'; \ systemctl is-active ducklake && echo 'DuckLake API: Running' || echo 'DuckLake API: Stopped'; \ systemctl is-active qdrant && echo 'Qdrant: Running' || echo 'Qdrant: Stopped'; \ systemctl is-active glam-rag-api && echo 'RAG API: Running' || echo 'RAG API: Stopped'; \ cd /var/lib/glam/valkey && docker-compose ps | grep -q 'Up' && echo 'Valkey Cache: Running' || echo 'Valkey Cache: Stopped'; \ systemctl is-active caddy && echo 'Caddy: Running' || echo 'Caddy: Stopped'" echo "" echo -e "${BLUE}SPARQL Triple Count:${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "curl -s -X POST -H 'Content-Type: application/sparql-query' \ -H 'Accept: application/sparql-results+json' \ --data 'SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }' \ http://localhost:7878/query | jq -r '.results.bindings[0].count.value // \"0\"' | xargs -I {} echo '{} triples'" fi exit 0 fi # Deploy infrastructure if [ "$DEPLOY_INFRA" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Deploying Infrastructure${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" cd "$PROJECT_ROOT/infrastructure/terraform" # Initialize Terraform echo -e "${YELLOW}Initializing Terraform...${NC}" terraform init -upgrade # Create terraform.tfvars if it doesn't exist if [ ! -f terraform.tfvars ]; then echo -e "${YELLOW}Creating terraform.tfvars from environment...${NC}" cat > terraform.tfvars < /dev/null 2>&1"; then echo -e "${RED}Error: Oxigraph not responding at $OXIGRAPH_ENDPOINT on server${NC}" echo "Ensure Oxigraph is running: systemctl status oxigraph" exit 1 fi echo -e "${GREEN}Oxigraph is running${NC}" # Get initial triple count INITIAL_COUNT=$(ssh $SSH_OPTS "$SERVER_USER@$SERVER_IP" "curl -s -X POST \ -H 'Content-Type: application/sparql-query' \ -H 'Accept: application/sparql-results+json' \ --data 'SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }' \ $OXIGRAPH_ENDPOINT/query" | jq -r ".results.bindings[0].count.value // \"0\"") echo -e "${BLUE}Initial triple count: $INITIAL_COUNT${NC}" # Clear store if requested if [ "$CLEAR_OXIGRAPH" = true ]; then echo -e "${YELLOW}Clearing Oxigraph store...${NC}" ssh $SSH_OPTS "$SERVER_USER@$SERVER_IP" "curl -s -X POST \ -H 'Content-Type: application/sparql-update' \ --data 'CLEAR ALL' \ $OXIGRAPH_ENDPOINT/update" > /dev/null echo -e "${GREEN}Store cleared${NC}" fi # Function to load a file to Oxigraph via SSH load_file() { local file="$1" local content_type="$2" local filename=$(basename "$file") # Stream file content via SSH and load to Oxigraph local http_code=$(cat "$file" | ssh $SSH_OPTS "$SERVER_USER@$SERVER_IP" \ "curl -s -X POST \ -H 'Content-Type: $content_type' \ --data-binary @- \ -w '%{http_code}' \ -o /dev/null \ '$OXIGRAPH_ENDPOINT/store?default'" 2>/dev/null) # HTTP 200, 201, 204 are all success codes if [ "$http_code" = "200" ] || [ "$http_code" = "201" ] || [ "$http_code" = "204" ]; then echo -e " ${GREEN}✓${NC} $filename" return 0 else echo -e " ${RED}✗${NC} $filename (HTTP $http_code)" return 1 fi } # Function to load a file silently (for batch operations) load_file_silent() { local file="$1" local content_type="$2" local http_code=$(cat "$file" | ssh $SSH_OPTS "$SERVER_USER@$SERVER_IP" \ "curl -s -X POST \ -H 'Content-Type: $content_type' \ --data-binary @- \ -w '%{http_code}' \ -o /dev/null \ '$OXIGRAPH_ENDPOINT/store?default'" 2>/dev/null) if [ "$http_code" = "200" ] || [ "$http_code" = "201" ] || [ "$http_code" = "204" ]; then return 0 else return 1 fi } # Track statistics LOADED=0 FAILED=0 # Load base ontologies from data/ontology/ echo "" echo -e "${YELLOW}Loading base ontologies...${NC}" # Turtle files for file in "$PROJECT_ROOT/data/ontology"/*.ttl; do if [ -f "$file" ]; then if load_file "$file" "text/turtle"; then LOADED=$((LOADED + 1)) else FAILED=$((FAILED + 1)) fi fi done # RDF/XML files (.rdf and .owl) for file in "$PROJECT_ROOT/data/ontology"/*.rdf "$PROJECT_ROOT/data/ontology"/*.owl; do if [ -f "$file" ]; then if load_file "$file" "application/rdf+xml"; then LOADED=$((LOADED + 1)) else FAILED=$((FAILED + 1)) fi fi done # JSON-LD files (convert to Turtle first - Oxigraph /store doesn't accept JSON-LD) echo "" echo -e "${YELLOW}Converting and loading JSON-LD files...${NC}" for file in "$PROJECT_ROOT/data/ontology"/*.jsonld; do if [ -f "$file" ]; then filename=$(basename "$file") # Convert JSON-LD to Turtle using rdfpipe if command -v rdfpipe &> /dev/null; then TEMP_TTL=$(mktemp).ttl if rdfpipe -i json-ld -o turtle "$file" > "$TEMP_TTL" 2>/dev/null; then if load_file "$TEMP_TTL" "text/turtle"; then LOADED=$((LOADED + 1)) else FAILED=$((FAILED + 1)) fi else echo -e " ${RED}✗${NC} $filename (JSON-LD conversion failed)" FAILED=$((FAILED + 1)) fi rm -f "$TEMP_TTL" else echo -e " ${YELLOW}⚠${NC} $filename (skipped - rdfpipe not installed locally)" FAILED=$((FAILED + 1)) fi fi done # Load generated RDF schemas echo "" echo -e "${YELLOW}Loading generated RDF schemas...${NC}" for file in "$PROJECT_ROOT/schemas/20251121/rdf"/*.ttl; do if [ -f "$file" ]; then # Skip archived files if [[ "$file" == *"archive_"* ]]; then continue fi if load_file "$file" "text/turtle"; then LOADED=$((LOADED + 1)) else FAILED=$((FAILED + 1)) fi fi done # Load NDE Heritage Custodian RDF files # Strategy: Transfer files to server, then load locally in parallel (much faster) if [ -d "$PROJECT_ROOT/data/nde/rdf" ]; then echo "" echo -e "${YELLOW}Loading NDE Heritage Custodian data...${NC}" NDE_TOTAL=$(ls -1 "$PROJECT_ROOT/data/nde/rdf"/*.ttl 2>/dev/null | wc -l | tr -d ' ') echo " Total files: $NDE_TOTAL" # Step 1: Sync NDE files to server echo " Step 1: Syncing files to server..." rsync -az --progress \ -e "ssh -o StrictHostKeyChecking=no" \ "$PROJECT_ROOT/data/nde/rdf/" \ "$SERVER_USER@$SERVER_IP:/tmp/nde_rdf/" # Step 2: Load files on server using parallel processing echo " Step 2: Loading files to Oxigraph (parallel)..." NDE_RESULT=$(ssh $SSH_OPTS "$SERVER_USER@$SERVER_IP" 'bash -s' << 'REMOTE_SCRIPT' cd /tmp/nde_rdf LOADED=0 FAILED=0 FAILED_FILES="" for file in *.ttl; do if [ -f "$file" ]; then http_code=$(curl -s -X POST \ -H "Content-Type: text/turtle" \ --data-binary "@$file" \ -w "%{http_code}" \ -o /dev/null \ "http://127.0.0.1:7878/store?default") if [ "$http_code" = "200" ] || [ "$http_code" = "201" ] || [ "$http_code" = "204" ]; then LOADED=$((LOADED + 1)) else FAILED=$((FAILED + 1)) FAILED_FILES="$FAILED_FILES $file" fi # Progress every 200 files TOTAL=$((LOADED + FAILED)) if [ $((TOTAL % 200)) -eq 0 ]; then echo " Progress: $TOTAL files processed" >&2 fi fi done echo "LOADED=$LOADED" echo "FAILED=$FAILED" echo "FAILED_FILES=$FAILED_FILES" # Cleanup rm -rf /tmp/nde_rdf REMOTE_SCRIPT ) # Parse results NDE_LOADED=$(echo "$NDE_RESULT" | grep "^LOADED=" | cut -d= -f2) NDE_FAILED=$(echo "$NDE_RESULT" | grep "^FAILED=" | cut -d= -f2) NDE_FAILED_FILES=$(echo "$NDE_RESULT" | grep "^FAILED_FILES=" | cut -d= -f2-) LOADED=$((LOADED + ${NDE_LOADED:-0})) FAILED=$((FAILED + ${NDE_FAILED:-0})) echo -e " ${GREEN}Loaded: ${NDE_LOADED:-0} NDE heritage custodian files${NC}" if [ "${NDE_FAILED:-0}" -gt 0 ]; then echo -e " ${RED}Failed: $NDE_FAILED files${NC}" echo -e " ${RED}Failed files:${NC}$NDE_FAILED_FILES" fi fi # Get final triple count echo "" FINAL_COUNT=$(ssh $SSH_OPTS "$SERVER_USER@$SERVER_IP" "curl -s -X POST \ -H 'Content-Type: application/sparql-query' \ -H 'Accept: application/sparql-results+json' \ --data 'SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }' \ $OXIGRAPH_ENDPOINT/query" | jq -r ".results.bindings[0].count.value // \"0\"") ADDED=$((FINAL_COUNT - INITIAL_COUNT)) echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${GREEN}Data deployment complete${NC}" echo -e " Files loaded: ${GREEN}$LOADED${NC}" echo -e " Files failed: ${RED}$FAILED${NC}" echo -e " Triples added: ${BLUE}$ADDED${NC}" echo -e " Total triples: ${BLUE}$FINAL_COUNT${NC}" fi # Deploy frontend if [ "$DEPLOY_FRONTEND" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Building & Deploying Frontend${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" cd "$PROJECT_ROOT/frontend" # Build frontend (uses pnpm for workspace support) echo -e "${YELLOW}Building frontend...${NC}" pnpm install pnpm run build # Deploy to server echo -e "${YELLOW}Deploying frontend to server...${NC}" rsync -avz --progress --delete \ -e "ssh -o StrictHostKeyChecking=no" \ dist/ \ "$SERVER_USER@$SERVER_IP:/var/www/glam-frontend/" cd "$PROJECT_ROOT" echo -e "${GREEN}Frontend deployment complete${NC}" fi # Deploy frontend (server-side build - recommended for low-RAM machines) if [ "$DEPLOY_FRONTEND_SERVER" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Building Frontend on Server (Recommended)${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Push local changes first echo -e "${YELLOW}Pushing local changes to git...${NC}" cd "$PROJECT_ROOT" git push origin master 2>&1 || echo " (Nothing to push or push failed - continuing anyway)" # Build on server echo -e "${YELLOW}Building on server (pulling, installing, building, deploying)...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" << 'ENDSSH' set -e cd /opt/glam # Pull latest changes echo "Pulling latest changes..." git pull origin master # Install dependencies if needed echo "Installing dependencies..." pnpm install --frozen-lockfile 2>/dev/null || pnpm install # Build frontend echo "Building frontend..." cd frontend pnpm build # Deploy to production echo "Deploying to /var/www/glam-frontend/..." rsync -av --delete dist/ /var/www/glam-frontend/ echo "Build and deployment complete!" ENDSSH cd "$PROJECT_ROOT" echo -e "${GREEN}Server-side frontend deployment complete${NC}" fi # Deploy archief-assistent (archief.support) if [ "$DEPLOY_ARCHIEF" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Building & Deploying ArchiefAssistent (archief.support)${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" cd "$PROJECT_ROOT/apps/archief-assistent" # Build archief-assistent echo -e "${YELLOW}Building archief-assistent...${NC}" pnpm install pnpm run build # Ensure remote directory exists echo -e "${YELLOW}Setting up archief-assistent directory on server...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "mkdir -p /var/www/archief-assistent" # Deploy to server echo -e "${YELLOW}Deploying archief-assistent to server...${NC}" rsync -avz --progress --delete \ -e "ssh -o StrictHostKeyChecking=no" \ dist/ \ "$SERVER_USER@$SERVER_IP:/var/www/archief-assistent/" cd "$PROJECT_ROOT" echo -e "${GREEN}ArchiefAssistent deployment complete${NC}" fi # Deploy API if [ "$DEPLOY_API" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Deploying FastAPI Backend (DSPy SPARQL Generation)${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # CRITICAL: Backup review data before deployment to prevent data loss echo -e "${YELLOW}Creating backup of review data before deployment...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "/var/lib/glam/api/backup-reviews.sh 2>/dev/null || echo 'No backup script yet - will be created'" # Ensure remote directories exist with proper Python package structure echo -e "${YELLOW}Setting up API directory on server...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "mkdir -p /var/lib/glam/api/src/glam_extractor/api && \ mkdir -p /var/lib/glam/api/src/glam_extractor/geocoding && \ mkdir -p /var/lib/glam/api/backend/rag/optimized_models && \ touch /var/lib/glam/api/src/__init__.py && \ touch /var/lib/glam/api/src/glam_extractor/__init__.py && \ touch /var/lib/glam/api/src/glam_extractor/geocoding/__init__.py && \ touch /var/lib/glam/api/backend/__init__.py && \ touch /var/lib/glam/api/backend/rag/__init__.py" # Sync API source code (preserving package structure) echo -e "${YELLOW}Syncing API source code...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ --exclude="__pycache__" \ --exclude="*.pyc" \ --exclude=".pytest_cache" \ "$PROJECT_ROOT/src/glam_extractor/api/" \ "$SERVER_USER@$SERVER_IP:/var/lib/glam/api/src/glam_extractor/api/" # Sync geocoding module (required by hybrid_retriever for polygon filtering) echo -e "${YELLOW}Syncing geocoding module...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ --exclude="__pycache__" \ --exclude="*.pyc" \ "$PROJECT_ROOT/src/glam_extractor/geocoding/" \ "$SERVER_USER@$SERVER_IP:/var/lib/glam/api/src/glam_extractor/geocoding/" # Sync DSPy RAG backend code echo -e "${YELLOW}Syncing DSPy Heritage RAG backend...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ --exclude="__pycache__" \ --exclude="*.pyc" \ --exclude=".pytest_cache" \ --exclude="*.log" \ "$PROJECT_ROOT/backend/rag/" \ "$SERVER_USER@$SERVER_IP:/var/lib/glam/api/backend/rag/" # Sync LinkML schemas (required by ontology_mapping for enum definitions) echo -e "${YELLOW}Syncing LinkML schemas...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "mkdir -p /var/lib/glam/api/schemas/20251121/linkml/modules/enums && \ mkdir -p /var/lib/glam/api/schemas/20251121/linkml/modules/classes" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ "$PROJECT_ROOT/schemas/20251121/linkml/" \ "$SERVER_USER@$SERVER_IP:/var/lib/glam/api/schemas/20251121/linkml/" # Sync requirements file echo -e "${YELLOW}Syncing API requirements...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ "$PROJECT_ROOT/infrastructure/api-requirements.txt" \ "$SERVER_USER@$SERVER_IP:/var/lib/glam/api/requirements.txt" # Install/update Python dependencies and restart service echo -e "${YELLOW}Installing Python dependencies and restarting API service...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" << 'ENDSSH' set -e # Create virtual environment if it doesn't exist if [ ! -d /var/lib/glam/api/venv ]; then echo "Creating Python virtual environment..." python3 -m venv /var/lib/glam/api/venv fi # Activate and install dependencies source /var/lib/glam/api/venv/bin/activate pip install --upgrade pip pip install -r /var/lib/glam/api/requirements.txt # Ensure PYTHONPATH is set in systemd service for glam_extractor imports if ! grep -q "PYTHONPATH" /etc/systemd/system/glam-api.service 2>/dev/null; then echo "Adding PYTHONPATH to glam-api.service..." sed -i '/^Environment=PATH/a Environment=PYTHONPATH=/var/lib/glam/api/src' /etc/systemd/system/glam-api.service systemctl daemon-reload fi # Restart the API service if systemctl is-active --quiet glam-api; then echo "Restarting glam-api service..." systemctl restart glam-api else echo "Starting glam-api service..." systemctl start glam-api fi # Wait for service to be ready sleep 2 # Check service status if systemctl is-active --quiet glam-api; then echo "glam-api service is running" else echo "Warning: glam-api service failed to start" journalctl -u glam-api --no-pager -n 20 fi ENDSSH echo -e "${GREEN}API deployment complete${NC}" fi # Deploy DuckLake API if [ "$DEPLOY_DUCKLAKE" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Deploying DuckLake API Backend (Time Travel & Schema Evolution)${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Ensure remote directories exist echo -e "${YELLOW}Setting up DuckLake API directory on server...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "mkdir -p /var/lib/glam/ducklake/{data,catalog}" # Sync DuckLake API source code echo -e "${YELLOW}Syncing DuckLake API source code...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ --exclude="__pycache__" \ --exclude="*.pyc" \ --exclude=".pytest_cache" \ --exclude="*.db" \ --exclude="data/" \ --exclude="catalog/" \ "$PROJECT_ROOT/backend/ducklake/" \ "$SERVER_USER@$SERVER_IP:/var/lib/glam/ducklake/" # Install/update Python dependencies and set up service echo -e "${YELLOW}Installing Python dependencies and setting up DuckLake service...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" << 'ENDSSH' set -e # Create virtual environment if it doesn't exist if [ ! -d /var/lib/glam/ducklake/venv ]; then echo "Creating Python virtual environment..." python3 -m venv /var/lib/glam/ducklake/venv fi # Activate and install dependencies source /var/lib/glam/ducklake/venv/bin/activate pip install --upgrade pip pip install -r /var/lib/glam/ducklake/requirements.txt # Create systemd service if it doesn't exist if [ ! -f /etc/systemd/system/ducklake.service ]; then echo "Creating DuckLake systemd service..." cat > /etc/systemd/system/ducklake.service << 'EOF' [Unit] Description=DuckLake API Server After=network.target [Service] Type=simple User=root WorkingDirectory=/var/lib/glam/ducklake Environment="PATH=/var/lib/glam/ducklake/venv/bin" ExecStart=/var/lib/glam/ducklake/venv/bin/uvicorn main:app --host 0.0.0.0 --port 8765 Restart=always RestartSec=3 [Install] WantedBy=multi-user.target EOF systemctl daemon-reload systemctl enable ducklake fi # Restart the DuckLake service if systemctl is-active --quiet ducklake; then echo "Restarting ducklake service..." systemctl restart ducklake else echo "Starting ducklake service..." systemctl start ducklake fi # Wait for service to be ready sleep 2 # Check service status if systemctl is-active --quiet ducklake; then echo "ducklake service is running on port 8765" else echo "Warning: ducklake service failed to start" journalctl -u ducklake --no-pager -n 20 fi ENDSSH echo -e "${GREEN}DuckLake API deployment complete${NC}" fi # Deploy Valkey Semantic Cache if [ "$DEPLOY_VALKEY" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Deploying Valkey Semantic Cache${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Ensure remote directories exist echo -e "${YELLOW}Setting up Valkey directories on server...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "mkdir -p /var/lib/glam/valkey && mkdir -p /mnt/data/valkey && chown -R glam:glam /mnt/data/valkey" # Sync Valkey backend source code echo -e "${YELLOW}Syncing Valkey API source code...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ --exclude="__pycache__" \ --exclude="*.pyc" \ --exclude=".pytest_cache" \ "$PROJECT_ROOT/backend/valkey/" \ "$SERVER_USER@$SERVER_IP:/var/lib/glam/valkey/" echo -e "${YELLOW}Building and starting Valkey services...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" << 'ENDSSH' set -e # Ensure Docker is installed if ! command -v docker &> /dev/null; then echo "Installing Docker..." curl -fsSL https://get.docker.com | sh systemctl enable docker systemctl start docker fi # Ensure docker-compose is installed if ! command -v docker-compose &> /dev/null; then echo "Installing docker-compose..." curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose chmod +x /usr/local/bin/docker-compose fi # Build and start Valkey services echo "Building and starting Valkey services..." cd /var/lib/glam/valkey docker-compose down || true docker-compose build --no-cache docker-compose up -d # Wait for services to be ready echo "Waiting for Valkey API to be ready..." sleep 10 # Check service status if docker-compose ps | grep -q "Up"; then echo "Valkey services are running" echo "Valkey: localhost:6379 (internal)" echo "Valkey API: localhost:8090 (internal)" # Test the health endpoint if curl -s http://localhost:8090/health | grep -q "healthy"; then echo "Valkey API health check: OK" else echo "Valkey API health check: waiting..." sleep 5 curl -s http://localhost:8090/health || echo "Still starting up..." fi else echo "Warning: Valkey services failed to start" docker-compose logs fi ENDSSH echo -e "${GREEN}Valkey deployment complete${NC}" echo -e " Valkey: localhost:6379 (internal)" echo -e " Valkey API: localhost:8090 (internal)" echo -e " External: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/api/cache/" fi # Deploy Qdrant if [ "$DEPLOY_QDRANT" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Deploying Qdrant Vector Database${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Ensure remote directories exist and Docker is installed echo -e "${YELLOW}Setting up Qdrant on server...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" << 'ENDSSH' set -e # Create data directories mkdir -p /mnt/data/qdrant/storage mkdir -p /mnt/data/qdrant/snapshots chown -R glam:glam /mnt/data/qdrant # Ensure Docker is installed if ! command -v docker &> /dev/null; then echo "Installing Docker..." curl -fsSL https://get.docker.com | sh systemctl enable docker systemctl start docker fi # Pull latest Qdrant image echo "Pulling Qdrant image..." docker pull qdrant/qdrant:latest # Create systemd service if it doesn't exist if [ ! -f /etc/systemd/system/qdrant.service ]; then echo "Creating Qdrant systemd service..." cat > /etc/systemd/system/qdrant.service << 'EOF' [Unit] Description=Qdrant Vector Database After=network.target docker.service Requires=docker.service [Service] Type=simple Restart=always RestartSec=10 ExecStartPre=-/usr/bin/docker stop qdrant ExecStartPre=-/usr/bin/docker rm qdrant ExecStart=/usr/bin/docker run --name qdrant \ -p 127.0.0.1:6333:6333 \ -p 127.0.0.1:6334:6334 \ -v /mnt/data/qdrant/storage:/qdrant/storage:z \ -v /mnt/data/qdrant/snapshots:/qdrant/snapshots:z \ --memory=2g \ --cpus=2 \ qdrant/qdrant:latest ExecStop=/usr/bin/docker stop qdrant [Install] WantedBy=multi-user.target EOF systemctl daemon-reload systemctl enable qdrant fi # Restart the Qdrant service if systemctl is-active --quiet qdrant; then echo "Restarting qdrant service..." systemctl restart qdrant else echo "Starting qdrant service..." systemctl start qdrant fi # Wait for service to be ready echo "Waiting for Qdrant to be ready..." sleep 5 # Check service status if systemctl is-active --quiet qdrant; then echo "Qdrant service is running" # Check health if curl -s http://127.0.0.1:6333/health | grep -q "ok"; then echo "Qdrant health check: OK" else echo "Qdrant health check: waiting..." sleep 5 curl -s http://127.0.0.1:6333/health || echo "Still starting up..." fi else echo "Warning: qdrant service failed to start" journalctl -u qdrant --no-pager -n 20 fi # Show Qdrant info echo "" echo "Qdrant collections:" curl -s http://127.0.0.1:6333/collections | jq -r '.result.collections[] | " - \(.name)"' 2>/dev/null || echo " (none yet)" ENDSSH echo -e "${GREEN}Qdrant deployment complete${NC}" echo -e " REST API: http://localhost:6333 (internal)" echo -e " gRPC API: http://localhost:6334 (internal)" echo -e " External: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/qdrant/" fi # Deploy RAG API via Podman if [ "$DEPLOY_RAG" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Deploying RAG API via Podman Container${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Ensure remote directories exist echo -e "${YELLOW}Setting up RAG API directory on server...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "mkdir -p /opt/glam-backend/rag" # Sync RAG backend source code echo -e "${YELLOW}Syncing RAG API source code...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ --exclude="__pycache__" \ --exclude="*.pyc" \ --exclude=".pytest_cache" \ --exclude="*.log" \ --exclude="benchmark_results" \ "$PROJECT_ROOT/backend/rag/" \ "$SERVER_USER@$SERVER_IP:/opt/glam-backend/rag/" # Install Podman, build image, and start service echo -e "${YELLOW}Building and starting RAG API container...${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" << 'ENDSSH' set -e # Install Podman if not present if ! command -v podman &> /dev/null; then echo "Installing Podman..." apt-get update apt-get install -y podman fi cd /opt/glam-backend/rag # Stop existing container if running echo "Stopping existing RAG container (if any)..." podman stop glam-rag-api 2>/dev/null || true podman rm glam-rag-api 2>/dev/null || true # Build the container image echo "Building RAG API container image..." podman build -t glam-rag-api:latest \ --build-arg BUILD_DATE="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \ --build-arg VCS_REF="$(git rev-parse --short HEAD 2>/dev/null || echo 'unknown')" \ . # Create systemd service for Podman container echo "Creating systemd service for RAG API..." cat > /etc/systemd/system/glam-rag-api.service << 'EOF' [Unit] Description=GLAM Heritage RAG API (Podman) After=network.target qdrant.service Wants=qdrant.service [Service] Type=simple Restart=always RestartSec=10 # Load environment from .env file EnvironmentFile=/var/lib/glam/.env # Run podman container with host networking ExecStart=/usr/bin/podman run --rm --name glam-rag-api \ --network host \ -e OPENAI_API_KEY \ -e ZAI_API_TOKEN \ -e QDRANT_HOST=localhost \ -e QDRANT_PORT=6333 \ -e QDRANT_COLLECTION=heritage_custodians_minilm \ -e EMBEDDING_MODEL=all-MiniLM-L6-v2 \ -e EMBEDDING_DIM=384 \ -e TYPEDB_HOST=localhost \ -e TYPEDB_PORT=1729 \ -e TYPEDB_DATABASE=glam \ -e SPARQL_ENDPOINT=http://localhost:7878/query \ -e VALKEY_CACHE_URL=http://localhost:8090 \ -e POSTGIS_HOST=localhost \ -e POSTGIS_PORT=5432 \ -e POSTGIS_DATABASE=glam \ -e LLM_PROVIDER=openai \ -e LLM_MODEL=gpt-4.1-mini \ -v glam-rag-optimized-models:/app/optimized_models:z \ glam-rag-api:latest ExecStop=/usr/bin/podman stop glam-rag-api [Install] WantedBy=multi-user.target EOF # Reload systemd and start service systemctl daemon-reload systemctl enable glam-rag-api systemctl restart glam-rag-api # Wait for service to be ready echo "Waiting for RAG API to be ready..." sleep 10 # Check service status if systemctl is-active --quiet glam-rag-api; then echo "RAG API service is running" # Test health endpoint for i in 1 2 3 4 5; do if curl -s http://localhost:8010/health | grep -q "healthy\|ok"; then echo "RAG API health check: OK" break fi echo " Waiting for health check ($i/5)..." sleep 5 done else echo "Warning: RAG API service failed to start" journalctl -u glam-rag-api --no-pager -n 30 fi ENDSSH echo -e "${GREEN}RAG API deployment complete${NC}" echo -e " RAG API: http://localhost:8010 (internal)" echo -e " External: https://bronhouder.nl/api/rag/" fi # Sync review data from server if [ "$SYNC_REVIEWS" = true ]; then echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE} Syncing Entity Resolution Review Data FROM Server${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" LOCAL_ER_DIR="$PROJECT_ROOT/data/entity_resolution" SERVER_ER_DIR="/var/lib/glam/api/data/entity_resolution" BACKUP_DIR="$LOCAL_ER_DIR/backups" # Create backup directory if needed mkdir -p "$BACKUP_DIR" # Check if server file exists echo -e "${YELLOW}Checking server data...${NC}" SERVER_FILE_EXISTS=$(ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "test -f $SERVER_ER_DIR/entity_resolution_candidates.json && echo 'yes' || echo 'no'") if [ "$SERVER_FILE_EXISTS" = "yes" ]; then # Get stats from server echo -e "${YELLOW}Server review stats:${NC}" ssh -o StrictHostKeyChecking=no "$SERVER_USER@$SERVER_IP" \ "cat $SERVER_ER_DIR/entity_resolution_candidates.json | python3 -c ' import json,sys d=json.load(sys.stdin) reviewed=[c for c in d.get(\"candidates\",[]) if c.get(\"reviewed\")] matches=[c for c in reviewed if c.get(\"review_decision\")==\"match\"] print(f\" Total candidates: {len(d.get(\"candidates\",[]))}\" ) print(f\" Reviewed: {len(reviewed)} ({len(matches)} matches)\") print(f\" Last review: {d.get(\"metadata\",{}).get(\"last_review_at\",\"N/A\")}\") '" # Backup local file if [ -f "$LOCAL_ER_DIR/entity_resolution_candidates.json" ]; then BACKUP_FILE="$BACKUP_DIR/entity_resolution_candidates_local_$(date +%Y%m%d_%H%M%S).json" echo -e "${YELLOW}Backing up local file to: $BACKUP_FILE${NC}" cp "$LOCAL_ER_DIR/entity_resolution_candidates.json" "$BACKUP_FILE" fi # Download server file echo -e "${YELLOW}Downloading server data...${NC}" rsync -avz --progress \ -e "ssh -o StrictHostKeyChecking=no" \ "$SERVER_USER@$SERVER_IP:$SERVER_ER_DIR/entity_resolution_candidates.json" \ "$LOCAL_ER_DIR/entity_resolution_candidates.json" echo -e "${GREEN}Review data synced successfully${NC}" # Show local stats echo -e "${YELLOW}Local file now contains:${NC}" cat "$LOCAL_ER_DIR/entity_resolution_candidates.json" | python3 -c ' import json,sys d=json.load(sys.stdin) reviewed=[c for c in d.get("candidates",[]) if c.get("reviewed")] matches=[c for c in reviewed if c.get("review_decision")=="match"] print(f" Reviewed: {len(reviewed)} ({len(matches)} matches)") ' else echo -e "${RED}No entity resolution data found on server${NC}" echo "Server path: $SERVER_ER_DIR/entity_resolution_candidates.json" fi fi # Final status echo "" echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" echo -e "${GREEN} Deployment Complete!${NC}" echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" echo "" echo "Server IP: $SERVER_IP" echo "SPARQL Query: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/query" echo "Frontend (bronhouder.nl): https://bronhouder.nl/" echo "ArchiefAssistent (archief.support): https://archief.support/" echo "API: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/api/" echo "DuckLake: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/ducklake/ (port 8765)" echo "Qdrant: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/qdrant/ (REST API)" echo "Valkey Cache: https://${GLAM_DOMAIN:-sparql.glam-ontology.org}/api/cache/ (Semantic Cache API)" echo "RAG API: https://bronhouder.nl/api/rag/ (Heritage Knowledge Assistant)" echo "" echo "Check status with: $0 --status"