glam/infrastructure/terraform/cloud-init.yaml
2025-11-30 23:30:29 +01:00

303 lines
7.7 KiB
YAML

#cloud-config
# Cloud-init configuration for GLAM SPARQL Server
# This script runs on first boot to configure the server
package_update: true
package_upgrade: true
packages:
- curl
- wget
- unzip
- htop
- vim
- git
- jq
- apt-transport-https
- ca-certificates
- debian-keyring
- debian-archive-keyring
# Create users and groups
groups:
- glam
users:
- name: glam
groups: glam
shell: /bin/bash
system: true
home: /var/lib/glam
# Write configuration files
write_files:
# Oxigraph systemd service
- path: /etc/systemd/system/oxigraph.service
content: |
[Unit]
Description=Oxigraph SPARQL Server
After=network.target
Requires=mnt-data.mount
[Service]
Type=simple
User=glam
Group=glam
ExecStart=/usr/local/bin/oxigraph serve \
--location /mnt/data/oxigraph \
--bind 127.0.0.1:7878
Restart=always
RestartSec=10
StandardOutput=journal
StandardError=journal
# Security hardening
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/mnt/data/oxigraph
PrivateTmp=true
[Install]
WantedBy=multi-user.target
# Caddy configuration for reverse proxy
- path: /etc/caddy/Caddyfile
content: |
${domain} {
# TLS with automatic HTTPS
tls ${admin_email}
# SPARQL Query endpoint
handle /query* {
reverse_proxy localhost:7878
}
# SPARQL Update endpoint
handle /update* {
reverse_proxy localhost:7878
}
# Graph Store Protocol
handle /store* {
reverse_proxy localhost:7878
}
# YASGUI SPARQL Editor (root path)
handle / {
root * /var/www/glam-frontend
try_files {path} /index.html
file_server
}
# Static ontology files
handle /ontology/* {
root * /mnt/data
file_server browse
}
# Static LinkML files
handle /linkml/* {
root * /mnt/data
file_server browse
}
# Static UML files
handle /uml/* {
root * /mnt/data
file_server browse
}
# Health check endpoint
handle /health {
respond "OK" 200
}
# CORS headers for SPARQL endpoints
header Access-Control-Allow-Origin "*"
header Access-Control-Allow-Methods "GET, POST, OPTIONS"
header Access-Control-Allow-Headers "Content-Type, Accept"
# Compression
encode gzip zstd
# Logging
log {
output file /var/log/caddy/access.log {
roll_size 100mb
roll_keep 5
}
}
}
# Script to load ontologies into Oxigraph
- path: /var/lib/glam/scripts/load-ontologies.sh
permissions: '0755'
content: |
#!/bin/bash
set -e
OXIGRAPH_ENDPOINT="http://127.0.0.1:7878"
ONTOLOGY_DIR="/mnt/data/ontologies"
echo "Loading ontologies into Oxigraph..."
# Wait for Oxigraph to be ready
until curl -s "$OXIGRAPH_ENDPOINT/" > /dev/null 2>&1; do
echo "Waiting for Oxigraph to start..."
sleep 2
done
# Load Turtle files
for file in "$ONTOLOGY_DIR"/*.ttl; do
if [ -f "$file" ]; then
filename=$(basename "$file")
echo "Loading: $filename"
curl -X POST \
-H 'Content-Type: text/turtle' \
--data-binary "@$file" \
"$OXIGRAPH_ENDPOINT/store?default" \
-w " HTTP %%{http_code}\n" \
-o /dev/null -s
fi
done
# Load RDF/XML files
for file in "$ONTOLOGY_DIR"/*.rdf "$ONTOLOGY_DIR"/*.owl; do
if [ -f "$file" ]; then
filename=$(basename "$file")
echo "Loading: $filename"
curl -X POST \
-H 'Content-Type: application/rdf+xml' \
--data-binary "@$file" \
"$OXIGRAPH_ENDPOINT/store?default" \
-w " HTTP %%{http_code}\n" \
-o /dev/null -s
fi
done
# Load N-Triples files
for file in "$ONTOLOGY_DIR"/*.nt; do
if [ -f "$file" ]; then
filename=$(basename "$file")
echo "Loading: $filename"
curl -X POST \
-H 'Content-Type: application/n-triples' \
--data-binary "@$file" \
"$OXIGRAPH_ENDPOINT/store?default" \
-w " HTTP %%{http_code}\n" \
-o /dev/null -s
fi
done
echo "Done loading ontologies!"
# Show triple count
echo ""
echo "Triple count:"
curl -s -X POST \
-H 'Content-Type: application/sparql-query' \
-H 'Accept: application/sparql-results+json' \
--data 'SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }' \
"$OXIGRAPH_ENDPOINT/query" | jq -r '.results.bindings[0].count.value'
# Backup script
- path: /var/lib/glam/scripts/backup.sh
permissions: '0755'
content: |
#!/bin/bash
set -e
BACKUP_DIR="/mnt/data/backups"
DATE=$(date +%Y%m%d_%H%M%S)
mkdir -p "$BACKUP_DIR"
echo "Creating Oxigraph backup..."
# Stop Oxigraph for consistent backup
systemctl stop oxigraph
# Create tarball of Oxigraph data
tar -czf "$BACKUP_DIR/oxigraph_$DATE.tar.gz" -C /mnt/data oxigraph
# Restart Oxigraph
systemctl start oxigraph
# Keep only last 7 backups
ls -t "$BACKUP_DIR"/oxigraph_*.tar.gz | tail -n +8 | xargs -r rm
echo "Backup complete: $BACKUP_DIR/oxigraph_$DATE.tar.gz"
# Logrotate configuration
- path: /etc/logrotate.d/glam
content: |
/var/log/caddy/*.log {
daily
missingok
rotate 14
compress
delaycompress
notifempty
create 0640 caddy caddy
sharedscripts
postrotate
systemctl reload caddy
endscript
}
runcmd:
# Mount data volume
- mkdir -p /mnt/data
- |
VOLUME_DEVICE="${volume_device}"
if [ -e "$VOLUME_DEVICE" ]; then
# Check if already formatted
if ! blkid "$VOLUME_DEVICE" | grep -q ext4; then
mkfs.ext4 "$VOLUME_DEVICE"
fi
mount "$VOLUME_DEVICE" /mnt/data
echo "$VOLUME_DEVICE /mnt/data ext4 defaults 0 2" >> /etc/fstab
fi
# Create directory structure
- mkdir -p /mnt/data/oxigraph
- mkdir -p /mnt/data/ontologies
- mkdir -p /mnt/data/linkml
- mkdir -p /mnt/data/uml
- mkdir -p /mnt/data/backups
- mkdir -p /var/www/glam-frontend
- mkdir -p /var/log/caddy
- chown -R glam:glam /mnt/data
- chown -R glam:glam /var/lib/glam
# Install Caddy
- curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
- curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list
- apt-get update
- apt-get install -y caddy
# Install Oxigraph
- |
OXIGRAPH_VERSION="${oxigraph_version}"
ARCH=$(uname -m)
if [ "$ARCH" = "x86_64" ]; then
ARCH="x86_64"
elif [ "$ARCH" = "aarch64" ]; then
ARCH="aarch64"
fi
wget -q "https://github.com/oxigraph/oxigraph/releases/download/v$OXIGRAPH_VERSION/oxigraph_v$${OXIGRAPH_VERSION}_$${ARCH}_linux_gnu" -O /usr/local/bin/oxigraph
chmod +x /usr/local/bin/oxigraph
# Enable and start services
- systemctl daemon-reload
- systemctl enable oxigraph
- systemctl start oxigraph
- systemctl enable caddy
- systemctl start caddy
# Setup automatic backups (daily at 3 AM)
- echo "0 3 * * * /var/lib/glam/scripts/backup.sh >> /var/log/glam-backup.log 2>&1" | crontab -
# Notify that setup is complete
- echo "GLAM SPARQL server setup complete!" > /var/lib/glam/setup-complete