303 lines
7.7 KiB
YAML
303 lines
7.7 KiB
YAML
#cloud-config
|
|
# Cloud-init configuration for GLAM SPARQL Server
|
|
# This script runs on first boot to configure the server
|
|
|
|
package_update: true
|
|
package_upgrade: true
|
|
|
|
packages:
|
|
- curl
|
|
- wget
|
|
- unzip
|
|
- htop
|
|
- vim
|
|
- git
|
|
- jq
|
|
- apt-transport-https
|
|
- ca-certificates
|
|
- debian-keyring
|
|
- debian-archive-keyring
|
|
|
|
# Create users and groups
|
|
groups:
|
|
- glam
|
|
|
|
users:
|
|
- name: glam
|
|
groups: glam
|
|
shell: /bin/bash
|
|
system: true
|
|
home: /var/lib/glam
|
|
|
|
# Write configuration files
|
|
write_files:
|
|
# Oxigraph systemd service
|
|
- path: /etc/systemd/system/oxigraph.service
|
|
content: |
|
|
[Unit]
|
|
Description=Oxigraph SPARQL Server
|
|
After=network.target
|
|
Requires=mnt-data.mount
|
|
|
|
[Service]
|
|
Type=simple
|
|
User=glam
|
|
Group=glam
|
|
ExecStart=/usr/local/bin/oxigraph serve \
|
|
--location /mnt/data/oxigraph \
|
|
--bind 127.0.0.1:7878
|
|
Restart=always
|
|
RestartSec=10
|
|
StandardOutput=journal
|
|
StandardError=journal
|
|
|
|
# Security hardening
|
|
NoNewPrivileges=true
|
|
ProtectSystem=strict
|
|
ProtectHome=true
|
|
ReadWritePaths=/mnt/data/oxigraph
|
|
PrivateTmp=true
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
|
|
# Caddy configuration for reverse proxy
|
|
- path: /etc/caddy/Caddyfile
|
|
content: |
|
|
${domain} {
|
|
# TLS with automatic HTTPS
|
|
tls ${admin_email}
|
|
|
|
# SPARQL Query endpoint
|
|
handle /query* {
|
|
reverse_proxy localhost:7878
|
|
}
|
|
|
|
# SPARQL Update endpoint
|
|
handle /update* {
|
|
reverse_proxy localhost:7878
|
|
}
|
|
|
|
# Graph Store Protocol
|
|
handle /store* {
|
|
reverse_proxy localhost:7878
|
|
}
|
|
|
|
# YASGUI SPARQL Editor (root path)
|
|
handle / {
|
|
root * /var/www/glam-frontend
|
|
try_files {path} /index.html
|
|
file_server
|
|
}
|
|
|
|
# Static ontology files
|
|
handle /ontology/* {
|
|
root * /mnt/data
|
|
file_server browse
|
|
}
|
|
|
|
# Static LinkML files
|
|
handle /linkml/* {
|
|
root * /mnt/data
|
|
file_server browse
|
|
}
|
|
|
|
# Static UML files
|
|
handle /uml/* {
|
|
root * /mnt/data
|
|
file_server browse
|
|
}
|
|
|
|
# Health check endpoint
|
|
handle /health {
|
|
respond "OK" 200
|
|
}
|
|
|
|
# CORS headers for SPARQL endpoints
|
|
header Access-Control-Allow-Origin "*"
|
|
header Access-Control-Allow-Methods "GET, POST, OPTIONS"
|
|
header Access-Control-Allow-Headers "Content-Type, Accept"
|
|
|
|
# Compression
|
|
encode gzip zstd
|
|
|
|
# Logging
|
|
log {
|
|
output file /var/log/caddy/access.log {
|
|
roll_size 100mb
|
|
roll_keep 5
|
|
}
|
|
}
|
|
}
|
|
|
|
# Script to load ontologies into Oxigraph
|
|
- path: /var/lib/glam/scripts/load-ontologies.sh
|
|
permissions: '0755'
|
|
content: |
|
|
#!/bin/bash
|
|
set -e
|
|
|
|
OXIGRAPH_ENDPOINT="http://127.0.0.1:7878"
|
|
ONTOLOGY_DIR="/mnt/data/ontologies"
|
|
|
|
echo "Loading ontologies into Oxigraph..."
|
|
|
|
# Wait for Oxigraph to be ready
|
|
until curl -s "$OXIGRAPH_ENDPOINT/" > /dev/null 2>&1; do
|
|
echo "Waiting for Oxigraph to start..."
|
|
sleep 2
|
|
done
|
|
|
|
# Load Turtle files
|
|
for file in "$ONTOLOGY_DIR"/*.ttl; do
|
|
if [ -f "$file" ]; then
|
|
filename=$(basename "$file")
|
|
echo "Loading: $filename"
|
|
curl -X POST \
|
|
-H 'Content-Type: text/turtle' \
|
|
--data-binary "@$file" \
|
|
"$OXIGRAPH_ENDPOINT/store?default" \
|
|
-w " HTTP %%{http_code}\n" \
|
|
-o /dev/null -s
|
|
fi
|
|
done
|
|
|
|
# Load RDF/XML files
|
|
for file in "$ONTOLOGY_DIR"/*.rdf "$ONTOLOGY_DIR"/*.owl; do
|
|
if [ -f "$file" ]; then
|
|
filename=$(basename "$file")
|
|
echo "Loading: $filename"
|
|
curl -X POST \
|
|
-H 'Content-Type: application/rdf+xml' \
|
|
--data-binary "@$file" \
|
|
"$OXIGRAPH_ENDPOINT/store?default" \
|
|
-w " HTTP %%{http_code}\n" \
|
|
-o /dev/null -s
|
|
fi
|
|
done
|
|
|
|
# Load N-Triples files
|
|
for file in "$ONTOLOGY_DIR"/*.nt; do
|
|
if [ -f "$file" ]; then
|
|
filename=$(basename "$file")
|
|
echo "Loading: $filename"
|
|
curl -X POST \
|
|
-H 'Content-Type: application/n-triples' \
|
|
--data-binary "@$file" \
|
|
"$OXIGRAPH_ENDPOINT/store?default" \
|
|
-w " HTTP %%{http_code}\n" \
|
|
-o /dev/null -s
|
|
fi
|
|
done
|
|
|
|
echo "Done loading ontologies!"
|
|
|
|
# Show triple count
|
|
echo ""
|
|
echo "Triple count:"
|
|
curl -s -X POST \
|
|
-H 'Content-Type: application/sparql-query' \
|
|
-H 'Accept: application/sparql-results+json' \
|
|
--data 'SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }' \
|
|
"$OXIGRAPH_ENDPOINT/query" | jq -r '.results.bindings[0].count.value'
|
|
|
|
# Backup script
|
|
- path: /var/lib/glam/scripts/backup.sh
|
|
permissions: '0755'
|
|
content: |
|
|
#!/bin/bash
|
|
set -e
|
|
|
|
BACKUP_DIR="/mnt/data/backups"
|
|
DATE=$(date +%Y%m%d_%H%M%S)
|
|
|
|
mkdir -p "$BACKUP_DIR"
|
|
|
|
echo "Creating Oxigraph backup..."
|
|
|
|
# Stop Oxigraph for consistent backup
|
|
systemctl stop oxigraph
|
|
|
|
# Create tarball of Oxigraph data
|
|
tar -czf "$BACKUP_DIR/oxigraph_$DATE.tar.gz" -C /mnt/data oxigraph
|
|
|
|
# Restart Oxigraph
|
|
systemctl start oxigraph
|
|
|
|
# Keep only last 7 backups
|
|
ls -t "$BACKUP_DIR"/oxigraph_*.tar.gz | tail -n +8 | xargs -r rm
|
|
|
|
echo "Backup complete: $BACKUP_DIR/oxigraph_$DATE.tar.gz"
|
|
|
|
# Logrotate configuration
|
|
- path: /etc/logrotate.d/glam
|
|
content: |
|
|
/var/log/caddy/*.log {
|
|
daily
|
|
missingok
|
|
rotate 14
|
|
compress
|
|
delaycompress
|
|
notifempty
|
|
create 0640 caddy caddy
|
|
sharedscripts
|
|
postrotate
|
|
systemctl reload caddy
|
|
endscript
|
|
}
|
|
|
|
runcmd:
|
|
# Mount data volume
|
|
- mkdir -p /mnt/data
|
|
- |
|
|
VOLUME_DEVICE="${volume_device}"
|
|
if [ -e "$VOLUME_DEVICE" ]; then
|
|
# Check if already formatted
|
|
if ! blkid "$VOLUME_DEVICE" | grep -q ext4; then
|
|
mkfs.ext4 "$VOLUME_DEVICE"
|
|
fi
|
|
mount "$VOLUME_DEVICE" /mnt/data
|
|
echo "$VOLUME_DEVICE /mnt/data ext4 defaults 0 2" >> /etc/fstab
|
|
fi
|
|
|
|
# Create directory structure
|
|
- mkdir -p /mnt/data/oxigraph
|
|
- mkdir -p /mnt/data/ontologies
|
|
- mkdir -p /mnt/data/linkml
|
|
- mkdir -p /mnt/data/uml
|
|
- mkdir -p /mnt/data/backups
|
|
- mkdir -p /var/www/glam-frontend
|
|
- mkdir -p /var/log/caddy
|
|
- chown -R glam:glam /mnt/data
|
|
- chown -R glam:glam /var/lib/glam
|
|
|
|
# Install Caddy
|
|
- curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
|
|
- curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list
|
|
- apt-get update
|
|
- apt-get install -y caddy
|
|
|
|
# Install Oxigraph
|
|
- |
|
|
OXIGRAPH_VERSION="${oxigraph_version}"
|
|
ARCH=$(uname -m)
|
|
if [ "$ARCH" = "x86_64" ]; then
|
|
ARCH="x86_64"
|
|
elif [ "$ARCH" = "aarch64" ]; then
|
|
ARCH="aarch64"
|
|
fi
|
|
wget -q "https://github.com/oxigraph/oxigraph/releases/download/v$OXIGRAPH_VERSION/oxigraph_v$${OXIGRAPH_VERSION}_$${ARCH}_linux_gnu" -O /usr/local/bin/oxigraph
|
|
chmod +x /usr/local/bin/oxigraph
|
|
|
|
# Enable and start services
|
|
- systemctl daemon-reload
|
|
- systemctl enable oxigraph
|
|
- systemctl start oxigraph
|
|
- systemctl enable caddy
|
|
- systemctl start caddy
|
|
|
|
# Setup automatic backups (daily at 3 AM)
|
|
- echo "0 3 * * * /var/lib/glam/scripts/backup.sh >> /var/log/glam-backup.log 2>&1" | crontab -
|
|
|
|
# Notify that setup is complete
|
|
- echo "GLAM SPARQL server setup complete!" > /var/lib/glam/setup-complete
|