feat(infra): add fast push-based schema sync to production

- Replace slow Forgejo→Server git pull with direct local rsync
- Add git-push-schemas.sh wrapper script for manual pushes
- Add post-commit hook for automatic schema sync
- Fix YAML syntax errors in slot comment blocks
- Update deploy-webhook.py to use master branch
This commit is contained in:
kempersc 2026-01-11 01:22:47 +01:00
parent 0df26a6e44
commit 3c3be47e32
6 changed files with 116 additions and 37 deletions

View file

@ -0,0 +1,65 @@
#!/bin/bash
# git-push-schemas: Push to git AND sync schemas to production
#
# Usage: ./infrastructure/git-push-schemas.sh [git push args]
#
# This script:
# 1. Pushes to Forgejo (git push origin master)
# 2. Rsyncs schemas directly to Hetzner server (bypasses slow Forgejo→Hetzner network)
# 3. Triggers webhook to copy staging → frontend
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# Colors
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m'
echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}"
echo -e "${BLUE} GLAM Schema Push${NC}"
echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}"
# Step 1: Git push
echo -e "\n${YELLOW}Step 1: Pushing to Forgejo...${NC}"
git push "$@"
echo -e "${GREEN}✓ Git push complete${NC}"
# Step 2: Check if schemas changed
CHANGED_FILES=$(git diff --name-only HEAD~1 HEAD 2>/dev/null || git diff --name-only HEAD)
if echo "$CHANGED_FILES" | grep -q "^schemas/20251121/linkml/"; then
echo -e "\n${YELLOW}Step 2: Syncing schemas to server...${NC}"
# Direct rsync to server (fast: bypasses Forgejo→Hetzner slow link)
rsync -az --delete \
--exclude "*.pyc" \
--exclude "__pycache__" \
-e "ssh -o StrictHostKeyChecking=no" \
"$PROJECT_ROOT/schemas/20251121/linkml/" \
"root@91.98.224.44:/var/lib/glam/repo/schemas/20251121/linkml/"
echo -e "${GREEN}✓ Schemas synced to staging${NC}"
# Step 3: Trigger local copy on server (staging → frontend)
echo -e "\n${YELLOW}Step 3: Deploying to frontend...${NC}"
# Direct rsync to frontend (even simpler - skip webhook)
rsync -az --delete \
--exclude "*.pyc" \
--exclude "__pycache__" \
-e "ssh -o StrictHostKeyChecking=no" \
"$PROJECT_ROOT/schemas/20251121/linkml/" \
"root@91.98.224.44:/var/www/glam-frontend/schemas/20251121/linkml/"
echo -e "${GREEN}✓ Schemas deployed to frontend${NC}"
else
echo -e "\n${YELLOW}No schema changes detected, skipping sync${NC}"
fi
echo -e "\n${BLUE}════════════════════════════════════════════════════════════════${NC}"
echo -e "${GREEN} Done! Schemas available at:${NC}"
echo -e " https://bronhouder.nl/schemas/20251121/linkml/"
echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}"

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
Webhook receiver for Forgejo push events.
Triggers schema sync when push events are received on the main branch.
Triggers schema sync when push events are received on the master branch.
Run with: uvicorn deploy-webhook:app --port 8099 --host 127.0.0.1
Or as systemd service: deploy-webhook.service
@ -12,7 +12,6 @@ import hashlib
import hmac
import json
import os
import subprocess
import logging
from datetime import datetime
from pathlib import Path
@ -29,12 +28,11 @@ logging.basicConfig(
)
logger = logging.getLogger(__name__)
app = FastAPI(title="GLAM Deploy Webhook", version="1.0.0")
app = FastAPI(title="GLAM Deploy Webhook", version="1.1.0")
# Configuration
WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET", "")
REPO_PATH = Path("/var/lib/glam/repo")
SCRIPTS_PATH = Path("/var/lib/glam/scripts")
FRONTEND_PATH = Path("/var/www/glam-frontend")
LINKML_SOURCE = REPO_PATH / "schemas/20251121/linkml"
LINKML_DEST = FRONTEND_PATH / "schemas/20251121/linkml"
@ -81,30 +79,34 @@ async def run_command(cmd: list[str], cwd: Optional[Path] = None) -> tuple[int,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
return process.returncode, stdout.decode(), stderr.decode()
returncode = process.returncode if process.returncode is not None else -1
return returncode, stdout.decode(), stderr.decode()
async def sync_schemas() -> dict:
"""Sync LinkML schemas from repo to frontend public directory."""
"""
Sync LinkML schemas from staging area to frontend public directory.
The staging area (/var/lib/glam/repo/schemas) is populated by rsync
from the deployer's machine, NOT by git clone (to avoid slow network).
"""
results = {}
# Pull latest changes
logger.info(f"Pulling latest changes in {REPO_PATH}")
code, stdout, stderr = await run_command(["git", "pull", "origin", "main"], cwd=REPO_PATH)
results["git_pull"] = {
"success": code == 0,
"stdout": stdout[:500] if stdout else "",
"stderr": stderr[:500] if stderr else ""
}
if code != 0:
logger.error(f"Git pull failed: {stderr}")
# Ensure source exists
if not LINKML_SOURCE.exists():
logger.error(f"Source directory does not exist: {LINKML_SOURCE}")
results["check_source"] = {
"success": False,
"error": f"Source directory not found: {LINKML_SOURCE}"
}
return results
# Ensure destination directory exists
LINKML_DEST.mkdir(parents=True, exist_ok=True)
results["check_source"] = {"success": True, "path": str(LINKML_SOURCE)}
# Sync LinkML schemas using rsync
# Ensure destination directory exists
LINKML_DEST.parent.mkdir(parents=True, exist_ok=True)
# Sync LinkML schemas using rsync (local copy, very fast)
logger.info(f"Syncing schemas: {LINKML_SOURCE} -> {LINKML_DEST}")
code, stdout, stderr = await run_command([
"rsync", "-av", "--delete",
@ -116,6 +118,7 @@ async def sync_schemas() -> dict:
])
results["rsync_linkml"] = {
"success": code == 0,
"files_synced": len([l for l in stdout.split('\n') if l and not l.startswith('sent') and not l.startswith('total')]),
"stdout": stdout[:1000] if stdout else "",
"stderr": stderr[:500] if stderr else ""
}
@ -131,7 +134,20 @@ async def sync_schemas() -> dict:
@app.get("/health")
async def health():
"""Health check endpoint."""
return {"status": "ok", "service": "deploy-webhook"}
return {"status": "ok", "service": "deploy-webhook", "version": "1.1.0"}
@app.get("/webhook/status")
async def webhook_status():
"""Status endpoint showing configuration."""
return {
"status": "ready",
"source_path": str(LINKML_SOURCE),
"source_exists": LINKML_SOURCE.exists(),
"dest_path": str(LINKML_DEST),
"dest_exists": LINKML_DEST.exists(),
"secret_configured": bool(WEBHOOK_SECRET)
}
@app.post("/webhook/deploy")
@ -144,7 +160,7 @@ async def deploy_webhook(
):
"""
Handle Forgejo/Gitea push webhook.
Triggers schema sync on push to main branch.
Triggers schema sync on push to master branch.
"""
body = await request.body()
@ -170,7 +186,7 @@ async def deploy_webhook(
"reason": f"Event type '{event}' not handled"
})
# Only process pushes to main branch
# Only process pushes to main/master branch
ref = payload.get("ref", "")
if ref not in ["refs/heads/main", "refs/heads/master"]:
return JSONResponse({

View file

@ -22,10 +22,12 @@ if [ -d "$REPO_DIR/.git" ]; then
echo "Updating existing repo..."
cd "$REPO_DIR"
git fetch origin
git reset --hard origin/main
git reset --hard origin/master
else
echo "Cloning repository..."
git clone https://git.bronhouder.nl/kempersc/glam.git "$REPO_DIR"
cd "$REPO_DIR"
git checkout master
fi
# Install Python dependencies
@ -128,6 +130,6 @@ echo " - HTTP Method: POST"
echo " - Content Type: application/json"
echo " - Secret: $WEBHOOK_SECRET"
echo " - Trigger On: Push Events"
echo " - Branch filter: main"
echo " - Branch filter: master"
echo ""
echo "Test with: curl -X POST https://bronhouder.nl/webhook/deploy/manual -H 'Authorization: Bearer $WEBHOOK_SECRET'"

View file

@ -19,10 +19,8 @@ slots:
- Part of PROV-O derivation pattern
- Inverse of has_derived_observation
- Creates owl:inverseOf axiom in RDF output
- 'Inverse: `has_derived_observation` | Pattern: If Observation derived_from_entity LegalStatus, then LegalStatus has_derived_observation
Observation'
- 'Navigation: From observation: Find formal entity it references (derived_from_entity) | From legal status: Find all
observations that reference it (has_derived_observation)'
- "Inverse: has_derived_observation | Pattern: If Observation derived_from_entity LegalStatus, then LegalStatus has_derived_observation Observation"
- "Navigation: From observation: Find formal entity it references (derived_from_entity) | From legal status: Find all observations that reference it (has_derived_observation)"
- Range is a prov:Entity instance (CustodianLegalStatus)
annotations:
inverse_slot: has_derived_observation

View file

@ -22,9 +22,8 @@ slots:
- Inverse of refers_to_custodian (dcterms:references)
- Links custodian hub to all its evidence/observations
- Creates owl:inverseOf axiom in RDF output
- 'Inverse: `refers_to_custodian` (dcterms:references) | Pattern: If Observation refers_to_custodian Custodian, then
Custodian has_observation Observation'
- 'Navigation: From custodian: Find all observations (has_observation) | From observation: Find custodian hub (refers_to_custodian)'
- "Inverse: refers_to_custodian (dcterms:references) | Pattern: If Observation refers_to_custodian Custodian, then Custodian has_observation Observation"
- "Navigation: From custodian: Find all observations (has_observation) | From observation: Find custodian hub (refers_to_custodian)"
- Range contains prov:Entity instances (CustodianObservation)
annotations:
inverse_slot: refers_to_custodian

View file

@ -29,9 +29,8 @@ slots:
- Conceptual inverse of refers_to_person (pico:observationOf)
- Links person hub to all its evidence/observations
- Parallel to has_observation slot on Custodian
- 'Inverse: `refers_to_person` (pico:observationOf) | Pattern: If PersonObservation refers_to_person Person, then Person
has_person_observation PersonObservation'
- 'Navigation: From person: Find all observations (has_person_observation) | From observation: Find person hub (refers_to_person)'
- "Inverse: refers_to_person (pico:observationOf) | Pattern: If PersonObservation refers_to_person Person, then Person has_person_observation PersonObservation"
- "Navigation: From person: Find all observations (has_person_observation) | From observation: Find person hub (refers_to_person)"
- Range contains prov:Entity instances (PersonObservation)
annotations:
inverse_slot: refers_to_person
@ -39,7 +38,7 @@ slots:
- value: |
Person:
person_id: "https://nde.nl/ontology/hc/person/taco-dibbits"
preferred_name: "Taco Dibbits"\
preferred_name: "Taco Dibbits"
has_person_observation:
- "https://nde.nl/ontology/hc/observation/linkedin-taco-dibbits-202...
- "https://nde.nl/ontology/hc/observation/linkedin-taco-dibbits-2024"
description: Usage example