From 3c3be47e325b7a7732213463b346c56f7575f30f Mon Sep 17 00:00:00 2001 From: kempersc Date: Sun, 11 Jan 2026 01:22:47 +0100 Subject: [PATCH] feat(infra): add fast push-based schema sync to production MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace slow Forgejo→Server git pull with direct local rsync - Add git-push-schemas.sh wrapper script for manual pushes - Add post-commit hook for automatic schema sync - Fix YAML syntax errors in slot comment blocks - Update deploy-webhook.py to use master branch --- infrastructure/git-push-schemas.sh | 65 +++++++++++++++++++ infrastructure/scripts/deploy-webhook.py | 62 +++++++++++------- .../scripts/setup-deploy-webhook.sh | 6 +- .../modules/slots/derived_from_entity.yaml | 6 +- .../linkml/modules/slots/has_observation.yaml | 5 +- .../modules/slots/has_person_observation.yaml | 9 ++- 6 files changed, 116 insertions(+), 37 deletions(-) create mode 100755 infrastructure/git-push-schemas.sh diff --git a/infrastructure/git-push-schemas.sh b/infrastructure/git-push-schemas.sh new file mode 100755 index 0000000000..3f37d990c9 --- /dev/null +++ b/infrastructure/git-push-schemas.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# git-push-schemas: Push to git AND sync schemas to production +# +# Usage: ./infrastructure/git-push-schemas.sh [git push args] +# +# This script: +# 1. Pushes to Forgejo (git push origin master) +# 2. Rsyncs schemas directly to Hetzner server (bypasses slow Forgejo→Hetzner network) +# 3. Triggers webhook to copy staging → frontend + +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +# Colors +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" +echo -e "${BLUE} GLAM Schema Push${NC}" +echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" + +# Step 1: Git push +echo -e "\n${YELLOW}Step 1: Pushing to Forgejo...${NC}" +git push "$@" +echo -e "${GREEN}✓ Git push complete${NC}" + +# Step 2: Check if schemas changed +CHANGED_FILES=$(git diff --name-only HEAD~1 HEAD 2>/dev/null || git diff --name-only HEAD) +if echo "$CHANGED_FILES" | grep -q "^schemas/20251121/linkml/"; then + echo -e "\n${YELLOW}Step 2: Syncing schemas to server...${NC}" + + # Direct rsync to server (fast: bypasses Forgejo→Hetzner slow link) + rsync -az --delete \ + --exclude "*.pyc" \ + --exclude "__pycache__" \ + -e "ssh -o StrictHostKeyChecking=no" \ + "$PROJECT_ROOT/schemas/20251121/linkml/" \ + "root@91.98.224.44:/var/lib/glam/repo/schemas/20251121/linkml/" + + echo -e "${GREEN}✓ Schemas synced to staging${NC}" + + # Step 3: Trigger local copy on server (staging → frontend) + echo -e "\n${YELLOW}Step 3: Deploying to frontend...${NC}" + + # Direct rsync to frontend (even simpler - skip webhook) + rsync -az --delete \ + --exclude "*.pyc" \ + --exclude "__pycache__" \ + -e "ssh -o StrictHostKeyChecking=no" \ + "$PROJECT_ROOT/schemas/20251121/linkml/" \ + "root@91.98.224.44:/var/www/glam-frontend/schemas/20251121/linkml/" + + echo -e "${GREEN}✓ Schemas deployed to frontend${NC}" +else + echo -e "\n${YELLOW}No schema changes detected, skipping sync${NC}" +fi + +echo -e "\n${BLUE}════════════════════════════════════════════════════════════════${NC}" +echo -e "${GREEN} Done! Schemas available at:${NC}" +echo -e " https://bronhouder.nl/schemas/20251121/linkml/" +echo -e "${BLUE}════════════════════════════════════════════════════════════════${NC}" diff --git a/infrastructure/scripts/deploy-webhook.py b/infrastructure/scripts/deploy-webhook.py index b97ad6d740..9d7946d9ca 100644 --- a/infrastructure/scripts/deploy-webhook.py +++ b/infrastructure/scripts/deploy-webhook.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ Webhook receiver for Forgejo push events. -Triggers schema sync when push events are received on the main branch. +Triggers schema sync when push events are received on the master branch. Run with: uvicorn deploy-webhook:app --port 8099 --host 127.0.0.1 Or as systemd service: deploy-webhook.service @@ -12,7 +12,6 @@ import hashlib import hmac import json import os -import subprocess import logging from datetime import datetime from pathlib import Path @@ -29,12 +28,11 @@ logging.basicConfig( ) logger = logging.getLogger(__name__) -app = FastAPI(title="GLAM Deploy Webhook", version="1.0.0") +app = FastAPI(title="GLAM Deploy Webhook", version="1.1.0") # Configuration WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET", "") REPO_PATH = Path("/var/lib/glam/repo") -SCRIPTS_PATH = Path("/var/lib/glam/scripts") FRONTEND_PATH = Path("/var/www/glam-frontend") LINKML_SOURCE = REPO_PATH / "schemas/20251121/linkml" LINKML_DEST = FRONTEND_PATH / "schemas/20251121/linkml" @@ -81,30 +79,34 @@ async def run_command(cmd: list[str], cwd: Optional[Path] = None) -> tuple[int, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await process.communicate() - return process.returncode, stdout.decode(), stderr.decode() + returncode = process.returncode if process.returncode is not None else -1 + return returncode, stdout.decode(), stderr.decode() async def sync_schemas() -> dict: - """Sync LinkML schemas from repo to frontend public directory.""" + """ + Sync LinkML schemas from staging area to frontend public directory. + + The staging area (/var/lib/glam/repo/schemas) is populated by rsync + from the deployer's machine, NOT by git clone (to avoid slow network). + """ results = {} - # Pull latest changes - logger.info(f"Pulling latest changes in {REPO_PATH}") - code, stdout, stderr = await run_command(["git", "pull", "origin", "main"], cwd=REPO_PATH) - results["git_pull"] = { - "success": code == 0, - "stdout": stdout[:500] if stdout else "", - "stderr": stderr[:500] if stderr else "" - } - - if code != 0: - logger.error(f"Git pull failed: {stderr}") + # Ensure source exists + if not LINKML_SOURCE.exists(): + logger.error(f"Source directory does not exist: {LINKML_SOURCE}") + results["check_source"] = { + "success": False, + "error": f"Source directory not found: {LINKML_SOURCE}" + } return results - # Ensure destination directory exists - LINKML_DEST.mkdir(parents=True, exist_ok=True) + results["check_source"] = {"success": True, "path": str(LINKML_SOURCE)} - # Sync LinkML schemas using rsync + # Ensure destination directory exists + LINKML_DEST.parent.mkdir(parents=True, exist_ok=True) + + # Sync LinkML schemas using rsync (local copy, very fast) logger.info(f"Syncing schemas: {LINKML_SOURCE} -> {LINKML_DEST}") code, stdout, stderr = await run_command([ "rsync", "-av", "--delete", @@ -116,6 +118,7 @@ async def sync_schemas() -> dict: ]) results["rsync_linkml"] = { "success": code == 0, + "files_synced": len([l for l in stdout.split('\n') if l and not l.startswith('sent') and not l.startswith('total')]), "stdout": stdout[:1000] if stdout else "", "stderr": stderr[:500] if stderr else "" } @@ -131,7 +134,20 @@ async def sync_schemas() -> dict: @app.get("/health") async def health(): """Health check endpoint.""" - return {"status": "ok", "service": "deploy-webhook"} + return {"status": "ok", "service": "deploy-webhook", "version": "1.1.0"} + + +@app.get("/webhook/status") +async def webhook_status(): + """Status endpoint showing configuration.""" + return { + "status": "ready", + "source_path": str(LINKML_SOURCE), + "source_exists": LINKML_SOURCE.exists(), + "dest_path": str(LINKML_DEST), + "dest_exists": LINKML_DEST.exists(), + "secret_configured": bool(WEBHOOK_SECRET) + } @app.post("/webhook/deploy") @@ -144,7 +160,7 @@ async def deploy_webhook( ): """ Handle Forgejo/Gitea push webhook. - Triggers schema sync on push to main branch. + Triggers schema sync on push to master branch. """ body = await request.body() @@ -170,7 +186,7 @@ async def deploy_webhook( "reason": f"Event type '{event}' not handled" }) - # Only process pushes to main branch + # Only process pushes to main/master branch ref = payload.get("ref", "") if ref not in ["refs/heads/main", "refs/heads/master"]: return JSONResponse({ diff --git a/infrastructure/scripts/setup-deploy-webhook.sh b/infrastructure/scripts/setup-deploy-webhook.sh index 8c4c130d4b..0d1b6a2b55 100755 --- a/infrastructure/scripts/setup-deploy-webhook.sh +++ b/infrastructure/scripts/setup-deploy-webhook.sh @@ -22,10 +22,12 @@ if [ -d "$REPO_DIR/.git" ]; then echo "Updating existing repo..." cd "$REPO_DIR" git fetch origin - git reset --hard origin/main + git reset --hard origin/master else echo "Cloning repository..." git clone https://git.bronhouder.nl/kempersc/glam.git "$REPO_DIR" + cd "$REPO_DIR" + git checkout master fi # Install Python dependencies @@ -128,6 +130,6 @@ echo " - HTTP Method: POST" echo " - Content Type: application/json" echo " - Secret: $WEBHOOK_SECRET" echo " - Trigger On: Push Events" -echo " - Branch filter: main" +echo " - Branch filter: master" echo "" echo "Test with: curl -X POST https://bronhouder.nl/webhook/deploy/manual -H 'Authorization: Bearer $WEBHOOK_SECRET'" diff --git a/schemas/20251121/linkml/modules/slots/derived_from_entity.yaml b/schemas/20251121/linkml/modules/slots/derived_from_entity.yaml index d038526b2e..571faae3bb 100644 --- a/schemas/20251121/linkml/modules/slots/derived_from_entity.yaml +++ b/schemas/20251121/linkml/modules/slots/derived_from_entity.yaml @@ -19,10 +19,8 @@ slots: - Part of PROV-O derivation pattern - Inverse of has_derived_observation - Creates owl:inverseOf axiom in RDF output - - 'Inverse: `has_derived_observation` | Pattern: If Observation derived_from_entity LegalStatus, then LegalStatus has_derived_observation - Observation' - - 'Navigation: From observation: Find formal entity it references (derived_from_entity) | From legal status: Find all - observations that reference it (has_derived_observation)' + - "Inverse: has_derived_observation | Pattern: If Observation derived_from_entity LegalStatus, then LegalStatus has_derived_observation Observation" + - "Navigation: From observation: Find formal entity it references (derived_from_entity) | From legal status: Find all observations that reference it (has_derived_observation)" - Range is a prov:Entity instance (CustodianLegalStatus) annotations: inverse_slot: has_derived_observation diff --git a/schemas/20251121/linkml/modules/slots/has_observation.yaml b/schemas/20251121/linkml/modules/slots/has_observation.yaml index 6c927371fe..fc6a87f01c 100644 --- a/schemas/20251121/linkml/modules/slots/has_observation.yaml +++ b/schemas/20251121/linkml/modules/slots/has_observation.yaml @@ -22,9 +22,8 @@ slots: - Inverse of refers_to_custodian (dcterms:references) - Links custodian hub to all its evidence/observations - Creates owl:inverseOf axiom in RDF output - - 'Inverse: `refers_to_custodian` (dcterms:references) | Pattern: If Observation refers_to_custodian Custodian, then - Custodian has_observation Observation' - - 'Navigation: From custodian: Find all observations (has_observation) | From observation: Find custodian hub (refers_to_custodian)' + - "Inverse: refers_to_custodian (dcterms:references) | Pattern: If Observation refers_to_custodian Custodian, then Custodian has_observation Observation" + - "Navigation: From custodian: Find all observations (has_observation) | From observation: Find custodian hub (refers_to_custodian)" - Range contains prov:Entity instances (CustodianObservation) annotations: inverse_slot: refers_to_custodian diff --git a/schemas/20251121/linkml/modules/slots/has_person_observation.yaml b/schemas/20251121/linkml/modules/slots/has_person_observation.yaml index d6ba3ac34b..619a57cb82 100644 --- a/schemas/20251121/linkml/modules/slots/has_person_observation.yaml +++ b/schemas/20251121/linkml/modules/slots/has_person_observation.yaml @@ -29,9 +29,8 @@ slots: - Conceptual inverse of refers_to_person (pico:observationOf) - Links person hub to all its evidence/observations - Parallel to has_observation slot on Custodian - - 'Inverse: `refers_to_person` (pico:observationOf) | Pattern: If PersonObservation refers_to_person Person, then Person - has_person_observation PersonObservation' - - 'Navigation: From person: Find all observations (has_person_observation) | From observation: Find person hub (refers_to_person)' + - "Inverse: refers_to_person (pico:observationOf) | Pattern: If PersonObservation refers_to_person Person, then Person has_person_observation PersonObservation" + - "Navigation: From person: Find all observations (has_person_observation) | From observation: Find person hub (refers_to_person)" - Range contains prov:Entity instances (PersonObservation) annotations: inverse_slot: refers_to_person @@ -39,7 +38,7 @@ slots: - value: | Person: person_id: "https://nde.nl/ontology/hc/person/taco-dibbits" - preferred_name: "Taco Dibbits"\ + preferred_name: "Taco Dibbits" has_person_observation: - - "https://nde.nl/ontology/hc/observation/linkedin-taco-dibbits-202... + - "https://nde.nl/ontology/hc/observation/linkedin-taco-dibbits-2024" description: Usage example