feat(infra): add webhook-based schema deployment pipeline

- Add FastAPI webhook receiver for Forgejo push events
- Add setup script for server deployment
- Add Caddy snippet for webhook endpoint
- Add local sync-schemas.sh helper script
- Sync frontend schemas with source (archived deprecated slots)

Infrastructure scripts staged for optional webhook deployment.
Current deployment uses: ./infrastructure/deploy.sh --frontend
This commit is contained in:
kempersc 2026-01-10 21:45:02 +01:00
parent f02cffe1e8
commit a4184cb805
18 changed files with 2368 additions and 2076 deletions

File diff suppressed because it is too large Load diff

View file

@ -14,7 +14,7 @@ imports:
- ../metadata
- ./SpecificityAnnotation
- ./TemplateSpecificityScores
- ../slots/access
- ../slots/has_or_had_access_condition
- ../slots/full_name
- ../slots/isil
- ../slots/location
@ -34,7 +34,7 @@ classes:
Used for key_archives (main archives for a topic) and related_archives
(external archives with related holdings).
slots:
- access
- has_or_had_access_condition
- full_name
- isil
- location

View file

@ -24,7 +24,7 @@ imports:
- ./FindingAid
- ./ExhibitedObject
- ./CurationActivity
- ../slots/access_policy_ref
- ../slots/has_or_had_access_policy_reference
- ../slots/has_acquisition_date
- ../slots/has_acquisition_method
- ../slots/has_acquisition_source
@ -166,7 +166,7 @@ classes:
- rico:Record
- bf:Item
slots:
- access_policy_ref
- has_or_had_access_policy_reference
- has_acquisition_date
- has_acquisition_method
- has_acquisition_source
@ -408,7 +408,7 @@ classes:
description: Source of VOC archives transfer
- value: Estate of Anna Drucker-Fraser
description: Source of bequest
has_access_policy_reference:
has_or_had_access_policy_reference:
slot_uri: premis:hasRightsDeclaration
description: |
Access policy governing this collection.
@ -679,7 +679,7 @@ classes:
acquisition_method: TRANSFER
acquisition_date: '1856-01-01'
acquisition_source: Ministry of Colonies
access_policy_ref: https://nde.nl/ontology/hc/access-policy/open-access
has_or_had_access_policy_reference: https://nde.nl/ontology/hc/access-policy/open-access
arrangement: Organized by provenance, then chronologically
has_or_had_finding_aid:
- finding_aid_id: https://nde.nl/finding-aid/nationaal-archief-voc-inventory
@ -699,5 +699,5 @@ classes:
# NOTE: All slots are defined in centralized modules/slots/ files
# Slots used by this class: collection_id, collection_type_ref, record_set_type,
# extent_items, subject_areas, provenance_statement, custodial_history, acquisition_source,
# access_policy_ref, arrangement, finding_aids, digital_surrogate_url, parent_collection,
# has_or_had_access_policy_reference, arrangement, finding_aids, digital_surrogate_url, parent_collection,
# has_or_had_sub_collection, items, curation_activities, part_of_custodian_collection

View file

@ -6,7 +6,7 @@ imports:
- ./ReconstructedEntity
- ./CustodianObservation
- ./ReconstructionActivity
- ../slots/accepts_external_work
- ../slots/accepts_or_accepted_external_work
- ../slots/has_or_had_accreditation_body
- ../slots/conservation_specialization
- ../slots/equipment_type
@ -126,7 +126,7 @@ classes:
- crm:E14_Condition_Assessment
- schema:ResearchOrganization
slots:
- accepts_external_work
- accepts_or_accepted_external_work
- has_or_had_accreditation_body
- conservation_specialization
- equipment_type
@ -416,7 +416,7 @@ classes:
is_accredited: true
accreditation_body: VeRes
staff_count: 12
accepts_external_work: false
accepts_or_accepted_external_work: false
description: Major museum conservation studio
- value:
lab_id: https://nde.nl/ontology/hc/aux/na-restauratie
@ -437,7 +437,7 @@ classes:
has_fume_hoods: true
has_deacidification_facility: true
staff_count: 6
accepts_external_work: true
accepts_or_accepted_external_work: true
description: Archive paper conservation workshop
slots:
lab_id:

View file

@ -50,7 +50,7 @@ imports:
- ../slots/publisher
- ../slots/publication_date
- ../slots/isbn
- ../slots/access
- ../slots/has_or_had_access_condition
- ../slots/is_or_was_access_restricted
- ../slots/all_links
- ../slots/card_description
@ -676,7 +676,7 @@ classes:
Used for key_archives (main archives for a topic) and related_archives
(external archives with related holdings).
slots:
- access
- has_or_had_access_condition
- full_name
- isil
- location

View file

@ -15,7 +15,7 @@ imports:
- ../slots/managed_by
- ../slots/price_currency
- ./ReconstructedEntity
- ../slots/accepts_payment_methods
- ../slots/accepts_or_accepted_payment_method
- ../slots/has_or_had_annual_revenue
- ../slots/giftshop_price_range
- ../slots/online_shop
@ -65,7 +65,7 @@ classes:
\ materials, replicas for learning\n\n**PHYSICAL vs. DIGITAL PRESENCE**:\n\n\
Gift shops can exist in multiple forms:\n\n1. **Physical shop** (on-site): Located\
\ within museum/archive building\n - Links to AuxiliaryPlace (physical location)\n\
\ - Has opening_hours, accepts_payment_methods\n \n2. **Physical shop**\
\ - Has opening_hours, accepts_or_accepted_payment_method\n \n2. **Physical shop**\
\ (separate): Stand-alone retail location\n - Links to AuxiliaryPlace with\
\ type RETAIL_SPACE\n - May have separate street address, hours\n \n3. **Online\
\ shop** (e-commerce): Web-based retail platform\n - Links to AuxiliaryDigitalPlatform\
@ -121,7 +121,7 @@ classes:
- gr:Offering
- schema:Product
slots:
- accepts_payment_methods
- accepts_or_accepted_payment_method
- has_or_had_annual_revenue
- giftshop_price_range
- managed_by

View file

@ -7,7 +7,7 @@ imports:
- ./CustodianObservation
- ./ReconstructionActivity
- ../enums/ResearchCenterTypeEnum
- ../slots/accepts_visiting_scholars
- ../slots/accepts_or_accepted_visiting_scholar
- ../slots/has_or_had_affiliated_university
- ../slots/has_or_had_custodian_type
- ../slots/fellows_count
@ -130,7 +130,7 @@ classes:
- hc:ConservationLab
- hc:EducationCenter
slots:
- accepts_visiting_scholars
- accepts_or_accepted_visiting_scholar
- has_or_had_affiliated_university
- has_or_had_custodian_type
- fellows_count
@ -369,7 +369,7 @@ classes:
has_publication_series: true
publication_series_name: Rijksmuseum Studies in Art
has_research_library: true
accepts_visiting_scholars: true
accepts_or_accepted_visiting_scholar: true
major_research_project:
- Rembrandt Database
- Operation Night Watch
@ -391,7 +391,7 @@ classes:
- TU Delft
has_fellows_program: true
fellows_count: 4
accepts_visiting_scholars: true
accepts_or_accepted_visiting_scholar: true
staff_count: 8
description: Digital humanities research lab
slots:

View file

@ -1,23 +1,26 @@
id: https://nde.nl/ontology/hc/slot/has_access_policy_reference
name: has_access_policy_reference_slot
title: Has Access Policy Reference Slot
id: https://nde.nl/ontology/hc/slot/has_or_had_access_policy_reference
name: has_or_had_access_policy_reference_slot
title: Has Or Had Access Policy Reference Slot
prefixes:
dcterms: http://purl.org/dc/terms/
hc: https://nde.nl/ontology/hc/
linkml: https://w3id.org/linkml/
schema: http://schema.org/
premis: http://www.loc.gov/premis/rdf/v3/
imports:
- linkml:types
default_prefix: hc
slots:
has_access_policy_reference:
has_or_had_access_policy_reference:
description: >-
Reference (URL or citation) to an access policy document.
Uses temporal naming to indicate that access policies may change over time.
range: uri
slot_uri: dcterms:references
slot_uri: premis:hasRightsDeclaration
exact_mappings:
- dcterms:references
- premis:hasRightsDeclaration
close_mappings:
- dcterms:references
- schema:citation
- dcterms:source
annotations:

View file

@ -0,0 +1,16 @@
# GLAM Deploy Webhook - Add to bronhouder.nl site block
# Insert after the /health handler
#
# This handles webhook callbacks from Forgejo for automatic schema deployment
#
# To add to /etc/caddy/Caddyfile, insert within the bronhouder.nl block:
# Webhook endpoint for Forgejo push events
handle /webhook/deploy* {
reverse_proxy 127.0.0.1:8099 {
transport http {
read_timeout 120s
write_timeout 120s
}
}
}

View file

@ -0,0 +1,262 @@
#!/usr/bin/env python3
"""
Webhook receiver for Forgejo push events.
Triggers schema sync when push events are received on the main branch.
Run with: uvicorn deploy-webhook:app --port 8099 --host 127.0.0.1
Or as systemd service: deploy-webhook.service
"""
import asyncio
import hashlib
import hmac
import json
import os
import subprocess
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional
from fastapi import FastAPI, Request, HTTPException, Header
from fastapi.responses import JSONResponse
from pydantic import BaseModel
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
app = FastAPI(title="GLAM Deploy Webhook", version="1.0.0")
# Configuration
WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET", "")
REPO_PATH = Path("/var/lib/glam/repo")
SCRIPTS_PATH = Path("/var/lib/glam/scripts")
FRONTEND_PATH = Path("/var/www/glam-frontend")
LINKML_SOURCE = REPO_PATH / "schemas/20251121/linkml"
LINKML_DEST = FRONTEND_PATH / "schemas/20251121/linkml"
# Lock to prevent concurrent deployments
deploy_lock = asyncio.Lock()
def verify_signature(payload: bytes, signature: str) -> bool:
"""Verify Forgejo webhook signature."""
if not WEBHOOK_SECRET:
logger.warning("WEBHOOK_SECRET not set, skipping signature verification")
return True
if not signature:
return False
expected = hmac.new(
WEBHOOK_SECRET.encode(),
payload,
hashlib.sha256
).hexdigest()
# Forgejo uses sha256=<hex> format
if signature.startswith("sha256="):
signature = signature[7:]
return hmac.compare_digest(expected, signature)
class DeployResult(BaseModel):
success: bool
message: str
details: Optional[dict] = None
timestamp: str
async def run_command(cmd: list[str], cwd: Optional[Path] = None) -> tuple[int, str, str]:
"""Run a shell command asynchronously."""
process = await asyncio.create_subprocess_exec(
*cmd,
cwd=cwd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
return process.returncode, stdout.decode(), stderr.decode()
async def sync_schemas() -> dict:
"""Sync LinkML schemas from repo to frontend public directory."""
results = {}
# Pull latest changes
logger.info(f"Pulling latest changes in {REPO_PATH}")
code, stdout, stderr = await run_command(["git", "pull", "origin", "main"], cwd=REPO_PATH)
results["git_pull"] = {
"success": code == 0,
"stdout": stdout[:500] if stdout else "",
"stderr": stderr[:500] if stderr else ""
}
if code != 0:
logger.error(f"Git pull failed: {stderr}")
return results
# Ensure destination directory exists
LINKML_DEST.mkdir(parents=True, exist_ok=True)
# Sync LinkML schemas using rsync
logger.info(f"Syncing schemas: {LINKML_SOURCE} -> {LINKML_DEST}")
code, stdout, stderr = await run_command([
"rsync", "-av", "--delete",
"--exclude", "*.pyc",
"--exclude", "__pycache__",
"--exclude", ".git",
f"{LINKML_SOURCE}/",
f"{LINKML_DEST}/"
])
results["rsync_linkml"] = {
"success": code == 0,
"stdout": stdout[:1000] if stdout else "",
"stderr": stderr[:500] if stderr else ""
}
if code != 0:
logger.error(f"rsync failed: {stderr}")
else:
logger.info("Schema sync completed successfully")
return results
@app.get("/health")
async def health():
"""Health check endpoint."""
return {"status": "ok", "service": "deploy-webhook"}
@app.post("/webhook/deploy")
async def deploy_webhook(
request: Request,
x_forgejo_signature: Optional[str] = Header(None, alias="X-Forgejo-Signature"),
x_forgejo_event: Optional[str] = Header(None, alias="X-Forgejo-Event"),
x_gitea_signature: Optional[str] = Header(None, alias="X-Gitea-Signature"),
x_gitea_event: Optional[str] = Header(None, alias="X-Gitea-Event"),
):
"""
Handle Forgejo/Gitea push webhook.
Triggers schema sync on push to main branch.
"""
body = await request.body()
# Use Forgejo or Gitea headers (Forgejo is a Gitea fork)
signature = x_forgejo_signature or x_gitea_signature
event = x_forgejo_event or x_gitea_event
# Verify signature
if not verify_signature(body, signature or ""):
logger.warning("Invalid webhook signature")
raise HTTPException(status_code=401, detail="Invalid signature")
# Parse payload
try:
payload = json.loads(body)
except json.JSONDecodeError:
raise HTTPException(status_code=400, detail="Invalid JSON payload")
# Only process push events
if event != "push":
return JSONResponse({
"status": "ignored",
"reason": f"Event type '{event}' not handled"
})
# Only process pushes to main branch
ref = payload.get("ref", "")
if ref not in ["refs/heads/main", "refs/heads/master"]:
return JSONResponse({
"status": "ignored",
"reason": f"Push to non-main branch: {ref}"
})
# Check if schemas changed
commits = payload.get("commits", [])
schema_changed = False
for commit in commits:
modified = commit.get("modified", []) + commit.get("added", []) + commit.get("removed", [])
for path in modified:
if path.startswith("schemas/20251121/linkml/"):
schema_changed = True
break
if schema_changed:
break
if not schema_changed:
return JSONResponse({
"status": "ignored",
"reason": "No schema changes detected"
})
# Acquire lock to prevent concurrent deployments
if deploy_lock.locked():
return JSONResponse({
"status": "busy",
"message": "Deployment already in progress"
}, status_code=409)
async with deploy_lock:
logger.info(f"Starting deployment for push to {ref}")
try:
results = await sync_schemas()
success = all(r.get("success", False) for r in results.values())
return DeployResult(
success=success,
message="Schema sync completed" if success else "Schema sync failed",
details=results,
timestamp=datetime.utcnow().isoformat()
)
except Exception as e:
logger.exception("Deployment failed")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/webhook/deploy/manual")
async def manual_deploy(request: Request):
"""
Manual deployment trigger (for testing or forced sync).
Requires a simple auth token.
"""
auth = request.headers.get("Authorization", "")
expected = f"Bearer {WEBHOOK_SECRET}"
if WEBHOOK_SECRET and auth != expected:
raise HTTPException(status_code=401, detail="Unauthorized")
if deploy_lock.locked():
return JSONResponse({
"status": "busy",
"message": "Deployment already in progress"
}, status_code=409)
async with deploy_lock:
logger.info("Starting manual deployment")
try:
results = await sync_schemas()
success = all(r.get("success", False) for r in results.values())
return DeployResult(
success=success,
message="Manual schema sync completed" if success else "Manual sync failed",
details=results,
timestamp=datetime.utcnow().isoformat()
)
except Exception as e:
logger.exception("Manual deployment failed")
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="127.0.0.1", port=8099)

View file

@ -0,0 +1,133 @@
#!/bin/bash
# Setup script for GLAM deploy webhook on the server
# Run this on the Hetzner server as root
set -e
echo "=== GLAM Deploy Webhook Setup ==="
# Configuration
WEBHOOK_SECRET="${1:-$(openssl rand -hex 32)}"
GLAM_USER="glam"
SCRIPTS_DIR="/var/lib/glam/scripts"
REPO_DIR="/var/lib/glam/repo"
# Create directories
echo "Creating directories..."
mkdir -p "$SCRIPTS_DIR"
mkdir -p "$REPO_DIR"
# Clone/update the repo
if [ -d "$REPO_DIR/.git" ]; then
echo "Updating existing repo..."
cd "$REPO_DIR"
git fetch origin
git reset --hard origin/main
else
echo "Cloning repository..."
git clone https://git.bronhouder.nl/kempersc/glam.git "$REPO_DIR"
fi
# Install Python dependencies
echo "Installing Python dependencies..."
pip3 install fastapi uvicorn pydantic --quiet
# Copy webhook script
echo "Deploying webhook script..."
cp "$REPO_DIR/infrastructure/scripts/deploy-webhook.py" "$SCRIPTS_DIR/"
# Create systemd service
echo "Creating systemd service..."
cat > /etc/systemd/system/deploy-webhook.service << EOF
[Unit]
Description=GLAM Deploy Webhook Service
Documentation=https://git.bronhouder.nl/kempersc/glam
After=network.target caddy.service
[Service]
Type=simple
User=$GLAM_USER
Group=$GLAM_USER
WorkingDirectory=$SCRIPTS_DIR
Environment="WEBHOOK_SECRET=$WEBHOOK_SECRET"
ExecStart=/usr/bin/python3 -m uvicorn deploy-webhook:app --host 127.0.0.1 --port 8099
Restart=always
RestartSec=5
StandardOutput=journal
StandardError=journal
# Security
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/var/lib/glam /var/www/glam-frontend
[Install]
WantedBy=multi-user.target
EOF
# Set ownership
echo "Setting ownership..."
chown -R $GLAM_USER:$GLAM_USER "$REPO_DIR"
chown -R $GLAM_USER:$GLAM_USER "$SCRIPTS_DIR"
# Add webhook endpoint to Caddy
echo "Checking Caddy configuration..."
if ! grep -q "/webhook/deploy" /etc/caddy/Caddyfile; then
echo "Adding webhook endpoint to Caddy..."
# Insert webhook handler after /health in bronhouder.nl block
# This is a simple sed approach - may need manual adjustment
sed -i '/bronhouder.nl, www.bronhouder.nl/,/handle \/health/a\\n\t# Webhook endpoint for Forgejo push events\n\thandle /webhook/deploy* {\n\t\treverse_proxy 127.0.0.1:8099 {\n\t\t\ttransport http {\n\t\t\t\tread_timeout 120s\n\t\t\t\twrite_timeout 120s\n\t\t\t}\n\t\t}\n\t}' /etc/caddy/Caddyfile || {
echo "WARNING: Could not auto-add webhook to Caddyfile"
echo "Please manually add the following to bronhouder.nl block:"
cat << 'CADDY'
# Webhook endpoint for Forgejo push events
handle /webhook/deploy* {
reverse_proxy 127.0.0.1:8099 {
transport http {
read_timeout 120s
write_timeout 120s
}
}
}
CADDY
}
fi
# Reload systemd and start service
echo "Starting services..."
systemctl daemon-reload
systemctl enable deploy-webhook
systemctl restart deploy-webhook
# Reload Caddy if config was changed
caddy validate --config /etc/caddy/Caddyfile && systemctl reload caddy || {
echo "WARNING: Caddy config validation failed. Please fix manually."
}
# Initial schema sync
echo "Running initial schema sync..."
cd "$REPO_DIR"
rsync -av --delete \
--exclude "*.pyc" \
--exclude "__pycache__" \
--exclude ".git" \
"schemas/20251121/linkml/" \
"/var/www/glam-frontend/schemas/20251121/linkml/"
echo ""
echo "=== Setup Complete ==="
echo ""
echo "Webhook Secret: $WEBHOOK_SECRET"
echo ""
echo "Next steps:"
echo "1. Go to https://git.bronhouder.nl/kempersc/glam/settings/hooks"
echo "2. Add a new webhook:"
echo " - Target URL: https://bronhouder.nl/webhook/deploy"
echo " - HTTP Method: POST"
echo " - Content Type: application/json"
echo " - Secret: $WEBHOOK_SECRET"
echo " - Trigger On: Push Events"
echo " - Branch filter: main"
echo ""
echo "Test with: curl -X POST https://bronhouder.nl/webhook/deploy/manual -H 'Authorization: Bearer $WEBHOOK_SECRET'"

32
infrastructure/sync-schemas.sh Executable file
View file

@ -0,0 +1,32 @@
#!/bin/bash
# Sync LinkML schemas from source to frontend public directory
# This ensures the frontend serves the latest schemas during development and in production builds
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
SOURCE_DIR="$PROJECT_ROOT/schemas/20251121/linkml"
DEST_DIR="$PROJECT_ROOT/frontend/public/schemas/20251121/linkml"
echo "Syncing LinkML schemas..."
echo " Source: $SOURCE_DIR"
echo " Dest: $DEST_DIR"
# Ensure destination directory exists
mkdir -p "$DEST_DIR"
# Rsync with delete to remove old files
rsync -av --delete \
--exclude "*.pyc" \
--exclude "__pycache__" \
--exclude ".git" \
"$SOURCE_DIR/" \
"$DEST_DIR/"
echo "Schema sync complete!"
# Count files synced
TOTAL=$(find "$DEST_DIR" -type f -name "*.yaml" | wc -l | tr -d ' ')
echo " Total YAML files: $TOTAL"

View file

@ -0,0 +1,25 @@
[Unit]
Description=GLAM Deploy Webhook Service
Documentation=https://git.bronhouder.nl/kempersc/glam
After=network.target caddy.service
[Service]
Type=simple
User=glam
Group=glam
WorkingDirectory=/var/lib/glam/scripts
Environment="WEBHOOK_SECRET="
ExecStart=/usr/bin/python3 -m uvicorn deploy-webhook:app --host 127.0.0.1 --port 8099
Restart=always
RestartSec=5
StandardOutput=journal
StandardError=journal
# Security
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/var/lib/glam /var/www/glam-frontend
[Install]
WantedBy=multi-user.target