glam/backend/rag/Dockerfile

104 lines
3 KiB
Docker

# Dockerfile for GLAM Heritage RAG API
#
# Multi-source retrieval-augmented generation system for heritage data
# Connects to: Qdrant, Oxigraph SPARQL, TypeDB, PostGIS, Valkey cache
#
# Build:
# podman build -t glam-rag-api .
#
# Run:
# podman run -d --name glam-rag-api -p 8010:8010 \
# --network host \
# -e OPENAI_API_KEY=$OPENAI_API_KEY \
# glam-rag-api
FROM python:3.11-slim
# Build args for cache busting
ARG BUILD_DATE
ARG VCS_REF
# Labels for OCI compliance
LABEL org.opencontainers.image.created="${BUILD_DATE}" \
org.opencontainers.image.revision="${VCS_REF}" \
org.opencontainers.image.title="GLAM Heritage RAG API" \
org.opencontainers.image.description="Heritage Knowledge Assistant with DSPy RAG" \
org.opencontainers.image.source="https://git.bronhouder.nl/kempersc/glam"
WORKDIR /app
# Install system dependencies
# - curl: healthcheck
# - git: some pip packages need git
# - build-essential: compile native extensions
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
git \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user for security
RUN useradd -m -u 1000 -s /bin/bash glam
# Install Python dependencies first (better layer caching)
COPY requirements.txt .
# Install CPU-only PyTorch first to avoid massive CUDA download and runtime issues
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
# Structure:
# /app/
# main.py
# provenance.py
# dspy_heritage_rag.py
# ... (all .py files)
# specificity/
# evaluation/
# optimized_models/
# data/
COPY --chown=glam:glam . .
# Create directories for runtime data
RUN mkdir -p /app/data /app/optimized_models /app/benchmark_results \
&& chown -R glam:glam /app
# Switch to non-root user
USER glam
# Environment variables (can be overridden at runtime)
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
# Service endpoints (default to localhost, override in compose)
QDRANT_HOST=localhost \
QDRANT_PORT=6333 \
QDRANT_COLLECTION=heritage_custodians_minilm \
EMBEDDING_MODEL=all-MiniLM-L6-v2 \
EMBEDDING_DIM=384 \
TYPEDB_HOST=localhost \
TYPEDB_PORT=1729 \
TYPEDB_DATABASE=glam \
SPARQL_ENDPOINT=http://localhost:7878/query \
VALKEY_CACHE_URL=http://localhost:8090 \
POSTGIS_HOST=localhost \
POSTGIS_PORT=5432 \
POSTGIS_DATABASE=glam
# Expose RAG API port
EXPOSE 8010
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8010/health || exit 1
# Run with gunicorn for production (multiple workers)
# Using uvicorn workers for async support
CMD ["gunicorn", "main:app", \
"--bind", "0.0.0.0:8010", \
"--workers", "2", \
"--worker-class", "uvicorn.workers.UvicornWorker", \
"--timeout", "120", \
"--graceful-timeout", "30", \
"--keep-alive", "5", \
"--access-logfile", "-", \
"--error-logfile", "-"]