# Dockerfile for GLAM Heritage RAG API # # Multi-source retrieval-augmented generation system for heritage data # Connects to: Qdrant, Oxigraph SPARQL, TypeDB, PostGIS, Valkey cache # # Build: # podman build -t glam-rag-api . # # Run: # podman run -d --name glam-rag-api -p 8010:8010 \ # --network host \ # -e OPENAI_API_KEY=$OPENAI_API_KEY \ # glam-rag-api FROM python:3.11-slim # Build args for cache busting ARG BUILD_DATE ARG VCS_REF # Labels for OCI compliance LABEL org.opencontainers.image.created="${BUILD_DATE}" \ org.opencontainers.image.revision="${VCS_REF}" \ org.opencontainers.image.title="GLAM Heritage RAG API" \ org.opencontainers.image.description="Heritage Knowledge Assistant with DSPy RAG" \ org.opencontainers.image.source="https://git.bronhouder.nl/kempersc/glam" WORKDIR /app # Install system dependencies # - curl: healthcheck # - git: some pip packages need git # - build-essential: compile native extensions RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ git \ build-essential \ && rm -rf /var/lib/apt/lists/* # Create non-root user for security RUN useradd -m -u 1000 -s /bin/bash glam # Install Python dependencies first (better layer caching) COPY requirements.txt . # Install CPU-only PyTorch first to avoid massive CUDA download and runtime issues RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu RUN pip install --no-cache-dir -r requirements.txt # Copy application code # Structure: # /app/ # main.py # provenance.py # dspy_heritage_rag.py # ... (all .py files) # specificity/ # evaluation/ # optimized_models/ # data/ COPY --chown=glam:glam . . # Create directories for runtime data RUN mkdir -p /app/data /app/optimized_models /app/benchmark_results \ && chown -R glam:glam /app # Switch to non-root user USER glam # Environment variables (can be overridden at runtime) ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ # Service endpoints (default to localhost, override in compose) QDRANT_HOST=localhost \ QDRANT_PORT=6333 \ QDRANT_COLLECTION=heritage_custodians_minilm \ EMBEDDING_MODEL=all-MiniLM-L6-v2 \ EMBEDDING_DIM=384 \ TYPEDB_HOST=localhost \ TYPEDB_PORT=1729 \ TYPEDB_DATABASE=glam \ SPARQL_ENDPOINT=http://localhost:7878/query \ VALKEY_CACHE_URL=http://localhost:8090 \ POSTGIS_HOST=localhost \ POSTGIS_PORT=5432 \ POSTGIS_DATABASE=glam # Expose RAG API port EXPOSE 8010 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ CMD curl -f http://localhost:8010/health || exit 1 # Run with gunicorn for production (multiple workers) # Using uvicorn workers for async support CMD ["gunicorn", "main:app", \ "--bind", "0.0.0.0:8010", \ "--workers", "2", \ "--worker-class", "uvicorn.workers.UvicornWorker", \ "--timeout", "120", \ "--graceful-timeout", "30", \ "--keep-alive", "5", \ "--access-logfile", "-", \ "--error-logfile", "-"]