glam/backend/rag/podman-compose.yaml

# Podman Compose for GLAM Heritage RAG API
#
# This deploys the RAG API container that connects to external services:
# - Qdrant (vector search) - host network
# - Oxigraph (SPARQL) - host network
# - TypeDB (knowledge graph) - host network
# - PostGIS (geospatial) - host network
# - Valkey (semantic cache) - host network
#
# Usage:
#   podman-compose up -d
#   podman-compose logs -f glam-rag-api
#
# Note: Uses --network=host to connect to localhost services on the host.
# This is simpler than bridge networking for this use case since all
# backend services run on the same host.

version: '3.8'

services:
  glam-rag-api:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        BUILD_DATE: ${BUILD_DATE:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")}
        VCS_REF: ${VCS_REF:-local}
    container_name: glam-rag-api
    restart: unless-stopped

    # Use host network to access localhost services (Qdrant, Oxigraph, etc.)
    network_mode: host

    # Environment variables
    # API keys should be passed via env_file or runtime environment
    environment:
      # Service endpoints (localhost since using host network)
      - QDRANT_HOST=localhost
      - QDRANT_PORT=6333
      - QDRANT_COLLECTION=heritage_custodians_minilm
      - EMBEDDING_MODEL=all-MiniLM-L6-v2
      - EMBEDDING_DIM=384
      - TYPEDB_HOST=localhost
      - TYPEDB_PORT=1729
      - TYPEDB_DATABASE=glam
      - SPARQL_ENDPOINT=http://localhost:7878/query
      - VALKEY_CACHE_URL=http://localhost:8090
      - POSTGIS_HOST=localhost
      - POSTGIS_PORT=5432
      - POSTGIS_DATABASE=glam
      # LLM Configuration
      - LLM_PROVIDER=${LLM_PROVIDER:-openai}
      - LLM_MODEL=${LLM_MODEL:-gpt-4.1-mini}
      # Rate limiting
      - RAG_MAX_CONCURRENT=2
      - RAG_REQUESTS_PER_MINUTE=30

    # Load API keys from env file
    env_file:
      - /var/lib/glam/.env

    # Mount optimized models and schemas
    volumes:
      # Optimized DSPy models (persisted)
      - rag-optimized-models:/app/optimized_models
      # LinkML schemas for ontology mapping
      - ${SCHEMAS_DIR:-/var/lib/glam/schemas}:/app/schemas:ro
      # Benchmark results (optional)
      - rag-benchmark-results:/app/benchmark_results

    # Health check (also in Dockerfile, but explicit here)
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8010/health"]
      interval: 30s
      timeout: 10s
      start_period: 60s
      retries: 3

    # Resource limits (conservative for 4-core/8GB server)
    deploy:
      resources:
        limits:
          cpus: '2'
          memory: 4G
        reservations:
          cpus: '1'
          memory: 2G

# Named volumes for persistence
volumes:
  rag-optimized-models:
    name: glam-rag-optimized-models
  rag-benchmark-results:
    name: glam-rag-benchmark-results