# Podman Compose for GLAM Heritage RAG API # # This deploys the RAG API container that connects to external services: # - Qdrant (vector search) - host network # - Oxigraph (SPARQL) - host network # - TypeDB (knowledge graph) - host network # - PostGIS (geospatial) - host network # - Valkey (semantic cache) - host network # # Usage: # podman-compose up -d # podman-compose logs -f glam-rag-api # # Note: Uses --network=host to connect to localhost services on the host. # This is simpler than bridge networking for this use case since all # backend services run on the same host. version: '3.8' services: glam-rag-api: build: context: . dockerfile: Dockerfile args: BUILD_DATE: ${BUILD_DATE:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")} VCS_REF: ${VCS_REF:-local} container_name: glam-rag-api restart: unless-stopped # Use host network to access localhost services (Qdrant, Oxigraph, etc.) network_mode: host # Environment variables # API keys should be passed via env_file or runtime environment environment: # Service endpoints (localhost since using host network) - QDRANT_HOST=localhost - QDRANT_PORT=6333 - QDRANT_COLLECTION=heritage_custodians_minilm - EMBEDDING_MODEL=all-MiniLM-L6-v2 - EMBEDDING_DIM=384 - TYPEDB_HOST=localhost - TYPEDB_PORT=1729 - TYPEDB_DATABASE=glam - SPARQL_ENDPOINT=http://localhost:7878/query - VALKEY_CACHE_URL=http://localhost:8090 - POSTGIS_HOST=localhost - POSTGIS_PORT=5432 - POSTGIS_DATABASE=glam # LLM Configuration - LLM_PROVIDER=${LLM_PROVIDER:-openai} - LLM_MODEL=${LLM_MODEL:-gpt-4.1-mini} # Rate limiting - RAG_MAX_CONCURRENT=2 - RAG_REQUESTS_PER_MINUTE=30 # Load API keys from env file env_file: - /var/lib/glam/.env # Mount optimized models and schemas volumes: # Optimized DSPy models (persisted) - rag-optimized-models:/app/optimized_models # LinkML schemas for ontology mapping - ${SCHEMAS_DIR:-/var/lib/glam/schemas}:/app/schemas:ro # Benchmark results (optional) - rag-benchmark-results:/app/benchmark_results # Health check (also in Dockerfile, but explicit here) healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8010/health"] interval: 30s timeout: 10s start_period: 60s retries: 3 # Resource limits (conservative for 4-core/8GB server) deploy: resources: limits: cpus: '2' memory: 4G reservations: cpus: '1' memory: 2G # Named volumes for persistence volumes: rag-optimized-models: name: glam-rag-optimized-models rag-benchmark-results: name: glam-rag-benchmark-results