glam/backend/rag/podman-compose.yaml
kempersc e3adb4ed60 feat: Introduce Overview, RealnessStatus, and WebLink classes with comprehensive documentation and migration notes
- Added Overview class to represent structured collections of web links, including detailed descriptions, examples, and ontology alignments.
- Introduced RealnessStatus class to classify data as real or synthetic, with rich provenance and temporal semantics.
- Created WebLink class for representing hyperlinks with associated metadata, enhancing structured link representation.
- Established new slots: has_or_had_comprehensive_overview, is_or_was_real, and includes_or_included to support the new classes and improve data modeling.
- Migrated existing slots to new structures, ensuring compliance with RiC-O naming conventions and enhancing specificity.
- Updated annotations and examples across all new classes and slots for clarity and usability.
2026-01-14 09:32:14 +01:00

94 lines
2.8 KiB
YAML

# Podman Compose for GLAM Heritage RAG API
#
# This deploys the RAG API container that connects to external services:
# - Qdrant (vector search) - host network
# - Oxigraph (SPARQL) - host network
# - TypeDB (knowledge graph) - host network
# - PostGIS (geospatial) - host network
# - Valkey (semantic cache) - host network
#
# Usage:
# podman-compose up -d
# podman-compose logs -f glam-rag-api
#
# Note: Uses --network=host to connect to localhost services on the host.
# This is simpler than bridge networking for this use case since all
# backend services run on the same host.
version: '3.8'
services:
glam-rag-api:
build:
context: .
dockerfile: Dockerfile
args:
BUILD_DATE: ${BUILD_DATE:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")}
VCS_REF: ${VCS_REF:-local}
container_name: glam-rag-api
restart: unless-stopped
# Use host network to access localhost services (Qdrant, Oxigraph, etc.)
network_mode: host
# Environment variables
# API keys should be passed via env_file or runtime environment
environment:
# Service endpoints (localhost since using host network)
- QDRANT_HOST=localhost
- QDRANT_PORT=6333
- QDRANT_COLLECTION=heritage_custodians_minilm
- EMBEDDING_MODEL=all-MiniLM-L6-v2
- EMBEDDING_DIM=384
- TYPEDB_HOST=localhost
- TYPEDB_PORT=1729
- TYPEDB_DATABASE=glam
- SPARQL_ENDPOINT=http://localhost:7878/query
- VALKEY_CACHE_URL=http://localhost:8090
- POSTGIS_HOST=localhost
- POSTGIS_PORT=5432
- POSTGIS_DATABASE=glam
# LLM Configuration
- LLM_PROVIDER=${LLM_PROVIDER:-openai}
- LLM_MODEL=${LLM_MODEL:-gpt-4.1-mini}
# Rate limiting
- RAG_MAX_CONCURRENT=2
- RAG_REQUESTS_PER_MINUTE=30
# Load API keys from env file
env_file:
- /var/lib/glam/.env
# Mount optimized models and schemas
volumes:
# Optimized DSPy models (persisted)
- rag-optimized-models:/app/optimized_models
# LinkML schemas for ontology mapping
- ${SCHEMAS_DIR:-/var/lib/glam/schemas}:/app/schemas:ro
# Benchmark results (optional)
- rag-benchmark-results:/app/benchmark_results
# Health check (also in Dockerfile, but explicit here)
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8010/health"]
interval: 30s
timeout: 10s
start_period: 60s
retries: 3
# Resource limits (conservative for 4-core/8GB server)
deploy:
resources:
limits:
cpus: '2'
memory: 4G
reservations:
cpus: '1'
memory: 2G
# Named volumes for persistence
volumes:
rag-optimized-models:
name: glam-rag-optimized-models
rag-benchmark-results:
name: glam-rag-benchmark-results