glam/src/glam_extractor/api/config.py

"""
API Configuration

Settings management using Pydantic for environment variable handling.
"""

from functools import lru_cache
from typing import Literal

from pydantic import Field
from pydantic_settings import BaseSettings


class Settings(BaseSettings):
    """Application settings loaded from environment variables."""

    # API Configuration
    api_title: str = "GLAM Heritage Custodian API"
    api_description: str = "API for heritage custodian data services and SPARQL query generation"
    api_version: str = "0.1.0"
    debug: bool = False

    # CORS settings
    cors_origins: list[str] = Field(
        default=["http://localhost:5173", "http://localhost:3000", "https://bronhouder.nl"]
    )

    # LLM Configuration for DSPy
    # Providers: openai, anthropic, zai (Z.AI Coding Plan - free GLM models), groq (free, fast)
    llm_provider: Literal["openai", "anthropic", "zai", "groq"] = "anthropic"
    openai_api_key: str | None = None
    anthropic_api_key: str | None = None
    zai_api_token: str | None = None  # Z.AI Coding Plan token
    groq_api_key: str | None = None  # Groq API key (free, very fast ~1-2s)
    llm_model: str = "claude-sonnet-4-20250514"  # or "gpt-4o" for OpenAI, "glm-4.6" for Z.AI

    # Fast LLM for routing/extraction (uses cheaper/faster model for intermediate steps)
    # Options: "groq" (FREE, fastest ~1-2s), "openai" (~1-2s, $0.15/1M), "zai" (FREE, ~13s)
    # Default: openai (since we have the key; change to groq if GROQ_API_KEY is set)
    fast_lm_provider: Literal["groq", "openai", "zai", "none"] = "openai"

    # SPARQL Endpoint
    sparql_endpoint: str = "http://localhost:7878/query"

    # Ontology context for SPARQL generation
    ontology_context_path: str = "schemas/20251121/linkml/01_custodian_name.yaml"

    # Qdrant Vector Database Configuration
    qdrant_host: str = "localhost"
    qdrant_port: int = 6333
    qdrant_collection: str = "heritage_custodians"
    qdrant_enabled: bool = True

    # Embedding Configuration
    embedding_model: str = "text-embedding-3-small"
    embedding_dim: int = 1536

    # Redis Semantic Cache Configuration
    redis_url: str = "redis://localhost:6379"
    cache_enabled: bool = True
    cache_distance_threshold: float = 0.10  # Cosine distance for semantic matching
    cache_default_ttl: int = 3600  # 1 hour default TTL
    cache_warmup_on_startup: bool = True
    cache_embedding_model: str = "all-MiniLM-L6-v2"  # Sentence transformer model
    cache_cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"

    class Config:
        env_file = ".env"
        env_file_encoding = "utf-8"
        extra = "ignore"


@lru_cache
def get_settings() -> Settings:
    """Get cached settings instance."""
    return Settings()