glam/backend/rag/metrics.py

"""
Prometheus Metrics for Heritage RAG API

Exposes metrics for monitoring template-based SPARQL generation,
session management, and overall API performance.

Metrics exposed:
- rag_queries_total: Total queries by type (template/llm), status, endpoint
- rag_template_hits_total: Template SPARQL hits by template_id
- rag_query_duration_seconds: Query latency histogram
- rag_session_active: Active sessions gauge
- rag_cache_hits_total: Cache hit/miss counter

Usage:
    from backend.rag.metrics import (
        record_query, create_metrics_endpoint, PROMETHEUS_AVAILABLE
    )

    # Record a query
    record_query(
        endpoint="dspy_query",
        template_used=True,
        template_id="count_by_province",
        cache_hit=False,
        status="success",
        duration_seconds=1.5
    )
"""

from __future__ import annotations

import logging
from functools import lru_cache
from typing import Any

logger = logging.getLogger(__name__)

# ============================================================================
# Prometheus Client Import (Lazy/Optional)
# ============================================================================

PROMETHEUS_AVAILABLE = False
_prometheus_client = None

try:
    import prometheus_client as _prometheus_client
    PROMETHEUS_AVAILABLE = True
    logger.info("Prometheus metrics enabled")
except ImportError:
    logger.warning("prometheus_client not installed - metrics disabled")


# ============================================================================
# Metric Initialization
# ============================================================================

def _init_metrics():
    """Initialize Prometheus metrics. Called once at module load."""
    if not PROMETHEUS_AVAILABLE or _prometheus_client is None:
        return {}

    pc = _prometheus_client

    return {
        "query_counter": pc.Counter(
            "rag_queries_total",
            "Total RAG queries processed",
            labelnames=["endpoint", "method", "status"],
        ),
        "template_hit_counter": pc.Counter(
            "rag_template_hits_total",
            "Template SPARQL hits by template ID",
            labelnames=["template_id", "intent"],
        ),
        "cache_counter": pc.Counter(
            "rag_cache_total",
            "Cache hits and misses",
            labelnames=["result"],
        ),
        "query_duration": pc.Histogram(
            "rag_query_duration_seconds",
            "Query processing time in seconds",
            labelnames=["endpoint", "method"],
            buckets=(0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0),
        ),
        "template_matching_duration": pc.Histogram(
            "rag_template_matching_seconds",
            "Time to match query to template",
            labelnames=["matched"],
            buckets=(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25),
        ),
        "active_sessions_gauge": pc.Gauge(
            "rag_sessions_active",
            "Number of active conversation sessions",
        ),
    }


# Initialize metrics at module load
_metrics = _init_metrics()


# ============================================================================
# Helper Functions
# ============================================================================

def record_query(
    endpoint: str,
    template_used: bool,
    template_id: str | None,
    cache_hit: bool,
    status: str,
    duration_seconds: float,
    intent: str | None = None,
) -> None:
    """Record metrics for a completed query.

    Args:
        endpoint: API endpoint name (e.g., "dspy_query", "dspy_query_stream")
        template_used: Whether template SPARQL was used vs LLM generation
        template_id: Template ID if template was used
        cache_hit: Whether response was served from cache
        status: Query status ("success", "error", "timeout")
        duration_seconds: Total query duration in seconds
        intent: Query intent classification if available
    """
    if not PROMETHEUS_AVAILABLE or not _metrics:
        return

    method = "template" if template_used else "llm"

    # Increment query counter
    _metrics["query_counter"].labels(
        endpoint=endpoint,
        method=method,
        status=status,
    ).inc()

    # Record template hit if applicable
    if template_used and template_id:
        _metrics["template_hit_counter"].labels(
            template_id=template_id,
            intent=intent or "unknown",
        ).inc()

    # Record cache status
    _metrics["cache_counter"].labels(result="hit" if cache_hit else "miss").inc()

    # Record duration
    _metrics["query_duration"].labels(
        endpoint=endpoint,
        method=method,
    ).observe(duration_seconds)


def record_template_matching(matched: bool, duration_seconds: float) -> None:
    """Record template matching attempt metrics.

    Args:
        matched: Whether a template was successfully matched
        duration_seconds: Time taken to attempt template matching
    """
    if not PROMETHEUS_AVAILABLE or not _metrics:
        return

    _metrics["template_matching_duration"].labels(
        matched="true" if matched else "false",
    ).observe(duration_seconds)


def set_active_sessions(count: int) -> None:
    """Update the active sessions gauge.

    Args:
        count: Current number of active sessions
    """
    if not PROMETHEUS_AVAILABLE or not _metrics:
        return

    _metrics["active_sessions_gauge"].set(count)


def increment_active_sessions() -> None:
    """Increment active sessions by 1."""
    if not PROMETHEUS_AVAILABLE or not _metrics:
        return
    _metrics["active_sessions_gauge"].inc()


def decrement_active_sessions() -> None:
    """Decrement active sessions by 1."""
    if not PROMETHEUS_AVAILABLE or not _metrics:
        return
    _metrics["active_sessions_gauge"].dec()


# ============================================================================
# Metrics Endpoint
# ============================================================================

@lru_cache(maxsize=1)
def _get_metrics_bytes() -> tuple[bytes, str]:
    """Generate Prometheus metrics response (cached).

    Returns:
        Tuple of (metrics_bytes, content_type)
    """
    if not PROMETHEUS_AVAILABLE or _prometheus_client is None:
        return b"# Prometheus metrics not available\n", "text/plain"

    return (
        _prometheus_client.generate_latest(_prometheus_client.REGISTRY),
        _prometheus_client.CONTENT_TYPE_LATEST,
    )


def get_metrics_response() -> tuple[bytes, str]:
    """Generate Prometheus metrics response.

    Clears cache to ensure fresh metrics on each call.

    Returns:
        Tuple of (metrics_bytes, content_type)
    """
    _get_metrics_bytes.cache_clear()
    return _get_metrics_bytes()


def create_metrics_endpoint():
    """Create a FastAPI router for the /metrics endpoint.

    Usage:
        from backend.rag.metrics import create_metrics_endpoint
        app.include_router(create_metrics_endpoint())

    Returns:
        FastAPI APIRouter with /metrics endpoint
    """
    from fastapi import APIRouter
    from fastapi.responses import Response

    router = APIRouter(tags=["monitoring"])

    @router.get("/metrics")
    async def metrics():
        """Prometheus metrics endpoint for scraping."""
        body, content_type = get_metrics_response()
        return Response(content=body, media_type=content_type)

    return router


# ============================================================================
# Metric Summary Helpers (for logging/debugging)
# ============================================================================

def get_template_hit_rate() -> dict[str, Any]:
    """Calculate template hit rate from current metrics.

    Returns:
        Dict with hit rate statistics
    """
    if not PROMETHEUS_AVAILABLE or not _metrics:
        return {"available": False}

    query_counter = _metrics["query_counter"]

    # Get current counter values
    total_template = 0
    total_llm = 0

    # Iterate through query_counter samples
    for metric in query_counter.collect():
        for sample in metric.samples:
            if sample.name == "rag_queries_total":
                labels = sample.labels
                if labels.get("method") == "template":
                    total_template += sample.value
                elif labels.get("method") == "llm":
                    total_llm += sample.value

    total = total_template + total_llm
    hit_rate = total_template / total if total > 0 else 0.0

    return {
        "available": True,
        "total_queries": int(total),
        "template_queries": int(total_template),
        "llm_queries": int(total_llm),
        "template_hit_rate": round(hit_rate, 4),
        "template_hit_rate_percent": round(hit_rate * 100, 2),
    }


def get_template_breakdown() -> dict[str, int]:
    """Get breakdown of template usage by template_id.

    Returns:
        Dict mapping template_id to hit count
    """
    if not PROMETHEUS_AVAILABLE or not _metrics:
        return {}

    template_counter = _metrics["template_hit_counter"]

    breakdown: dict[str, int] = {}
    for metric in template_counter.collect():
        for sample in metric.samples:
            if sample.name == "rag_template_hits_total":
                template_id = sample.labels.get("template_id", "unknown")
                breakdown[template_id] = int(sample.value)

    return breakdown