glam/tests/dspy_gitops/metrics/intent_accuracy.py

"""
Intent Accuracy Metrics

Measures how accurately the system classifies query intent.
"""

from typing import Any


def intent_accuracy(expected: str, predicted: str) -> float:
    """Calculate intent accuracy (exact match).

    Args:
        expected: Expected intent classification
        predicted: Predicted intent classification

    Returns:
        1.0 if exact match, 0.0 otherwise
    """
    return 1.0 if expected.lower().strip() == predicted.lower().strip() else 0.0


def intent_accuracy_metric(example: Any, pred: Any, trace: Any = None) -> float:
    """DSPy-compatible intent accuracy metric.

    Args:
        example: DSPy Example with expected_intent
        pred: Prediction with intent field
        trace: Optional trace for debugging

    Returns:
        1.0 if intent matches, 0.0 otherwise
    """
    expected = getattr(example, "expected_intent", None)
    predicted = getattr(pred, "intent", None)

    if expected is None or predicted is None:
        return 0.0

    return intent_accuracy(expected, predicted)


# Intent similarity mapping for partial credit
INTENT_SIMILARITY = {
    ("statistical", "exploration"): 0.3,
    ("geographic", "exploration"): 0.3,
    ("entity_lookup", "exploration"): 0.5,
    ("temporal", "entity_lookup"): 0.2,
    ("relational", "entity_lookup"): 0.3,
    ("comparative", "statistical"): 0.4,
}


def intent_similarity_score(expected: str, predicted: str) -> float:
    """Calculate intent similarity with partial credit.

    Args:
        expected: Expected intent
        predicted: Predicted intent

    Returns:
        Score between 0.0 and 1.0
    """
    if expected.lower() == predicted.lower():
        return 1.0

    # Check similarity mapping (bidirectional)
    key = (expected.lower(), predicted.lower())
    if key in INTENT_SIMILARITY:
        return INTENT_SIMILARITY[key]

    key_reverse = (predicted.lower(), expected.lower())
    if key_reverse in INTENT_SIMILARITY:
        return INTENT_SIMILARITY[key_reverse]

    return 0.0