""" Intent Accuracy Metrics Measures how accurately the system classifies query intent. """ from typing import Any def intent_accuracy(expected: str, predicted: str) -> float: """Calculate intent accuracy (exact match). Args: expected: Expected intent classification predicted: Predicted intent classification Returns: 1.0 if exact match, 0.0 otherwise """ return 1.0 if expected.lower().strip() == predicted.lower().strip() else 0.0 def intent_accuracy_metric(example: Any, pred: Any, trace: Any = None) -> float: """DSPy-compatible intent accuracy metric. Args: example: DSPy Example with expected_intent pred: Prediction with intent field trace: Optional trace for debugging Returns: 1.0 if intent matches, 0.0 otherwise """ expected = getattr(example, "expected_intent", None) predicted = getattr(pred, "intent", None) if expected is None or predicted is None: return 0.0 return intent_accuracy(expected, predicted) # Intent similarity mapping for partial credit INTENT_SIMILARITY = { ("statistical", "exploration"): 0.3, ("geographic", "exploration"): 0.3, ("entity_lookup", "exploration"): 0.5, ("temporal", "entity_lookup"): 0.2, ("relational", "entity_lookup"): 0.3, ("comparative", "statistical"): 0.4, } def intent_similarity_score(expected: str, predicted: str) -> float: """Calculate intent similarity with partial credit. Args: expected: Expected intent predicted: Predicted intent Returns: Score between 0.0 and 1.0 """ if expected.lower() == predicted.lower(): return 1.0 # Check similarity mapping (bidirectional) key = (expected.lower(), predicted.lower()) if key in INTENT_SIMILARITY: return INTENT_SIMILARITY[key] key_reverse = (predicted.lower(), expected.lower()) if key_reverse in INTENT_SIMILARITY: return INTENT_SIMILARITY[key_reverse] return 0.0