76 lines
2 KiB
Python
76 lines
2 KiB
Python
"""
|
|
Intent Accuracy Metrics
|
|
|
|
Measures how accurately the system classifies query intent.
|
|
"""
|
|
|
|
from typing import Any
|
|
|
|
|
|
def intent_accuracy(expected: str, predicted: str) -> float:
|
|
"""Calculate intent accuracy (exact match).
|
|
|
|
Args:
|
|
expected: Expected intent classification
|
|
predicted: Predicted intent classification
|
|
|
|
Returns:
|
|
1.0 if exact match, 0.0 otherwise
|
|
"""
|
|
return 1.0 if expected.lower().strip() == predicted.lower().strip() else 0.0
|
|
|
|
|
|
def intent_accuracy_metric(example: Any, pred: Any, trace: Any = None) -> float:
|
|
"""DSPy-compatible intent accuracy metric.
|
|
|
|
Args:
|
|
example: DSPy Example with expected_intent
|
|
pred: Prediction with intent field
|
|
trace: Optional trace for debugging
|
|
|
|
Returns:
|
|
1.0 if intent matches, 0.0 otherwise
|
|
"""
|
|
expected = getattr(example, "expected_intent", None)
|
|
predicted = getattr(pred, "intent", None)
|
|
|
|
if expected is None or predicted is None:
|
|
return 0.0
|
|
|
|
return intent_accuracy(expected, predicted)
|
|
|
|
|
|
# Intent similarity mapping for partial credit
|
|
INTENT_SIMILARITY = {
|
|
("statistical", "exploration"): 0.3,
|
|
("geographic", "exploration"): 0.3,
|
|
("entity_lookup", "exploration"): 0.5,
|
|
("temporal", "entity_lookup"): 0.2,
|
|
("relational", "entity_lookup"): 0.3,
|
|
("comparative", "statistical"): 0.4,
|
|
}
|
|
|
|
|
|
def intent_similarity_score(expected: str, predicted: str) -> float:
|
|
"""Calculate intent similarity with partial credit.
|
|
|
|
Args:
|
|
expected: Expected intent
|
|
predicted: Predicted intent
|
|
|
|
Returns:
|
|
Score between 0.0 and 1.0
|
|
"""
|
|
if expected.lower() == predicted.lower():
|
|
return 1.0
|
|
|
|
# Check similarity mapping (bidirectional)
|
|
key = (expected.lower(), predicted.lower())
|
|
if key in INTENT_SIMILARITY:
|
|
return INTENT_SIMILARITY[key]
|
|
|
|
key_reverse = (predicted.lower(), expected.lower())
|
|
if key_reverse in INTENT_SIMILARITY:
|
|
return INTENT_SIMILARITY[key_reverse]
|
|
|
|
return 0.0
|