25 lines
610 B
Python
25 lines
610 B
Python
"""
|
|
DSPy RAG Evaluation Module
|
|
|
|
This module provides tools for evaluating the Heritage RAG system:
|
|
- Golden dataset loading
|
|
- Metric functions (count accuracy, slot extraction)
|
|
- Evaluation harness for running tests against the API
|
|
"""
|
|
|
|
from .metrics import (
|
|
count_accuracy,
|
|
slot_extraction_accuracy,
|
|
heritage_rag_metric,
|
|
)
|
|
from .dataset_loader import load_golden_dataset, GoldenExample
|
|
from .run_evaluation import run_evaluation
|
|
|
|
__all__ = [
|
|
"count_accuracy",
|
|
"slot_extraction_accuracy",
|
|
"heritage_rag_metric",
|
|
"load_golden_dataset",
|
|
"GoldenExample",
|
|
"run_evaluation",
|
|
]
|