""" DSPy RAG Evaluation Module This module provides tools for evaluating the Heritage RAG system: - Golden dataset loading - Metric functions (count accuracy, slot extraction) - Evaluation harness for running tests against the API """ from .metrics import ( count_accuracy, slot_extraction_accuracy, heritage_rag_metric, ) from .dataset_loader import load_golden_dataset, GoldenExample from .run_evaluation import run_evaluation __all__ = [ "count_accuracy", "slot_extraction_accuracy", "heritage_rag_metric", "load_golden_dataset", "GoldenExample", "run_evaluation", ]