# DSPy Optimizer Configuration ## Overview DSPy optimizers automatically tune prompts and few-shot examples to improve metric scores. This document covers: 1. MIPROv2 - Joint instruction + few-shot optimization 2. BootstrapFewShot - Rapid few-shot example generation 3. GEPA - Reflective prompt optimization (already partially implemented) ## MIPROv2 Configuration MIPROv2 (Multiprompt Instruction PRoposal Optimizer) uses Bayesian Optimization to find the best combination of instructions and few-shot examples. ### Basic Setup ```python # backend/rag/evaluation/optimizer.py import dspy from dspy.teleprompt import MIPROv2, BootstrapFewShot from .metrics import heritage_rag_metric from .dataset_loader import load_golden_dataset class HeritageRAGOptimizer: """Optimizer for Heritage RAG prompts and few-shot examples.""" def __init__( self, metric=heritage_rag_metric, auto: str = "light", # "light", "medium", or "heavy" max_bootstrapped_demos: int = 4, max_labeled_demos: int = 4, ): self.metric = metric self.auto = auto self.max_bootstrapped_demos = max_bootstrapped_demos self.max_labeled_demos = max_labeled_demos def optimize_with_miprov2( self, program: dspy.Module, trainset: list[dspy.Example], devset: list[dspy.Example], ) -> dspy.Module: """ Optimize program using MIPROv2. Args: program: DSPy program to optimize trainset: Training examples (for few-shot bootstrapping) devset: Dev examples (for validation during optimization) Returns: Optimized program with improved prompts/demos """ optimizer = MIPROv2( metric=self.metric, auto=self.auto, max_bootstrapped_demos=self.max_bootstrapped_demos, max_labeled_demos=self.max_labeled_demos, num_threads=4, verbose=True, log_dir="logs/miprov2", track_stats=True, ) # Run optimization optimized_program = optimizer.compile( program, trainset=trainset, eval_kwargs={"devset": devset}, ) return optimized_program def optimize_with_bootstrap( self, program: dspy.Module, trainset: list[dspy.Example], teacher_settings: dict = None, ) -> dspy.Module: """ Quick optimization using BootstrapFewShot. Faster than MIPROv2, good for initial few-shot collection. """ optimizer = BootstrapFewShot( metric=self.metric, max_bootstrapped_demos=self.max_bootstrapped_demos, max_labeled_demos=self.max_labeled_demos, max_rounds=1, max_errors=5, ) if teacher_settings: optimizer.teacher_settings = teacher_settings return optimizer.compile(program, trainset=trainset) ``` ### Optimization Presets ```yaml # config/optimizer_presets.yaml presets: light: description: "Quick optimization (~5 min)" auto: light max_bootstrapped_demos: 2 max_labeled_demos: 2 num_candidates: 5 use_cases: - "Quick iteration during development" - "Testing new prompt ideas" medium: description: "Balanced optimization (~30 min)" auto: medium max_bootstrapped_demos: 4 max_labeled_demos: 4 num_candidates: 10 use_cases: - "Weekly optimization runs" - "Before major releases" heavy: description: "Thorough optimization (~2 hours)" auto: heavy max_bootstrapped_demos: 8 max_labeled_demos: 8 num_candidates: 20 use_cases: - "Major version updates" - "Comprehensive prompt overhaul" category_specific: count_queries: description: "Optimize COUNT query handling" target_signatures: - HeritageQueryIntent - GenerateSPARQL focus_metric: count_accuracy person_queries: description: "Optimize PERSON query handling" target_signatures: - PersonQueryRouter - PersonSPARQL focus_metric: person_extraction_accuracy ``` ### Running Optimization ```python #!/usr/bin/env python """ scripts/optimize_rag.py Run DSPy optimization on Heritage RAG prompts. """ import argparse import json from pathlib import Path from datetime import datetime import dspy from backend.rag.dspy_heritage_rag import HeritageRAGPipeline from backend.rag.evaluation.optimizer import HeritageRAGOptimizer from backend.rag.evaluation.dataset_loader import load_golden_dataset, split_dataset def main(): parser = argparse.ArgumentParser(description="Optimize RAG prompts") parser.add_argument('--preset', default='light', choices=['light', 'medium', 'heavy']) parser.add_argument('--categories', nargs='+', help='Focus on specific categories') parser.add_argument('--output-dir', default='optimized_programs') parser.add_argument('--model', default='gpt-4o-mini', help='Optimization model') args = parser.parse_args() # Configure DSPy lm = dspy.LM(f"openai/{args.model}") dspy.configure(lm=lm) # Load dataset data = load_golden_dataset('data/rag_eval/golden_dataset.json') trainset, devset, _ = split_dataset(data, train=0.7, dev=0.15, test=0.15) if args.categories: trainset = [e for e in trainset if e.get('category') in args.categories] devset = [e for e in devset if e.get('category') in args.categories] print(f"Training set: {len(trainset)} examples") print(f"Dev set: {len(devset)} examples") # Create program program = HeritageRAGPipeline() # Baseline evaluation print("\n--- Baseline Evaluation ---") baseline_score = evaluate_program(program, devset) print(f"Baseline score: {baseline_score:.2%}") # Optimize print(f"\n--- Running {args.preset.upper()} optimization ---") optimizer = HeritageRAGOptimizer(auto=args.preset) optimized_program = optimizer.optimize_with_miprov2( program=program, trainset=trainset, devset=devset, ) # Optimized evaluation print("\n--- Optimized Evaluation ---") optimized_score = evaluate_program(optimized_program, devset) print(f"Optimized score: {optimized_score:.2%}") print(f"Improvement: {(optimized_score - baseline_score) / baseline_score:.1%}") # Save optimized program output_dir = Path(args.output_dir) output_dir.mkdir(exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_path = output_dir / f"heritage_rag_{args.preset}_{timestamp}.json" optimized_program.save(str(output_path)) print(f"\nSaved optimized program to: {output_path}") # Save optimization report report = { "timestamp": timestamp, "preset": args.preset, "baseline_score": baseline_score, "optimized_score": optimized_score, "improvement": (optimized_score - baseline_score) / baseline_score, "trainset_size": len(trainset), "devset_size": len(devset), "categories": args.categories, } report_path = output_dir / f"report_{args.preset}_{timestamp}.json" with open(report_path, 'w') as f: json.dump(report, f, indent=2) def evaluate_program(program, devset): """Evaluate program on devset.""" from dspy import Evaluate from backend.rag.evaluation.metrics import heritage_rag_metric evaluator = Evaluate( devset=devset, metric=heritage_rag_metric, num_threads=4, display_progress=True, ) return evaluator(program) if __name__ == '__main__': main() ``` ### CLI Usage ```bash # Quick optimization (light preset) python scripts/optimize_rag.py --preset light # Focus on COUNT queries python scripts/optimize_rag.py --preset medium --categories count # Heavy optimization for production python scripts/optimize_rag.py --preset heavy --model gpt-4o # Output to specific directory python scripts/optimize_rag.py --output-dir optimized_programs/v2 ``` ## Loading Optimized Programs ```python # In production code from backend.rag.dspy_heritage_rag import HeritageRAGPipeline def get_optimized_pipeline(): """Load the latest optimized pipeline.""" from pathlib import Path optimized_dir = Path("optimized_programs") if not optimized_dir.exists(): return HeritageRAGPipeline() # Fallback to default # Find latest optimization files = sorted(optimized_dir.glob("heritage_rag_*.json"), reverse=True) if not files: return HeritageRAGPipeline() latest = files[0] print(f"Loading optimized program: {latest}") program = HeritageRAGPipeline() program.load(str(latest)) return program ``` ## Optimization Schedule ### Automated Weekly Optimization ```yaml # .github/workflows/weekly-optimization.yml name: Weekly RAG Optimization on: schedule: - cron: '0 2 * * 0' # Sunday 2 AM workflow_dispatch: jobs: optimize: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - name: Install dependencies run: pip install -r requirements.txt - name: Run optimization run: python scripts/optimize_rag.py --preset medium env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - name: Upload optimized program uses: actions/upload-artifact@v4 with: name: optimized-program path: optimized_programs/ - name: Create PR with optimized prompts uses: peter-evans/create-pull-request@v5 with: title: "chore: Weekly RAG prompt optimization" body: | Automated weekly optimization of RAG prompts. See optimization report in artifacts. branch: auto/weekly-optimization ``` ## Monitoring Optimization Progress ### Metrics Tracking ```python # Track optimization metrics over time import wandb def log_optimization_metrics(report: dict): """Log optimization metrics to W&B.""" wandb.init( project="heritage-rag-optimization", config=report, ) wandb.log({ "baseline_score": report["baseline_score"], "optimized_score": report["optimized_score"], "improvement": report["improvement"], }) wandb.finish() ``` ### Version Control for Prompts ``` optimized_programs/ ├── heritage_rag_light_20250109_150000.json ├── heritage_rag_medium_20250112_020000.json ├── heritage_rag_heavy_20250115_020000.json ├── reports/ │ ├── report_light_20250109_150000.json │ ├── report_medium_20250112_020000.json │ └── report_heavy_20250115_020000.json └── current -> heritage_rag_medium_20250112_020000.json # symlink ```