# GitHub Actions Workflow for DSPy Evaluations ## Overview This document describes the GitHub Actions workflow for automated DSPy evaluations on every PR and merge. ## Workflow Configuration ```yaml # .github/workflows/dspy-eval.yml name: DSPy RAG Evaluation on: push: branches: [main] paths: - 'backend/rag/**' - 'tests/dspy_gitops/**' - 'src/glam_extractor/api/**' pull_request: branches: [main] paths: - 'backend/rag/**' - 'tests/dspy_gitops/**' - 'src/glam_extractor/api/**' workflow_dispatch: inputs: evaluation_level: description: 'Evaluation depth' required: true default: 'standard' type: choice options: - smoke - standard - comprehensive schedule: # Nightly comprehensive evaluation at 2 AM UTC - cron: '0 2 * * *' env: PYTHON_VERSION: '3.11' OXIGRAPH_URL: 'http://91.98.224.44:7878' # Production Oxigraph jobs: # ========================================================================== # Layer 1: Fast Unit Tests (no LLM calls) # ========================================================================== unit-tests: name: Unit Tests runs-on: ubuntu-latest timeout-minutes: 5 steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} cache: 'pip' - name: Install dependencies run: | pip install -e ".[dev]" pip install pytest pytest-cov rapidfuzz - name: Run unit tests run: | pytest tests/dspy_gitops/test_layer1_unit.py \ -v --tb=short \ --cov=backend/rag \ --cov-report=xml - name: Upload coverage uses: codecov/codecov-action@v4 with: files: ./coverage.xml flags: dspy-unit # ========================================================================== # Layer 2: DSPy Module Tests (with LLM) # ========================================================================== dspy-module-tests: name: DSPy Module Tests runs-on: ubuntu-latest timeout-minutes: 15 needs: unit-tests # Only run on PRs or scheduled runs if: github.event_name == 'pull_request' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} cache: 'pip' - name: Install dependencies run: | pip install -e ".[dev]" pip install dspy-ai pytest httpx rapidfuzz - name: Run DSPy module tests env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OXIGRAPH_ENDPOINT: ${{ env.OXIGRAPH_URL }} run: | pytest tests/dspy_gitops/test_layer2_dspy.py \ -v --tb=short \ --junit-xml=dspy-results.xml - name: Upload test results uses: actions/upload-artifact@v4 if: always() with: name: dspy-test-results path: dspy-results.xml - name: Comment PR with results if: github.event_name == 'pull_request' uses: actions/github-script@v7 with: script: | const fs = require('fs'); // Parse JUnit XML and create comment // (simplified - use proper XML parser in production) const results = fs.readFileSync('dspy-results.xml', 'utf8'); const passed = (results.match(/tests="(\d+)"/)?.[1] || 0); const failed = (results.match(/failures="(\d+)"/)?.[1] || 0); const body = `## DSPy Evaluation Results | Metric | Value | |--------|-------| | Tests Passed | ${passed} | | Tests Failed | ${failed} | | Status | ${failed > 0 ? '❌ FAILED' : '✅ PASSED'} | `; github.rest.issues.createComment({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, body: body }); # ========================================================================== # Layer 3: Integration Tests (live API) # ========================================================================== integration-tests: name: Integration Tests runs-on: ubuntu-latest timeout-minutes: 10 needs: unit-tests steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} cache: 'pip' - name: Install dependencies run: | pip install -e ".[dev]" pip install pytest httpx - name: Check API health run: | # Verify Oxigraph is accessible curl -sf "${OXIGRAPH_URL}/query" \ -H "Accept: application/sparql-results+json" \ --data-urlencode "query=SELECT (COUNT(*) as ?c) WHERE { ?s ?p ?o } LIMIT 1" \ || (echo "Oxigraph not accessible" && exit 1) - name: Run integration tests env: OXIGRAPH_ENDPOINT: ${{ env.OXIGRAPH_URL }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} run: | pytest tests/dspy_gitops/test_layer3_integration.py \ -v --tb=short \ --junit-xml=integration-results.xml - name: Upload test results uses: actions/upload-artifact@v4 if: always() with: name: integration-test-results path: integration-results.xml # ========================================================================== # Layer 4: Comprehensive Evaluation (nightly only) # ========================================================================== comprehensive-eval: name: Comprehensive Evaluation runs-on: ubuntu-latest timeout-minutes: 60 needs: [unit-tests, dspy-module-tests, integration-tests] # Only run on schedule or manual trigger with 'comprehensive' if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.evaluation_level == 'comprehensive') steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} cache: 'pip' - name: Install dependencies run: | pip install -e ".[dev]" pip install dspy-ai pytest httpx rapidfuzz pandas - name: Run comprehensive evaluation env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OXIGRAPH_ENDPOINT: ${{ env.OXIGRAPH_URL }} run: | pytest tests/dspy_gitops/test_layer4_comprehensive.py \ -v --tb=short \ --junit-xml=comprehensive-results.xml \ --json-report \ --json-report-file=eval-report.json - name: Save evaluation metrics run: | python -c " import json with open('eval-report.json') as f: report = json.load(f) # Extract key metrics metrics = { 'timestamp': '$(date -u +%Y-%m-%dT%H:%M:%SZ)', 'commit': '${{ github.sha }}', 'total_tests': report.get('summary', {}).get('total', 0), 'passed': report.get('summary', {}).get('passed', 0), 'failed': report.get('summary', {}).get('failed', 0), } with open('metrics.json', 'w') as f: json.dump(metrics, f, indent=2) " - name: Upload evaluation artifacts uses: actions/upload-artifact@v4 with: name: comprehensive-eval-results path: | comprehensive-results.xml eval-report.json metrics.json - name: Post to Slack (optional) if: failure() uses: slackapi/slack-github-action@v1 with: payload: | { "text": "🚨 DSPy Comprehensive Evaluation Failed", "blocks": [ { "type": "section", "text": { "type": "mrkdwn", "text": "*DSPy Nightly Evaluation Failed*\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Run>" } } ] } env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} # ========================================================================== # Quality Gate Check # ========================================================================== quality-gate: name: Quality Gate runs-on: ubuntu-latest needs: [unit-tests, dspy-module-tests, integration-tests] if: always() steps: - name: Check all tests passed run: | if [[ "${{ needs.unit-tests.result }}" != "success" ]]; then echo "Unit tests failed" exit 1 fi # DSPy module tests are optional for push, required for PR if [[ "${{ github.event_name }}" == "pull_request" ]]; then if [[ "${{ needs.dspy-module-tests.result }}" != "success" ]]; then echo "DSPy module tests failed" exit 1 fi fi if [[ "${{ needs.integration-tests.result }}" != "success" ]]; then echo "Integration tests failed" exit 1 fi echo "All quality gates passed!" ``` ## Required Secrets Configure these in GitHub repository settings: | Secret | Description | |--------|-------------| | `ANTHROPIC_API_KEY` | API key for Claude LLM | | `SLACK_WEBHOOK_URL` | (Optional) Slack notifications | ## Test Markers for Selective Execution ```python # pytest.ini or pyproject.toml [tool.pytest.ini_options] markers = [ "layer1: fast unit tests without LLM", "layer2: DSPy module tests with LLM", "layer3: integration tests with live API", "layer4: comprehensive evaluation", "smoke: quick smoke tests", "slow: tests that take > 10 seconds", "requires_oxigraph: tests that need Oxigraph connection", "requires_llm: tests that need LLM API access", ] ``` ## Local Development Run specific layers locally: ```bash # Layer 1 only (fast, no LLM) pytest tests/dspy_gitops/test_layer1_unit.py -v # Layer 2 only (needs ANTHROPIC_API_KEY) pytest tests/dspy_gitops/test_layer2_dspy.py -v # Smoke tests only pytest tests/dspy_gitops/ -m smoke -v # All except slow tests pytest tests/dspy_gitops/ -m "not slow" -v ```