diff --git a/.forgejo/workflows/dspy-eval.yml b/.forgejo/workflows/dspy-eval.yml index 89d05c0d42..761dad3750 100644 --- a/.forgejo/workflows/dspy-eval.yml +++ b/.forgejo/workflows/dspy-eval.yml @@ -1,11 +1,11 @@ # DSPy RAG Evaluation Workflow # Automated testing and evaluation for Heritage RAG system # -# Layers: +# All layers run on every push/PR: # - Layer 1: Fast unit tests (no LLM) # - Layer 2: DSPy module tests with LLM # - Layer 3: Integration tests (requires SSH tunnel to Oxigraph) -# - Layer 4: Comprehensive evaluation (nightly) +# - Layer 4: Comprehensive evaluation name: DSPy RAG Evaluation @@ -93,9 +93,6 @@ jobs: timeout-minutes: 20 needs: unit-tests - # Run on PRs, scheduled runs, or manual triggers - if: github.event_name == 'pull_request' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' - steps: - uses: https://github.com/actions/checkout@v4 @@ -197,7 +194,7 @@ jobs: path: layer3-results.xml # ========================================================================== - # Layer 4: Comprehensive Evaluation (nightly only) + # Layer 4: Comprehensive Evaluation # ========================================================================== comprehensive-eval: name: Layer 4 - Comprehensive Evaluation @@ -205,9 +202,6 @@ jobs: timeout-minutes: 60 needs: [unit-tests, dspy-module-tests, integration-tests] - # Only run on schedule or manual trigger with 'comprehensive' - if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.evaluation_level == 'comprehensive') - steps: - uses: https://github.com/actions/checkout@v4 @@ -295,7 +289,7 @@ jobs: quality-gate: name: Quality Gate runs-on: ubuntu-latest - needs: [unit-tests, dspy-module-tests, integration-tests] + needs: [unit-tests, dspy-module-tests, integration-tests, comprehensive-eval] if: always() steps: @@ -303,30 +297,35 @@ jobs: run: | echo "Checking quality gates..." - # Layer 1 (unit tests) is always required + # Layer 1 (unit tests) is required if [[ "${{ needs.unit-tests.result }}" != "success" ]]; then - echo "Layer 1 (Unit Tests) failed" + echo "❌ Layer 1 (Unit Tests) failed" exit 1 fi - echo "Layer 1 (Unit Tests) passed" + echo "✅ Layer 1 (Unit Tests) passed" - # Layer 2 (DSPy module tests) required for PRs - if [[ "${{ github.event_name }}" == "pull_request" ]]; then - if [[ "${{ needs.dspy-module-tests.result }}" != "success" ]]; then - echo "Layer 2 (DSPy Module Tests) failed - required for PRs" - exit 1 - fi - echo "Layer 2 (DSPy Module Tests) passed" + # Layer 2 (DSPy module tests) is required + if [[ "${{ needs.dspy-module-tests.result }}" != "success" ]]; then + echo "❌ Layer 2 (DSPy Module Tests) failed" + exit 1 fi + echo "✅ Layer 2 (DSPy Module Tests) passed" - # Layer 3 (integration tests) is warning-only for now + # Layer 3 (integration tests) is required if [[ "${{ needs.integration-tests.result }}" != "success" ]]; then - echo "Warning: Layer 3 (Integration Tests) failed - non-blocking" - else - echo "Layer 3 (Integration Tests) passed" + echo "❌ Layer 3 (Integration Tests) failed" + exit 1 fi + echo "✅ Layer 3 (Integration Tests) passed" + + # Layer 4 (comprehensive evaluation) is required + if [[ "${{ needs.comprehensive-eval.result }}" != "success" ]]; then + echo "❌ Layer 4 (Comprehensive Evaluation) failed" + exit 1 + fi + echo "✅ Layer 4 (Comprehensive Evaluation) passed" echo "" echo "============================================" - echo " All required quality gates passed!" + echo " All quality gates passed!" echo "============================================"