From 7e9df1d600046e6116f3783561dceaa64d04a431 Mon Sep 17 00:00:00 2001 From: kempersc Date: Sun, 11 Jan 2026 21:20:05 +0100 Subject: [PATCH] chore(ci): remove GitHub dspy-eval workflow (replaced by Forgejo workflow) --- .github/workflows/dspy-eval.yml | 355 -------------------------------- 1 file changed, 355 deletions(-) delete mode 100644 .github/workflows/dspy-eval.yml diff --git a/.github/workflows/dspy-eval.yml b/.github/workflows/dspy-eval.yml deleted file mode 100644 index 4181621ddf..0000000000 --- a/.github/workflows/dspy-eval.yml +++ /dev/null @@ -1,355 +0,0 @@ -# DSPy RAG Evaluation Workflow -# Automated testing and evaluation for Heritage RAG system -# -# Layers: -# - Layer 1: Fast unit tests (no LLM) -# - Layer 2: DSPy module tests with LLM -# - Layer 3: Integration tests (requires SSH tunnel to Oxigraph) -# - Layer 4: Comprehensive evaluation (nightly) - -name: DSPy RAG Evaluation - -on: - push: - branches: [main] - paths: - - 'backend/rag/**' - - 'tests/dspy_gitops/**' - - 'src/glam_extractor/api/**' - pull_request: - branches: [main] - paths: - - 'backend/rag/**' - - 'tests/dspy_gitops/**' - - 'src/glam_extractor/api/**' - workflow_dispatch: - inputs: - evaluation_level: - description: 'Evaluation depth' - required: true - default: 'standard' - type: choice - options: - - smoke - - standard - - comprehensive - schedule: - # Nightly comprehensive evaluation at 2 AM UTC - - cron: '0 2 * * *' - -env: - PYTHON_VERSION: '3.11' - SERVER_IP: '91.98.224.44' - SERVER_USER: 'root' - -jobs: - # ========================================================================== - # Layer 1: Fast Unit Tests (no LLM calls) - # ========================================================================== - unit-tests: - name: Layer 1 - Unit Tests - runs-on: ubuntu-latest - timeout-minutes: 5 - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: 'pip' - - - name: Install dependencies - run: | - pip install -e ".[dev]" - pip install rapidfuzz - - - name: Run Layer 1 unit tests - run: | - pytest tests/dspy_gitops/test_layer1_unit.py \ - -v --tb=short \ - -m "layer1 or not (layer2 or layer3 or layer4)" \ - --junit-xml=layer1-results.xml - - - name: Upload test results - uses: actions/upload-artifact@v4 - if: always() - with: - name: layer1-test-results - path: layer1-results.xml - - # ========================================================================== - # Layer 2: DSPy Module Tests (with LLM) - # ========================================================================== - dspy-module-tests: - name: Layer 2 - DSPy Module Tests - runs-on: ubuntu-latest - timeout-minutes: 20 - needs: unit-tests - - # Run on PRs, scheduled runs, or manual triggers - if: github.event_name == 'pull_request' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: 'pip' - - - name: Install dependencies - run: | - pip install -e ".[dev]" - pip install dspy-ai httpx rapidfuzz litellm - - - name: Run Layer 2 DSPy tests - env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - run: | - pytest tests/dspy_gitops/test_layer2_dspy.py \ - -v --tb=short \ - -m "layer2 or not (layer1 or layer3 or layer4)" \ - --junit-xml=layer2-results.xml - - - name: Upload test results - uses: actions/upload-artifact@v4 - if: always() - with: - name: layer2-test-results - path: layer2-results.xml - - - name: Comment PR with Layer 2 results - if: github.event_name == 'pull_request' - uses: actions/github-script@v7 - with: - script: | - const fs = require('fs'); - try { - const results = fs.readFileSync('layer2-results.xml', 'utf8'); - const testsMatch = results.match(/tests="(\d+)"/); - const failuresMatch = results.match(/failures="(\d+)"/); - const errorsMatch = results.match(/errors="(\d+)"/); - - const tests = testsMatch ? testsMatch[1] : '0'; - const failures = failuresMatch ? failuresMatch[1] : '0'; - const errors = errorsMatch ? errorsMatch[1] : '0'; - const passed = parseInt(tests) - parseInt(failures) - parseInt(errors); - - const body = '## DSPy Layer 2 Evaluation Results\n\n' + - '| Metric | Value |\n' + - '|--------|-------|\n' + - '| Tests Passed | ' + passed + '/' + tests + ' |\n' + - '| Failures | ' + failures + ' |\n' + - '| Errors | ' + errors + ' |\n' + - '| Status | ' + ((parseInt(failures) + parseInt(errors)) > 0 ? '❌ FAILED' : '✅ PASSED') + ' |\n'; - - github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: body - }); - } catch (e) { - console.log('Could not parse results:', e); - } - - # ========================================================================== - # Layer 3: Integration Tests (requires SSH tunnel to Oxigraph) - # ========================================================================== - integration-tests: - name: Layer 3 - Integration Tests - runs-on: ubuntu-latest - timeout-minutes: 15 - needs: unit-tests - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: 'pip' - - - name: Install dependencies - run: | - pip install -e ".[dev]" - pip install httpx pytest-asyncio - - - name: Setup SSH for tunnel - run: | - mkdir -p ~/.ssh - echo "${{ secrets.DEPLOY_SSH_PRIVATE_KEY }}" > ~/.ssh/deploy_key - chmod 600 ~/.ssh/deploy_key - ssh-keyscan -H ${{ env.SERVER_IP }} >> ~/.ssh/known_hosts 2>/dev/null || true - - - name: Create SSH tunnel to Oxigraph - run: | - # Create SSH tunnel: local port 7878 -> server localhost:7878 - ssh -f -N -L 7878:127.0.0.1:7878 \ - -i ~/.ssh/deploy_key \ - -o StrictHostKeyChecking=no \ - ${{ env.SERVER_USER }}@${{ env.SERVER_IP }} - - # Wait for tunnel to establish - sleep 3 - - # Verify tunnel is working - curl -sf "http://127.0.0.1:7878/query" \ - -H "Accept: application/sparql-results+json" \ - --data-urlencode "query=SELECT (1 AS ?test) WHERE {}" \ - || (echo "SSH tunnel failed" && exit 1) - - echo "SSH tunnel established successfully" - - - name: Run Layer 3 integration tests - env: - OXIGRAPH_ENDPOINT: "http://127.0.0.1:7878" - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - run: | - pytest tests/dspy_gitops/test_layer3_integration.py \ - -v --tb=short \ - -m "layer3 or not (layer1 or layer2 or layer4)" \ - --junit-xml=layer3-results.xml - - - name: Upload test results - uses: actions/upload-artifact@v4 - if: always() - with: - name: layer3-test-results - path: layer3-results.xml - - # ========================================================================== - # Layer 4: Comprehensive Evaluation (nightly only) - # ========================================================================== - comprehensive-eval: - name: Layer 4 - Comprehensive Evaluation - runs-on: ubuntu-latest - timeout-minutes: 60 - needs: [unit-tests, dspy-module-tests, integration-tests] - - # Only run on schedule or manual trigger with 'comprehensive' - if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.evaluation_level == 'comprehensive') - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: 'pip' - - - name: Install dependencies - run: | - pip install -e ".[dev]" - pip install dspy-ai httpx rapidfuzz pandas pytest-json-report litellm - - - name: Setup SSH for tunnel - run: | - mkdir -p ~/.ssh - echo "${{ secrets.DEPLOY_SSH_PRIVATE_KEY }}" > ~/.ssh/deploy_key - chmod 600 ~/.ssh/deploy_key - ssh-keyscan -H ${{ env.SERVER_IP }} >> ~/.ssh/known_hosts 2>/dev/null || true - - - name: Create SSH tunnel to Oxigraph - run: | - ssh -f -N -L 7878:127.0.0.1:7878 \ - -i ~/.ssh/deploy_key \ - -o StrictHostKeyChecking=no \ - ${{ env.SERVER_USER }}@${{ env.SERVER_IP }} - sleep 3 - - - name: Run comprehensive evaluation - env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - OXIGRAPH_ENDPOINT: "http://127.0.0.1:7878" - run: | - pytest tests/dspy_gitops/test_layer4_comprehensive.py \ - -v --tb=short \ - -m "layer4 or not (layer1 or layer2 or layer3)" \ - --junit-xml=layer4-results.xml \ - --json-report \ - --json-report-file=eval-report.json - - - name: Generate metrics summary - run: | - python -c " - import json - from datetime import datetime - - try: - with open('eval-report.json') as f: - report = json.load(f) - - metrics = { - 'timestamp': datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ'), - 'commit': '${{ github.sha }}', - 'total_tests': report.get('summary', {}).get('total', 0), - 'passed': report.get('summary', {}).get('passed', 0), - 'failed': report.get('summary', {}).get('failed', 0), - 'duration': report.get('duration', 0), - } - - with open('metrics.json', 'w') as f: - json.dump(metrics, f, indent=2) - - print('Metrics saved to metrics.json') - print(json.dumps(metrics, indent=2)) - except Exception as e: - print(f'Error generating metrics: {e}') - " - - - name: Upload evaluation artifacts - uses: actions/upload-artifact@v4 - with: - name: comprehensive-eval-results - path: | - layer4-results.xml - eval-report.json - metrics.json - - # ========================================================================== - # Quality Gate Check - # ========================================================================== - quality-gate: - name: Quality Gate - runs-on: ubuntu-latest - needs: [unit-tests, dspy-module-tests, integration-tests] - if: always() - - steps: - - name: Check all required tests passed - run: | - echo "Checking quality gates..." - - # Layer 1 (unit tests) is always required - if [[ "${{ needs.unit-tests.result }}" != "success" ]]; then - echo "❌ Layer 1 (Unit Tests) failed" - exit 1 - fi - echo "✅ Layer 1 (Unit Tests) passed" - - # Layer 2 (DSPy module tests) required for PRs - if [[ "${{ github.event_name }}" == "pull_request" ]]; then - if [[ "${{ needs.dspy-module-tests.result }}" != "success" ]]; then - echo "❌ Layer 2 (DSPy Module Tests) failed - required for PRs" - exit 1 - fi - echo "✅ Layer 2 (DSPy Module Tests) passed" - fi - - # Layer 3 (integration tests) is warning-only for now - if [[ "${{ needs.integration-tests.result }}" != "success" ]]; then - echo "⚠️ Layer 3 (Integration Tests) failed - non-blocking" - else - echo "✅ Layer 3 (Integration Tests) passed" - fi - - echo "" - echo "============================================" - echo " All required quality gates passed!" - echo "============================================"