From 7e9df1d600046e6116f3783561dceaa64d04a431 Mon Sep 17 00:00:00 2001
From: kempersc <sckemper@mailfence.com>
Date: Sun, 11 Jan 2026 21:20:05 +0100
Subject: [PATCH] chore(ci): remove GitHub dspy-eval workflow (replaced by
 Forgejo workflow)

---
 .github/workflows/dspy-eval.yml | 355 --------------------------------
 1 file changed, 355 deletions(-)
 delete mode 100644 .github/workflows/dspy-eval.yml

diff --git a/.github/workflows/dspy-eval.yml b/.github/workflows/dspy-eval.yml
deleted file mode 100644
index 4181621ddf..0000000000
--- a/.github/workflows/dspy-eval.yml
+++ /dev/null
@@ -1,355 +0,0 @@
-# DSPy RAG Evaluation Workflow
-# Automated testing and evaluation for Heritage RAG system
-#
-# Layers:
-# - Layer 1: Fast unit tests (no LLM)
-# - Layer 2: DSPy module tests with LLM
-# - Layer 3: Integration tests (requires SSH tunnel to Oxigraph)
-# - Layer 4: Comprehensive evaluation (nightly)
-
-name: DSPy RAG Evaluation
-
-on:
-  push:
-    branches: [main]
-    paths:
-      - 'backend/rag/**'
-      - 'tests/dspy_gitops/**'
-      - 'src/glam_extractor/api/**'
-  pull_request:
-    branches: [main]
-    paths:
-      - 'backend/rag/**'
-      - 'tests/dspy_gitops/**'
-      - 'src/glam_extractor/api/**'
-  workflow_dispatch:
-    inputs:
-      evaluation_level:
-        description: 'Evaluation depth'
-        required: true
-        default: 'standard'
-        type: choice
-        options:
-          - smoke
-          - standard
-          - comprehensive
-  schedule:
-    # Nightly comprehensive evaluation at 2 AM UTC
-    - cron: '0 2 * * *'
-
-env:
-  PYTHON_VERSION: '3.11'
-  SERVER_IP: '91.98.224.44'
-  SERVER_USER: 'root'
-
-jobs:
-  # ==========================================================================
-  # Layer 1: Fast Unit Tests (no LLM calls)
-  # ==========================================================================
-  unit-tests:
-    name: Layer 1 - Unit Tests
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    
-    steps:
-      - uses: actions/checkout@v4
-      
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-          cache: 'pip'
-      
-      - name: Install dependencies
-        run: |
-          pip install -e ".[dev]"
-          pip install rapidfuzz
-      
-      - name: Run Layer 1 unit tests
-        run: |
-          pytest tests/dspy_gitops/test_layer1_unit.py \
-            -v --tb=short \
-            -m "layer1 or not (layer2 or layer3 or layer4)" \
-            --junit-xml=layer1-results.xml
-      
-      - name: Upload test results
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: layer1-test-results
-          path: layer1-results.xml
-
-  # ==========================================================================
-  # Layer 2: DSPy Module Tests (with LLM)
-  # ==========================================================================
-  dspy-module-tests:
-    name: Layer 2 - DSPy Module Tests
-    runs-on: ubuntu-latest
-    timeout-minutes: 20
-    needs: unit-tests
-    
-    # Run on PRs, scheduled runs, or manual triggers
-    if: github.event_name == 'pull_request' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
-    
-    steps:
-      - uses: actions/checkout@v4
-      
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-          cache: 'pip'
-      
-      - name: Install dependencies
-        run: |
-          pip install -e ".[dev]"
-          pip install dspy-ai httpx rapidfuzz litellm
-      
-      - name: Run Layer 2 DSPy tests
-        env:
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-        run: |
-          pytest tests/dspy_gitops/test_layer2_dspy.py \
-            -v --tb=short \
-            -m "layer2 or not (layer1 or layer3 or layer4)" \
-            --junit-xml=layer2-results.xml
-      
-      - name: Upload test results
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: layer2-test-results
-          path: layer2-results.xml
-      
-      - name: Comment PR with Layer 2 results
-        if: github.event_name == 'pull_request'
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const fs = require('fs');
-            try {
-              const results = fs.readFileSync('layer2-results.xml', 'utf8');
-              const testsMatch = results.match(/tests="(\d+)"/);
-              const failuresMatch = results.match(/failures="(\d+)"/);
-              const errorsMatch = results.match(/errors="(\d+)"/);
-              
-              const tests = testsMatch ? testsMatch[1] : '0';
-              const failures = failuresMatch ? failuresMatch[1] : '0';
-              const errors = errorsMatch ? errorsMatch[1] : '0';
-              const passed = parseInt(tests) - parseInt(failures) - parseInt(errors);
-              
-              const body = '## DSPy Layer 2 Evaluation Results\n\n' +
-                '| Metric | Value |\n' +
-                '|--------|-------|\n' +
-                '| Tests Passed | ' + passed + '/' + tests + ' |\n' +
-                '| Failures | ' + failures + ' |\n' +
-                '| Errors | ' + errors + ' |\n' +
-                '| Status | ' + ((parseInt(failures) + parseInt(errors)) > 0 ? '❌ FAILED' : '✅ PASSED') + ' |\n';
-              
-              github.rest.issues.createComment({
-                issue_number: context.issue.number,
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                body: body
-              });
-            } catch (e) {
-              console.log('Could not parse results:', e);
-            }
-
-  # ==========================================================================
-  # Layer 3: Integration Tests (requires SSH tunnel to Oxigraph)
-  # ==========================================================================
-  integration-tests:
-    name: Layer 3 - Integration Tests
-    runs-on: ubuntu-latest
-    timeout-minutes: 15
-    needs: unit-tests
-    
-    steps:
-      - uses: actions/checkout@v4
-      
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-          cache: 'pip'
-      
-      - name: Install dependencies
-        run: |
-          pip install -e ".[dev]"
-          pip install httpx pytest-asyncio
-      
-      - name: Setup SSH for tunnel
-        run: |
-          mkdir -p ~/.ssh
-          echo "${{ secrets.DEPLOY_SSH_PRIVATE_KEY }}" > ~/.ssh/deploy_key
-          chmod 600 ~/.ssh/deploy_key
-          ssh-keyscan -H ${{ env.SERVER_IP }} >> ~/.ssh/known_hosts 2>/dev/null || true
-      
-      - name: Create SSH tunnel to Oxigraph
-        run: |
-          # Create SSH tunnel: local port 7878 -> server localhost:7878
-          ssh -f -N -L 7878:127.0.0.1:7878 \
-            -i ~/.ssh/deploy_key \
-            -o StrictHostKeyChecking=no \
-            ${{ env.SERVER_USER }}@${{ env.SERVER_IP }}
-          
-          # Wait for tunnel to establish
-          sleep 3
-          
-          # Verify tunnel is working
-          curl -sf "http://127.0.0.1:7878/query" \
-            -H "Accept: application/sparql-results+json" \
-            --data-urlencode "query=SELECT (1 AS ?test) WHERE {}" \
-            || (echo "SSH tunnel failed" && exit 1)
-          
-          echo "SSH tunnel established successfully"
-      
-      - name: Run Layer 3 integration tests
-        env:
-          OXIGRAPH_ENDPOINT: "http://127.0.0.1:7878"
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-        run: |
-          pytest tests/dspy_gitops/test_layer3_integration.py \
-            -v --tb=short \
-            -m "layer3 or not (layer1 or layer2 or layer4)" \
-            --junit-xml=layer3-results.xml
-      
-      - name: Upload test results
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: layer3-test-results
-          path: layer3-results.xml
-
-  # ==========================================================================
-  # Layer 4: Comprehensive Evaluation (nightly only)
-  # ==========================================================================
-  comprehensive-eval:
-    name: Layer 4 - Comprehensive Evaluation
-    runs-on: ubuntu-latest
-    timeout-minutes: 60
-    needs: [unit-tests, dspy-module-tests, integration-tests]
-    
-    # Only run on schedule or manual trigger with 'comprehensive'
-    if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.evaluation_level == 'comprehensive')
-    
-    steps:
-      - uses: actions/checkout@v4
-      
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-          cache: 'pip'
-      
-      - name: Install dependencies
-        run: |
-          pip install -e ".[dev]"
-          pip install dspy-ai httpx rapidfuzz pandas pytest-json-report litellm
-      
-      - name: Setup SSH for tunnel
-        run: |
-          mkdir -p ~/.ssh
-          echo "${{ secrets.DEPLOY_SSH_PRIVATE_KEY }}" > ~/.ssh/deploy_key
-          chmod 600 ~/.ssh/deploy_key
-          ssh-keyscan -H ${{ env.SERVER_IP }} >> ~/.ssh/known_hosts 2>/dev/null || true
-      
-      - name: Create SSH tunnel to Oxigraph
-        run: |
-          ssh -f -N -L 7878:127.0.0.1:7878 \
-            -i ~/.ssh/deploy_key \
-            -o StrictHostKeyChecking=no \
-            ${{ env.SERVER_USER }}@${{ env.SERVER_IP }}
-          sleep 3
-      
-      - name: Run comprehensive evaluation
-        env:
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          OXIGRAPH_ENDPOINT: "http://127.0.0.1:7878"
-        run: |
-          pytest tests/dspy_gitops/test_layer4_comprehensive.py \
-            -v --tb=short \
-            -m "layer4 or not (layer1 or layer2 or layer3)" \
-            --junit-xml=layer4-results.xml \
-            --json-report \
-            --json-report-file=eval-report.json
-      
-      - name: Generate metrics summary
-        run: |
-          python -c "
-          import json
-          from datetime import datetime
-          
-          try:
-              with open('eval-report.json') as f:
-                  report = json.load(f)
-              
-              metrics = {
-                  'timestamp': datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ'),
-                  'commit': '${{ github.sha }}',
-                  'total_tests': report.get('summary', {}).get('total', 0),
-                  'passed': report.get('summary', {}).get('passed', 0),
-                  'failed': report.get('summary', {}).get('failed', 0),
-                  'duration': report.get('duration', 0),
-              }
-              
-              with open('metrics.json', 'w') as f:
-                  json.dump(metrics, f, indent=2)
-              
-              print('Metrics saved to metrics.json')
-              print(json.dumps(metrics, indent=2))
-          except Exception as e:
-              print(f'Error generating metrics: {e}')
-          "
-      
-      - name: Upload evaluation artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: comprehensive-eval-results
-          path: |
-            layer4-results.xml
-            eval-report.json
-            metrics.json
-
-  # ==========================================================================
-  # Quality Gate Check
-  # ==========================================================================
-  quality-gate:
-    name: Quality Gate
-    runs-on: ubuntu-latest
-    needs: [unit-tests, dspy-module-tests, integration-tests]
-    if: always()
-    
-    steps:
-      - name: Check all required tests passed
-        run: |
-          echo "Checking quality gates..."
-          
-          # Layer 1 (unit tests) is always required
-          if [[ "${{ needs.unit-tests.result }}" != "success" ]]; then
-            echo "❌ Layer 1 (Unit Tests) failed"
-            exit 1
-          fi
-          echo "✅ Layer 1 (Unit Tests) passed"
-          
-          # Layer 2 (DSPy module tests) required for PRs
-          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
-            if [[ "${{ needs.dspy-module-tests.result }}" != "success" ]]; then
-              echo "❌ Layer 2 (DSPy Module Tests) failed - required for PRs"
-              exit 1
-            fi
-            echo "✅ Layer 2 (DSPy Module Tests) passed"
-          fi
-          
-          # Layer 3 (integration tests) is warning-only for now
-          if [[ "${{ needs.integration-tests.result }}" != "success" ]]; then
-            echo "⚠️ Layer 3 (Integration Tests) failed - non-blocking"
-          else
-            echo "✅ Layer 3 (Integration Tests) passed"
-          fi
-          
-          echo ""
-          echo "============================================"
-          echo "  All required quality gates passed!"
-          echo "============================================"