glam/tests/dspy_gitops/conftest.py
kempersc 8856be1085
Some checks failed
DSPy RAG Evaluation / Layer 1 - Unit Tests (push) Failing after 5m9s
DSPy RAG Evaluation / Layer 3 - Integration Tests (push) Has been skipped
DSPy RAG Evaluation / Layer 2 - DSPy Module Tests (push) Has been skipped
DSPy RAG Evaluation / Layer 4 - Comprehensive Evaluation (push) Has been skipped
DSPy RAG Evaluation / Quality Gate (push) Failing after 2s
chore: trigger DSPy eval workflow
2026-01-11 22:18:17 +01:00

200 lines
5.5 KiB
Python

"""
Pytest fixtures for DSPy GitOps testing.
"""
import json
import os
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock
import pytest
# Conditionally import dspy
try:
import dspy
DSPY_AVAILABLE = True
except ImportError:
DSPY_AVAILABLE = False
dspy = MagicMock()
DATASETS_DIR = Path(__file__).parent / "datasets"
OXIGRAPH_URL = os.environ.get("OXIGRAPH_ENDPOINT", "http://91.98.224.44:7878")
# =============================================================================
# Skip markers
# =============================================================================
requires_dspy = pytest.mark.skipif(
not DSPY_AVAILABLE,
reason="DSPy not installed"
)
requires_llm = pytest.mark.skipif(
not (os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("CLAUDE_API_KEY")),
reason="ANTHROPIC_API_KEY or CLAUDE_API_KEY not set"
)
# =============================================================================
# Dataset loading
# =============================================================================
def load_examples_from_json(filename: str) -> list[dict[str, Any]]:
"""Load examples from JSON file."""
filepath = DATASETS_DIR / filename
if not filepath.exists():
return []
with open(filepath) as f:
data = json.load(f)
return data.get("examples", [])
def dict_to_dspy_example(ex: dict[str, Any]) -> Any:
"""Convert dict to dspy.Example."""
if not DSPY_AVAILABLE:
return ex
return dspy.Example(
question=ex["question"],
language=ex["language"],
expected_intent=ex["expected_intent"],
expected_entities=ex.get("expected_entities", []),
expected_entity_type=ex.get("expected_entity_type", "institution"),
expected_sources=ex.get("expected_sources", []),
gold_answer=ex.get("gold_answer"),
).with_inputs("question", "language")
@pytest.fixture
def dev_set() -> list[Any]:
"""Load development set for evaluation."""
examples = load_examples_from_json("heritage_rag_dev.json")
if DSPY_AVAILABLE:
return [dict_to_dspy_example(ex) for ex in examples]
return examples
@pytest.fixture
def test_set() -> list[Any]:
"""Load test set for final evaluation."""
examples = load_examples_from_json("heritage_rag_test.json")
if DSPY_AVAILABLE:
return [dict_to_dspy_example(ex) for ex in examples]
return examples
@pytest.fixture
def golden_tests() -> list[dict]:
"""Load golden test cases."""
import yaml
filepath = DATASETS_DIR / "golden_queries.yaml"
if not filepath.exists():
return []
with open(filepath) as f:
data = yaml.safe_load(f)
return data.get("golden_tests", [])
# =============================================================================
# API fixtures
# =============================================================================
@pytest.fixture
def oxigraph_url() -> str:
"""Return Oxigraph endpoint URL."""
return OXIGRAPH_URL
@pytest.fixture
def api_client():
"""Create async HTTP client for API testing."""
import httpx
return httpx.AsyncClient(base_url="http://localhost:8000", timeout=30.0)
# =============================================================================
# DSPy fixtures
# =============================================================================
@pytest.fixture
def dspy_lm():
"""Configure DSPy with Claude."""
if not DSPY_AVAILABLE:
pytest.skip("DSPy not installed")
# Check for API key in both variable names
api_key = os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("CLAUDE_API_KEY")
if not api_key:
pytest.skip("ANTHROPIC_API_KEY or CLAUDE_API_KEY not set")
lm = dspy.LM(model="anthropic/claude-sonnet-4-20250514", api_key=api_key)
dspy.configure(lm=lm)
return lm
@pytest.fixture
def heritage_pipeline(dspy_lm):
"""Create Heritage RAG pipeline."""
try:
from backend.rag.dspy_heritage_rag import create_heritage_rag_pipeline
return create_heritage_rag_pipeline(use_tools=False)
except ImportError:
pytest.skip("Heritage RAG pipeline not available")
@pytest.fixture
def query_router(dspy_lm):
"""Create query router."""
try:
from backend.rag.dspy_heritage_rag import HeritageQueryRouter
return HeritageQueryRouter()
except ImportError:
pytest.skip("Query router not available")
# =============================================================================
# Sample test data
# =============================================================================
SAMPLE_QUERIES = [
{
"question": "Hoeveel musea zijn er in Amsterdam?",
"language": "nl",
"expected_intent": "statistical",
"expected_entities": ["amsterdam", "musea"],
},
{
"question": "Waar is het Rijksmuseum gevestigd?",
"language": "nl",
"expected_intent": "entity_lookup",
"expected_entities": ["rijksmuseum"],
},
{
"question": "How many libraries are there in the Netherlands?",
"language": "en",
"expected_intent": "statistical",
"expected_entities": ["libraries", "netherlands"],
},
]
@pytest.fixture
def sample_queries() -> list[dict]:
"""Return sample test queries."""
return SAMPLE_QUERIES
@pytest.fixture
def sample_dspy_examples() -> list[Any]:
"""Return sample queries as DSPy examples."""
if DSPY_AVAILABLE:
return [dict_to_dspy_example(q) for q in SAMPLE_QUERIES]
return SAMPLE_QUERIES
# Trigger test