glam/tests/dspy_gitops/conftest.py
2026-01-11 18:08:40 +01:00

198 lines
5.3 KiB
Python

"""
Pytest fixtures for DSPy GitOps testing.
"""
import json
import os
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock
import pytest
# Conditionally import dspy
try:
import dspy
DSPY_AVAILABLE = True
except ImportError:
DSPY_AVAILABLE = False
dspy = MagicMock()
DATASETS_DIR = Path(__file__).parent / "datasets"
OXIGRAPH_URL = os.environ.get("OXIGRAPH_ENDPOINT", "http://91.98.224.44:7878")
# =============================================================================
# Skip markers
# =============================================================================
requires_dspy = pytest.mark.skipif(
not DSPY_AVAILABLE,
reason="DSPy not installed"
)
requires_llm = pytest.mark.skipif(
not os.environ.get("ANTHROPIC_API_KEY"),
reason="ANTHROPIC_API_KEY not set"
)
# =============================================================================
# Dataset loading
# =============================================================================
def load_examples_from_json(filename: str) -> list[dict[str, Any]]:
"""Load examples from JSON file."""
filepath = DATASETS_DIR / filename
if not filepath.exists():
return []
with open(filepath) as f:
data = json.load(f)
return data.get("examples", [])
def dict_to_dspy_example(ex: dict[str, Any]) -> Any:
"""Convert dict to dspy.Example."""
if not DSPY_AVAILABLE:
return ex
return dspy.Example(
question=ex["question"],
language=ex["language"],
expected_intent=ex["expected_intent"],
expected_entities=ex.get("expected_entities", []),
expected_entity_type=ex.get("expected_entity_type", "institution"),
expected_sources=ex.get("expected_sources", []),
gold_answer=ex.get("gold_answer"),
).with_inputs("question", "language")
@pytest.fixture
def dev_set() -> list[Any]:
"""Load development set for evaluation."""
examples = load_examples_from_json("heritage_rag_dev.json")
if DSPY_AVAILABLE:
return [dict_to_dspy_example(ex) for ex in examples]
return examples
@pytest.fixture
def test_set() -> list[Any]:
"""Load test set for final evaluation."""
examples = load_examples_from_json("heritage_rag_test.json")
if DSPY_AVAILABLE:
return [dict_to_dspy_example(ex) for ex in examples]
return examples
@pytest.fixture
def golden_tests() -> list[dict]:
"""Load golden test cases."""
import yaml
filepath = DATASETS_DIR / "golden_queries.yaml"
if not filepath.exists():
return []
with open(filepath) as f:
data = yaml.safe_load(f)
return data.get("golden_tests", [])
# =============================================================================
# API fixtures
# =============================================================================
@pytest.fixture
def oxigraph_url() -> str:
"""Return Oxigraph endpoint URL."""
return OXIGRAPH_URL
@pytest.fixture
def api_client():
"""Create async HTTP client for API testing."""
import httpx
return httpx.AsyncClient(base_url="http://localhost:8000", timeout=30.0)
# =============================================================================
# DSPy fixtures
# =============================================================================
@pytest.fixture
def dspy_lm():
"""Configure DSPy with Claude."""
if not DSPY_AVAILABLE:
pytest.skip("DSPy not installed")
api_key = os.environ.get("ANTHROPIC_API_KEY")
if not api_key:
pytest.skip("ANTHROPIC_API_KEY not set")
lm = dspy.LM(model="anthropic/claude-sonnet-4-20250514", api_key=api_key)
dspy.configure(lm=lm)
return lm
@pytest.fixture
def heritage_pipeline(dspy_lm):
"""Create Heritage RAG pipeline."""
try:
from backend.rag.dspy_heritage_rag import create_heritage_rag_pipeline
return create_heritage_rag_pipeline(use_tools=False)
except ImportError:
pytest.skip("Heritage RAG pipeline not available")
@pytest.fixture
def query_router(dspy_lm):
"""Create query router."""
try:
from backend.rag.dspy_heritage_rag import HeritageQueryRouter
return HeritageQueryRouter()
except ImportError:
pytest.skip("Query router not available")
# =============================================================================
# Sample test data
# =============================================================================
SAMPLE_QUERIES = [
{
"question": "Hoeveel musea zijn er in Amsterdam?",
"language": "nl",
"expected_intent": "statistical",
"expected_entities": ["amsterdam", "musea"],
},
{
"question": "Waar is het Rijksmuseum gevestigd?",
"language": "nl",
"expected_intent": "entity_lookup",
"expected_entities": ["rijksmuseum"],
},
{
"question": "How many libraries are there in the Netherlands?",
"language": "en",
"expected_intent": "statistical",
"expected_entities": ["libraries", "netherlands"],
},
]
@pytest.fixture
def sample_queries() -> list[dict]:
"""Return sample test queries."""
return SAMPLE_QUERIES
@pytest.fixture
def sample_dspy_examples() -> list[Any]:
"""Return sample queries as DSPy examples."""
if DSPY_AVAILABLE:
return [dict_to_dspy_example(q) for q in SAMPLE_QUERIES]
return SAMPLE_QUERIES