glam/tests/dspy_gitops/conftest.py

"""
Pytest fixtures for DSPy GitOps testing.
"""

import json
import os
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock

import pytest

# Conditionally import dspy
try:
    import dspy
    DSPY_AVAILABLE = True
except ImportError:
    DSPY_AVAILABLE = False
    dspy = MagicMock()


DATASETS_DIR = Path(__file__).parent / "datasets"
OXIGRAPH_URL = os.environ.get("OXIGRAPH_ENDPOINT", "http://91.98.224.44:7878")


# =============================================================================
# Skip markers
# =============================================================================

requires_dspy = pytest.mark.skipif(
    not DSPY_AVAILABLE,
    reason="DSPy not installed"
)

requires_llm = pytest.mark.skipif(
    not (os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("CLAUDE_API_KEY")),
    reason="ANTHROPIC_API_KEY or CLAUDE_API_KEY not set"
)


# =============================================================================
# Dataset loading
# =============================================================================

def load_examples_from_json(filename: str) -> list[dict[str, Any]]:
    """Load examples from JSON file."""
    filepath = DATASETS_DIR / filename
    if not filepath.exists():
        return []

    with open(filepath) as f:
        data = json.load(f)

    return data.get("examples", [])


def dict_to_dspy_example(ex: dict[str, Any]) -> Any:
    """Convert dict to dspy.Example."""
    if not DSPY_AVAILABLE:
        return ex

    return dspy.Example(
        question=ex["question"],
        language=ex["language"],
        expected_intent=ex["expected_intent"],
        expected_entities=ex.get("expected_entities", []),
        expected_entity_type=ex.get("expected_entity_type", "institution"),
        expected_sources=ex.get("expected_sources", []),
        gold_answer=ex.get("gold_answer"),
    ).with_inputs("question", "language")


@pytest.fixture
def dev_set() -> list[Any]:
    """Load development set for evaluation."""
    examples = load_examples_from_json("heritage_rag_dev.json")
    if DSPY_AVAILABLE:
        return [dict_to_dspy_example(ex) for ex in examples]
    return examples


@pytest.fixture
def test_set() -> list[Any]:
    """Load test set for final evaluation."""
    examples = load_examples_from_json("heritage_rag_test.json")
    if DSPY_AVAILABLE:
        return [dict_to_dspy_example(ex) for ex in examples]
    return examples


@pytest.fixture
def golden_tests() -> list[dict]:
    """Load golden test cases."""
    import yaml
    filepath = DATASETS_DIR / "golden_queries.yaml"
    if not filepath.exists():
        return []

    with open(filepath) as f:
        data = yaml.safe_load(f)

    return data.get("golden_tests", [])


# =============================================================================
# API fixtures
# =============================================================================

@pytest.fixture
def oxigraph_url() -> str:
    """Return Oxigraph endpoint URL."""
    return OXIGRAPH_URL


@pytest.fixture
def api_client():
    """Create async HTTP client for API testing."""
    import httpx
    return httpx.AsyncClient(base_url="http://localhost:8000", timeout=30.0)


# =============================================================================
# DSPy fixtures
# =============================================================================

@pytest.fixture
def dspy_lm():
    """Configure DSPy with Claude."""
    if not DSPY_AVAILABLE:
        pytest.skip("DSPy not installed")

    # Check for API key in both variable names
    api_key = os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("CLAUDE_API_KEY")
    if not api_key:
        pytest.skip("ANTHROPIC_API_KEY or CLAUDE_API_KEY not set")

    lm = dspy.LM(model="anthropic/claude-sonnet-4-20250514", api_key=api_key)
    dspy.configure(lm=lm)
    return lm


@pytest.fixture
def heritage_pipeline(dspy_lm):
    """Create Heritage RAG pipeline."""
    try:
        from backend.rag.dspy_heritage_rag import create_heritage_rag_pipeline
        return create_heritage_rag_pipeline(use_tools=False)
    except ImportError:
        pytest.skip("Heritage RAG pipeline not available")


@pytest.fixture
def query_router(dspy_lm):
    """Create query router."""
    try:
        from backend.rag.dspy_heritage_rag import HeritageQueryRouter
        return HeritageQueryRouter()
    except ImportError:
        pytest.skip("Query router not available")


# =============================================================================
# Sample test data
# =============================================================================

SAMPLE_QUERIES = [
    {
        "question": "Hoeveel musea zijn er in Amsterdam?",
        "language": "nl",
        "expected_intent": "statistical",
        "expected_entities": ["amsterdam", "musea"],
    },
    {
        "question": "Waar is het Rijksmuseum gevestigd?",
        "language": "nl",
        "expected_intent": "entity_lookup",
        "expected_entities": ["rijksmuseum"],
    },
    {
        "question": "How many libraries are there in the Netherlands?",
        "language": "en",
        "expected_intent": "statistical",
        "expected_entities": ["libraries", "netherlands"],
    },
]


@pytest.fixture
def sample_queries() -> list[dict]:
    """Return sample test queries."""
    return SAMPLE_QUERIES


@pytest.fixture
def sample_dspy_examples() -> list[Any]:
    """Return sample queries as DSPy examples."""
    if DSPY_AVAILABLE:
        return [dict_to_dspy_example(q) for q in SAMPLE_QUERIES]
    return SAMPLE_QUERIES
# Trigger test