glam/tests/dspy_gitops/metrics/sparql_correctness.py

"""
SPARQL Correctness Metrics

Validates SPARQL syntax and query results.
"""

import re
from typing import Any, Optional
import logging

logger = logging.getLogger(__name__)


def validate_sparql_syntax(sparql: str) -> tuple[bool, Optional[str]]:
    """Validate SPARQL syntax using basic rules.

    Args:
        sparql: SPARQL query string

    Returns:
        Tuple of (is_valid, error_message)
    """
    if not sparql or not sparql.strip():
        return False, "Empty query"

    sparql_upper = sparql.upper()

    # Must have SELECT, ASK, CONSTRUCT, or DESCRIBE
    if not any(kw in sparql_upper for kw in ["SELECT", "ASK", "CONSTRUCT", "DESCRIBE"]):
        return False, "Missing query form (SELECT/ASK/CONSTRUCT/DESCRIBE)"

    # Must have WHERE clause (except simple ASK)
    if "SELECT" in sparql_upper and "WHERE" not in sparql_upper:
        return False, "Missing WHERE clause"

    # Check balanced braces
    if sparql.count("{") != sparql.count("}"):
        return False, "Unbalanced braces"

    # Check balanced parentheses
    if sparql.count("(") != sparql.count(")"):
        return False, "Unbalanced parentheses"

    # Check for common errors
    if "??" in sparql:
        return False, "Double question mark in variable"

    return True, None


def check_required_prefixes(sparql: str, required: list[str] = None) -> tuple[bool, list[str]]:
    """Check if SPARQL has required prefixes.

    Args:
        sparql: SPARQL query string
        required: List of required prefixes (e.g., ["hc:", "crm:"])

    Returns:
        Tuple of (has_all, missing_prefixes)
    """
    if required is None:
        required = ["hc:", "crm:"]  # Default heritage prefixes

    sparql_lower = sparql.lower()
    missing = []

    for prefix in required:
        # Check if prefix is used but not declared
        prefix_lower = prefix.lower()
        if prefix_lower in sparql_lower:
            # Should have PREFIX declaration
            if f"prefix {prefix_lower.rstrip(':')}" not in sparql_lower:
                missing.append(prefix)

    return len(missing) == 0, missing


def sparql_validation_score(sparql: str) -> float:
    """Calculate SPARQL validation score.

    Args:
        sparql: SPARQL query string

    Returns:
        Score 0.0-1.0 based on validity
    """
    is_valid, error = validate_sparql_syntax(sparql)
    if not is_valid:
        return 0.0

    score = 1.0

    # Deduct for missing prefix declarations
    has_prefixes, missing = check_required_prefixes(sparql)
    if not has_prefixes:
        score -= 0.1 * len(missing)

    return max(0.0, score)


async def execute_sparql_query(
    sparql: str,
    endpoint: str = "http://91.98.224.44:7878/query"
) -> tuple[bool, Any]:
    """Execute SPARQL query against Oxigraph.

    Args:
        sparql: SPARQL query string
        endpoint: SPARQL endpoint URL

    Returns:
        Tuple of (success, results_or_error)
    """
    try:
        import httpx
        async with httpx.AsyncClient(timeout=30.0) as client:
            response = await client.post(
                endpoint,
                data={"query": sparql},
                headers={"Accept": "application/sparql-results+json"}
            )

            if response.status_code == 200:
                return True, response.json()
            else:
                return False, f"HTTP {response.status_code}: {response.text[:200]}"
    except Exception as e:
        return False, str(e)


def sparql_result_score(
    results: dict,
    expected_min: int = 0,
    expected_max: int = None,
) -> float:
    """Score SPARQL results based on expectations.

    Args:
        results: SPARQL JSON results
        expected_min: Minimum expected results
        expected_max: Maximum expected results (None = no limit)

    Returns:
        Score 0.0-1.0
    """
    try:
        bindings = results.get("results", {}).get("bindings", [])
        count = len(bindings)

        # No results when some expected
        if count == 0 and expected_min > 0:
            return 0.0

        # Within expected range
        if count >= expected_min:
            if expected_max is None or count <= expected_max:
                return 1.0
            else:
                # Penalty for too many results
                return max(0.5, 1.0 - (count - expected_max) / expected_max * 0.5)

        # Below minimum
        return count / expected_min

    except Exception as e:
        logger.warning(f"Error scoring SPARQL results: {e}")
        return 0.0


def sparql_correctness_metric(example: Any, pred: Any, trace: Any = None) -> float:
    """DSPy-compatible SPARQL correctness metric.

    Only checks syntax - does not execute query.

    Args:
        example: DSPy Example
        pred: Prediction with sparql field
        trace: Optional trace

    Returns:
        Validation score 0.0-1.0
    """
    sparql = getattr(pred, "sparql", None)
    if not sparql:
        return 0.0

    return sparql_validation_score(sparql)


# Common SPARQL patterns for heritage queries
HERITAGE_SPARQL_PATTERNS = {
    "count_by_type": re.compile(
        r"SELECT.*COUNT.*WHERE.*institutionType",
        re.IGNORECASE | re.DOTALL
    ),
    "list_by_location": re.compile(
        r"SELECT.*WHERE.*addressLocality|addressCountry",
        re.IGNORECASE | re.DOTALL
    ),
    "entity_lookup": re.compile(
        r"SELECT.*WHERE.*prefLabel.*FILTER.*CONTAINS",
        re.IGNORECASE | re.DOTALL
    ),
}


def matches_expected_pattern(sparql: str, intent: str) -> bool:
    """Check if SPARQL matches expected pattern for intent.

    Args:
        sparql: SPARQL query string
        intent: Query intent

    Returns:
        True if pattern matches
    """
    pattern_map = {
        "statistical": HERITAGE_SPARQL_PATTERNS["count_by_type"],
        "geographic": HERITAGE_SPARQL_PATTERNS["list_by_location"],
        "entity_lookup": HERITAGE_SPARQL_PATTERNS["entity_lookup"],
    }

    pattern = pattern_map.get(intent)
    if pattern:
        return bool(pattern.search(sparql))

    return True  # No pattern defined, assume OK