glam/tests/rag/test_specificity_dspy_integration.py
kempersc 11983014bb Enhance specificity scoring system integration with existing infrastructure
- Updated documentation to clarify integration points with existing components in the RAG pipeline and DSPy framework.
- Added detailed mapping of SPARQL templates to context templates for improved specificity filtering.
- Implemented wrapper patterns around existing classifiers to extend functionality without duplication.
- Introduced new tests for the SpecificityAwareClassifier and SPARQLToContextMapper to ensure proper integration and functionality.
- Enhanced the CustodianRDFConverter to include ISO country and subregion codes from GHCID for better geospatial data handling.
2026-01-05 17:37:49 +01:00

443 lines
18 KiB
Python

"""Tests for specificity-aware DSPy integration in HeritageRAGPipeline.
This module tests:
1. Signature factory functions with optional context templates
2. Getter functions for cached vs dynamic signatures
3. HeritageRAGPipeline initialization with specificity filtering
"""
import pytest
from unittest.mock import patch, MagicMock
class TestSignatureFactoryFunctions:
"""Tests for _create_schema_aware_sparql_signature and related functions."""
def test_create_sparql_signature_without_context(self):
"""Signature created without context_template uses full ontology."""
from backend.rag.dspy_heritage_rag import (
_create_schema_aware_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
sig = _create_schema_aware_sparql_signature()
assert sig is not None
# Should have a docstring with full ontology context
assert sig.__doc__ is not None
assert len(sig.__doc__) > 1000 # Full context is large
def test_create_sparql_signature_with_context_template(self):
"""Signature created with context_template uses filtered context."""
from backend.rag.dspy_heritage_rag import (
_create_schema_aware_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
sig = _create_schema_aware_sparql_signature("archive_search", 0.4)
assert sig is not None
assert sig.__doc__ is not None
# Filtered context should be different from full context
full_sig = _create_schema_aware_sparql_signature()
# Just verify both are valid (comparison depends on threshold semantics)
assert sig.__doc__ is not None and len(sig.__doc__) > 0
assert full_sig.__doc__ is not None and len(full_sig.__doc__) > 0
def test_create_sparql_signature_different_templates_produce_different_results(self):
"""Different templates produce different filtered contexts."""
from backend.rag.dspy_heritage_rag import (
_create_schema_aware_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
# Different templates should potentially produce different contexts
sig_archive = _create_schema_aware_sparql_signature("archive_search", 0.5)
sig_museum = _create_schema_aware_sparql_signature("museum_search", 0.5)
# Both should be valid
assert sig_archive is not None
assert sig_museum is not None
assert sig_archive.__doc__ is not None
assert sig_museum.__doc__ is not None
def test_create_person_sparql_signature_without_context(self):
"""Person signature created without context_template uses full ontology."""
from backend.rag.dspy_heritage_rag import (
_create_schema_aware_person_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
sig = _create_schema_aware_person_sparql_signature()
assert sig is not None
assert sig.__doc__ is not None
def test_create_person_sparql_signature_with_context_template(self):
"""Person signature with context_template uses person_research template."""
from backend.rag.dspy_heritage_rag import (
_create_schema_aware_person_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
sig = _create_schema_aware_person_sparql_signature("person_research", 0.5)
assert sig is not None
assert sig.__doc__ is not None
class TestGetterFunctions:
"""Tests for get_schema_aware_sparql_signature and caching behavior."""
def test_get_sparql_signature_cached_when_no_context(self):
"""Signature is cached when no context_template provided."""
from backend.rag.dspy_heritage_rag import (
get_schema_aware_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
sig1 = get_schema_aware_sparql_signature()
sig2 = get_schema_aware_sparql_signature()
# Same object returned (cached)
assert sig1 is sig2
def test_get_sparql_signature_dynamic_with_context(self):
"""Signature is dynamically created when context_template provided."""
from backend.rag.dspy_heritage_rag import (
get_schema_aware_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
sig1 = get_schema_aware_sparql_signature("archive_search", 0.4)
sig2 = get_schema_aware_sparql_signature("museum_search", 0.4)
# Different templates produce different signatures
assert sig1 is not sig2
# Both should be valid signatures
assert sig1 is not None
assert sig2 is not None
def test_get_person_sparql_signature_cached_when_no_context(self):
"""Person signature is cached when no context_template provided."""
from backend.rag.dspy_heritage_rag import (
get_schema_aware_person_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
sig1 = get_schema_aware_person_sparql_signature()
sig2 = get_schema_aware_person_sparql_signature()
# Same object returned (cached)
assert sig1 is sig2
def test_get_person_sparql_signature_dynamic_with_context(self):
"""Person signature is dynamically created when context_template provided."""
from backend.rag.dspy_heritage_rag import (
get_schema_aware_person_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
sig1 = get_schema_aware_person_sparql_signature("person_research", 0.3)
sig2 = get_schema_aware_person_sparql_signature("person_research", 0.7)
# Different thresholds produce different signatures
assert sig1 is not sig2
class TestHeritageRAGPipelineSpecificityInit:
"""Tests for HeritageRAGPipeline initialization with specificity filtering."""
def test_pipeline_has_specificity_parameters(self):
"""Pipeline class accepts specificity filtering parameters."""
import inspect
from backend.rag.dspy_heritage_rag import HeritageRAGPipeline
sig = inspect.signature(HeritageRAGPipeline.__init__)
params = sig.parameters
# Verify parameters exist
assert 'use_specificity_filtering' in params
assert 'specificity_threshold' in params
# Verify defaults
assert params['use_specificity_filtering'].default is False
assert params['specificity_threshold'].default == 0.5
def test_pipeline_docstring_documents_specificity_params(self):
"""Pipeline class docstring includes specificity parameter documentation."""
from backend.rag.dspy_heritage_rag import HeritageRAGPipeline
# Check the class docstring (where Args are documented)
docstring = HeritageRAGPipeline.__doc__
assert docstring is not None
# Should document the new parameters
assert 'use_specificity_filtering' in docstring or 'specificity' in docstring.lower()
class TestContextTemplateValidation:
"""Tests for context template validation in signature creation."""
def test_valid_context_templates(self):
"""All valid context templates work with signature creation."""
from backend.rag.dspy_heritage_rag import (
get_schema_aware_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
get_available_context_templates,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if get_available_context_templates is None:
pytest.skip("Specificity functions not available")
templates = get_available_context_templates()
for template in templates:
sig = get_schema_aware_sparql_signature(template, 0.5)
assert sig is not None, f"Failed for template: {template}"
def test_invalid_context_template_falls_back_gracefully(self):
"""Invalid context template falls back to general_heritage with warning."""
from backend.rag.dspy_heritage_rag import (
get_schema_aware_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
# Should not raise - falls back to general_heritage
sig = get_schema_aware_sparql_signature("invalid_template_xyz", 0.5)
assert sig is not None
# Docstring should contain general_heritage context (fallback behavior)
assert sig.__doc__ is not None
class TestSpecificityImportsAvailable:
"""Tests to verify specificity-aware imports are available."""
def test_create_specificity_aware_sparql_docstring_importable(self):
"""create_specificity_aware_sparql_docstring is importable."""
from backend.rag.dspy_heritage_rag import create_specificity_aware_sparql_docstring
# May be None if schema loader not available, but should be importable
assert create_specificity_aware_sparql_docstring is None or callable(create_specificity_aware_sparql_docstring)
def test_format_filtered_ontology_context_importable(self):
"""format_filtered_ontology_context is importable."""
from backend.rag.dspy_heritage_rag import format_filtered_ontology_context
assert format_filtered_ontology_context is None or callable(format_filtered_ontology_context)
def test_get_available_context_templates_importable(self):
"""get_available_context_templates is importable."""
from backend.rag.dspy_heritage_rag import get_available_context_templates
assert get_available_context_templates is None or callable(get_available_context_templates)
def test_specificity_functions_work_when_available(self):
"""Specificity functions return expected results when schema loader available."""
from backend.rag.dspy_heritage_rag import (
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
format_filtered_ontology_context,
get_available_context_templates,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
# All functions should be available
assert create_specificity_aware_sparql_docstring is not None
assert format_filtered_ontology_context is not None
assert get_available_context_templates is not None
# get_available_context_templates should return list of templates
templates = get_available_context_templates()
assert isinstance(templates, list)
assert len(templates) > 0
assert "general_heritage" in templates
# create_specificity_aware_sparql_docstring should return string
docstring = create_specificity_aware_sparql_docstring("general_heritage", 0.5)
assert isinstance(docstring, str)
assert len(docstring) > 0
class TestThresholdBehavior:
"""Tests for specificity threshold behavior.
Note: The threshold semantics are "include classes where score <= threshold".
Higher threshold = more classes included (more permissive filter).
Lower threshold = fewer classes included (more restrictive filter).
"""
def test_threshold_variation_produces_different_results(self):
"""Different thresholds produce different filtered contexts."""
from backend.rag.dspy_heritage_rag import (
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
# Higher threshold = more permissive = more classes
docstring_high = create_specificity_aware_sparql_docstring("general_heritage", 0.9)
# Lower threshold = more restrictive = fewer classes
docstring_low = create_specificity_aware_sparql_docstring("general_heritage", 0.1)
# Higher threshold should include more classes (larger output)
# Note: The semantics are "include if score <= threshold"
# So higher threshold includes more classes
assert len(docstring_high) >= len(docstring_low)
def test_threshold_bounds_validation(self):
"""Threshold outside 0-1 range should be handled gracefully."""
from backend.rag.dspy_heritage_rag import (
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
# These should either work (clamping values) or raise ValueError
try:
result = create_specificity_aware_sparql_docstring("general_heritage", -0.5)
# If it doesn't raise, it should return something valid
assert isinstance(result, str)
except (ValueError, AssertionError):
pass # Expected behavior
try:
result = create_specificity_aware_sparql_docstring("general_heritage", 1.5)
assert isinstance(result, str)
except (ValueError, AssertionError):
pass # Expected behavior
def test_docstring_contains_template_context_info(self):
"""Docstring should indicate which template was used."""
from backend.rag.dspy_heritage_rag import (
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
docstring = create_specificity_aware_sparql_docstring("archive_search", 0.5)
# Should mention the template being used
assert "archive_search" in docstring or "filtered" in docstring.lower()
class TestSignatureCreationWithContextTemplate:
"""Integration tests for signature creation with context templates."""
def test_signature_has_context_in_instructions(self):
"""Signature created with context template includes context in instructions."""
from backend.rag.dspy_heritage_rag import (
_create_schema_aware_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
sig = _create_schema_aware_sparql_signature("archive_search", 0.5)
# Should have docstring with filtered context
assert sig.__doc__ is not None
assert "archive_search" in sig.__doc__ or "filtered" in sig.__doc__.lower()
def test_full_context_signature_vs_filtered_context(self):
"""Full context signature differs from filtered context signature."""
from backend.rag.dspy_heritage_rag import (
_create_schema_aware_sparql_signature,
SCHEMA_LOADER_AVAILABLE,
create_specificity_aware_sparql_docstring,
)
if not SCHEMA_LOADER_AVAILABLE:
pytest.skip("Schema loader not available")
if create_specificity_aware_sparql_docstring is None:
pytest.skip("Specificity functions not available")
# Full context (no template)
full_sig = _create_schema_aware_sparql_signature()
# Filtered context (with template)
filtered_sig = _create_schema_aware_sparql_signature("archive_search", 0.5)
# Both should be valid
assert full_sig is not None
assert filtered_sig is not None
# They should be different objects
assert full_sig is not filtered_sig
# Both should have docstrings
assert full_sig.__doc__ is not None
assert filtered_sig.__doc__ is not None