- Updated documentation to clarify integration points with existing components in the RAG pipeline and DSPy framework. - Added detailed mapping of SPARQL templates to context templates for improved specificity filtering. - Implemented wrapper patterns around existing classifiers to extend functionality without duplication. - Introduced new tests for the SpecificityAwareClassifier and SPARQLToContextMapper to ensure proper integration and functionality. - Enhanced the CustodianRDFConverter to include ISO country and subregion codes from GHCID for better geospatial data handling.
443 lines
18 KiB
Python
443 lines
18 KiB
Python
"""Tests for specificity-aware DSPy integration in HeritageRAGPipeline.
|
|
|
|
This module tests:
|
|
1. Signature factory functions with optional context templates
|
|
2. Getter functions for cached vs dynamic signatures
|
|
3. HeritageRAGPipeline initialization with specificity filtering
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
|
|
class TestSignatureFactoryFunctions:
|
|
"""Tests for _create_schema_aware_sparql_signature and related functions."""
|
|
|
|
def test_create_sparql_signature_without_context(self):
|
|
"""Signature created without context_template uses full ontology."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
_create_schema_aware_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
sig = _create_schema_aware_sparql_signature()
|
|
assert sig is not None
|
|
# Should have a docstring with full ontology context
|
|
assert sig.__doc__ is not None
|
|
assert len(sig.__doc__) > 1000 # Full context is large
|
|
|
|
def test_create_sparql_signature_with_context_template(self):
|
|
"""Signature created with context_template uses filtered context."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
_create_schema_aware_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
sig = _create_schema_aware_sparql_signature("archive_search", 0.4)
|
|
assert sig is not None
|
|
assert sig.__doc__ is not None
|
|
# Filtered context should be different from full context
|
|
full_sig = _create_schema_aware_sparql_signature()
|
|
# Just verify both are valid (comparison depends on threshold semantics)
|
|
assert sig.__doc__ is not None and len(sig.__doc__) > 0
|
|
assert full_sig.__doc__ is not None and len(full_sig.__doc__) > 0
|
|
|
|
def test_create_sparql_signature_different_templates_produce_different_results(self):
|
|
"""Different templates produce different filtered contexts."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
_create_schema_aware_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
# Different templates should potentially produce different contexts
|
|
sig_archive = _create_schema_aware_sparql_signature("archive_search", 0.5)
|
|
sig_museum = _create_schema_aware_sparql_signature("museum_search", 0.5)
|
|
|
|
# Both should be valid
|
|
assert sig_archive is not None
|
|
assert sig_museum is not None
|
|
assert sig_archive.__doc__ is not None
|
|
assert sig_museum.__doc__ is not None
|
|
|
|
def test_create_person_sparql_signature_without_context(self):
|
|
"""Person signature created without context_template uses full ontology."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
_create_schema_aware_person_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
sig = _create_schema_aware_person_sparql_signature()
|
|
assert sig is not None
|
|
assert sig.__doc__ is not None
|
|
|
|
def test_create_person_sparql_signature_with_context_template(self):
|
|
"""Person signature with context_template uses person_research template."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
_create_schema_aware_person_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
sig = _create_schema_aware_person_sparql_signature("person_research", 0.5)
|
|
assert sig is not None
|
|
assert sig.__doc__ is not None
|
|
|
|
|
|
class TestGetterFunctions:
|
|
"""Tests for get_schema_aware_sparql_signature and caching behavior."""
|
|
|
|
def test_get_sparql_signature_cached_when_no_context(self):
|
|
"""Signature is cached when no context_template provided."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
get_schema_aware_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
sig1 = get_schema_aware_sparql_signature()
|
|
sig2 = get_schema_aware_sparql_signature()
|
|
|
|
# Same object returned (cached)
|
|
assert sig1 is sig2
|
|
|
|
def test_get_sparql_signature_dynamic_with_context(self):
|
|
"""Signature is dynamically created when context_template provided."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
get_schema_aware_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
sig1 = get_schema_aware_sparql_signature("archive_search", 0.4)
|
|
sig2 = get_schema_aware_sparql_signature("museum_search", 0.4)
|
|
|
|
# Different templates produce different signatures
|
|
assert sig1 is not sig2
|
|
# Both should be valid signatures
|
|
assert sig1 is not None
|
|
assert sig2 is not None
|
|
|
|
def test_get_person_sparql_signature_cached_when_no_context(self):
|
|
"""Person signature is cached when no context_template provided."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
get_schema_aware_person_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
sig1 = get_schema_aware_person_sparql_signature()
|
|
sig2 = get_schema_aware_person_sparql_signature()
|
|
|
|
# Same object returned (cached)
|
|
assert sig1 is sig2
|
|
|
|
def test_get_person_sparql_signature_dynamic_with_context(self):
|
|
"""Person signature is dynamically created when context_template provided."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
get_schema_aware_person_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
sig1 = get_schema_aware_person_sparql_signature("person_research", 0.3)
|
|
sig2 = get_schema_aware_person_sparql_signature("person_research", 0.7)
|
|
|
|
# Different thresholds produce different signatures
|
|
assert sig1 is not sig2
|
|
|
|
|
|
class TestHeritageRAGPipelineSpecificityInit:
|
|
"""Tests for HeritageRAGPipeline initialization with specificity filtering."""
|
|
|
|
def test_pipeline_has_specificity_parameters(self):
|
|
"""Pipeline class accepts specificity filtering parameters."""
|
|
import inspect
|
|
from backend.rag.dspy_heritage_rag import HeritageRAGPipeline
|
|
|
|
sig = inspect.signature(HeritageRAGPipeline.__init__)
|
|
params = sig.parameters
|
|
|
|
# Verify parameters exist
|
|
assert 'use_specificity_filtering' in params
|
|
assert 'specificity_threshold' in params
|
|
|
|
# Verify defaults
|
|
assert params['use_specificity_filtering'].default is False
|
|
assert params['specificity_threshold'].default == 0.5
|
|
|
|
def test_pipeline_docstring_documents_specificity_params(self):
|
|
"""Pipeline class docstring includes specificity parameter documentation."""
|
|
from backend.rag.dspy_heritage_rag import HeritageRAGPipeline
|
|
|
|
# Check the class docstring (where Args are documented)
|
|
docstring = HeritageRAGPipeline.__doc__
|
|
assert docstring is not None
|
|
|
|
# Should document the new parameters
|
|
assert 'use_specificity_filtering' in docstring or 'specificity' in docstring.lower()
|
|
|
|
|
|
class TestContextTemplateValidation:
|
|
"""Tests for context template validation in signature creation."""
|
|
|
|
def test_valid_context_templates(self):
|
|
"""All valid context templates work with signature creation."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
get_schema_aware_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
get_available_context_templates,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if get_available_context_templates is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
templates = get_available_context_templates()
|
|
|
|
for template in templates:
|
|
sig = get_schema_aware_sparql_signature(template, 0.5)
|
|
assert sig is not None, f"Failed for template: {template}"
|
|
|
|
def test_invalid_context_template_falls_back_gracefully(self):
|
|
"""Invalid context template falls back to general_heritage with warning."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
get_schema_aware_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
# Should not raise - falls back to general_heritage
|
|
sig = get_schema_aware_sparql_signature("invalid_template_xyz", 0.5)
|
|
assert sig is not None
|
|
# Docstring should contain general_heritage context (fallback behavior)
|
|
assert sig.__doc__ is not None
|
|
|
|
|
|
class TestSpecificityImportsAvailable:
|
|
"""Tests to verify specificity-aware imports are available."""
|
|
|
|
def test_create_specificity_aware_sparql_docstring_importable(self):
|
|
"""create_specificity_aware_sparql_docstring is importable."""
|
|
from backend.rag.dspy_heritage_rag import create_specificity_aware_sparql_docstring
|
|
# May be None if schema loader not available, but should be importable
|
|
assert create_specificity_aware_sparql_docstring is None or callable(create_specificity_aware_sparql_docstring)
|
|
|
|
def test_format_filtered_ontology_context_importable(self):
|
|
"""format_filtered_ontology_context is importable."""
|
|
from backend.rag.dspy_heritage_rag import format_filtered_ontology_context
|
|
assert format_filtered_ontology_context is None or callable(format_filtered_ontology_context)
|
|
|
|
def test_get_available_context_templates_importable(self):
|
|
"""get_available_context_templates is importable."""
|
|
from backend.rag.dspy_heritage_rag import get_available_context_templates
|
|
assert get_available_context_templates is None or callable(get_available_context_templates)
|
|
|
|
def test_specificity_functions_work_when_available(self):
|
|
"""Specificity functions return expected results when schema loader available."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
format_filtered_ontology_context,
|
|
get_available_context_templates,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
# All functions should be available
|
|
assert create_specificity_aware_sparql_docstring is not None
|
|
assert format_filtered_ontology_context is not None
|
|
assert get_available_context_templates is not None
|
|
|
|
# get_available_context_templates should return list of templates
|
|
templates = get_available_context_templates()
|
|
assert isinstance(templates, list)
|
|
assert len(templates) > 0
|
|
assert "general_heritage" in templates
|
|
|
|
# create_specificity_aware_sparql_docstring should return string
|
|
docstring = create_specificity_aware_sparql_docstring("general_heritage", 0.5)
|
|
assert isinstance(docstring, str)
|
|
assert len(docstring) > 0
|
|
|
|
|
|
class TestThresholdBehavior:
|
|
"""Tests for specificity threshold behavior.
|
|
|
|
Note: The threshold semantics are "include classes where score <= threshold".
|
|
Higher threshold = more classes included (more permissive filter).
|
|
Lower threshold = fewer classes included (more restrictive filter).
|
|
"""
|
|
|
|
def test_threshold_variation_produces_different_results(self):
|
|
"""Different thresholds produce different filtered contexts."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
# Higher threshold = more permissive = more classes
|
|
docstring_high = create_specificity_aware_sparql_docstring("general_heritage", 0.9)
|
|
# Lower threshold = more restrictive = fewer classes
|
|
docstring_low = create_specificity_aware_sparql_docstring("general_heritage", 0.1)
|
|
|
|
# Higher threshold should include more classes (larger output)
|
|
# Note: The semantics are "include if score <= threshold"
|
|
# So higher threshold includes more classes
|
|
assert len(docstring_high) >= len(docstring_low)
|
|
|
|
def test_threshold_bounds_validation(self):
|
|
"""Threshold outside 0-1 range should be handled gracefully."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
# These should either work (clamping values) or raise ValueError
|
|
try:
|
|
result = create_specificity_aware_sparql_docstring("general_heritage", -0.5)
|
|
# If it doesn't raise, it should return something valid
|
|
assert isinstance(result, str)
|
|
except (ValueError, AssertionError):
|
|
pass # Expected behavior
|
|
|
|
try:
|
|
result = create_specificity_aware_sparql_docstring("general_heritage", 1.5)
|
|
assert isinstance(result, str)
|
|
except (ValueError, AssertionError):
|
|
pass # Expected behavior
|
|
|
|
def test_docstring_contains_template_context_info(self):
|
|
"""Docstring should indicate which template was used."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
docstring = create_specificity_aware_sparql_docstring("archive_search", 0.5)
|
|
|
|
# Should mention the template being used
|
|
assert "archive_search" in docstring or "filtered" in docstring.lower()
|
|
|
|
|
|
class TestSignatureCreationWithContextTemplate:
|
|
"""Integration tests for signature creation with context templates."""
|
|
|
|
def test_signature_has_context_in_instructions(self):
|
|
"""Signature created with context template includes context in instructions."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
_create_schema_aware_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
sig = _create_schema_aware_sparql_signature("archive_search", 0.5)
|
|
|
|
# Should have docstring with filtered context
|
|
assert sig.__doc__ is not None
|
|
assert "archive_search" in sig.__doc__ or "filtered" in sig.__doc__.lower()
|
|
|
|
def test_full_context_signature_vs_filtered_context(self):
|
|
"""Full context signature differs from filtered context signature."""
|
|
from backend.rag.dspy_heritage_rag import (
|
|
_create_schema_aware_sparql_signature,
|
|
SCHEMA_LOADER_AVAILABLE,
|
|
create_specificity_aware_sparql_docstring,
|
|
)
|
|
|
|
if not SCHEMA_LOADER_AVAILABLE:
|
|
pytest.skip("Schema loader not available")
|
|
|
|
if create_specificity_aware_sparql_docstring is None:
|
|
pytest.skip("Specificity functions not available")
|
|
|
|
# Full context (no template)
|
|
full_sig = _create_schema_aware_sparql_signature()
|
|
|
|
# Filtered context (with template)
|
|
filtered_sig = _create_schema_aware_sparql_signature("archive_search", 0.5)
|
|
|
|
# Both should be valid
|
|
assert full_sig is not None
|
|
assert filtered_sig is not None
|
|
|
|
# They should be different objects
|
|
assert full_sig is not filtered_sig
|
|
|
|
# Both should have docstrings
|
|
assert full_sig.__doc__ is not None
|
|
assert filtered_sig.__doc__ is not None
|