"""Tests for specificity-aware DSPy integration in HeritageRAGPipeline. This module tests: 1. Signature factory functions with optional context templates 2. Getter functions for cached vs dynamic signatures 3. HeritageRAGPipeline initialization with specificity filtering """ import pytest from unittest.mock import patch, MagicMock class TestSignatureFactoryFunctions: """Tests for _create_schema_aware_sparql_signature and related functions.""" def test_create_sparql_signature_without_context(self): """Signature created without context_template uses full ontology.""" from backend.rag.dspy_heritage_rag import ( _create_schema_aware_sparql_signature, SCHEMA_LOADER_AVAILABLE, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") sig = _create_schema_aware_sparql_signature() assert sig is not None # Should have a docstring with full ontology context assert sig.__doc__ is not None assert len(sig.__doc__) > 1000 # Full context is large def test_create_sparql_signature_with_context_template(self): """Signature created with context_template uses filtered context.""" from backend.rag.dspy_heritage_rag import ( _create_schema_aware_sparql_signature, SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") sig = _create_schema_aware_sparql_signature("archive_search", 0.4) assert sig is not None assert sig.__doc__ is not None # Filtered context should be different from full context full_sig = _create_schema_aware_sparql_signature() # Just verify both are valid (comparison depends on threshold semantics) assert sig.__doc__ is not None and len(sig.__doc__) > 0 assert full_sig.__doc__ is not None and len(full_sig.__doc__) > 0 def test_create_sparql_signature_different_templates_produce_different_results(self): """Different templates produce different filtered contexts.""" from backend.rag.dspy_heritage_rag import ( _create_schema_aware_sparql_signature, SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") # Different templates should potentially produce different contexts sig_archive = _create_schema_aware_sparql_signature("archive_search", 0.5) sig_museum = _create_schema_aware_sparql_signature("museum_search", 0.5) # Both should be valid assert sig_archive is not None assert sig_museum is not None assert sig_archive.__doc__ is not None assert sig_museum.__doc__ is not None def test_create_person_sparql_signature_without_context(self): """Person signature created without context_template uses full ontology.""" from backend.rag.dspy_heritage_rag import ( _create_schema_aware_person_sparql_signature, SCHEMA_LOADER_AVAILABLE, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") sig = _create_schema_aware_person_sparql_signature() assert sig is not None assert sig.__doc__ is not None def test_create_person_sparql_signature_with_context_template(self): """Person signature with context_template uses person_research template.""" from backend.rag.dspy_heritage_rag import ( _create_schema_aware_person_sparql_signature, SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") sig = _create_schema_aware_person_sparql_signature("person_research", 0.5) assert sig is not None assert sig.__doc__ is not None class TestGetterFunctions: """Tests for get_schema_aware_sparql_signature and caching behavior.""" def test_get_sparql_signature_cached_when_no_context(self): """Signature is cached when no context_template provided.""" from backend.rag.dspy_heritage_rag import ( get_schema_aware_sparql_signature, SCHEMA_LOADER_AVAILABLE, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") sig1 = get_schema_aware_sparql_signature() sig2 = get_schema_aware_sparql_signature() # Same object returned (cached) assert sig1 is sig2 def test_get_sparql_signature_dynamic_with_context(self): """Signature is dynamically created when context_template provided.""" from backend.rag.dspy_heritage_rag import ( get_schema_aware_sparql_signature, SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") sig1 = get_schema_aware_sparql_signature("archive_search", 0.4) sig2 = get_schema_aware_sparql_signature("museum_search", 0.4) # Different templates produce different signatures assert sig1 is not sig2 # Both should be valid signatures assert sig1 is not None assert sig2 is not None def test_get_person_sparql_signature_cached_when_no_context(self): """Person signature is cached when no context_template provided.""" from backend.rag.dspy_heritage_rag import ( get_schema_aware_person_sparql_signature, SCHEMA_LOADER_AVAILABLE, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") sig1 = get_schema_aware_person_sparql_signature() sig2 = get_schema_aware_person_sparql_signature() # Same object returned (cached) assert sig1 is sig2 def test_get_person_sparql_signature_dynamic_with_context(self): """Person signature is dynamically created when context_template provided.""" from backend.rag.dspy_heritage_rag import ( get_schema_aware_person_sparql_signature, SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") sig1 = get_schema_aware_person_sparql_signature("person_research", 0.3) sig2 = get_schema_aware_person_sparql_signature("person_research", 0.7) # Different thresholds produce different signatures assert sig1 is not sig2 class TestHeritageRAGPipelineSpecificityInit: """Tests for HeritageRAGPipeline initialization with specificity filtering.""" def test_pipeline_has_specificity_parameters(self): """Pipeline class accepts specificity filtering parameters.""" import inspect from backend.rag.dspy_heritage_rag import HeritageRAGPipeline sig = inspect.signature(HeritageRAGPipeline.__init__) params = sig.parameters # Verify parameters exist assert 'use_specificity_filtering' in params assert 'specificity_threshold' in params # Verify defaults assert params['use_specificity_filtering'].default is False assert params['specificity_threshold'].default == 0.5 def test_pipeline_docstring_documents_specificity_params(self): """Pipeline class docstring includes specificity parameter documentation.""" from backend.rag.dspy_heritage_rag import HeritageRAGPipeline # Check the class docstring (where Args are documented) docstring = HeritageRAGPipeline.__doc__ assert docstring is not None # Should document the new parameters assert 'use_specificity_filtering' in docstring or 'specificity' in docstring.lower() class TestContextTemplateValidation: """Tests for context template validation in signature creation.""" def test_valid_context_templates(self): """All valid context templates work with signature creation.""" from backend.rag.dspy_heritage_rag import ( get_schema_aware_sparql_signature, SCHEMA_LOADER_AVAILABLE, get_available_context_templates, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if get_available_context_templates is None: pytest.skip("Specificity functions not available") templates = get_available_context_templates() for template in templates: sig = get_schema_aware_sparql_signature(template, 0.5) assert sig is not None, f"Failed for template: {template}" def test_invalid_context_template_falls_back_gracefully(self): """Invalid context template falls back to general_heritage with warning.""" from backend.rag.dspy_heritage_rag import ( get_schema_aware_sparql_signature, SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") # Should not raise - falls back to general_heritage sig = get_schema_aware_sparql_signature("invalid_template_xyz", 0.5) assert sig is not None # Docstring should contain general_heritage context (fallback behavior) assert sig.__doc__ is not None class TestSpecificityImportsAvailable: """Tests to verify specificity-aware imports are available.""" def test_create_specificity_aware_sparql_docstring_importable(self): """create_specificity_aware_sparql_docstring is importable.""" from backend.rag.dspy_heritage_rag import create_specificity_aware_sparql_docstring # May be None if schema loader not available, but should be importable assert create_specificity_aware_sparql_docstring is None or callable(create_specificity_aware_sparql_docstring) def test_format_filtered_ontology_context_importable(self): """format_filtered_ontology_context is importable.""" from backend.rag.dspy_heritage_rag import format_filtered_ontology_context assert format_filtered_ontology_context is None or callable(format_filtered_ontology_context) def test_get_available_context_templates_importable(self): """get_available_context_templates is importable.""" from backend.rag.dspy_heritage_rag import get_available_context_templates assert get_available_context_templates is None or callable(get_available_context_templates) def test_specificity_functions_work_when_available(self): """Specificity functions return expected results when schema loader available.""" from backend.rag.dspy_heritage_rag import ( SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, format_filtered_ontology_context, get_available_context_templates, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") # All functions should be available assert create_specificity_aware_sparql_docstring is not None assert format_filtered_ontology_context is not None assert get_available_context_templates is not None # get_available_context_templates should return list of templates templates = get_available_context_templates() assert isinstance(templates, list) assert len(templates) > 0 assert "general_heritage" in templates # create_specificity_aware_sparql_docstring should return string docstring = create_specificity_aware_sparql_docstring("general_heritage", 0.5) assert isinstance(docstring, str) assert len(docstring) > 0 class TestThresholdBehavior: """Tests for specificity threshold behavior. Note: The threshold semantics are "include classes where score <= threshold". Higher threshold = more classes included (more permissive filter). Lower threshold = fewer classes included (more restrictive filter). """ def test_threshold_variation_produces_different_results(self): """Different thresholds produce different filtered contexts.""" from backend.rag.dspy_heritage_rag import ( SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") # Higher threshold = more permissive = more classes docstring_high = create_specificity_aware_sparql_docstring("general_heritage", 0.9) # Lower threshold = more restrictive = fewer classes docstring_low = create_specificity_aware_sparql_docstring("general_heritage", 0.1) # Higher threshold should include more classes (larger output) # Note: The semantics are "include if score <= threshold" # So higher threshold includes more classes assert len(docstring_high) >= len(docstring_low) def test_threshold_bounds_validation(self): """Threshold outside 0-1 range should be handled gracefully.""" from backend.rag.dspy_heritage_rag import ( SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") # These should either work (clamping values) or raise ValueError try: result = create_specificity_aware_sparql_docstring("general_heritage", -0.5) # If it doesn't raise, it should return something valid assert isinstance(result, str) except (ValueError, AssertionError): pass # Expected behavior try: result = create_specificity_aware_sparql_docstring("general_heritage", 1.5) assert isinstance(result, str) except (ValueError, AssertionError): pass # Expected behavior def test_docstring_contains_template_context_info(self): """Docstring should indicate which template was used.""" from backend.rag.dspy_heritage_rag import ( SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") docstring = create_specificity_aware_sparql_docstring("archive_search", 0.5) # Should mention the template being used assert "archive_search" in docstring or "filtered" in docstring.lower() class TestSignatureCreationWithContextTemplate: """Integration tests for signature creation with context templates.""" def test_signature_has_context_in_instructions(self): """Signature created with context template includes context in instructions.""" from backend.rag.dspy_heritage_rag import ( _create_schema_aware_sparql_signature, SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") sig = _create_schema_aware_sparql_signature("archive_search", 0.5) # Should have docstring with filtered context assert sig.__doc__ is not None assert "archive_search" in sig.__doc__ or "filtered" in sig.__doc__.lower() def test_full_context_signature_vs_filtered_context(self): """Full context signature differs from filtered context signature.""" from backend.rag.dspy_heritage_rag import ( _create_schema_aware_sparql_signature, SCHEMA_LOADER_AVAILABLE, create_specificity_aware_sparql_docstring, ) if not SCHEMA_LOADER_AVAILABLE: pytest.skip("Schema loader not available") if create_specificity_aware_sparql_docstring is None: pytest.skip("Specificity functions not available") # Full context (no template) full_sig = _create_schema_aware_sparql_signature() # Filtered context (with template) filtered_sig = _create_schema_aware_sparql_signature("archive_search", 0.5) # Both should be valid assert full_sig is not None assert filtered_sig is not None # They should be different objects assert full_sig is not filtered_sig # Both should have docstrings assert full_sig.__doc__ is not None assert filtered_sig.__doc__ is not None