681 lines
27 KiB
Python
681 lines
27 KiB
Python
"""
|
|
Integration Tests for Ontology-Driven RAG Components
|
|
|
|
Tests the end-to-end flow:
|
|
1. OntologyLoader → SynonymResolver integration
|
|
2. OntologyLoader → SchemaAwareSlotValidator integration
|
|
3. TTL-based caching behavior
|
|
4. Mock KG response handling
|
|
|
|
These tests verify that the RAG system properly relies on the ontology
|
|
and Knowledge Graph rather than hardcoded heuristics.
|
|
|
|
Author: OpenCode
|
|
Created: 2025-01-08
|
|
"""
|
|
|
|
import pytest
|
|
import time
|
|
from unittest.mock import patch, MagicMock
|
|
from typing import Any
|
|
|
|
|
|
# =============================================================================
|
|
# FIXTURES
|
|
# =============================================================================
|
|
|
|
@pytest.fixture
|
|
def reset_singletons():
|
|
"""Reset all singleton instances before each test.
|
|
|
|
Note: SynonymResolver uses module-level _synonym_resolver (not class-level _instance)
|
|
while OntologyLoader and SchemaAwareSlotValidator use class-level _instance.
|
|
"""
|
|
import backend.rag.template_sparql as module
|
|
|
|
# Reset OntologyLoader (uses class-level _instance)
|
|
if module.OntologyLoader._instance is not None:
|
|
module.OntologyLoader._instance = None
|
|
module.OntologyLoader._predicates = set()
|
|
module.OntologyLoader._external_predicates = set()
|
|
module.OntologyLoader._classes = set()
|
|
module.OntologyLoader._slot_values = {}
|
|
module.OntologyLoader._synonyms = {}
|
|
module.OntologyLoader._enums = {}
|
|
module.OntologyLoader._institution_type_codes = set()
|
|
module.OntologyLoader._institution_type_mappings = {}
|
|
module.OntologyLoader._subregion_mappings = {}
|
|
module.OntologyLoader._country_mappings = {}
|
|
module.OntologyLoader._loaded = False
|
|
module.OntologyLoader._kg_cache = {}
|
|
module.OntologyLoader._kg_cache_timestamps = {}
|
|
module._ontology_loader = None
|
|
|
|
# Reset SynonymResolver (uses module-level _synonym_resolver only, no class-level _instance)
|
|
# Just reset the module-level variable
|
|
module._synonym_resolver = None
|
|
|
|
# Reset SchemaAwareSlotValidator (uses class-level _instance)
|
|
if module.SchemaAwareSlotValidator._instance is not None:
|
|
module.SchemaAwareSlotValidator._instance = None
|
|
module.SchemaAwareSlotValidator._valid_values = {}
|
|
module.SchemaAwareSlotValidator._synonym_maps = {}
|
|
module.SchemaAwareSlotValidator._loaded = False
|
|
module.SchemaAwareSlotValidator._kg_validation_cache = {}
|
|
module.SchemaAwareSlotValidator._kg_validation_timestamps = {}
|
|
module._schema_slot_validator = None
|
|
|
|
yield
|
|
|
|
# Cleanup after test
|
|
module._ontology_loader = None
|
|
module._synonym_resolver = None
|
|
module._schema_slot_validator = None
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_kg_responses():
|
|
"""Mock Knowledge Graph responses for testing."""
|
|
return {
|
|
"institution_types": {"M", "L", "A", "G", "O", "R", "C", "U", "B", "E", "S", "F", "I", "X", "P", "H", "D", "N", "T"},
|
|
"subregions": {"NL-NH", "NL-ZH", "NL-GE", "NL-NB", "NL-LI"},
|
|
"countries": {"NL", "BE", "DE", "FR", "GB"},
|
|
"cities": {"Amsterdam", "Rotterdam", "Den Haag", "Utrecht", "Eindhoven"},
|
|
}
|
|
|
|
|
|
# =============================================================================
|
|
# ONTOLOGY LOADER TESTS
|
|
# =============================================================================
|
|
|
|
class TestOntologyLoaderIntegration:
|
|
"""Integration tests for OntologyLoader."""
|
|
|
|
def test_loader_loads_from_validation_rules(self, reset_singletons):
|
|
"""OntologyLoader should load institution type codes from validation rules JSON."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
loader.load()
|
|
|
|
# Should have loaded institution type codes
|
|
type_codes = loader.get_institution_type_codes()
|
|
assert len(type_codes) > 0
|
|
assert "M" in type_codes # Museum
|
|
assert "L" in type_codes # Library
|
|
assert "A" in type_codes # Archive
|
|
|
|
def test_loader_loads_institution_type_mappings(self, reset_singletons):
|
|
"""OntologyLoader should load institution type mappings."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
loader.load()
|
|
|
|
mappings = loader.get_institution_type_mappings()
|
|
assert len(mappings) > 0
|
|
# Check some common mappings
|
|
assert "museum" in mappings or "MUSEUM" in mappings or any("museum" in k.lower() for k in mappings)
|
|
|
|
def test_loader_loads_subregion_mappings(self, reset_singletons):
|
|
"""OntologyLoader should load subregion mappings."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
loader.load()
|
|
|
|
mappings = loader.get_subregion_mappings()
|
|
# May be empty if no subregion mappings in validation rules
|
|
assert isinstance(mappings, dict)
|
|
|
|
def test_loader_singleton_pattern(self, reset_singletons):
|
|
"""OntologyLoader should be a singleton."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader1 = get_ontology_loader()
|
|
loader2 = get_ontology_loader()
|
|
|
|
assert loader1 is loader2
|
|
|
|
def test_loader_caches_loaded_state(self, reset_singletons):
|
|
"""OntologyLoader should only load once."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
|
|
# First load
|
|
loader.load()
|
|
assert loader._loaded is True
|
|
|
|
# Second load should be no-op
|
|
with patch.object(loader, '_load_from_validation_rules') as mock_load:
|
|
loader.load()
|
|
mock_load.assert_not_called()
|
|
|
|
|
|
# =============================================================================
|
|
# ONTOLOGY LOADER → SYNONYM RESOLVER INTEGRATION
|
|
# =============================================================================
|
|
|
|
class TestOntologyLoaderSynonymResolverIntegration:
|
|
"""Integration tests for OntologyLoader → SynonymResolver flow."""
|
|
|
|
def test_synonym_resolver_uses_ontology_type_codes(self, reset_singletons):
|
|
"""SynonymResolver should use type codes from OntologyLoader."""
|
|
from backend.rag.template_sparql import get_synonym_resolver, get_ontology_loader
|
|
|
|
# First load ontology
|
|
loader = get_ontology_loader()
|
|
loader.load()
|
|
ontology_codes = loader.get_institution_type_codes()
|
|
|
|
# Then load synonym resolver
|
|
resolver = get_synonym_resolver()
|
|
resolver.load()
|
|
|
|
# Resolver should have the same valid type codes
|
|
assert resolver._valid_type_codes == ontology_codes
|
|
|
|
def test_synonym_resolver_resolves_museum_to_M(self, reset_singletons):
|
|
"""SynonymResolver should resolve 'museum' to 'M' using ontology mappings."""
|
|
from backend.rag.template_sparql import get_synonym_resolver
|
|
|
|
resolver = get_synonym_resolver()
|
|
resolver.load()
|
|
|
|
# Test various forms
|
|
assert resolver.resolve_institution_type("museum") == "M"
|
|
assert resolver.resolve_institution_type("musea") == "M"
|
|
assert resolver.resolve_institution_type("Museum") == "M"
|
|
|
|
def test_synonym_resolver_resolves_library_to_L(self, reset_singletons):
|
|
"""SynonymResolver should resolve library terms to 'L'."""
|
|
from backend.rag.template_sparql import get_synonym_resolver
|
|
|
|
resolver = get_synonym_resolver()
|
|
resolver.load()
|
|
|
|
assert resolver.resolve_institution_type("library") == "L"
|
|
assert resolver.resolve_institution_type("bibliotheek") == "L"
|
|
|
|
def test_synonym_resolver_resolves_archive_to_A(self, reset_singletons):
|
|
"""SynonymResolver should resolve archive terms to 'A'."""
|
|
from backend.rag.template_sparql import get_synonym_resolver
|
|
|
|
resolver = get_synonym_resolver()
|
|
resolver.load()
|
|
|
|
assert resolver.resolve_institution_type("archive") == "A"
|
|
assert resolver.resolve_institution_type("archief") == "A"
|
|
|
|
def test_synonym_resolver_accepts_valid_codes_directly(self, reset_singletons):
|
|
"""SynonymResolver should accept valid single-letter codes directly."""
|
|
from backend.rag.template_sparql import get_synonym_resolver
|
|
|
|
resolver = get_synonym_resolver()
|
|
resolver.load()
|
|
|
|
# Direct codes should pass through
|
|
assert resolver.resolve_institution_type("M") == "M"
|
|
assert resolver.resolve_institution_type("L") == "L"
|
|
assert resolver.resolve_institution_type("A") == "A"
|
|
|
|
def test_synonym_resolver_uses_ontology_mappings_not_hardcoded(self, reset_singletons):
|
|
"""SynonymResolver should get mappings from OntologyLoader, not hardcoded strings."""
|
|
from backend.rag.template_sparql import get_synonym_resolver, get_ontology_loader
|
|
|
|
resolver = get_synonym_resolver()
|
|
resolver.load()
|
|
|
|
# The valid_type_codes should come from OntologyLoader
|
|
ontology_loader = get_ontology_loader()
|
|
expected_codes = ontology_loader.get_institution_type_codes()
|
|
|
|
# Resolver should have exactly the same codes
|
|
assert resolver._valid_type_codes == expected_codes
|
|
|
|
# Should NOT have hardcoded string "MLAGORCUBESFIXPHDNT"
|
|
# Instead should have a set from the ontology
|
|
assert isinstance(resolver._valid_type_codes, set)
|
|
|
|
|
|
# =============================================================================
|
|
# ONTOLOGY LOADER → SLOT VALIDATOR INTEGRATION
|
|
# =============================================================================
|
|
|
|
class TestOntologyLoaderSlotValidatorIntegration:
|
|
"""Integration tests for OntologyLoader → SchemaAwareSlotValidator flow."""
|
|
|
|
def test_slot_validator_loads_from_synonym_resolver(self, reset_singletons):
|
|
"""SlotValidator should load mappings from SynonymResolver (which uses OntologyLoader)."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
validator._load_validation_rules()
|
|
|
|
# Should have institution type mappings
|
|
assert "institution_type" in validator._synonym_maps
|
|
assert len(validator._synonym_maps["institution_type"]) > 0
|
|
|
|
def test_slot_validator_validates_museum(self, reset_singletons):
|
|
"""SlotValidator should validate 'museum' → 'M'."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
|
|
result = validator.validate_slot("institution_type", "museum")
|
|
assert result.valid is True
|
|
assert result.corrected_value == "M"
|
|
|
|
def test_slot_validator_validates_dutch_terms(self, reset_singletons):
|
|
"""SlotValidator should validate Dutch institution type terms."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
|
|
# Test Dutch terms
|
|
result = validator.validate_slot("institution_type", "bibliotheek")
|
|
assert result.valid is True
|
|
assert result.corrected_value == "L"
|
|
|
|
result = validator.validate_slot("institution_type", "archief")
|
|
assert result.valid is True
|
|
assert result.corrected_value == "A"
|
|
|
|
def test_slot_validator_corrects_typos(self, reset_singletons):
|
|
"""SlotValidator should attempt to correct typos using fuzzy matching."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
|
|
# Typo in "museum"
|
|
result = validator.validate_slot("institution_type", "musem", auto_correct=True)
|
|
# Should either correct to M or flag as invalid with suggestion
|
|
if result.valid:
|
|
assert result.corrected_value == "M"
|
|
else:
|
|
assert len(result.suggestions) > 0
|
|
|
|
def test_slot_validator_validate_slots_batch(self, reset_singletons):
|
|
"""SlotValidator should validate multiple slots at once."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
|
|
slots = {
|
|
"institution_type": "museum",
|
|
"city": "Amsterdam",
|
|
}
|
|
|
|
results = validator.validate_slots(slots)
|
|
|
|
assert "institution_type" in results
|
|
assert results["institution_type"].corrected_value == "M"
|
|
|
|
def test_get_corrected_slots(self, reset_singletons):
|
|
"""get_corrected_slots should return corrected values."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
|
|
slots = {
|
|
"institution_type": "bibliotheek",
|
|
}
|
|
|
|
corrected = validator.get_corrected_slots(slots)
|
|
assert corrected["institution_type"] == "L"
|
|
|
|
|
|
# =============================================================================
|
|
# TTL-BASED CACHING TESTS
|
|
# =============================================================================
|
|
|
|
class TestOntologyLoaderCaching:
|
|
"""Tests for OntologyLoader TTL-based caching."""
|
|
|
|
def test_kg_cache_ttl_default(self, reset_singletons):
|
|
"""OntologyLoader should have default TTL of 300 seconds."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
assert loader.get_kg_cache_ttl() == 300.0
|
|
|
|
def test_kg_cache_ttl_setter(self, reset_singletons):
|
|
"""Should be able to set KG cache TTL."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
loader.set_kg_cache_ttl(60.0)
|
|
|
|
assert loader.get_kg_cache_ttl() == 60.0
|
|
|
|
def test_clear_kg_cache(self, reset_singletons):
|
|
"""clear_kg_cache should clear the cache."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
loader.load()
|
|
|
|
# Add some mock cache entries
|
|
loader._kg_cache["test_hash"] = {"value1", "value2"}
|
|
loader._kg_cache_timestamps["test_hash"] = time.time()
|
|
|
|
# Clear cache
|
|
loader.clear_kg_cache()
|
|
|
|
assert len(loader._kg_cache) == 0
|
|
assert len(loader._kg_cache_timestamps) == 0
|
|
|
|
def test_get_kg_cache_stats(self, reset_singletons):
|
|
"""get_kg_cache_stats should return cache statistics."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
loader.load()
|
|
|
|
stats = loader.get_kg_cache_stats()
|
|
|
|
assert "cache_size" in stats
|
|
assert "ttl_seconds" in stats
|
|
assert "entries" in stats
|
|
|
|
def test_clear_all_cache(self, reset_singletons):
|
|
"""clear_all_cache should reset loader to initial state."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
loader.load()
|
|
|
|
# Verify data is loaded
|
|
assert loader._loaded is True
|
|
assert len(loader._institution_type_codes) > 0
|
|
|
|
# Clear all cache
|
|
loader.clear_all_cache()
|
|
|
|
# Verify reset
|
|
assert loader._loaded is False
|
|
assert len(loader._institution_type_codes) == 0
|
|
assert len(loader._kg_cache) == 0
|
|
|
|
def test_kg_query_caching_behavior(self, reset_singletons):
|
|
"""KG queries should be cached and reused within TTL."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
import hashlib
|
|
|
|
loader = get_ontology_loader()
|
|
|
|
# Set short TTL for testing
|
|
loader.set_kg_cache_ttl(10.0)
|
|
|
|
# Mock the actual HTTP request
|
|
test_query = "SELECT ?x WHERE { ?x ?y ?z }"
|
|
query_hash = hashlib.md5(test_query.encode()).hexdigest()
|
|
|
|
# Pre-populate cache
|
|
loader._kg_cache[query_hash] = {"value1", "value2"}
|
|
loader._kg_cache_timestamps[query_hash] = time.time()
|
|
|
|
# Query should return cached result
|
|
result = loader._query_kg_for_values(test_query, use_cache=True)
|
|
|
|
assert result == {"value1", "value2"}
|
|
|
|
def test_kg_query_cache_expiration(self, reset_singletons):
|
|
"""Expired cache entries should trigger fresh query."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
import hashlib
|
|
|
|
loader = get_ontology_loader()
|
|
loader.set_kg_cache_ttl(0.1) # Very short TTL
|
|
|
|
test_query = "SELECT ?expired WHERE { ?x ?y ?z }"
|
|
query_hash = hashlib.md5(test_query.encode()).hexdigest()
|
|
|
|
# Pre-populate cache with old timestamp
|
|
loader._kg_cache[query_hash] = {"old_value"}
|
|
loader._kg_cache_timestamps[query_hash] = time.time() - 1.0 # 1 second ago
|
|
|
|
# Wait for expiration
|
|
time.sleep(0.2)
|
|
|
|
# Mock HTTP to return empty (KG unavailable)
|
|
with patch('urllib.request.urlopen') as mock_urlopen:
|
|
mock_urlopen.side_effect = Exception("KG unavailable")
|
|
|
|
# Should return stale cache on failure
|
|
result = loader._query_kg_for_values(test_query, use_cache=True)
|
|
|
|
# Returns stale cache because KG query failed
|
|
assert result == {"old_value"}
|
|
|
|
|
|
class TestSlotValidatorCaching:
|
|
"""Tests for SchemaAwareSlotValidator TTL-based caching."""
|
|
|
|
def test_kg_validation_ttl_default(self, reset_singletons):
|
|
"""SlotValidator should have default TTL of 300 seconds."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
assert validator.get_kg_validation_ttl() == 300.0
|
|
|
|
def test_kg_validation_ttl_setter(self, reset_singletons):
|
|
"""Should be able to set KG validation cache TTL."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
validator.set_kg_validation_ttl(120.0)
|
|
|
|
assert validator.get_kg_validation_ttl() == 120.0
|
|
|
|
def test_clear_kg_validation_cache(self, reset_singletons):
|
|
"""clear_kg_validation_cache should clear the validation cache."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
|
|
# Add mock cache entry
|
|
validator._kg_validation_cache["institution_type:M"] = True
|
|
validator._kg_validation_timestamps["institution_type:M"] = time.time()
|
|
|
|
# Clear cache
|
|
validator.clear_kg_validation_cache()
|
|
|
|
assert len(validator._kg_validation_cache) == 0
|
|
assert len(validator._kg_validation_timestamps) == 0
|
|
|
|
def test_get_kg_validation_cache_stats(self, reset_singletons):
|
|
"""get_kg_validation_cache_stats should return statistics."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
|
|
# Add some mock entries
|
|
validator._kg_validation_cache["slot1:value1"] = True
|
|
validator._kg_validation_cache["slot2:value2"] = False
|
|
validator._kg_validation_timestamps["slot1:value1"] = time.time()
|
|
validator._kg_validation_timestamps["slot2:value2"] = time.time()
|
|
|
|
stats = validator.get_kg_validation_cache_stats()
|
|
|
|
assert stats["cache_size"] == 2
|
|
assert stats["valid_entries"] == 1
|
|
assert stats["invalid_entries"] == 1
|
|
assert "ttl_seconds" in stats
|
|
|
|
def test_kg_validation_caching_behavior(self, reset_singletons):
|
|
"""KG validations should be cached and reused within TTL."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator
|
|
|
|
validator = get_schema_slot_validator()
|
|
validator.set_kg_validation_ttl(10.0)
|
|
|
|
# Pre-populate cache
|
|
cache_key = "institution_type:TEST"
|
|
validator._kg_validation_cache[cache_key] = True
|
|
validator._kg_validation_timestamps[cache_key] = time.time()
|
|
|
|
# Validation should return cached result without calling OntologyLoader
|
|
with patch('backend.rag.template_sparql.get_ontology_loader') as mock_loader:
|
|
result = validator.validate_slot_against_kg("institution_type", "TEST", use_cache=True)
|
|
|
|
# Should return cached result
|
|
assert result is True
|
|
# OntologyLoader should not be called
|
|
mock_loader.assert_not_called()
|
|
|
|
|
|
# =============================================================================
|
|
# MOCK KG RESPONSE TESTS
|
|
# =============================================================================
|
|
|
|
class TestMockKGResponses:
|
|
"""Tests with mocked KG responses."""
|
|
|
|
def test_ontology_loader_with_mock_kg(self, reset_singletons, mock_kg_responses):
|
|
"""OntologyLoader should handle mock KG responses correctly."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
|
|
# Mock the KG query method
|
|
def mock_query(query, use_cache=True):
|
|
if "institutionType" in query:
|
|
return mock_kg_responses["institution_types"]
|
|
elif "subregionCode" in query:
|
|
return mock_kg_responses["subregions"]
|
|
elif "countryCode" in query:
|
|
return mock_kg_responses["countries"]
|
|
elif "settlementName" in query:
|
|
return mock_kg_responses["cities"]
|
|
return set()
|
|
|
|
with patch.object(loader, '_query_kg_for_values', side_effect=mock_query):
|
|
# Trigger KG loading
|
|
loader._load_institution_types_from_kg()
|
|
loader._load_subregions_from_kg()
|
|
loader._load_countries_from_kg()
|
|
loader._load_cities_from_kg()
|
|
|
|
# Verify mock data was loaded
|
|
assert loader._slot_values.get("institution_type") == mock_kg_responses["institution_types"]
|
|
assert loader._slot_values.get("subregion") == mock_kg_responses["subregions"]
|
|
|
|
def test_slot_validator_with_mock_kg_validation(self, reset_singletons, mock_kg_responses):
|
|
"""SlotValidator KG validation should work with mock responses."""
|
|
from backend.rag.template_sparql import get_schema_slot_validator, get_ontology_loader
|
|
|
|
# Setup mock OntologyLoader
|
|
loader = get_ontology_loader()
|
|
loader._slot_values["institution_type"] = mock_kg_responses["institution_types"]
|
|
loader._slot_values["city"] = mock_kg_responses["cities"]
|
|
loader._loaded = True
|
|
|
|
validator = get_schema_slot_validator()
|
|
|
|
# Validate against mock KG data
|
|
assert validator.validate_slot_against_kg("institution_type", "M") is True
|
|
assert validator.validate_slot_against_kg("city", "Amsterdam") is True
|
|
assert validator.validate_slot_against_kg("city", "NonexistentCity") is False
|
|
|
|
def test_kg_unavailable_fallback(self, reset_singletons):
|
|
"""System should gracefully handle KG unavailability."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
|
|
# Mock KG query to always fail
|
|
with patch.object(loader, '_query_kg_for_values', return_value=set()):
|
|
loader._load_institution_types_from_kg()
|
|
|
|
# Should have empty slot values (no KG data)
|
|
# But should not raise an error
|
|
assert loader._slot_values.get("institution_type", set()) == set()
|
|
|
|
def test_is_valid_value_with_empty_kg_data(self, reset_singletons):
|
|
"""is_valid_value should return True when KG has no data (assume valid)."""
|
|
from backend.rag.template_sparql import get_ontology_loader
|
|
|
|
loader = get_ontology_loader()
|
|
loader._slot_values = {} # Empty KG data
|
|
loader._loaded = True
|
|
|
|
# Should return True (assume valid when no KG data)
|
|
assert loader.is_valid_value("institution_type", "ANYTHING") is True
|
|
|
|
|
|
# =============================================================================
|
|
# END-TO-END INTEGRATION TESTS
|
|
# =============================================================================
|
|
|
|
class TestEndToEndOntologyFlow:
|
|
"""End-to-end tests for the complete ontology-driven flow."""
|
|
|
|
def test_full_validation_flow(self, reset_singletons):
|
|
"""Test complete flow: OntologyLoader → SynonymResolver → SlotValidator."""
|
|
from backend.rag.template_sparql import (
|
|
get_ontology_loader,
|
|
get_synonym_resolver,
|
|
get_schema_slot_validator
|
|
)
|
|
|
|
# Step 1: Load ontology
|
|
loader = get_ontology_loader()
|
|
loader.load()
|
|
|
|
# Step 2: Get synonym resolver (uses ontology)
|
|
resolver = get_synonym_resolver()
|
|
resolver.load()
|
|
|
|
# Step 3: Get slot validator (uses resolver)
|
|
validator = get_schema_slot_validator()
|
|
|
|
# Step 4: Validate a slot value
|
|
result = validator.validate_slot("institution_type", "museum")
|
|
|
|
# Verify the chain worked
|
|
assert result.valid is True
|
|
assert result.corrected_value == "M"
|
|
|
|
# The code "M" should be in the ontology's valid codes
|
|
assert "M" in loader.get_institution_type_codes()
|
|
|
|
def test_no_hardcoded_mlagorcubesfixphdnt(self, reset_singletons):
|
|
"""Verify the system doesn't rely on hardcoded 'MLAGORCUBESFIXPHDNT' string."""
|
|
from backend.rag.template_sparql import get_synonym_resolver
|
|
|
|
resolver = get_synonym_resolver()
|
|
resolver.load()
|
|
|
|
# The valid type codes should be a set, not derived from a hardcoded string
|
|
assert isinstance(resolver._valid_type_codes, set)
|
|
|
|
# All 19 GLAMORCUBESFIXPHDNT codes should be present
|
|
expected_codes = {"G", "L", "A", "M", "O", "R", "C", "U", "B", "E", "S", "F", "I", "X", "P", "H", "D", "N", "T"}
|
|
assert resolver._valid_type_codes == expected_codes
|
|
|
|
def test_validation_rules_json_is_source_of_truth(self, reset_singletons):
|
|
"""Verify that validation rules JSON is used as source of truth."""
|
|
from backend.rag.template_sparql import get_ontology_loader, VALIDATION_RULES_PATH
|
|
import json
|
|
|
|
loader = get_ontology_loader()
|
|
loader.load()
|
|
|
|
# Load rules directly
|
|
if VALIDATION_RULES_PATH.exists():
|
|
with open(VALIDATION_RULES_PATH) as f:
|
|
rules = json.load(f)
|
|
|
|
# Check that HeritageTypeEnum values match loader's codes
|
|
heritage_enum = rules.get("enums", {}).get("HeritageTypeEnum", {})
|
|
expected_codes = set(heritage_enum.get("values", []))
|
|
|
|
if expected_codes:
|
|
assert loader.get_institution_type_codes() == expected_codes
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|