""" Cache Configuration for Heritage RAG Semantic Caching Configuration settings for the hybrid semantic cache system including: - Redis/Valkey connection settings - Distance thresholds for semantic matching - TTL policies for cache invalidation - Quality control filters Based on research from: - DeepLearning.AI Semantic Caching course - Banking RAG case study (99% -> 3.8% false positive reduction) - GPTCache and vCache patterns """ from __future__ import annotations from dataclasses import dataclass, field from enum import Enum from typing import Any from pydantic import Field from pydantic_settings import BaseSettings class CacheBackend(str, Enum): """Available cache backends.""" REDIS = "redis" VALKEY = "valkey" MEMORY = "memory" # For testing class DistanceMetric(str, Enum): """Distance metrics for semantic similarity.""" COSINE = "cosine" L2 = "l2" IP = "ip" # Inner product class CacheSettings(BaseSettings): """Semantic cache configuration from environment variables.""" # Redis/Valkey Connection cache_backend: CacheBackend = CacheBackend.VALKEY redis_url: str = "redis://localhost:6379" redis_password: str | None = None redis_db: int = 0 # Cache Index Configuration cache_index_name: str = "heritage_rag_cache" cache_prefix: str = "heritage:cache:" # Embedding Model (matches redis/langcache-embed-v1) cache_embedding_model: str = "redis/langcache-embed-v1" cache_embedding_dim: int = 768 # Distance Thresholds (critical for false positive prevention) # Lower = stricter matching, fewer false positives # 0.1 is very strict, 0.3 allows more semantic similarity # For heritage domain, 0.25 balances accuracy with hit rate distance_threshold: float = 0.25 # Cosine distance threshold distance_metric: DistanceMetric = DistanceMetric.COSINE # Quality Control Thresholds min_query_length: int = 10 # Skip very short queries max_query_length: int = 500 # Skip extremely long queries # TTL Policies (seconds) ttl_default: int = 86400 # 24 hours ttl_statistical: int = 3600 # 1 hour (counts may change) ttl_temporal: int = 86400 # 24 hours (historical data stable) ttl_geographic: int = 604800 # 7 days (locations very stable) ttl_entity: int = 604800 # 7 days (entity details stable) # Cache Behavior cache_enabled: bool = True validation_enabled: bool = True # Enable cross-encoder validation atomic_decomposition_enabled: bool = True # Enable sub-query caching # Warmup Configuration warmup_on_startup: bool = True warmup_batch_size: int = 50 # Metrics & Observability metrics_enabled: bool = True log_cache_hits: bool = True log_cache_misses: bool = True class Config: env_prefix = "HERITAGE_CACHE_" env_file = ".env" extra = "ignore" @dataclass class CacheEntry: """A single cache entry with metadata.""" query: str query_hash: str response: dict[str, Any] intent: str language: str sources: list[str] # Filterable metadata for cache queries institution_type: str | None = None country_code: str | None = None region_code: str | None = None # Provenance created_at: str = "" ttl_seconds: int = 86400 hit_count: int = 0 # Quality metrics confidence: float = 0.0 validation_score: float | None = None @dataclass class CacheStats: """Cache performance statistics.""" total_queries: int = 0 cache_hits: int = 0 cache_misses: int = 0 validation_passes: int = 0 validation_failures: int = 0 # Timing avg_hit_latency_ms: float = 0.0 avg_miss_latency_ms: float = 0.0 # Quality false_positive_rate: float = 0.0 hit_rate: float = 0.0 def update_hit_rate(self) -> None: """Recalculate hit rate.""" if self.total_queries > 0: self.hit_rate = self.cache_hits / self.total_queries def update_false_positive_rate(self) -> None: """Recalculate false positive rate.""" total_hits = self.validation_passes + self.validation_failures if total_hits > 0: self.false_positive_rate = self.validation_failures / total_hits # Heritage-specific skip patterns for cache bypass CACHE_BYPASS_PATTERNS = [ # Temporal/dynamic queries (results change frequently) r"vandaag|today|gisteren|yesterday|nu|now", r"recent|latest|newest|nieuwste|current|actueel|huidige", r"dit jaar|this year|vorige week|last week", # User-specific queries r"mijn|my|ik heb|i have", # Code/technical queries (complex, not cacheable) r"sparql|query|api|endpoint|code", # Highly specific numeric queries r"exact|precies|specifically", ] # Heritage FAQ categories for cache warmup FAQ_CATEGORIES = { "statistical": [ "Hoeveel musea zijn er in Nederland?", "Hoeveel archieven heeft Noord-Holland?", "What is the total number of heritage institutions?", "How many libraries are there in Amsterdam?", "Hoeveel erfgoedinstellingen heeft Limburg?", ], "geographic": [ "Where is the Rijksmuseum located?", "Welke musea zijn er in Rotterdam?", "Find archives in Utrecht province", "Show heritage institutions near Amsterdam Centraal", "Waar ligt het Nationaal Archief?", ], "entity_lookup": [ "What is the ISIL code of the Rijksmuseum?", "Tell me about the Nationaal Archief", "Information about Eye Filmmuseum", "Details van het Zuiderzeemuseum", "Wat is het adres van de Koninklijke Bibliotheek?", ], "relational": [ "Which institutions merged to form Noord-Hollands Archief?", "What museums are part of the Rijkscollectie?", "Show relationships between archives in Amsterdam", "Welke instellingen behoren tot Collectie Nederland?", ], "temporal": [ "When was the Rijksmuseum founded?", "Which archives closed in the past decade?", "Timeline of museum mergers in the Netherlands", "Wanneer is het Stedelijk Museum opgericht?", ], } # Strategic distractors for cache boundary testing # These are semantically similar but should NOT match DISTRACTOR_PAIRS = [ # Same intent, different entity ("Hoeveel musea zijn er in Amsterdam?", "Hoeveel musea zijn er in Rotterdam?"), ("Where is the Rijksmuseum?", "Where is the Van Gogh Museum?"), # Same entity, different intent ("Where is the Nationaal Archief?", "When was the Nationaal Archief founded?"), ("How many items in the Rijksmuseum?", "What type is the Rijksmuseum?"), # Similar phrasing, different meaning ("Archives in Amsterdam", "Archives about Amsterdam"), ("Museums with ISIL codes", "Museum ISIL code lookup"), ] def get_cache_settings() -> CacheSettings: """Get cache settings singleton.""" return CacheSettings() def get_ttl_for_intent(intent: str, settings: CacheSettings | None = None) -> int: """Get appropriate TTL based on query intent. Args: intent: Query intent (statistical, geographic, etc.) settings: Optional cache settings override Returns: TTL in seconds """ if settings is None: settings = get_cache_settings() ttl_mapping = { "statistical": settings.ttl_statistical, "temporal": settings.ttl_temporal, "geographic": settings.ttl_geographic, "entity_lookup": settings.ttl_entity, "relational": settings.ttl_default, "comparative": settings.ttl_default, "exploration": settings.ttl_default, } return ttl_mapping.get(intent, settings.ttl_default) def should_bypass_cache(query: str, settings: CacheSettings | None = None) -> bool: """Check if query should bypass cache. Returns True for queries that should not use cached responses: - Temporal/dynamic queries (results change frequently) - User-specific queries - Very short or very long queries - Technical/code queries Args: query: The query string to check settings: Optional cache settings override Returns: True if cache should be bypassed """ import re if settings is None: settings = get_cache_settings() # Length checks if len(query) < settings.min_query_length: return True if len(query) > settings.max_query_length: return True # Pattern checks for pattern in CACHE_BYPASS_PATTERNS: if re.search(pattern, query, re.IGNORECASE): return True return False