glam/backend/rag/cache_config.py
2025-12-11 22:32:09 +01:00

285 lines
8.6 KiB
Python

"""
Cache Configuration for Heritage RAG Semantic Caching
Configuration settings for the hybrid semantic cache system including:
- Redis/Valkey connection settings
- Distance thresholds for semantic matching
- TTL policies for cache invalidation
- Quality control filters
Based on research from:
- DeepLearning.AI Semantic Caching course
- Banking RAG case study (99% -> 3.8% false positive reduction)
- GPTCache and vCache patterns
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
from pydantic import Field
from pydantic_settings import BaseSettings
class CacheBackend(str, Enum):
"""Available cache backends."""
REDIS = "redis"
VALKEY = "valkey"
MEMORY = "memory" # For testing
class DistanceMetric(str, Enum):
"""Distance metrics for semantic similarity."""
COSINE = "cosine"
L2 = "l2"
IP = "ip" # Inner product
class CacheSettings(BaseSettings):
"""Semantic cache configuration from environment variables."""
# Redis/Valkey Connection
cache_backend: CacheBackend = CacheBackend.VALKEY
redis_url: str = "redis://localhost:6379"
redis_password: str | None = None
redis_db: int = 0
# Cache Index Configuration
cache_index_name: str = "heritage_rag_cache"
cache_prefix: str = "heritage:cache:"
# Embedding Model (matches redis/langcache-embed-v1)
cache_embedding_model: str = "redis/langcache-embed-v1"
cache_embedding_dim: int = 768
# Distance Thresholds (critical for false positive prevention)
# Lower = stricter matching, fewer false positives
# 0.1 is very strict, 0.3 allows more semantic similarity
# For heritage domain, 0.25 balances accuracy with hit rate
distance_threshold: float = 0.25 # Cosine distance threshold
distance_metric: DistanceMetric = DistanceMetric.COSINE
# Quality Control Thresholds
min_query_length: int = 10 # Skip very short queries
max_query_length: int = 500 # Skip extremely long queries
# TTL Policies (seconds)
ttl_default: int = 86400 # 24 hours
ttl_statistical: int = 3600 # 1 hour (counts may change)
ttl_temporal: int = 86400 # 24 hours (historical data stable)
ttl_geographic: int = 604800 # 7 days (locations very stable)
ttl_entity: int = 604800 # 7 days (entity details stable)
# Cache Behavior
cache_enabled: bool = True
validation_enabled: bool = True # Enable cross-encoder validation
atomic_decomposition_enabled: bool = True # Enable sub-query caching
# Warmup Configuration
warmup_on_startup: bool = True
warmup_batch_size: int = 50
# Metrics & Observability
metrics_enabled: bool = True
log_cache_hits: bool = True
log_cache_misses: bool = True
class Config:
env_prefix = "HERITAGE_CACHE_"
env_file = ".env"
extra = "ignore"
@dataclass
class CacheEntry:
"""A single cache entry with metadata."""
query: str
query_hash: str
response: dict[str, Any]
intent: str
language: str
sources: list[str]
# Filterable metadata for cache queries
institution_type: str | None = None
country_code: str | None = None
region_code: str | None = None
# Provenance
created_at: str = ""
ttl_seconds: int = 86400
hit_count: int = 0
# Quality metrics
confidence: float = 0.0
validation_score: float | None = None
@dataclass
class CacheStats:
"""Cache performance statistics."""
total_queries: int = 0
cache_hits: int = 0
cache_misses: int = 0
validation_passes: int = 0
validation_failures: int = 0
# Timing
avg_hit_latency_ms: float = 0.0
avg_miss_latency_ms: float = 0.0
# Quality
false_positive_rate: float = 0.0
hit_rate: float = 0.0
def update_hit_rate(self) -> None:
"""Recalculate hit rate."""
if self.total_queries > 0:
self.hit_rate = self.cache_hits / self.total_queries
def update_false_positive_rate(self) -> None:
"""Recalculate false positive rate."""
total_hits = self.validation_passes + self.validation_failures
if total_hits > 0:
self.false_positive_rate = self.validation_failures / total_hits
# Heritage-specific skip patterns for cache bypass
CACHE_BYPASS_PATTERNS = [
# Temporal/dynamic queries (results change frequently)
r"vandaag|today|gisteren|yesterday|nu|now",
r"recent|latest|newest|nieuwste|current|actueel|huidige",
r"dit jaar|this year|vorige week|last week",
# User-specific queries
r"mijn|my|ik heb|i have",
# Code/technical queries (complex, not cacheable)
r"sparql|query|api|endpoint|code",
# Highly specific numeric queries
r"exact|precies|specifically",
]
# Heritage FAQ categories for cache warmup
FAQ_CATEGORIES = {
"statistical": [
"Hoeveel musea zijn er in Nederland?",
"Hoeveel archieven heeft Noord-Holland?",
"What is the total number of heritage institutions?",
"How many libraries are there in Amsterdam?",
"Hoeveel erfgoedinstellingen heeft Limburg?",
],
"geographic": [
"Where is the Rijksmuseum located?",
"Welke musea zijn er in Rotterdam?",
"Find archives in Utrecht province",
"Show heritage institutions near Amsterdam Centraal",
"Waar ligt het Nationaal Archief?",
],
"entity_lookup": [
"What is the ISIL code of the Rijksmuseum?",
"Tell me about the Nationaal Archief",
"Information about Eye Filmmuseum",
"Details van het Zuiderzeemuseum",
"Wat is het adres van de Koninklijke Bibliotheek?",
],
"relational": [
"Which institutions merged to form Noord-Hollands Archief?",
"What museums are part of the Rijkscollectie?",
"Show relationships between archives in Amsterdam",
"Welke instellingen behoren tot Collectie Nederland?",
],
"temporal": [
"When was the Rijksmuseum founded?",
"Which archives closed in the past decade?",
"Timeline of museum mergers in the Netherlands",
"Wanneer is het Stedelijk Museum opgericht?",
],
}
# Strategic distractors for cache boundary testing
# These are semantically similar but should NOT match
DISTRACTOR_PAIRS = [
# Same intent, different entity
("Hoeveel musea zijn er in Amsterdam?", "Hoeveel musea zijn er in Rotterdam?"),
("Where is the Rijksmuseum?", "Where is the Van Gogh Museum?"),
# Same entity, different intent
("Where is the Nationaal Archief?", "When was the Nationaal Archief founded?"),
("How many items in the Rijksmuseum?", "What type is the Rijksmuseum?"),
# Similar phrasing, different meaning
("Archives in Amsterdam", "Archives about Amsterdam"),
("Museums with ISIL codes", "Museum ISIL code lookup"),
]
def get_cache_settings() -> CacheSettings:
"""Get cache settings singleton."""
return CacheSettings()
def get_ttl_for_intent(intent: str, settings: CacheSettings | None = None) -> int:
"""Get appropriate TTL based on query intent.
Args:
intent: Query intent (statistical, geographic, etc.)
settings: Optional cache settings override
Returns:
TTL in seconds
"""
if settings is None:
settings = get_cache_settings()
ttl_mapping = {
"statistical": settings.ttl_statistical,
"temporal": settings.ttl_temporal,
"geographic": settings.ttl_geographic,
"entity_lookup": settings.ttl_entity,
"relational": settings.ttl_default,
"comparative": settings.ttl_default,
"exploration": settings.ttl_default,
}
return ttl_mapping.get(intent, settings.ttl_default)
def should_bypass_cache(query: str, settings: CacheSettings | None = None) -> bool:
"""Check if query should bypass cache.
Returns True for queries that should not use cached responses:
- Temporal/dynamic queries (results change frequently)
- User-specific queries
- Very short or very long queries
- Technical/code queries
Args:
query: The query string to check
settings: Optional cache settings override
Returns:
True if cache should be bypassed
"""
import re
if settings is None:
settings = get_cache_settings()
# Length checks
if len(query) < settings.min_query_length:
return True
if len(query) > settings.max_query_length:
return True
# Pattern checks
for pattern in CACHE_BYPASS_PATTERNS:
if re.search(pattern, query, re.IGNORECASE):
return True
return False