285 lines
8.6 KiB
Python
285 lines
8.6 KiB
Python
"""
|
|
Cache Configuration for Heritage RAG Semantic Caching
|
|
|
|
Configuration settings for the hybrid semantic cache system including:
|
|
- Redis/Valkey connection settings
|
|
- Distance thresholds for semantic matching
|
|
- TTL policies for cache invalidation
|
|
- Quality control filters
|
|
|
|
Based on research from:
|
|
- DeepLearning.AI Semantic Caching course
|
|
- Banking RAG case study (99% -> 3.8% false positive reduction)
|
|
- GPTCache and vCache patterns
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from enum import Enum
|
|
from typing import Any
|
|
|
|
from pydantic import Field
|
|
from pydantic_settings import BaseSettings
|
|
|
|
|
|
class CacheBackend(str, Enum):
|
|
"""Available cache backends."""
|
|
REDIS = "redis"
|
|
VALKEY = "valkey"
|
|
MEMORY = "memory" # For testing
|
|
|
|
|
|
class DistanceMetric(str, Enum):
|
|
"""Distance metrics for semantic similarity."""
|
|
COSINE = "cosine"
|
|
L2 = "l2"
|
|
IP = "ip" # Inner product
|
|
|
|
|
|
class CacheSettings(BaseSettings):
|
|
"""Semantic cache configuration from environment variables."""
|
|
|
|
# Redis/Valkey Connection
|
|
cache_backend: CacheBackend = CacheBackend.VALKEY
|
|
redis_url: str = "redis://localhost:6379"
|
|
redis_password: str | None = None
|
|
redis_db: int = 0
|
|
|
|
# Cache Index Configuration
|
|
cache_index_name: str = "heritage_rag_cache"
|
|
cache_prefix: str = "heritage:cache:"
|
|
|
|
# Embedding Model (matches redis/langcache-embed-v1)
|
|
cache_embedding_model: str = "redis/langcache-embed-v1"
|
|
cache_embedding_dim: int = 768
|
|
|
|
# Distance Thresholds (critical for false positive prevention)
|
|
# Lower = stricter matching, fewer false positives
|
|
# 0.1 is very strict, 0.3 allows more semantic similarity
|
|
# For heritage domain, 0.25 balances accuracy with hit rate
|
|
distance_threshold: float = 0.25 # Cosine distance threshold
|
|
distance_metric: DistanceMetric = DistanceMetric.COSINE
|
|
|
|
# Quality Control Thresholds
|
|
min_query_length: int = 10 # Skip very short queries
|
|
max_query_length: int = 500 # Skip extremely long queries
|
|
|
|
# TTL Policies (seconds)
|
|
ttl_default: int = 86400 # 24 hours
|
|
ttl_statistical: int = 3600 # 1 hour (counts may change)
|
|
ttl_temporal: int = 86400 # 24 hours (historical data stable)
|
|
ttl_geographic: int = 604800 # 7 days (locations very stable)
|
|
ttl_entity: int = 604800 # 7 days (entity details stable)
|
|
|
|
# Cache Behavior
|
|
cache_enabled: bool = True
|
|
validation_enabled: bool = True # Enable cross-encoder validation
|
|
atomic_decomposition_enabled: bool = True # Enable sub-query caching
|
|
|
|
# Warmup Configuration
|
|
warmup_on_startup: bool = True
|
|
warmup_batch_size: int = 50
|
|
|
|
# Metrics & Observability
|
|
metrics_enabled: bool = True
|
|
log_cache_hits: bool = True
|
|
log_cache_misses: bool = True
|
|
|
|
class Config:
|
|
env_prefix = "HERITAGE_CACHE_"
|
|
env_file = ".env"
|
|
extra = "ignore"
|
|
|
|
|
|
@dataclass
|
|
class CacheEntry:
|
|
"""A single cache entry with metadata."""
|
|
|
|
query: str
|
|
query_hash: str
|
|
response: dict[str, Any]
|
|
intent: str
|
|
language: str
|
|
sources: list[str]
|
|
|
|
# Filterable metadata for cache queries
|
|
institution_type: str | None = None
|
|
country_code: str | None = None
|
|
region_code: str | None = None
|
|
|
|
# Provenance
|
|
created_at: str = ""
|
|
ttl_seconds: int = 86400
|
|
hit_count: int = 0
|
|
|
|
# Quality metrics
|
|
confidence: float = 0.0
|
|
validation_score: float | None = None
|
|
|
|
|
|
@dataclass
|
|
class CacheStats:
|
|
"""Cache performance statistics."""
|
|
|
|
total_queries: int = 0
|
|
cache_hits: int = 0
|
|
cache_misses: int = 0
|
|
validation_passes: int = 0
|
|
validation_failures: int = 0
|
|
|
|
# Timing
|
|
avg_hit_latency_ms: float = 0.0
|
|
avg_miss_latency_ms: float = 0.0
|
|
|
|
# Quality
|
|
false_positive_rate: float = 0.0
|
|
hit_rate: float = 0.0
|
|
|
|
def update_hit_rate(self) -> None:
|
|
"""Recalculate hit rate."""
|
|
if self.total_queries > 0:
|
|
self.hit_rate = self.cache_hits / self.total_queries
|
|
|
|
def update_false_positive_rate(self) -> None:
|
|
"""Recalculate false positive rate."""
|
|
total_hits = self.validation_passes + self.validation_failures
|
|
if total_hits > 0:
|
|
self.false_positive_rate = self.validation_failures / total_hits
|
|
|
|
|
|
# Heritage-specific skip patterns for cache bypass
|
|
CACHE_BYPASS_PATTERNS = [
|
|
# Temporal/dynamic queries (results change frequently)
|
|
r"vandaag|today|gisteren|yesterday|nu|now",
|
|
r"recent|latest|newest|nieuwste|current|actueel|huidige",
|
|
r"dit jaar|this year|vorige week|last week",
|
|
|
|
# User-specific queries
|
|
r"mijn|my|ik heb|i have",
|
|
|
|
# Code/technical queries (complex, not cacheable)
|
|
r"sparql|query|api|endpoint|code",
|
|
|
|
# Highly specific numeric queries
|
|
r"exact|precies|specifically",
|
|
]
|
|
|
|
# Heritage FAQ categories for cache warmup
|
|
FAQ_CATEGORIES = {
|
|
"statistical": [
|
|
"Hoeveel musea zijn er in Nederland?",
|
|
"Hoeveel archieven heeft Noord-Holland?",
|
|
"What is the total number of heritage institutions?",
|
|
"How many libraries are there in Amsterdam?",
|
|
"Hoeveel erfgoedinstellingen heeft Limburg?",
|
|
],
|
|
"geographic": [
|
|
"Where is the Rijksmuseum located?",
|
|
"Welke musea zijn er in Rotterdam?",
|
|
"Find archives in Utrecht province",
|
|
"Show heritage institutions near Amsterdam Centraal",
|
|
"Waar ligt het Nationaal Archief?",
|
|
],
|
|
"entity_lookup": [
|
|
"What is the ISIL code of the Rijksmuseum?",
|
|
"Tell me about the Nationaal Archief",
|
|
"Information about Eye Filmmuseum",
|
|
"Details van het Zuiderzeemuseum",
|
|
"Wat is het adres van de Koninklijke Bibliotheek?",
|
|
],
|
|
"relational": [
|
|
"Which institutions merged to form Noord-Hollands Archief?",
|
|
"What museums are part of the Rijkscollectie?",
|
|
"Show relationships between archives in Amsterdam",
|
|
"Welke instellingen behoren tot Collectie Nederland?",
|
|
],
|
|
"temporal": [
|
|
"When was the Rijksmuseum founded?",
|
|
"Which archives closed in the past decade?",
|
|
"Timeline of museum mergers in the Netherlands",
|
|
"Wanneer is het Stedelijk Museum opgericht?",
|
|
],
|
|
}
|
|
|
|
# Strategic distractors for cache boundary testing
|
|
# These are semantically similar but should NOT match
|
|
DISTRACTOR_PAIRS = [
|
|
# Same intent, different entity
|
|
("Hoeveel musea zijn er in Amsterdam?", "Hoeveel musea zijn er in Rotterdam?"),
|
|
("Where is the Rijksmuseum?", "Where is the Van Gogh Museum?"),
|
|
|
|
# Same entity, different intent
|
|
("Where is the Nationaal Archief?", "When was the Nationaal Archief founded?"),
|
|
("How many items in the Rijksmuseum?", "What type is the Rijksmuseum?"),
|
|
|
|
# Similar phrasing, different meaning
|
|
("Archives in Amsterdam", "Archives about Amsterdam"),
|
|
("Museums with ISIL codes", "Museum ISIL code lookup"),
|
|
]
|
|
|
|
|
|
def get_cache_settings() -> CacheSettings:
|
|
"""Get cache settings singleton."""
|
|
return CacheSettings()
|
|
|
|
|
|
def get_ttl_for_intent(intent: str, settings: CacheSettings | None = None) -> int:
|
|
"""Get appropriate TTL based on query intent.
|
|
|
|
Args:
|
|
intent: Query intent (statistical, geographic, etc.)
|
|
settings: Optional cache settings override
|
|
|
|
Returns:
|
|
TTL in seconds
|
|
"""
|
|
if settings is None:
|
|
settings = get_cache_settings()
|
|
|
|
ttl_mapping = {
|
|
"statistical": settings.ttl_statistical,
|
|
"temporal": settings.ttl_temporal,
|
|
"geographic": settings.ttl_geographic,
|
|
"entity_lookup": settings.ttl_entity,
|
|
"relational": settings.ttl_default,
|
|
"comparative": settings.ttl_default,
|
|
"exploration": settings.ttl_default,
|
|
}
|
|
|
|
return ttl_mapping.get(intent, settings.ttl_default)
|
|
|
|
|
|
def should_bypass_cache(query: str, settings: CacheSettings | None = None) -> bool:
|
|
"""Check if query should bypass cache.
|
|
|
|
Returns True for queries that should not use cached responses:
|
|
- Temporal/dynamic queries (results change frequently)
|
|
- User-specific queries
|
|
- Very short or very long queries
|
|
- Technical/code queries
|
|
|
|
Args:
|
|
query: The query string to check
|
|
settings: Optional cache settings override
|
|
|
|
Returns:
|
|
True if cache should be bypassed
|
|
"""
|
|
import re
|
|
|
|
if settings is None:
|
|
settings = get_cache_settings()
|
|
|
|
# Length checks
|
|
if len(query) < settings.min_query_length:
|
|
return True
|
|
if len(query) > settings.max_query_length:
|
|
return True
|
|
|
|
# Pattern checks
|
|
for pattern in CACHE_BYPASS_PATTERNS:
|
|
if re.search(pattern, query, re.IGNORECASE):
|
|
return True
|
|
|
|
return False
|