glam/apps/archief-assistent/src/lib/semantic-cache.ts
2026-01-10 17:31:02 +01:00

1717 lines
63 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* SemanticCache - Lightweight Semantic Caching for de Aa ArchiefAssistent
*
* Architecture:
* ┌─────────────────────────────────────────────────────────────────┐
* │ Tier 1: IndexedDB (Client-Side) │
* │ - Zero latency lookups (~1ms) │
* │ - Per-user cache │
* │ - Works offline │
* └─────────────────────────────────────────────────────────────────┘
* │
* ▼ (on miss)
* ┌─────────────────────────────────────────────────────────────────┐
* │ Tier 2: Qdrant Vector DB via /api/cache (Server-Side) │
* │ - HNSW index for sub-millisecond similarity search │
* │ - Server-side embeddings (sentence-transformers) │
* │ - Shared across all users │
* └─────────────────────────────────────────────────────────────────┘
*
* Lookup Flow:
* 1. Check IndexedDB (instant, ~1ms)
* 2. If miss, check Qdrant via /api/cache/lookup
* 3. If Qdrant hit, populate IndexedDB for future lookups
* 4. If total miss, query RAG and store in both tiers
*/
// ============================================================================
// Types
// ============================================================================
/**
* Institution type codes from GLAMORCUBESFIXPHDNT taxonomy
*/
export type InstitutionTypeCode = 'G' | 'L' | 'A' | 'M' | 'O' | 'R' | 'C' | 'U' | 'B' | 'E' | 'S' | 'F' | 'I' | 'X' | 'P' | 'H' | 'D' | 'N' | 'T';
// ============================================================================
// Epistemic Provenance (Phase 1 - Rule 46 Evolution)
// ============================================================================
// Based on Pavlyshyn's "Context Graphs and Data Traces": Every cached response
// should carry epistemological metadata about WHERE, WHEN, HOW, and WHAT KIND
// of epistemic status the data holds. This enables revalidation and trust.
/**
* Data source types aligned with project's data tier system.
* See: AGENTS.md Rule 22 (Custodian YAML Files Are the Single Source of Truth)
*/
export type EpistemicDataSource =
| 'ISIL_REGISTRY' // Dutch ISIL codes CSV (TIER_1)
| 'WIKIDATA' // Wikidata SPARQL endpoint (TIER_3)
| 'CUSTODIAN_YAML' // data/custodian/*.yaml files (TIER_1)
| 'GOOGLE_MAPS' // Google Places API (TIER_3)
| 'WEB_SCRAPE' // Website content with XPath provenance (TIER_2)
| 'LLM_INFERENCE' // Generated by LLM without verifiable source (TIER_4)
| 'SPARQL_QUERY' // Direct SPARQL query result (TIER_1-3 depending on source)
| 'RAG_PIPELINE' // Full RAG retrieval + generation (mixed tiers)
| 'USER_PROVIDED' // User-submitted data (TIER_4 until verified)
| 'CACHE_AGGREGATION'; // Computed from other cached entries
/**
* Data quality tiers aligned with AGENTS.md provenance model.
* Higher tier = higher authority = more trustworthy.
*/
export type DataTier = 1 | 2 | 3 | 4;
/**
* Epistemic provenance tracks the justification chain for cached knowledge.
*
* This transforms the cache from a collection of answers into a *justified*
* knowledge base where each response carries information about:
* - WHERE the information originated
* - WHEN it was captured
* - HOW it was derived
* - WHAT KIND of epistemic status it holds
*
* @see https://volodymyrpavlyshyn.medium.com/context-graphs-and-data-traces-building-epistemology-layers-for-agentic-memory-64ee876c846f
*/
export interface EpistemicProvenance {
/** Primary source of the cached data */
dataSource: EpistemicDataSource;
/** Data quality tier (1=authoritative, 4=inferred) */
dataTier: DataTier;
/** ISO 8601 timestamp when the source data was captured/queried */
sourceTimestamp: string;
/**
* Derivation chain showing how the answer was produced.
* Example: ["SPARQL:Qdrant", "RAG:retrieve", "LLM:generate"]
*/
derivationChain: string[];
/**
* When this cache entry should be revalidated.
* - 'static': Never revalidate (e.g., historical facts)
* - 'daily': Revalidate after 24 hours
* - 'weekly': Revalidate after 7 days
* - 'on_access': Revalidate every time (expensive, for volatile data)
*/
revalidationPolicy: 'static' | 'daily' | 'weekly' | 'on_access';
/** ISO 8601 timestamp of last revalidation (if applicable) */
lastRevalidated?: string;
/** Confidence score (0.0 - 1.0) based on source quality and derivation */
confidenceScore?: number;
/** Source URLs or identifiers for traceability */
sourceReferences?: string[];
/** Notes about any caveats or limitations */
epistemicNotes?: string;
}
/**
* Entities extracted from a query for structured cache key generation.
* Used to prevent geographic false positives (e.g., "Amsterdam" vs "Noord-Holland").
*
* Enhanced with ontology-derived subtypes per Rule 46 (Ontology-Driven Cache Segmentation).
*/
export interface ExtractedEntities {
institutionType?: InstitutionTypeCode | null;
/** Specific subtype from ontology (e.g., 'MUNICIPAL_ARCHIVE', 'ART_MUSEUM') */
institutionSubtype?: string | null;
/** Record set type for archival queries (e.g., 'CIVIL_REGISTRY', 'COUNCIL_GOVERNANCE') */
recordSetType?: string | null;
/** Wikidata Q-number for the matched type/subtype */
wikidataEntity?: string | null;
location?: string | null;
locationType?: 'city' | 'province' | null;
intent?: 'count' | 'list' | 'info' | null;
/** Method used for entity extraction */
extractionMethod?: 'vocabulary' | 'regex' | 'embedding';
// ============================================================================
// Phase 5: Clarity Trading (Rule 46 Evolution)
// ============================================================================
/**
* Clarity score (0.0 - 1.0) indicating how unambiguous the query is.
* Queries with clarityScore < 0.7 should bypass cache and go to RAG.
*/
clarityScore?: number;
/** Identified ambiguities that reduce clarity */
ambiguities?: string[];
}
// ============================================================================
// Phase 4: Message-Passing Protocol (Smalltalk-Inspired)
// ============================================================================
// Based on Pavlyshyn's "Smalltalk: The Language That Changed Everything":
// Queries should be MESSAGES to holons, not function calls.
export type CacheMessageType = 'LOOKUP' | 'STORE' | 'INVALIDATE' | 'EXPLAIN';
export interface CacheMessage {
type: CacheMessageType;
/** Smalltalk-style message selector (e.g., "count:archives:municipal:GE") */
selector: string;
arguments: {
query?: string;
embedding?: number[] | null;
response?: CachedResponse;
entities?: ExtractedEntities;
};
/** Timestamp when message was created */
timestamp: number;
}
export interface CacheDecisionTrace {
/** The original query */
query: string;
/** Extracted entities */
entities: ExtractedEntities;
/** Structured cache key used */
structuredKey: string;
/** Whether cache hit occurred */
hit: boolean;
/** Tier where hit occurred (if any) */
tier?: 'local' | 'shared';
/** Match method used */
method: 'semantic' | 'fuzzy' | 'exact' | 'structured' | 'none';
/** Similarity score */
similarity: number;
/** Topological distance (if applicable) */
topologicalDistance?: number;
/** Why this decision was made */
reasoning: string;
/** Epistemic provenance of the cached entry (if hit) */
provenance?: EpistemicProvenance;
/** Time taken for lookup */
lookupTimeMs: number;
}
export interface CacheMessageResponse {
success: boolean;
result?: CacheLookupResult;
/** Decision trace for explainability */
trace?: CacheDecisionTrace;
error?: string;
}
export interface CachedQuery {
id: string;
query: string;
queryNormalized: string;
embedding: number[] | null;
response: CachedResponse;
timestamp: number;
hitCount: number;
lastAccessed: number;
language: 'nl' | 'en';
llmProvider: string;
source?: 'local' | 'shared';
/** Extracted entities for structured cache matching (prevents geographic false positives) */
entities?: ExtractedEntities;
/** Structured cache key derived from entities (e.g., "count:M:amsterdam") */
structuredKey?: string;
/** Epistemic provenance for trust and revalidation (Phase 1 - Rule 46 Evolution) */
epistemicProvenance?: EpistemicProvenance;
// ============================================================================
// Holonic Cache Properties (Phase 3 - Rule 46 Evolution)
// ============================================================================
/** Holon level: micro (single entity), meso (type+location), macro (aggregate) */
holonLevel?: 'micro' | 'meso' | 'macro';
/** Cache keys this entry participates in (upward links in holarchy) */
participatesIn?: string[];
/** Cache keys this entry aggregates (downward links in holarchy) */
aggregates?: string[];
}
export interface CachedResponse {
answer: string;
sources: Array<{ database: string; name: string }>;
institutions: Array<{
name: string;
type?: string;
city?: string;
country?: string;
description?: string;
website?: string;
score?: number;
}>;
sparqlQuery?: string; // SPARQL query used for knowledge graph search
sparqlResults?: Array<Record<string, string | number | boolean | null>>; // Pre-executed SPARQL results
retrievedResults?: Array<Record<string, unknown>>; // Full RAG results for Knowledge Graph visualization
}
export interface CacheConfig {
similarityThreshold: number; // 0.0 - 1.0, default 0.92
ttlMs: number; // Time-to-live in ms, default 24 hours
maxEntries: number; // Max cached entries, default 200
enableFuzzyMatch: boolean; // Fallback to text similarity if no embeddings
fuzzyThreshold: number; // Text similarity threshold, default 0.85
enableSharedCache: boolean; // Enable Valkey tier
sharedCacheUrl: string; // Valkey API URL
embedApiUrl: string; // Embedding API URL
/** Enable Phase 5 clarity trading - bypass cache for ambiguous queries */
useClarityCheck: boolean;
/** Minimum clarity score (0.0 - 1.0) required to use cache. Default 0.7 */
clarityThreshold: number;
}
export interface CacheStats {
totalEntries: number;
totalHits: number;
totalMisses: number;
hitRate: number;
localHits: number;
sharedHits: number;
}
export interface CacheLookupResult {
found: boolean;
entry?: CachedQuery;
similarity: number;
/** 'structured' = entity-aware match (location/type/intent), prevents geographic false positives */
method: 'semantic' | 'fuzzy' | 'exact' | 'structured' | 'none';
lookupTimeMs: number;
tier?: 'local' | 'shared';
}
// ============================================================================
// Constants
// ============================================================================
const DB_NAME = 'DeAa_SemanticCache';
const DB_VERSION = 1;
const STORE_NAME = 'cached_queries';
const DEFAULT_CONFIG: CacheConfig = {
// CRITICAL: Geographic queries like "musea in Amsterdam" vs "musea in Noord-Holland"
// have ~93% embedding similarity. A 0.85 threshold causes false cache hits.
// Must be ≥0.97 to avoid geographic false positives (matching BACKEND_DSPy threshold).
similarityThreshold: 0.97, // Raised from 0.85 to prevent geographic false positives
ttlMs: 24 * 60 * 60 * 1000, // 24 hours
maxEntries: 200,
enableFuzzyMatch: true,
fuzzyThreshold: 0.90, // Raised from 0.85 for stricter text matching
enableSharedCache: true,
sharedCacheUrl: '/api/cache', // Qdrant-backed cache on archief.support
embedApiUrl: '/api/embed',
// Phase 5: Clarity Trading (Rule 46 Evolution)
useClarityCheck: true, // Enable by default to bypass cache for ambiguous queries
clarityThreshold: 0.7, // Based on Pavlyshyn's "Clarity Traders" research
};
// ============================================================================
// Utility Functions
// ============================================================================
/**
* Compute cosine similarity between two vectors
*/
export function cosineSimilarity(a: number[], b: number[]): number {
if (a.length !== b.length || a.length === 0) return 0;
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
return magnitude === 0 ? 0 : dotProduct / magnitude;
}
/**
* Normalize query text for comparison
*/
export function normalizeQuery(query: string): string {
return query
.toLowerCase()
.trim()
.replace(/[^\w\s]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
/**
* Compute Jaccard similarity (word-level)
*/
function jaccardSimilarity(a: string, b: string): number {
const setA = new Set(normalizeQuery(a).split(/\s+/));
const setB = new Set(normalizeQuery(b).split(/\s+/));
const intersection = new Set([...setA].filter(x => setB.has(x)));
const union = new Set([...setA, ...setB]);
return union.size === 0 ? 0 : intersection.size / union.size;
}
/**
* Compute Levenshtein similarity
*/
function levenshteinSimilarity(a: string, b: string): number {
const normA = normalizeQuery(a);
const normB = normalizeQuery(b);
if (normA === normB) return 1;
const maxLen = Math.max(normA.length, normB.length);
if (maxLen === 0) return 1;
// Simple Levenshtein distance
const matrix: number[][] = [];
for (let i = 0; i <= normB.length; i++) matrix[i] = [i];
for (let j = 0; j <= normA.length; j++) matrix[0][j] = j;
for (let i = 1; i <= normB.length; i++) {
for (let j = 1; j <= normA.length; j++) {
if (normB.charAt(i - 1) === normA.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] = Math.min(
matrix[i - 1][j - 1] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j] + 1
);
}
}
}
return 1 - (matrix[normB.length][normA.length] / maxLen);
}
function generateCacheId(): string {
return `cache_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
// ============================================================================
// Type Hierarchy DAG (Phase 2 - Topological Distance)
// ============================================================================
// Based on Pavlyshyn's "The Shape of Knowledge": Embeddings lose structural
// information. The SHAPE of the query's relationship to the ontology matters.
/**
* GLAMORCUBESFIXPHDNT type hierarchy as a DAG.
* Each type maps to its parent types (toward HeritageCustodian root).
*/
const TYPE_HIERARCHY: Record<string, string[]> = {
// Root
'*': [], // HeritageCustodian (universal root)
// Tier 1: Base types (direct children of root)
'G': ['*'], 'L': ['*'], 'A': ['*'], 'M': ['*'], 'O': ['*'],
'R': ['*'], 'C': ['*'], 'U': ['*'], 'B': ['*'], 'E': ['*'],
'S': ['*'], 'F': ['*'], 'I': ['*'], 'X': ['*'], 'P': ['*'],
'H': ['*'], 'D': ['*'], 'N': ['*'], 'T': ['*'],
// Tier 2: Subtypes (examples - extend as needed from schema)
'M.ART': ['M'], 'M.HISTORY': ['M'], 'M.SCIENCE': ['M'], 'M.NATURAL': ['M'],
'A.MUNICIPAL': ['A'], 'A.STATE': ['A'], 'A.CORPORATE': ['A'], 'A.REGIONAL': ['A'],
'L.PUBLIC': ['L'], 'L.ACADEMIC': ['L'], 'L.SPECIAL': ['L'],
'E.UNIVERSITY': ['E'], 'E.SCHOOL': ['E'],
'H.CHURCH': ['H'], 'H.MOSQUE': ['H'], 'H.SYNAGOGUE': ['H'],
};
/** Depth cache for topological distance calculation */
const depthCache: Map<string, number> = new Map();
/**
* Get the depth of a type in the hierarchy (memoized).
*/
function getTypeDepth(typeCode: string): number {
if (depthCache.has(typeCode)) return depthCache.get(typeCode)!;
if (typeCode === '*') {
depthCache.set('*', 0);
return 0;
}
const parents = TYPE_HIERARCHY[typeCode];
if (!parents || parents.length === 0) {
// Unknown type, treat as direct child of root
depthCache.set(typeCode, 1);
return 1;
}
const depth = 1 + Math.min(...parents.map(p => getTypeDepth(p)));
depthCache.set(typeCode, depth);
return depth;
}
/**
* Find the lowest common ancestor of two types in the hierarchy.
*/
function findLCA(type1: string, type2: string): string {
if (type1 === type2) return type1;
if (type1 === '*' || type2 === '*') return '*';
// Get ancestors of type1
const ancestors1 = new Set<string>();
let current = type1;
while (current && current !== '*') {
ancestors1.add(current);
const parents = TYPE_HIERARCHY[current];
current = parents?.[0] || '*';
}
ancestors1.add('*');
// Find first ancestor of type2 that's in ancestors1
current = type2;
while (current) {
if (ancestors1.has(current)) return current;
const parents = TYPE_HIERARCHY[current];
current = parents?.[0] || '*';
if (current === '*' && ancestors1.has('*')) return '*';
}
return '*'; // Root is always common ancestor
}
/**
* Compute topological distance between two institution types.
*
* Based on path length through the type hierarchy DAG.
* Returns 0.0 for identical types, 1.0 for maximally distant types.
*
* @example
* topologicalDistance('M.ART', 'M.HISTORY') // 0.33 (siblings under M)
* topologicalDistance('M.ART', 'A.MUNICIPAL') // 0.67 (different base types)
* topologicalDistance('M', 'M') // 0.0 (identical)
*/
export function topologicalDistance(type1: string, type2: string): number {
if (type1 === type2) return 0;
// Normalize to uppercase
const t1 = type1.toUpperCase();
const t2 = type2.toUpperCase();
const lca = findLCA(t1, t2);
const depth1 = getTypeDepth(t1);
const depth2 = getTypeDepth(t2);
const depthLCA = getTypeDepth(lca);
// Path length = distance from t1 to LCA + distance from t2 to LCA
const pathLength = (depth1 - depthLCA) + (depth2 - depthLCA);
// Max possible depth in hierarchy (currently 2: root -> base -> subtype)
const maxDepth = 2;
// Normalize to 0-1 range
return Math.min(pathLength / (2 * maxDepth), 1.0);
}
/**
* Combined similarity score using both embedding similarity and topological distance.
*
* @param embeddingSimilarity - Cosine similarity of embeddings (0-1)
* @param queryType - Institution type from query
* @param cachedType - Institution type from cached entry
* @param embeddingWeight - Weight for embedding similarity (default 0.7)
* @returns Combined similarity score (0-1)
*/
export function combinedSimilarity(
embeddingSimilarity: number,
queryType: string | undefined,
cachedType: string | undefined,
embeddingWeight: number = 0.7
): number {
// If no types available, use pure embedding similarity
if (!queryType || !cachedType) return embeddingSimilarity;
const topoDist = topologicalDistance(queryType, cachedType);
const topoSimilarity = 1 - topoDist;
// Weighted combination
return embeddingWeight * embeddingSimilarity + (1 - embeddingWeight) * topoSimilarity;
}
// ============================================================================
// Phase 5: Clarity Trading (Rule 46 Evolution)
// ============================================================================
// Based on Pavlyshyn's "Clarity Traders: Beyond Vibe Coding":
// The real work is bringing clarity to ambiguity, not generating code/cache keys.
/** Patterns that indicate ambiguity in queries */
const AMBIGUITY_PATTERNS: Array<{ pattern: RegExp; type: string; penalty: number }> = [
// Temporal ambiguity: "old", "recent", "historical" without dates
{ pattern: /\b(oude?|old|recent|historical|historisch)\b(?!.*\d{4})/i, type: 'temporal_vague', penalty: 0.15 },
// Size ambiguity: "large", "small", "big" without metrics
{ pattern: /\b(grote?|small|large|big|klein)\b/i, type: 'size_vague', penalty: 0.10 },
// Quality ambiguity: "best", "good", "important"
{ pattern: /\b(beste?|good|best|important|belangrijk)\b/i, type: 'quality_vague', penalty: 0.10 },
// Vague quantifiers: "some", "many", "few"
{ pattern: /\b(sommige|some|many|veel|few|weinig)\b/i, type: 'quantity_vague', penalty: 0.05 },
// Pronouns without antecedents: "it", "they", "this"
{ pattern: /^(het|it|they|this|dat|die)\b/i, type: 'pronoun_start', penalty: 0.20 },
// Very short queries (likely incomplete)
{ pattern: /^.{1,10}$/i, type: 'too_short', penalty: 0.25 },
];
/** Patterns that indicate high clarity */
const CLARITY_PATTERNS: Array<{ pattern: RegExp; type: string; bonus: number }> = [
// Specific location mentioned
{ pattern: /\b(amsterdam|rotterdam|utrecht|den haag|groningen)\b/i, type: 'specific_city', bonus: 0.10 },
// Specific type mentioned
{ pattern: /\b(museum|archief|bibliotheek|archive|library)\b/i, type: 'specific_type', bonus: 0.10 },
// Specific intent
{ pattern: /\b(hoeveel|welke|waar|count|list|how many)\b/i, type: 'clear_intent', bonus: 0.10 },
// ISO codes or identifiers
{ pattern: /\b(ISIL|Q\d+|NL-[A-Z]{2,})\b/i, type: 'identifier', bonus: 0.15 },
// Date ranges
{ pattern: /\b\d{4}\s*[-]\s*\d{4}\b/i, type: 'date_range', bonus: 0.10 },
];
/**
* Calculate clarity score for a query (Phase 5 - Clarity Trading).
*
* High clarity (≥0.7): Query is unambiguous, safe to use cached response.
* Low clarity (<0.7): Query is ambiguous, should bypass cache and go to RAG.
*
* @param query - The user's query text
* @param entities - Already-extracted entities (to avoid re-extraction)
* @returns Clarity score (0.0 - 1.0) and list of identified ambiguities
*/
export function calculateClarity(
query: string,
entities?: ExtractedEntities
): { clarityScore: number; ambiguities: string[] } {
let score = 0.7; // Start at threshold
const ambiguities: string[] = [];
// Check for ambiguity patterns (reduce score)
for (const { pattern, type, penalty } of AMBIGUITY_PATTERNS) {
if (pattern.test(query)) {
score -= penalty;
ambiguities.push(type);
}
}
// Check for clarity patterns (increase score)
for (const { pattern, bonus } of CLARITY_PATTERNS) {
if (pattern.test(query)) {
score += bonus;
}
}
// Bonus for having extracted entities
if (entities) {
if (entities.institutionType) score += 0.05;
if (entities.location) score += 0.05;
if (entities.intent) score += 0.05;
if (entities.institutionSubtype) score += 0.05; // Very specific
}
// Clamp to 0-1 range
const clarityScore = Math.max(0, Math.min(1, score));
return { clarityScore, ambiguities };
}
/**
* Extract entities with clarity scoring (Phase 5 - Clarity Trading).
*
* Enhanced version of extractEntitiesFast that includes clarity assessment.
* Queries with clarityScore < 0.7 should bypass cache.
*
* @param query - The user's query text
* @returns Extracted entities with clarity score and ambiguities
*/
export function extractEntitiesWithClarity(query: string): ExtractedEntities {
const entities = extractEntitiesFast(query);
const { clarityScore, ambiguities } = calculateClarity(query, entities);
return {
...entities,
clarityScore,
ambiguities: ambiguities.length > 0 ? ambiguities : undefined,
};
}
// ============================================================================
// Phase 4: Message Handler (Smalltalk-Inspired Introspection)
// ============================================================================
/** Last decision trace for explainability */
let lastDecisionTrace: CacheDecisionTrace | null = null;
/**
* Handle a cache message with full introspection capability.
*
* Based on Smalltalk's message-passing paradigm where every object can:
* - Receive and handle messages
* - Explain its last decision
* - Introspect its own state
*
* @param message - The cache message to handle
* @param cache - The SemanticCache instance
* @returns Response with optional decision trace
*/
export async function handleCacheMessage(
message: CacheMessage,
cache: SemanticCache
): Promise<CacheMessageResponse> {
const startTime = performance.now();
try {
switch (message.type) {
case 'LOOKUP': {
if (!message.arguments.query) {
return { success: false, error: 'LOOKUP requires query argument' };
}
const query = message.arguments.query;
const embedding = message.arguments.embedding;
const entities = extractEntitiesWithClarity(query);
const structuredKey = generateStructuredCacheKey(entities);
// Phase 5: Block low-clarity queries
if (entities.clarityScore !== undefined && entities.clarityScore < 0.7) {
lastDecisionTrace = {
query,
entities,
structuredKey,
hit: false,
method: 'none',
similarity: 0,
reasoning: `Query clarity too low (${entities.clarityScore.toFixed(2)}). Ambiguities: ${entities.ambiguities?.join(', ')}. Bypassing cache.`,
lookupTimeMs: performance.now() - startTime,
};
return {
success: true,
result: { found: false, similarity: 0, method: 'none', lookupTimeMs: lastDecisionTrace.lookupTimeMs },
trace: lastDecisionTrace,
};
}
const result = await cache.lookup(query, embedding);
lastDecisionTrace = {
query,
entities,
structuredKey,
hit: result.found,
tier: result.tier,
method: result.method,
similarity: result.similarity,
reasoning: result.found
? `Cache hit via ${result.method} matching (similarity: ${result.similarity.toFixed(3)}) from ${result.tier} tier.`
: `Cache miss. No entry matched with sufficient similarity.`,
provenance: result.entry?.epistemicProvenance,
lookupTimeMs: result.lookupTimeMs,
};
return { success: true, result, trace: lastDecisionTrace };
}
case 'EXPLAIN': {
if (!lastDecisionTrace) {
return { success: false, error: 'No decision to explain. Perform a LOOKUP first.' };
}
return { success: true, trace: lastDecisionTrace };
}
case 'STORE': {
if (!message.arguments.query || !message.arguments.response) {
return { success: false, error: 'STORE requires query and response arguments' };
}
await cache.store(
message.arguments.query,
message.arguments.embedding || null,
message.arguments.response
);
return { success: true, result: { found: true, similarity: 1, method: 'exact', lookupTimeMs: 0 } };
}
case 'INVALIDATE': {
await cache.clear();
lastDecisionTrace = null;
return { success: true };
}
default:
return { success: false, error: `Unknown message type: ${message.type}` };
}
} catch (error) {
return { success: false, error: String(error) };
}
}
/**
* Get the last decision trace for explainability.
* Implements Smalltalk-style introspection.
*/
export function explainLastDecision(): CacheDecisionTrace | null {
return lastDecisionTrace;
}
// ============================================================================
// Entity Extraction (Ontology-Driven per Rule 46)
// ============================================================================
// Uses vocabulary extracted from LinkML schema files for entity detection.
// Prevents geographic false positives by extracting structured entities from queries.
// "musea in Amsterdam" and "musea in Noord-Holland" have ~93% embedding similarity
// but completely different answers. Entity extraction ensures they get different cache keys.
import { lookupTermLog } from './types-vocabulary';
/** Institution type patterns (Dutch + English) - FALLBACK only when vocabulary unavailable */
const INSTITUTION_PATTERNS: Record<InstitutionTypeCode, RegExp> = {
G: /\b(galler(y|ies|ij|ijen)|kunstgaller[ij])/i,
L: /\b(librar(y|ies)|bibliothe[ek]en?|bieb)/i,
A: /\b(archie[fv]en?|archives?|archief)/i,
M: /\b(muse(um|a|ums?)|musea)/i,
O: /\b(overheid|government|offici[eë]le?)/i,
R: /\b(onderzoek|research|kenniscentr[ua]m?)/i,
C: /\b(bedrijf|corporat|company)/i,
U: /\b(onbekend|unknown)/i,
B: /\b(botanisch|zoo|dierentuin|aquarium)/i,
E: /\b(universiteit|school|onderwijs|education|university|hogeschool)/i,
S: /\b(vereniging|genootschap|society|stichting)/i,
F: /\b(monument|standbeeld|landmark|feature)/i,
I: /\b(immateri[eë]el|intangible|erfgoed)/i,
X: /\b(gemengd|mixed|combinatie)/i,
P: /\b(priv[eé]|particulier|personal|collection)/i,
H: /\b(kerk|church|moskee|synagoge|temple|holy|religieus)/i,
D: /\b(digitaal|digital|online|platform)/i,
N: /\b(ngo|non-profit|goede doelen)/i,
T: /\b(culinair|parfum|smaak|smell|taste)/i,
};
/** Dutch provinces (ISO 3166-2:NL codes for backend compatibility) */
const DUTCH_PROVINCES: Array<{ name: string; variants: string[]; code: string }> = [
{ name: 'noord-holland', variants: ['noord holland', 'nh', 'north holland'], code: 'NH' },
{ name: 'zuid-holland', variants: ['zuid holland', 'zh', 'south holland'], code: 'ZH' },
{ name: 'utrecht', variants: ['ut'], code: 'UT' },
{ name: 'gelderland', variants: ['gld', 'guelders'], code: 'GE' },
{ name: 'noord-brabant', variants: ['noord brabant', 'nb', 'brabant'], code: 'NB' },
{ name: 'limburg', variants: ['lb'], code: 'LI' },
{ name: 'overijssel', variants: ['ov'], code: 'OV' },
{ name: 'friesland', variants: ['fryslân', 'frisia', 'fr'], code: 'FR' },
{ name: 'groningen', variants: ['gr'], code: 'GR' },
{ name: 'drenthe', variants: ['dr'], code: 'DR' },
{ name: 'zeeland', variants: ['zl', 'zealand'], code: 'ZE' },
{ name: 'flevoland', variants: ['fl'], code: 'FL' },
];
/** Major Dutch cities (top 50+ by population and heritage significance) */
const DUTCH_CITIES: string[] = [
'amsterdam', 'rotterdam', 'den haag', "'s-gravenhage", 'the hague',
'utrecht', 'eindhoven', 'groningen', 'tilburg', 'almere',
'breda', 'nijmegen', 'arnhem', 'haarlem', 'enschede',
'amersfoort', 'apeldoorn', 'zaanstad', 'haarlemmermeer', "'s-hertogenbosch",
'den bosch', 'hertogenbosch', 'zwolle', 'zoetermeer', 'leiden',
'maastricht', 'dordrecht', 'ede', 'alphen aan den rijn', 'alkmaar',
'emmen', 'delft', 'deventer', 'venlo', 'sittard',
'leeuwarden', 'hilversum', 'heerlen', 'amstelveen', 'oss',
'schiedam', 'spijkenisse', 'helmond', 'purmerend', 'roosendaal',
'vlaardingen', 'gouda', 'hoorn', 'middelburg', 'lelystad',
// Heritage-significant smaller cities
'naarden', 'muiden', 'enkhuizen', 'edam', 'volendam',
'zaanse schans', 'kinderdijk', 'giethoorn', 'valkenburg',
];
/**
* Extract entities from a query using vocabulary-based and regex matching.
*
* Strategy (per Rule 46 - Ontology-Driven Cache Segmentation):
* 1. Try vocabulary lookup first (O(1) term log, ontology-derived)
* 2. Fall back to regex patterns if vocabulary unavailable
* 3. Always extract location and intent
*
* No LLM calls - executes in <5ms for instant structured cache key generation.
*
* @param query - The user's query text
* @returns Extracted entities (institution type, subtype, location, intent)
*/
export function extractEntitiesFast(query: string): ExtractedEntities {
const normalized = query.toLowerCase().trim();
const entities: ExtractedEntities = {};
// Try vocabulary-based extraction first (async, but we provide sync fallback)
// Note: This is called synchronously for cache key generation,
// so we use the fallback regex patterns here
extractEntitiesWithVocabulary(query).then(vocabEntities => {
// Update entities asynchronously if vocabulary provides better results
if (vocabEntities.institutionSubtype || vocabEntities.recordSetType) {
console.log(`[SemanticCache] Vocabulary enrichment: ${JSON.stringify(vocabEntities)}`);
}
}).catch(() => {
// Vocabulary unavailable, regex fallback already applied below
});
// 1. Institution type detection via regex (sync fallback)
const typeOrder: InstitutionTypeCode[] = ['M', 'A', 'L', 'G', 'E', 'S', 'H', 'B', 'R', 'D', 'F', 'I', 'N', 'C', 'P', 'T', 'O', 'X', 'U'];
for (const typeCode of typeOrder) {
if (INSTITUTION_PATTERNS[typeCode].test(normalized)) {
entities.institutionType = typeCode;
entities.extractionMethod = 'regex';
break;
}
}
// 2. Province detection (check provinces FIRST - more specific geographic context)
for (const province of DUTCH_PROVINCES) {
if (normalized.includes(province.name) ||
province.variants.some(v => normalized.includes(v))) {
entities.location = province.code; // Use ISO code for backend compatibility
entities.locationType = 'province';
break;
}
}
// 3. City detection (only if no province found - cities are more specific)
if (!entities.location) {
for (const city of DUTCH_CITIES) {
if (normalized.includes(city)) {
// Normalize city name for cache key (lowercase, no special chars)
entities.location = city.replace(/[^a-z]/g, '');
entities.locationType = 'city';
break;
}
}
}
// 4. Intent detection (count vs list vs info)
if (/\b(hoeveel|aantal|count|how many|tel|totaal|som)\b/i.test(normalized)) {
entities.intent = 'count';
} else if (/\b(welke|lijst|list|toon|show|geef|overzicht|alle)\b/i.test(normalized)) {
entities.intent = 'list';
} else if (/\b(wat is|who is|info|informatie|details|over)\b/i.test(normalized)) {
entities.intent = 'info';
}
return entities;
}
/**
* Async version of entity extraction using vocabulary lookup.
* Provides richer results including subtypes and record set types.
*
* @param query - The user's query text
* @returns Extracted entities with ontology-derived subtypes
*/
export async function extractEntitiesWithVocabulary(query: string): Promise<ExtractedEntities> {
const normalized = query.toLowerCase().trim();
const entities: ExtractedEntities = {};
// 1. Try vocabulary-based type/subtype detection
const vocabMatch = await lookupTermLog(normalized);
if (vocabMatch) {
entities.institutionType = vocabMatch.typeCode;
entities.institutionSubtype = vocabMatch.subtypeName;
entities.recordSetType = vocabMatch.recordSetType;
entities.wikidataEntity = vocabMatch.wikidata;
entities.extractionMethod = 'vocabulary';
} else {
// Fall back to regex patterns
const typeOrder: InstitutionTypeCode[] = ['M', 'A', 'L', 'G', 'E', 'S', 'H', 'B', 'R', 'D', 'F', 'I', 'N', 'C', 'P', 'T', 'O', 'X', 'U'];
for (const typeCode of typeOrder) {
if (INSTITUTION_PATTERNS[typeCode].test(normalized)) {
entities.institutionType = typeCode;
entities.extractionMethod = 'regex';
break;
}
}
}
// 2. Province detection
for (const province of DUTCH_PROVINCES) {
if (normalized.includes(province.name) ||
province.variants.some(v => normalized.includes(v))) {
entities.location = province.code;
entities.locationType = 'province';
break;
}
}
// 3. City detection (only if no province found)
if (!entities.location) {
for (const city of DUTCH_CITIES) {
if (normalized.includes(city)) {
entities.location = city.replace(/[^a-z]/g, '');
entities.locationType = 'city';
break;
}
}
}
// 4. Intent detection
if (/\b(hoeveel|aantal|count|how many|tel|totaal|som)\b/i.test(normalized)) {
entities.intent = 'count';
} else if (/\b(welke|lijst|list|toon|show|geef|overzicht|alle)\b/i.test(normalized)) {
entities.intent = 'list';
} else if (/\b(wat is|who is|info|informatie|details|over)\b/i.test(normalized)) {
entities.intent = 'info';
}
return entities;
}
/**
* Generate a structured cache key from extracted entities.
* This key is used for entity-aware cache matching to prevent geographic false positives.
*
* Enhanced Format (Rule 46 - Ontology-Driven Cache Segmentation):
* "{intent}:{institutionType}[.{subtype}][:{recordSetType}]:{location}"
*
* Examples:
* - "count:m:amsterdam" (how many museums in Amsterdam - generic museum query)
* - "count:m.art_museum:amsterdam" (how many ART museums in Amsterdam - subtype-specific)
* - "list:a.municipal_archive:civil_registry:NH" (civil registry records from municipal archives in NH)
* - "count:a:burgerlijke_stand:amsterdam" (civil registry in Amsterdam archives)
* - "query:any:nl" (generic query, no specific entities)
*
* Cache Segmentation Benefits:
* - "kunstmuseum" and "museum" queries get different cache keys
* - "burgerlijke stand" queries are isolated from generic archive queries
* - Prevents false cache hits between related but distinct query types
*
* @param entities - Entities extracted from the query
* @returns Structured cache key string
*/
export function generateStructuredCacheKey(entities: ExtractedEntities): string {
// Build institution type component: "type" or "type.subtype"
let typeComponent = entities.institutionType || 'any';
if (entities.institutionSubtype) {
// Normalize subtype to snake_case lowercase
const normalizedSubtype = entities.institutionSubtype.toLowerCase().replace(/[^a-z0-9]+/g, '_');
typeComponent = `${typeComponent}.${normalizedSubtype}`;
}
const parts = [
entities.intent || 'query',
typeComponent,
];
// Add record set type if present (for archival queries)
if (entities.recordSetType) {
const normalizedRecordType = entities.recordSetType.toLowerCase().replace(/[^a-z0-9]+/g, '_');
parts.push(normalizedRecordType);
}
// Add location at the end
parts.push(entities.location || 'nl');
return parts.join(':').toLowerCase();
}
/**
* Check if two entity sets are compatible for cache matching.
* Returns false if there's a location, institution type, subtype, or record set type mismatch.
*
* This is the KEY function that prevents semantic false positives:
*
* Geographic false positives:
* - Query: "musea in Amsterdam" → entities: {type: M, location: amsterdam}
* - Cached: "musea in Rotterdam" → entities: {type: M, location: rotterdam}
* - Result: FALSE (location mismatch)
*
* Subtype false positives (Rule 46 - Ontology-Driven Cache Segmentation):
* - Query: "kunstmusea in Amsterdam" → entities: {type: M, subtype: ART_MUSEUM, location: amsterdam}
* - Cached: "musea in Amsterdam" → entities: {type: M, subtype: null, location: amsterdam}
* - Result: FALSE (subtype specificity mismatch)
*
* Record set type false positives:
* - Query: "burgerlijke stand archieven" → entities: {type: A, recordSetType: CIVIL_REGISTRY}
* - Cached: "archieven in Amsterdam" → entities: {type: A, recordSetType: null}
* - Result: FALSE (record set type specificity mismatch)
*/
export function entitiesMatch(queryEntities: ExtractedEntities, cachedEntities: ExtractedEntities): boolean {
// 1. Location matching - CRITICAL for preventing geographic false positives
if (queryEntities.location && cachedEntities.location) {
if (queryEntities.location !== cachedEntities.location) {
return false; // Location mismatch
}
}
// If query has a specific location but cached entry has no location, no match
if (queryEntities.location && !cachedEntities.location) {
return false;
}
// 2. Institution type matching
if (queryEntities.institutionType && cachedEntities.institutionType) {
if (queryEntities.institutionType !== cachedEntities.institutionType) {
return false; // Type mismatch
}
}
// If query has specific type but cached entry has no type, no match
if (queryEntities.institutionType && !cachedEntities.institutionType) {
return false;
}
// 3. Institution subtype matching (Rule 46)
// More specific queries should not match generic cached entries
if (queryEntities.institutionSubtype && cachedEntities.institutionSubtype) {
// Both have subtypes - must match exactly
if (queryEntities.institutionSubtype !== cachedEntities.institutionSubtype) {
return false; // Subtype mismatch (e.g., ART_MUSEUM vs HISTORY_MUSEUM)
}
}
// If query has specific subtype but cached entry has no subtype, no match
// This prevents "kunstmuseum" queries from matching generic "museum" cache entries
if (queryEntities.institutionSubtype && !cachedEntities.institutionSubtype) {
return false; // Query is more specific than cached entry
}
// NOTE: If cached entry has subtype but query doesn't, we DO allow match
// A generic "museum" query CAN match a cached "kunstmuseum" response
// (the cached response is MORE specific, which is acceptable)
// 4. Record set type matching (for archival queries)
if (queryEntities.recordSetType && cachedEntities.recordSetType) {
// Both have record set types - must match exactly
if (queryEntities.recordSetType !== cachedEntities.recordSetType) {
return false; // Record set type mismatch
}
}
// If query has specific record set type but cached entry has no record set type, no match
if (queryEntities.recordSetType && !cachedEntities.recordSetType) {
return false; // Query is more specific than cached entry
}
// 5. Intent matching (soft - less critical for false positive prevention)
// Intent mismatches are less harmful than location/type mismatches
// A "count" query matching a "list" cache entry may still be useful
return true;
}
// ============================================================================
// Embedding Service
// ============================================================================
export async function generateEmbedding(text: string, apiUrl: string): Promise<number[] | null> {
try {
const response = await fetch(apiUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text }),
});
if (response.ok) {
const data = await response.json();
return data.embedding;
}
} catch (error) {
console.warn('[SemanticCache] Embedding service unavailable:', error);
}
return null;
}
// ============================================================================
// SemanticCache Class
// ============================================================================
export class SemanticCache {
private db: IDBDatabase | null = null;
private config: CacheConfig;
private stats = {
localHits: 0,
sharedHits: 0,
misses: 0,
lookupCount: 0,
};
constructor(config: Partial<CacheConfig> = {}) {
this.config = { ...DEFAULT_CONFIG, ...config };
}
// ==========================================================================
// Database Operations
// ==========================================================================
async initialize(): Promise<void> {
if (this.db) return;
return new Promise((resolve, reject) => {
const request = indexedDB.open(DB_NAME, DB_VERSION);
request.onerror = () => {
console.error('[SemanticCache] IndexedDB error:', request.error);
reject(request.error);
};
request.onsuccess = () => {
this.db = request.result;
console.log('[SemanticCache] Initialized');
resolve();
};
request.onupgradeneeded = (event) => {
const db = (event.target as IDBOpenDBRequest).result;
if (!db.objectStoreNames.contains(STORE_NAME)) {
const store = db.createObjectStore(STORE_NAME, { keyPath: 'id' });
store.createIndex('queryNormalized', 'queryNormalized', { unique: false });
store.createIndex('timestamp', 'timestamp', { unique: false });
}
};
});
}
// ==========================================================================
// Cache Operations
// ==========================================================================
/**
* Look up a query in both cache tiers with ENTITY-AWARE matching.
*
* CRITICAL: Geographic queries like "musea in Amsterdam" vs "musea in Noord-Holland"
* have ~93% embedding similarity but require DIFFERENT answers.
*
* Flow:
* 1. Extract entities from query (institutionType, location, intent) - <5ms, no LLM
* 2. (Phase 5) If clarity check enabled, bypass cache for ambiguous queries
* 3. Check shared cache with entity validation (prevents geographic false positives)
* 4. Check local cache with entity validation
* 5. Embedding similarity is used ONLY if entities match (safe fallback)
*
* Embedding parameter is optional - only needed for local semantic matching.
*/
async lookup(
query: string,
embedding?: number[] | null
): Promise<CacheLookupResult> {
const startTime = performance.now();
await this.initialize();
// STEP 1: Entity extraction with optional clarity scoring (Phase 5)
// Use extractEntitiesWithClarity when clarity check is enabled
const queryEntities = this.config.useClarityCheck
? extractEntitiesWithClarity(query)
: extractEntitiesFast(query);
const structuredKey = generateStructuredCacheKey(queryEntities);
const hasSpecificEntities = !!(queryEntities.location || queryEntities.institutionType);
// STEP 2 (Phase 5): Bypass cache for ambiguous queries
if (this.config.useClarityCheck &&
queryEntities.clarityScore !== undefined &&
queryEntities.clarityScore < this.config.clarityThreshold) {
console.log(
`[SemanticCache] CLARITY BYPASS: "${query.slice(0, 40)}..." ` +
`clarityScore=${queryEntities.clarityScore.toFixed(2)} < threshold=${this.config.clarityThreshold} ` +
`ambiguities=[${queryEntities.ambiguities?.join(', ') || 'none'}]`
);
this.stats.misses++;
this.stats.lookupCount++;
return {
found: false,
similarity: 0,
method: 'none',
lookupTimeMs: performance.now() - startTime,
};
}
if (hasSpecificEntities) {
console.log(
`[SemanticCache] Entity extraction: ${JSON.stringify(queryEntities)} → key="${structuredKey}"`
);
}
// TIER 1: Shared Qdrant cache (server generates embeddings)
if (this.config.enableSharedCache) {
const sharedResult = await this.lookupShared(query, null);
if (sharedResult?.found && sharedResult.entry) {
// ENTITY VALIDATION: If query has specific entities, validate against cached entry
if (hasSpecificEntities) {
const cachedEntities = sharedResult.entry.entities || extractEntitiesFast(sharedResult.entry.query);
if (!entitiesMatch(queryEntities, cachedEntities)) {
console.log(
`[SemanticCache] BLOCKED geographic false positive! ` +
`Query="${query.slice(0, 30)}..." entities=${JSON.stringify(queryEntities)} vs ` +
`Cached="${sharedResult.entry.query.slice(0, 30)}..." entities=${JSON.stringify(cachedEntities)}`
);
// Fall through to local cache or miss - DO NOT return shared hit with mismatched entities
} else {
this.stats.sharedHits++;
this.stats.lookupCount++;
await this.storeLocal(sharedResult.entry);
console.log(
`[SemanticCache] SHARED HIT (entity-validated): "${query.slice(0, 40)}..." ` +
`similarity=${sharedResult.similarity.toFixed(3)}`
);
return { ...sharedResult, tier: 'shared', method: 'structured', lookupTimeMs: performance.now() - startTime };
}
} else {
// No specific entities - allow pure semantic match
this.stats.sharedHits++;
this.stats.lookupCount++;
await this.storeLocal(sharedResult.entry);
console.log(
`[SemanticCache] SHARED HIT: "${query.slice(0, 40)}..." ` +
`similarity=${sharedResult.similarity.toFixed(3)}`
);
return { ...sharedResult, tier: 'shared', lookupTimeMs: performance.now() - startTime };
}
}
}
// TIER 2: Local IndexedDB with entity validation
const localResult = await this.lookupLocal(query, embedding || null, queryEntities);
if (localResult.found) {
this.stats.localHits++;
this.stats.lookupCount++;
console.log(
`[SemanticCache] LOCAL HIT (${localResult.method}): "${query.slice(0, 40)}..." ` +
`similarity=${localResult.similarity.toFixed(3)}`
);
return { ...localResult, tier: 'local' };
}
// Total miss
this.stats.misses++;
this.stats.lookupCount++;
console.log(`[SemanticCache] MISS: "${query.slice(0, 40)}..."`);
return {
found: false,
similarity: 0,
method: 'none',
lookupTimeMs: performance.now() - startTime,
};
}
private async lookupLocal(
query: string,
embedding: number[] | null,
queryEntities?: ExtractedEntities
): Promise<CacheLookupResult> {
const startTime = performance.now();
const normalized = normalizeQuery(query);
const entities = queryEntities || extractEntitiesFast(query);
const hasSpecificEntities = !!(entities.location || entities.institutionType);
// Exact match first
const exactMatch = await this.getByNormalizedQuery(normalized);
if (exactMatch && !this.isExpired(exactMatch)) {
await this.updateAccessStats(exactMatch.id);
return {
found: true,
entry: exactMatch,
similarity: 1.0,
method: 'exact',
lookupTimeMs: performance.now() - startTime,
};
}
// Get all entries for similarity comparison
const allEntries = await this.getAllEntries();
const validEntries = allEntries.filter(e => !this.isExpired(e));
let bestMatch: CachedQuery | undefined;
let bestSimilarity = 0;
let matchMethod: 'semantic' | 'fuzzy' | 'structured' = 'semantic';
// ENTITY-AWARE matching: Only consider entries with matching entities
// This prevents geographic false positives (Amsterdam vs Noord-Holland)
const entityCompatibleEntries = hasSpecificEntities
? validEntries.filter(entry => {
const cachedEntities = entry.entities || extractEntitiesFast(entry.query);
return entitiesMatch(entities, cachedEntities);
})
: validEntries; // If no specific entities in query, consider all entries
// Semantic similarity (if embeddings available)
if (embedding && embedding.length > 0) {
for (const entry of entityCompatibleEntries) {
if (entry.embedding && entry.embedding.length > 0) {
const rawSimilarity = cosineSimilarity(embedding, entry.embedding);
// Apply topological distance penalty (Phase 2 - Rule 46 Evolution)
// This prevents "art museum" from matching "natural history museum"
const queryTypeKey = entities.institutionSubtype
? `${entities.institutionType}.${entities.institutionSubtype.toUpperCase().replace(/[^A-Z]/g, '')}`
: (entities.institutionType || undefined);
const cachedEntities = entry.entities || extractEntitiesFast(entry.query);
const cachedTypeKey = cachedEntities.institutionSubtype
? `${cachedEntities.institutionType}.${cachedEntities.institutionSubtype.toUpperCase().replace(/[^A-Z]/g, '')}`
: (cachedEntities.institutionType || undefined);
const similarity = combinedSimilarity(rawSimilarity, queryTypeKey, cachedTypeKey);
if (similarity > bestSimilarity && similarity >= this.config.similarityThreshold) {
bestSimilarity = similarity;
bestMatch = entry;
matchMethod = hasSpecificEntities ? 'structured' : 'semantic';
}
}
}
}
// Fuzzy text matching fallback
if (!bestMatch && this.config.enableFuzzyMatch) {
for (const entry of entityCompatibleEntries) {
const jaccard = jaccardSimilarity(normalized, entry.queryNormalized);
const levenshtein = levenshteinSimilarity(normalized, entry.queryNormalized);
const combined = (jaccard * 0.6) + (levenshtein * 0.4);
if (combined > bestSimilarity && combined >= this.config.fuzzyThreshold) {
bestSimilarity = combined;
bestMatch = entry;
matchMethod = 'fuzzy';
}
}
}
if (bestMatch) {
await this.updateAccessStats(bestMatch.id);
return {
found: true,
entry: bestMatch,
similarity: bestSimilarity,
method: matchMethod,
lookupTimeMs: performance.now() - startTime,
};
}
return {
found: false,
similarity: bestSimilarity,
method: 'none',
lookupTimeMs: performance.now() - startTime,
};
}
private async lookupShared(
query: string,
_embedding: number[] | null // Unused: server generates embeddings
): Promise<CacheLookupResult | null> {
try {
const response = await fetch(`${this.config.sharedCacheUrl}/lookup`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
query,
language: 'nl', // Server generates embeddings via Qdrant
similarity_threshold: this.config.similarityThreshold,
}),
});
if (!response.ok) return null;
const data = await response.json();
if (data.found && data.entry) {
const entry: CachedQuery = {
id: data.entry.id,
query: data.entry.query,
queryNormalized: data.entry.query_normalized || normalizeQuery(data.entry.query),
embedding: null,
response: {
answer: data.entry.response.answer,
sources: data.entry.response.sources || [],
institutions: data.entry.response.institutions || [],
sparqlQuery: data.entry.response.sparql_query || data.entry.response.sparqlQuery,
},
timestamp: data.entry.timestamp,
hitCount: data.entry.hit_count || 0,
lastAccessed: data.entry.last_accessed || Date.now(),
language: data.entry.language || 'nl',
llmProvider: data.entry.model || 'unknown',
source: 'shared',
};
return {
found: true,
entry,
similarity: data.similarity,
method: data.method,
lookupTimeMs: data.lookup_time_ms || 0,
tier: 'shared',
};
}
return { found: false, similarity: 0, method: 'none', lookupTimeMs: 0 };
} catch (error) {
console.warn('[SemanticCache] Shared lookup error:', error);
return null;
}
}
/**
* Store a query and response in cache with extracted entities and epistemic provenance.
* Entities are extracted at storage time to enable entity-aware cache matching.
*/
async store(
query: string,
embedding: number[] | null,
response: CachedResponse,
llmProvider: string = 'zai',
language: 'nl' | 'en' = 'nl',
provenance?: Partial<EpistemicProvenance>
): Promise<string> {
// Extract entities at storage time for future entity-aware matching
const entities = extractEntitiesFast(query);
const structuredKey = generateStructuredCacheKey(entities);
// Build epistemic provenance with defaults
const epistemicProvenance: EpistemicProvenance = {
dataSource: provenance?.dataSource || 'RAG_PIPELINE',
dataTier: provenance?.dataTier || 4,
sourceTimestamp: provenance?.sourceTimestamp || new Date().toISOString(),
derivationChain: provenance?.derivationChain || [`LLM:${llmProvider}`],
revalidationPolicy: provenance?.revalidationPolicy || 'weekly',
confidenceScore: provenance?.confidenceScore,
sourceReferences: provenance?.sourceReferences,
epistemicNotes: provenance?.epistemicNotes,
};
// Determine holon level based on entities
let holonLevel: 'micro' | 'meso' | 'macro' = 'meso';
if (entities.intent === 'info' && entities.location) {
holonLevel = 'micro'; // Specific entity query
} else if (!entities.institutionType && !entities.location) {
holonLevel = 'macro'; // Broad aggregate query
}
const entry: CachedQuery = {
id: generateCacheId(),
query,
queryNormalized: normalizeQuery(query),
embedding,
response,
timestamp: Date.now(),
hitCount: 0,
lastAccessed: Date.now(),
language,
llmProvider,
source: 'local',
entities,
structuredKey,
epistemicProvenance,
holonLevel,
};
// Store locally
await this.storeLocal(entry);
// Store in shared cache (fire-and-forget)
if (this.config.enableSharedCache) {
this.storeShared(entry).catch(err => {
console.warn('[SemanticCache] Failed to store in shared cache:', err);
});
}
console.log(
`[SemanticCache] Stored with entities: "${query.slice(0, 40)}..." → ${structuredKey}`
);
return entry.id;
}
private async storeLocal(entry: CachedQuery): Promise<void> {
await this.initialize();
return new Promise((resolve, reject) => {
if (!this.db) {
reject(new Error('Database not initialized'));
return;
}
const transaction = this.db.transaction([STORE_NAME], 'readwrite');
const store = transaction.objectStore(STORE_NAME);
const request = store.put(entry);
request.onsuccess = () => {
console.log(`[SemanticCache] Stored: "${entry.query.slice(0, 40)}..."`);
this.enforceMaxEntries();
resolve();
};
request.onerror = () => reject(request.error);
});
}
private async storeShared(entry: CachedQuery): Promise<void> {
try {
await fetch(`${this.config.sharedCacheUrl}/store`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
query: entry.query,
// Server generates embeddings via sentence-transformers
response: entry.response,
language: entry.language,
model: entry.llmProvider,
// Include extracted entities for server-side entity-aware matching
entities: entry.entities,
structured_key: entry.structuredKey,
}),
});
} catch (error) {
console.warn('[SemanticCache] Store shared error:', error);
}
}
// ==========================================================================
// Helper Methods
// ==========================================================================
private async getByNormalizedQuery(normalized: string): Promise<CachedQuery | null> {
return new Promise((resolve, reject) => {
if (!this.db) { resolve(null); return; }
const transaction = this.db.transaction([STORE_NAME], 'readonly');
const store = transaction.objectStore(STORE_NAME);
const index = store.index('queryNormalized');
const request = index.get(normalized);
request.onsuccess = () => resolve(request.result || null);
request.onerror = () => reject(request.error);
});
}
private async getAllEntries(): Promise<CachedQuery[]> {
return new Promise((resolve, reject) => {
if (!this.db) { resolve([]); return; }
const transaction = this.db.transaction([STORE_NAME], 'readonly');
const store = transaction.objectStore(STORE_NAME);
const request = store.getAll();
request.onsuccess = () => resolve(request.result || []);
request.onerror = () => reject(request.error);
});
}
private async updateAccessStats(id: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this.db) { resolve(); return; }
const transaction = this.db.transaction([STORE_NAME], 'readwrite');
const store = transaction.objectStore(STORE_NAME);
const getRequest = store.get(id);
getRequest.onsuccess = () => {
const entry = getRequest.result as CachedQuery;
if (entry) {
entry.hitCount++;
entry.lastAccessed = Date.now();
store.put(entry);
}
resolve();
};
getRequest.onerror = () => reject(getRequest.error);
});
}
private isExpired(entry: CachedQuery): boolean {
return Date.now() - entry.timestamp > this.config.ttlMs;
}
private async enforceMaxEntries(): Promise<void> {
const entries = await this.getAllEntries();
if (entries.length <= this.config.maxEntries) return;
// Sort by access frequency and recency
const scored = entries.map(e => ({
entry: e,
score: e.hitCount * 0.3 + (e.lastAccessed / Date.now()) * 0.7,
}));
scored.sort((a, b) => a.score - b.score);
const toRemove = scored.slice(0, entries.length - this.config.maxEntries);
for (const item of toRemove) {
await this.delete(item.entry.id);
}
}
async delete(id: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this.db) { resolve(); return; }
const transaction = this.db.transaction([STORE_NAME], 'readwrite');
const store = transaction.objectStore(STORE_NAME);
const request = store.delete(id);
request.onsuccess = () => resolve();
request.onerror = () => reject(request.error);
});
}
async clear(): Promise<void> {
// Clear Tier 1: Local IndexedDB cache
await new Promise<void>((resolve, reject) => {
if (!this.db) { resolve(); return; }
const transaction = this.db.transaction([STORE_NAME], 'readwrite');
const store = transaction.objectStore(STORE_NAME);
const request = store.clear();
request.onsuccess = () => {
console.log('[SemanticCache] Local cache cleared (Tier 1)');
resolve();
};
request.onerror = () => reject(request.error);
});
// Clear Tier 2: Shared Qdrant cache via backend API
if (this.config.enableSharedCache) {
try {
const response = await fetch(`${this.config.sharedCacheUrl}/clear`, {
method: 'DELETE',
});
if (response.ok) {
const result = await response.json();
console.log('[SemanticCache] Shared cache cleared (Tier 2):', result);
} else {
console.warn('[SemanticCache] Failed to clear shared cache:', response.status);
}
} catch (error) {
console.warn('[SemanticCache] Error clearing shared cache:', error);
}
}
// Reset stats
this.stats = { localHits: 0, sharedHits: 0, misses: 0, lookupCount: 0 };
}
getStats(): CacheStats {
const totalHits = this.stats.localHits + this.stats.sharedHits;
const totalRequests = totalHits + this.stats.misses;
return {
totalEntries: 0, // Would need to query
totalHits,
totalMisses: this.stats.misses,
hitRate: totalRequests > 0 ? totalHits / totalRequests : 0,
localHits: this.stats.localHits,
sharedHits: this.stats.sharedHits,
};
}
getConfig(): CacheConfig {
return { ...this.config };
}
setConfig(config: Partial<CacheConfig>): void {
this.config = { ...this.config, ...config };
}
}
// ============================================================================
// Singleton Export
// ============================================================================
export const semanticCache = new SemanticCache();
export default semanticCache;