glam/frontend/src/hooks/useMultiDatabaseRAG.ts
kempersc 0a38225b36 feat(frontend): Add multi-select filters, URL params, and UI improvements
- Institution Browser: multi-select for types and countries
- URL query param sync for shareable filter URLs
- New utility: countryNames.ts with flag emoji support
- New utility: imageProxy.ts for image URL handling
- New component: SearchableMultiSelect dropdown
- Career timeline CSS and component updates
- Media gallery improvements
- Lazy load error boundary component
- Version check utility
2025-12-15 01:47:11 +01:00

1011 lines
34 KiB
TypeScript

/**
* useMultiDatabaseRAG.ts - Multi-Database RAG (Retrieval-Augmented Generation) Hook
*
* Orchestrates queries across multiple databases for conversational AI:
* - Qdrant: Vector similarity search for semantic retrieval
* - Oxigraph: SPARQL queries for structured RDF data
* - TypeDB: TypeQL queries for knowledge graph traversal
*
* Features intelligent semantic caching to reduce API costs:
* - Caches query embeddings and responses in IndexedDB
* - Uses cosine similarity to find semantically similar past queries
* - Configurable similarity threshold (default 0.92)
* - Typical API savings: 30-50% for conversational UIs
*
* Based on DSPy RAG patterns for heritage institution conversations.
* Self-hosted infrastructure - no external API keys required.
*
* @see https://dspy.ai/
*/
import { useState, useCallback, useRef, useEffect } from 'react';
import type { QdrantSearchResult } from './useQdrant';
import { semanticCache, type CachedResponse, type CacheStats, type CacheLookupResult } from '../lib/storage/semantic-cache';
// Configuration - all services use Caddy proxy paths
const API_BASE = ''; // Relative URLs via Caddy proxy
const QDRANT_URL = '/qdrant';
const SPARQL_URL = '/sparql';
const TYPEDB_URL = '/api/typedb';
const DSPY_URL = '/api/dspy/rag'; // Backend serves at /api/dspy/rag/query
// ============================================================================
// Types
// ============================================================================
export interface RAGContext {
qdrantResults: QdrantSearchResult[];
sparqlResults: Record<string, unknown>[];
typedbResults: Record<string, unknown>[];
totalRetrieved: number;
}
/**
* Retrieved result from backend - can be a person or institution
* Returned by HeritageRAGPipeline.forward() for visualization
*/
export interface RetrievedResult {
type: 'person' | 'institution';
// Person fields (when type === 'person')
person_id?: string;
name: string;
headline?: string;
custodian_name?: string;
custodian_slug?: string;
heritage_relevant?: boolean;
heritage_type?: string; // GLAMORCUBESFIXPHDNT single letter code
linkedin_url?: string | null;
score?: number;
// Institution fields (when type === 'institution')
institution_type?: string;
city?: string;
country?: string;
description?: string;
}
export type QueryType = 'person' | 'institution';
export interface RAGResponse {
answer: string;
sparqlQuery?: string;
typeqlQuery?: string;
context: RAGContext;
visualizationType?: VisualizationType;
visualizationData?: VisualizationData;
sources: RAGSource[];
confidence: number;
// New fields from backend for person/institution visualization
retrievedResults?: RetrievedResult[];
queryType?: QueryType;
}
export interface RAGSource {
database: 'qdrant' | 'oxigraph' | 'typedb';
id: string;
name?: string;
score?: number;
snippet?: string;
}
export type VisualizationType =
| 'none'
| 'map' // Geographic visualization
| 'timeline' // Temporal visualization
| 'network' // Graph/relationship visualization
| 'chart' // Bar/line charts
| 'table' // Tabular data
| 'card' // Institution cards
| 'gallery'; // Image gallery
export interface VisualizationData {
type: VisualizationType;
institutions?: InstitutionData[];
coordinates?: GeoCoordinate[];
timeline?: TimelineEvent[];
graphData?: GraphVisualizationData;
chartData?: ChartData;
}
export interface InstitutionData {
id: string;
name: string;
type?: string;
city?: string;
province?: string;
country?: string;
latitude?: number;
longitude?: number;
description?: string;
website?: string;
isil?: string;
wikidata?: string;
rating?: number;
reviews?: number;
photoCount?: number;
}
export interface GeoCoordinate {
lat: number;
lng: number;
label: string;
type?: string;
data?: InstitutionData;
}
export interface TimelineEvent {
date: string;
label: string;
description?: string;
type?: string;
}
export interface GraphVisualizationData {
nodes: Array<{
id: string;
label: string;
type: string;
attributes?: Record<string, unknown>;
}>;
edges: Array<{
id: string;
source: string;
target: string;
label: string;
type?: string;
}>;
}
export interface ChartData {
labels: string[];
datasets: Array<{
label: string;
data: number[];
backgroundColor?: string | string[];
borderColor?: string;
}>;
}
export interface ConversationMessage {
id: string;
role: 'user' | 'assistant' | 'system';
content: string;
timestamp: Date;
response?: RAGResponse;
isLoading?: boolean;
error?: string;
}
export interface UseMultiDatabaseRAGReturn {
// State
isLoading: boolean;
error: Error | null;
lastContext: RAGContext | null;
// Cache state
cacheEnabled: boolean;
lastCacheLookup: CacheLookupResult | null;
// Core RAG function
queryRAG: (
question: string,
options?: RAGOptions
) => Promise<RAGResponse>;
// Individual database queries (for debugging/advanced use)
searchQdrant: (query: string, limit?: number) => Promise<QdrantSearchResult[]>;
querySparql: (sparql: string) => Promise<Record<string, unknown>[]>;
queryTypeDB: (typeql: string) => Promise<Record<string, unknown>[]>;
// Utility functions
clearContext: () => void;
detectVisualizationType: (question: string, results: RAGContext) => VisualizationType;
// Cache management functions
setCacheEnabled: (enabled: boolean) => void;
getCacheStats: () => Promise<CacheStats>;
clearCache: () => Promise<{ localCleared: boolean; sharedCleared: boolean }>;
setCacheSimilarityThreshold: (threshold: number) => void;
}
export interface RAGOptions {
model?: string;
language?: 'nl' | 'en';
maxQdrantResults?: number;
maxSparqlResults?: number;
maxTypeDBResults?: number;
includeSparql?: boolean;
includeTypeDB?: boolean;
conversationHistory?: ConversationMessage[];
// Cache options
useCache?: boolean; // Enable/disable cache for this query (default: true)
bypassCache?: boolean; // Force fresh query even if cache hit (default: false)
storeInCache?: boolean; // Store result in cache (default: true)
// Embedding model selection
embeddingModel?: 'minilm_384' | 'openai_1536' | 'bge_768' | null; // Embedding model for vector search (default: auto)
}
// ============================================================================
// Helper Functions
// ============================================================================
/**
* Generate text embedding using local embedding service or fallback
* In production, this would use a local embedding model (e.g., sentence-transformers)
* For now, we'll use keyword-based Qdrant filtering as a fallback
*/
async function generateEmbedding(text: string): Promise<number[] | null> {
try {
// Try local embedding service first
const response = await fetch(`${API_BASE}/api/embed`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text }),
});
if (response.ok) {
const data = await response.json();
return data.embedding;
}
} catch {
// Fallback: return null to use keyword search
}
return null;
}
/**
* Search Qdrant using vector similarity or keyword filter
*/
async function qdrantSearch(
query: string,
limit: number = 10
): Promise<QdrantSearchResult[]> {
const collectionName = 'heritage_custodians';
console.log('[Qdrant] Search query:', query);
console.log('[Qdrant] Limit:', limit);
console.log('[Qdrant] QDRANT_URL:', QDRANT_URL);
// Try to get embedding for semantic search
const embedding = await generateEmbedding(query);
console.log('[Qdrant] Has embedding:', !!embedding);
if (embedding) {
// Vector similarity search
const searchUrl = `${QDRANT_URL}/collections/${collectionName}/points/search`;
console.log('[Qdrant] Search URL:', searchUrl);
console.log('[Qdrant] Full search URL:', new URL(searchUrl, window.location.origin).href);
const response = await fetch(searchUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
vector: embedding,
limit,
with_payload: true,
}),
});
console.log('[Qdrant] Search response status:', response.status);
if (!response.ok) {
const errorBody = await response.text();
console.error('[Qdrant] Search error body:', errorBody);
}
if (response.ok) {
const data = await response.json();
console.log('[Qdrant] Search results count:', data.result?.length || 0);
return data.result || [];
}
}
// Fallback: Scroll through points with keyword filter
// Extract keywords from query for filtering
const keywords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
console.log('[Qdrant] Fallback: Keywords:', keywords);
const scrollUrl = `${QDRANT_URL}/collections/${collectionName}/points/scroll`;
console.log('[Qdrant] Scroll URL:', scrollUrl);
console.log('[Qdrant] Full scroll URL:', new URL(scrollUrl, window.location.origin).href);
const response = await fetch(scrollUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
limit: limit * 2, // Get more to filter
with_payload: true,
with_vector: false,
}),
});
console.log('[Qdrant] Scroll response status:', response.status);
if (!response.ok) {
const errorBody = await response.text();
console.error('[Qdrant] Scroll error body:', errorBody);
throw new Error(`Qdrant scroll failed: ${response.status}`);
}
const data = await response.json();
const points = data.result?.points || [];
// Simple keyword matching in payload
const scored = points.map((p: { id: string | number; payload: Record<string, unknown> }) => {
const payload = p.payload || {};
const text = JSON.stringify(payload).toLowerCase();
const matches = keywords.filter(k => text.includes(k)).length;
return {
id: p.id,
score: matches / Math.max(keywords.length, 1),
payload,
};
});
// Sort by score and return top results
return scored
.filter((p: { score: number }) => p.score > 0)
.sort((a: { score: number }, b: { score: number }) => b.score - a.score)
.slice(0, limit);
}
/**
* Execute SPARQL query against Oxigraph
*/
async function sparqlQuery(query: string): Promise<Record<string, unknown>[]> {
const response = await fetch(`${SPARQL_URL}/query`, {
method: 'POST',
headers: {
'Content-Type': 'application/sparql-query',
'Accept': 'application/sparql-results+json',
},
body: query,
});
if (!response.ok) {
const error = await response.text();
throw new Error(`SPARQL query failed: ${response.status} - ${error}`);
}
const data = await response.json();
return data.results?.bindings || [];
}
/**
* Execute TypeQL query against TypeDB
*/
async function typedbQuery(query: string): Promise<Record<string, unknown>[]> {
const response = await fetch(`${TYPEDB_URL}/query`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query, queryType: 'read' }),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`TypeDB query failed: ${response.status} - ${error}`);
}
const data = await response.json();
return data.results || [];
}
/**
* Call DSPy backend to generate queries and response
*/
async function callDSPy(
question: string,
context: RAGContext,
options: RAGOptions
): Promise<{
answer: string;
sparqlQuery?: string;
typeqlQuery?: string;
visualizationType?: VisualizationType;
confidence: number;
retrievedResults?: RetrievedResult[];
queryType?: QueryType;
}> {
// Format conversation history for DSPy backend
// Backend expects: context = [{question: "...", answer: "..."}, ...]
// Each object should pair a user question with its corresponding assistant answer
const conversationContext: Array<{ question: string; answer: string }> = [];
const messages = options.conversationHistory || [];
// Pair consecutive user/assistant messages (skip last if current user question)
for (let i = 0; i < messages.length - 1; i++) {
const current = messages[i];
const next = messages[i + 1];
// If we have a user message followed by an assistant message, pair them
if (current.role === 'user' && next.role === 'assistant' && !next.isLoading) {
conversationContext.push({
question: current.content,
answer: next.content,
});
i++; // Skip the assistant message since we've paired it
}
}
// Keep only last 4 turns for context
const recentContext = conversationContext.slice(-4);
const requestUrl = `${DSPY_URL}/query`;
const requestBody = {
question,
language: options.language || 'nl',
context: recentContext, // Backend expects conversation history here (paired Q&A)
include_visualization: true,
embedding_model: options.embeddingModel || null, // Pass embedding model preference
};
console.log('[DSPy] Request URL:', requestUrl);
console.log('[DSPy] Request body:', JSON.stringify(requestBody, null, 2));
console.log('[DSPy] Window location:', window.location.href);
console.log('[DSPy] Full URL being fetched:', new URL(requestUrl, window.location.origin).href);
let response: Response;
try {
response = await fetch(requestUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
});
console.log('[DSPy] Response status:', response.status);
console.log('[DSPy] Response URL:', response.url);
console.log('[DSPy] Response headers:', Object.fromEntries(response.headers.entries()));
} catch (networkError) {
// Network error - server unreachable
console.error('[DSPy] Network error:', networkError);
const lang = options.language || 'nl';
return {
answer: lang === 'nl'
? '⚠️ **Serverfout**: Kan geen verbinding maken met de RAG-server. Controleer of de backend draait op poort 8003.'
: '⚠️ **Server Error**: Cannot connect to RAG server. Check if backend is running on port 8003.',
confidence: 0,
};
}
if (!response.ok) {
// HTTP error - log details for debugging
const responseBody = await response.text();
console.error(`[DSPy] HTTP ${response.status}: ${response.statusText}`);
console.error(`[DSPy] Response body:`, responseBody);
const lang = options.language || 'nl';
if (response.status === 404) {
return {
answer: lang === 'nl'
? '⚠️ **Serverfout (404)**: De RAG API endpoint is niet gevonden. Controleer de proxy configuratie in vite.config.ts en herstart de frontend.'
: '⚠️ **Server Error (404)**: RAG API endpoint not found. Check proxy configuration in vite.config.ts and restart frontend.',
confidence: 0,
};
}
if (response.status >= 500) {
return {
answer: lang === 'nl'
? `⚠️ **Serverfout (${response.status})**: De RAG-server heeft een interne fout. Controleer de backend logs.`
: `⚠️ **Server Error (${response.status})**: RAG server internal error. Check backend logs.`,
confidence: 0,
};
}
// Other HTTP errors - fall back to context-based answer
return {
answer: generateFallbackAnswer(question, context, lang),
confidence: 0.5,
};
}
// Map backend DSPyQueryResponse to frontend expected format
const data = await response.json();
console.log('[DSPy] Response data (first 500 chars):', JSON.stringify(data, null, 2).slice(0, 500));
console.log('[DSPy] retrieved_results:', data.retrieved_results);
console.log('[DSPy] retrieved_results count:', data.retrieved_results?.length ?? 0);
console.log('[DSPy] query_type:', data.query_type);
// Debug: log first result if available
if (data.retrieved_results?.length > 0) {
console.log('[DSPy] First retrieved result:', JSON.stringify(data.retrieved_results[0], null, 2));
}
// Flatten nested metadata structure from backend to match frontend RetrievedResult interface
// Backend returns: { person_id, name, scores: {...}, metadata: { headline, custodian_name, ... }, type }
// Frontend expects: { person_id, name, headline, custodian_name, ..., type }
const flattenedResults: RetrievedResult[] | undefined = data.retrieved_results?.map((result: Record<string, unknown>) => {
const metadata = result.metadata as Record<string, unknown> | undefined;
const scores = result.scores as Record<string, number> | undefined;
// If already flat (no metadata object), still normalize to ensure consistent structure
if (!metadata) {
return {
type: (result.type as 'person' | 'institution') || 'institution',
person_id: result.person_id as string | undefined,
name: (result.name as string) || 'Unknown',
headline: result.headline as string | undefined,
custodian_name: result.custodian_name as string | undefined,
custodian_slug: result.custodian_slug as string | undefined,
heritage_relevant: result.heritage_relevant as boolean | undefined,
heritage_type: result.heritage_type as string | undefined,
linkedin_url: result.linkedin_url as string | undefined,
score: result.score as number | undefined,
institution_type: result.institution_type as string | undefined,
city: result.city as string | undefined,
country: result.country as string | undefined,
description: result.description as string | undefined,
};
}
// Flatten metadata into top-level fields
return {
type: (result.type as 'person' | 'institution') || 'institution',
person_id: result.person_id as string | undefined,
name: (result.name as string) || 'Unknown',
headline: metadata.headline as string | undefined,
custodian_name: metadata.custodian_name as string | undefined,
custodian_slug: metadata.custodian_slug as string | undefined,
location: metadata.location as string | undefined,
heritage_relevant: metadata.heritage_relevant as boolean | undefined,
heritage_type: metadata.heritage_type as string | undefined,
linkedin_url: metadata.linkedin_url as string | undefined,
score: scores?.combined || scores?.vector,
// Institution fields (when type === 'institution')
institution_type: metadata.institution_type as string | undefined,
city: metadata.city as string | undefined,
country: metadata.country as string | undefined,
description: metadata.description as string | undefined,
};
});
console.log('[DSPy] Flattened results count:', flattenedResults?.length ?? 0);
if (flattenedResults?.length) {
console.log('[DSPy] First flattened result:', JSON.stringify(flattenedResults[0], null, 2));
}
return {
answer: data.answer || '',
sparqlQuery: data.visualization?.sparql_query, // If backend includes SPARQL
visualizationType: data.visualization?.type as VisualizationType,
confidence: data.sources_used?.length > 0 ? 0.85 : 0.6, // Estimate confidence
// New fields from backend for person/institution visualization
retrievedResults: flattenedResults,
queryType: data.query_type as QueryType | undefined,
};
}
/**
* Generate a fallback answer when DSPy service is unavailable
*/
function generateFallbackAnswer(
_question: string,
context: RAGContext,
language: 'nl' | 'en'
): string {
const count = context.totalRetrieved;
if (count === 0) {
return language === 'nl'
? 'Geen resultaten gevonden voor uw vraag.'
: 'No results found for your question.';
}
const institutions = context.qdrantResults.slice(0, 5).map(r => {
const name = r.payload?.name || r.payload?.custodian_name || 'Unknown';
return name;
});
if (language === 'nl') {
return `Ik heb ${count} resultaten gevonden. Enkele relevante instellingen: ${institutions.join(', ')}.`;
}
return `I found ${count} results. Some relevant institutions: ${institutions.join(', ')}.`;
}
/**
* Detect appropriate visualization type based on question and results
*/
function detectVisualizationType(
question: string,
context: RAGContext
): VisualizationType {
const q = question.toLowerCase();
// Map visualization keywords
if (q.includes('kaart') || q.includes('map') || q.includes('waar') ||
q.includes('where') || q.includes('locatie') || q.includes('location') ||
q.includes('provincie') || q.includes('province') || q.includes('stad') ||
q.includes('city') || q.includes('geografisch') || q.includes('geographic')) {
return 'map';
}
// Timeline keywords
if (q.includes('wanneer') || q.includes('when') || q.includes('geschiedenis') ||
q.includes('history') || q.includes('tijdlijn') || q.includes('timeline') ||
q.includes('opgericht') || q.includes('founded') || q.includes('jaar') ||
q.includes('year')) {
return 'timeline';
}
// Network/graph keywords
if (q.includes('relatie') || q.includes('relationship') || q.includes('verbinding') ||
q.includes('connection') || q.includes('netwerk') || q.includes('network') ||
q.includes('samenwer') || q.includes('collaborat')) {
return 'network';
}
// Chart keywords
if (q.includes('hoeveel') || q.includes('how many') || q.includes('aantal') ||
q.includes('count') || q.includes('statistiek') || q.includes('statistic') ||
q.includes('verdeling') || q.includes('distribution') || q.includes('vergelijk') ||
q.includes('compare')) {
return 'chart';
}
// If we have location data, show map
const hasCoordinates = context.qdrantResults.some(r =>
r.payload?.latitude || r.payload?.coordinates
);
if (hasCoordinates && context.totalRetrieved > 0) {
return 'map';
}
// Default to cards for institution results
if (context.qdrantResults.length > 0) {
return 'card';
}
return 'table';
}
/**
* Extract visualization data from RAG context
*/
function extractVisualizationData(
type: VisualizationType,
context: RAGContext
): VisualizationData {
const data: VisualizationData = { type };
// Extract institution data from Qdrant results
data.institutions = context.qdrantResults.map(r => {
const p = (r.payload || {}) as Record<string, unknown>;
const location = (p.location || {}) as Record<string, unknown>;
const coordinates = (p.coordinates || {}) as Record<string, unknown>;
return {
id: String(r.id),
name: String(p.name || p.custodian_name || p.institution_name || 'Unknown'),
type: String(p.type || p.institution_type || ''),
city: String(p.city || location.city || ''),
province: String(p.province || p.region || ''),
country: String(p.country || 'NL'),
latitude: Number(p.latitude || coordinates.lat || location.latitude),
longitude: Number(p.longitude || coordinates.lng || location.longitude),
description: String(p.description || ''),
website: String(p.website || p.url || ''),
isil: String(p.isil || p.isil_code || ''),
wikidata: String(p.wikidata || p.wikidata_id || ''),
rating: Number(p.rating || p.google_rating || 0),
reviews: Number(p.reviews || p.review_count || 0),
photoCount: Number(p.photoCount || p.photo_count || 0),
};
});
// Extract coordinates for map
if (type === 'map') {
data.coordinates = data.institutions
.filter(i => i.latitude && i.longitude && !isNaN(i.latitude) && !isNaN(i.longitude))
.map(i => ({
lat: i.latitude!,
lng: i.longitude!,
label: i.name,
type: i.type,
data: i,
}));
}
return data;
}
// ============================================================================
// Hook Implementation
// ============================================================================
export function useMultiDatabaseRAG(): UseMultiDatabaseRAGReturn {
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<Error | null>(null);
const [lastContext, setLastContext] = useState<RAGContext | null>(null);
const [cacheEnabled, setCacheEnabled] = useState(true);
const [lastCacheLookup, setLastCacheLookup] = useState<CacheLookupResult | null>(null);
// Initialize cache on mount
const cacheInitialized = useRef(false);
useEffect(() => {
if (!cacheInitialized.current) {
semanticCache.initialize().then(() => {
cacheInitialized.current = true;
console.log('[useMultiDatabaseRAG] Semantic cache initialized');
}).catch(err => {
console.error('[useMultiDatabaseRAG] Cache init failed:', err);
});
}
}, []);
/**
* Main RAG query function - orchestrates multi-database retrieval
* Now with semantic caching for API cost savings
*/
const queryRAG = useCallback(async (
question: string,
options: RAGOptions = {}
): Promise<RAGResponse> => {
setIsLoading(true);
setError(null);
setLastCacheLookup(null);
const {
maxQdrantResults = 20,
maxSparqlResults = 50,
maxTypeDBResults = 50,
includeSparql = true,
includeTypeDB = false, // Disabled by default (may not be running)
useCache = true,
bypassCache = false,
storeInCache = true,
} = options;
try {
// Step 1: Generate embedding for the query (needed for both cache lookup and Qdrant search)
const queryEmbedding = await generateEmbedding(question);
// Step 2: Check semantic cache (if enabled and not bypassed)
if (cacheEnabled && useCache && !bypassCache) {
try {
const cacheResult = await semanticCache.lookup(
question,
queryEmbedding,
{ language: options.language, model: options.model }
);
setLastCacheLookup(cacheResult);
if (cacheResult.found && cacheResult.entry) {
console.log(
`[useMultiDatabaseRAG] Cache HIT! Similarity: ${cacheResult.similarity.toFixed(3)}, ` +
`Method: ${cacheResult.method}, Saved API call!`
);
// Reconstruct RAGResponse from cached data
const cached = cacheResult.entry.response;
// Update context state
setLastContext(cached.context as RAGContext);
return {
answer: cached.answer,
sparqlQuery: cached.sparqlQuery,
typeqlQuery: cached.typeqlQuery,
context: cached.context as RAGContext,
visualizationType: cached.visualizationType as VisualizationType || 'card',
visualizationData: cached.visualizationData as VisualizationData,
sources: cached.sources as RAGSource[],
confidence: cached.confidence,
// Include new fields for person/institution visualization
retrievedResults: cached.retrievedResults as RetrievedResult[] | undefined,
queryType: cached.queryType as QueryType | undefined,
// Add cache metadata to indicate this was from cache
_fromCache: true,
_cacheMethod: cacheResult.method,
_cacheSimilarity: cacheResult.similarity,
} as RAGResponse & { _fromCache?: boolean; _cacheMethod?: string; _cacheSimilarity?: number };
}
} catch (cacheError) {
console.warn('[useMultiDatabaseRAG] Cache lookup failed, continuing without cache:', cacheError);
}
}
// Step 3: Cache miss - perform actual database queries
// Parallel retrieval from all databases
const retrievalPromises: Promise<unknown>[] = [
qdrantSearch(question, maxQdrantResults),
];
// Add SPARQL if enabled (construct a basic query from keywords)
if (includeSparql) {
const keywords = question.split(/\s+/).filter(w => w.length > 2).slice(0, 3);
const sparqlSearchQuery = `
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <http://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?s ?label ?type WHERE {
?s rdfs:label|schema:name|skos:prefLabel ?label .
OPTIONAL { ?s a ?type }
FILTER(CONTAINS(LCASE(STR(?label)), "${keywords[0]?.toLowerCase() || ''}"))
}
LIMIT ${maxSparqlResults}
`;
retrievalPromises.push(
sparqlQuery(sparqlSearchQuery).catch(() => [])
);
}
// Add TypeDB if enabled
if (includeTypeDB) {
const typeqlSearchQuery = `match $x isa heritage_custodian, has name $n; get $x, $n; limit ${maxTypeDBResults};`;
retrievalPromises.push(
typedbQuery(typeqlSearchQuery).catch(() => [])
);
}
// Wait for all retrievals
const results = await Promise.all(retrievalPromises);
const qdrantResults = results[0] as QdrantSearchResult[];
const sparqlResults = (includeSparql ? results[1] : []) as Record<string, unknown>[];
const typedbResults = (includeTypeDB ? results[2] || results[1] : []) as Record<string, unknown>[];
const context: RAGContext = {
qdrantResults,
sparqlResults,
typedbResults,
totalRetrieved: qdrantResults.length + sparqlResults.length + typedbResults.length,
};
setLastContext(context);
// Call DSPy to generate response
const dspyResponse = await callDSPy(question, context, options);
// Detect visualization type
const vizType = dspyResponse.visualizationType || detectVisualizationType(question, context);
// Extract visualization data
const vizData = extractVisualizationData(vizType, context);
// Build sources list
const sources: RAGSource[] = [
...qdrantResults.slice(0, 5).map(r => ({
database: 'qdrant' as const,
id: String(r.id),
name: String(r.payload?.name || r.payload?.custodian_name || ''),
score: r.score,
snippet: String(r.payload?.description || '').slice(0, 200),
})),
];
const response: RAGResponse = {
answer: dspyResponse.answer,
sparqlQuery: dspyResponse.sparqlQuery,
typeqlQuery: dspyResponse.typeqlQuery,
context,
visualizationType: vizType,
visualizationData: vizData,
sources,
confidence: dspyResponse.confidence,
// New fields from backend for person/institution visualization
retrievedResults: dspyResponse.retrievedResults,
queryType: dspyResponse.queryType,
};
// Step 4: Store in cache (if enabled and response is valid)
// Don't cache error responses (confidence: 0) - these are transient API errors
if (cacheEnabled && storeInCache && response.confidence > 0) {
try {
const cacheResponse: CachedResponse = {
answer: response.answer,
sparqlQuery: response.sparqlQuery,
typeqlQuery: response.typeqlQuery,
visualizationType: response.visualizationType,
visualizationData: response.visualizationData,
sources: response.sources,
confidence: response.confidence,
context: response.context,
// Include new fields for person/institution visualization
retrievedResults: response.retrievedResults,
queryType: response.queryType,
};
await semanticCache.store(
question,
queryEmbedding,
cacheResponse,
{ language: options.language, model: options.model }
);
console.log('[useMultiDatabaseRAG] Response cached for future queries');
} catch (cacheError) {
console.warn('[useMultiDatabaseRAG] Failed to cache response:', cacheError);
}
}
return response;
} catch (err) {
const error = err instanceof Error ? err : new Error('RAG query failed');
setError(error);
throw error;
} finally {
setIsLoading(false);
}
}, [cacheEnabled]);
/**
* Direct Qdrant search (for debugging/advanced use)
*/
const searchQdrant = useCallback(async (
query: string,
limit: number = 10
): Promise<QdrantSearchResult[]> => {
return qdrantSearch(query, limit);
}, []);
/**
* Direct SPARQL query (for debugging/advanced use)
*/
const querySparql = useCallback(async (
sparql: string
): Promise<Record<string, unknown>[]> => {
return sparqlQuery(sparql);
}, []);
/**
* Direct TypeDB query (for debugging/advanced use)
*/
const queryTypeDB = useCallback(async (
typeql: string
): Promise<Record<string, unknown>[]> => {
return typedbQuery(typeql);
}, []);
/**
* Clear cached context
*/
const clearContext = useCallback(() => {
setLastContext(null);
setError(null);
setLastCacheLookup(null);
}, []);
/**
* Get cache statistics
*/
const getCacheStats = useCallback(async (): Promise<CacheStats> => {
return semanticCache.getStats();
}, []);
/**
* Clear the semantic cache
* @returns Object indicating which caches were cleared
*/
const clearCache = useCallback(async (): Promise<{ localCleared: boolean; sharedCleared: boolean }> => {
const result = await semanticCache.clear();
console.log('[useMultiDatabaseRAG] Semantic cache cleared:', result);
return result;
}, []);
/**
* Update cache similarity threshold
*/
const setCacheSimilarityThreshold = useCallback((threshold: number): void => {
semanticCache.setConfig({ similarityThreshold: threshold });
console.log(`[useMultiDatabaseRAG] Cache similarity threshold set to ${threshold}`);
}, []);
return {
isLoading,
error,
lastContext,
cacheEnabled,
lastCacheLookup,
queryRAG,
searchQdrant,
querySparql,
queryTypeDB,
clearContext,
detectVisualizationType,
setCacheEnabled,
getCacheStats,
clearCache,
setCacheSimilarityThreshold,
};
}
export default useMultiDatabaseRAG;