- Institution Browser: multi-select for types and countries - URL query param sync for shareable filter URLs - New utility: countryNames.ts with flag emoji support - New utility: imageProxy.ts for image URL handling - New component: SearchableMultiSelect dropdown - Career timeline CSS and component updates - Media gallery improvements - Lazy load error boundary component - Version check utility
1011 lines
34 KiB
TypeScript
1011 lines
34 KiB
TypeScript
/**
|
|
* useMultiDatabaseRAG.ts - Multi-Database RAG (Retrieval-Augmented Generation) Hook
|
|
*
|
|
* Orchestrates queries across multiple databases for conversational AI:
|
|
* - Qdrant: Vector similarity search for semantic retrieval
|
|
* - Oxigraph: SPARQL queries for structured RDF data
|
|
* - TypeDB: TypeQL queries for knowledge graph traversal
|
|
*
|
|
* Features intelligent semantic caching to reduce API costs:
|
|
* - Caches query embeddings and responses in IndexedDB
|
|
* - Uses cosine similarity to find semantically similar past queries
|
|
* - Configurable similarity threshold (default 0.92)
|
|
* - Typical API savings: 30-50% for conversational UIs
|
|
*
|
|
* Based on DSPy RAG patterns for heritage institution conversations.
|
|
* Self-hosted infrastructure - no external API keys required.
|
|
*
|
|
* @see https://dspy.ai/
|
|
*/
|
|
|
|
import { useState, useCallback, useRef, useEffect } from 'react';
|
|
import type { QdrantSearchResult } from './useQdrant';
|
|
import { semanticCache, type CachedResponse, type CacheStats, type CacheLookupResult } from '../lib/storage/semantic-cache';
|
|
|
|
// Configuration - all services use Caddy proxy paths
|
|
const API_BASE = ''; // Relative URLs via Caddy proxy
|
|
const QDRANT_URL = '/qdrant';
|
|
const SPARQL_URL = '/sparql';
|
|
const TYPEDB_URL = '/api/typedb';
|
|
const DSPY_URL = '/api/dspy/rag'; // Backend serves at /api/dspy/rag/query
|
|
|
|
// ============================================================================
|
|
// Types
|
|
// ============================================================================
|
|
|
|
export interface RAGContext {
|
|
qdrantResults: QdrantSearchResult[];
|
|
sparqlResults: Record<string, unknown>[];
|
|
typedbResults: Record<string, unknown>[];
|
|
totalRetrieved: number;
|
|
}
|
|
|
|
/**
|
|
* Retrieved result from backend - can be a person or institution
|
|
* Returned by HeritageRAGPipeline.forward() for visualization
|
|
*/
|
|
export interface RetrievedResult {
|
|
type: 'person' | 'institution';
|
|
// Person fields (when type === 'person')
|
|
person_id?: string;
|
|
name: string;
|
|
headline?: string;
|
|
custodian_name?: string;
|
|
custodian_slug?: string;
|
|
heritage_relevant?: boolean;
|
|
heritage_type?: string; // GLAMORCUBESFIXPHDNT single letter code
|
|
linkedin_url?: string | null;
|
|
score?: number;
|
|
// Institution fields (when type === 'institution')
|
|
institution_type?: string;
|
|
city?: string;
|
|
country?: string;
|
|
description?: string;
|
|
}
|
|
|
|
export type QueryType = 'person' | 'institution';
|
|
|
|
export interface RAGResponse {
|
|
answer: string;
|
|
sparqlQuery?: string;
|
|
typeqlQuery?: string;
|
|
context: RAGContext;
|
|
visualizationType?: VisualizationType;
|
|
visualizationData?: VisualizationData;
|
|
sources: RAGSource[];
|
|
confidence: number;
|
|
// New fields from backend for person/institution visualization
|
|
retrievedResults?: RetrievedResult[];
|
|
queryType?: QueryType;
|
|
}
|
|
|
|
export interface RAGSource {
|
|
database: 'qdrant' | 'oxigraph' | 'typedb';
|
|
id: string;
|
|
name?: string;
|
|
score?: number;
|
|
snippet?: string;
|
|
}
|
|
|
|
export type VisualizationType =
|
|
| 'none'
|
|
| 'map' // Geographic visualization
|
|
| 'timeline' // Temporal visualization
|
|
| 'network' // Graph/relationship visualization
|
|
| 'chart' // Bar/line charts
|
|
| 'table' // Tabular data
|
|
| 'card' // Institution cards
|
|
| 'gallery'; // Image gallery
|
|
|
|
export interface VisualizationData {
|
|
type: VisualizationType;
|
|
institutions?: InstitutionData[];
|
|
coordinates?: GeoCoordinate[];
|
|
timeline?: TimelineEvent[];
|
|
graphData?: GraphVisualizationData;
|
|
chartData?: ChartData;
|
|
}
|
|
|
|
export interface InstitutionData {
|
|
id: string;
|
|
name: string;
|
|
type?: string;
|
|
city?: string;
|
|
province?: string;
|
|
country?: string;
|
|
latitude?: number;
|
|
longitude?: number;
|
|
description?: string;
|
|
website?: string;
|
|
isil?: string;
|
|
wikidata?: string;
|
|
rating?: number;
|
|
reviews?: number;
|
|
photoCount?: number;
|
|
}
|
|
|
|
export interface GeoCoordinate {
|
|
lat: number;
|
|
lng: number;
|
|
label: string;
|
|
type?: string;
|
|
data?: InstitutionData;
|
|
}
|
|
|
|
export interface TimelineEvent {
|
|
date: string;
|
|
label: string;
|
|
description?: string;
|
|
type?: string;
|
|
}
|
|
|
|
export interface GraphVisualizationData {
|
|
nodes: Array<{
|
|
id: string;
|
|
label: string;
|
|
type: string;
|
|
attributes?: Record<string, unknown>;
|
|
}>;
|
|
edges: Array<{
|
|
id: string;
|
|
source: string;
|
|
target: string;
|
|
label: string;
|
|
type?: string;
|
|
}>;
|
|
}
|
|
|
|
export interface ChartData {
|
|
labels: string[];
|
|
datasets: Array<{
|
|
label: string;
|
|
data: number[];
|
|
backgroundColor?: string | string[];
|
|
borderColor?: string;
|
|
}>;
|
|
}
|
|
|
|
export interface ConversationMessage {
|
|
id: string;
|
|
role: 'user' | 'assistant' | 'system';
|
|
content: string;
|
|
timestamp: Date;
|
|
response?: RAGResponse;
|
|
isLoading?: boolean;
|
|
error?: string;
|
|
}
|
|
|
|
export interface UseMultiDatabaseRAGReturn {
|
|
// State
|
|
isLoading: boolean;
|
|
error: Error | null;
|
|
lastContext: RAGContext | null;
|
|
|
|
// Cache state
|
|
cacheEnabled: boolean;
|
|
lastCacheLookup: CacheLookupResult | null;
|
|
|
|
// Core RAG function
|
|
queryRAG: (
|
|
question: string,
|
|
options?: RAGOptions
|
|
) => Promise<RAGResponse>;
|
|
|
|
// Individual database queries (for debugging/advanced use)
|
|
searchQdrant: (query: string, limit?: number) => Promise<QdrantSearchResult[]>;
|
|
querySparql: (sparql: string) => Promise<Record<string, unknown>[]>;
|
|
queryTypeDB: (typeql: string) => Promise<Record<string, unknown>[]>;
|
|
|
|
// Utility functions
|
|
clearContext: () => void;
|
|
detectVisualizationType: (question: string, results: RAGContext) => VisualizationType;
|
|
|
|
// Cache management functions
|
|
setCacheEnabled: (enabled: boolean) => void;
|
|
getCacheStats: () => Promise<CacheStats>;
|
|
clearCache: () => Promise<{ localCleared: boolean; sharedCleared: boolean }>;
|
|
setCacheSimilarityThreshold: (threshold: number) => void;
|
|
}
|
|
|
|
export interface RAGOptions {
|
|
model?: string;
|
|
language?: 'nl' | 'en';
|
|
maxQdrantResults?: number;
|
|
maxSparqlResults?: number;
|
|
maxTypeDBResults?: number;
|
|
includeSparql?: boolean;
|
|
includeTypeDB?: boolean;
|
|
conversationHistory?: ConversationMessage[];
|
|
// Cache options
|
|
useCache?: boolean; // Enable/disable cache for this query (default: true)
|
|
bypassCache?: boolean; // Force fresh query even if cache hit (default: false)
|
|
storeInCache?: boolean; // Store result in cache (default: true)
|
|
// Embedding model selection
|
|
embeddingModel?: 'minilm_384' | 'openai_1536' | 'bge_768' | null; // Embedding model for vector search (default: auto)
|
|
}
|
|
|
|
// ============================================================================
|
|
// Helper Functions
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Generate text embedding using local embedding service or fallback
|
|
* In production, this would use a local embedding model (e.g., sentence-transformers)
|
|
* For now, we'll use keyword-based Qdrant filtering as a fallback
|
|
*/
|
|
async function generateEmbedding(text: string): Promise<number[] | null> {
|
|
try {
|
|
// Try local embedding service first
|
|
const response = await fetch(`${API_BASE}/api/embed`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ text }),
|
|
});
|
|
|
|
if (response.ok) {
|
|
const data = await response.json();
|
|
return data.embedding;
|
|
}
|
|
} catch {
|
|
// Fallback: return null to use keyword search
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Search Qdrant using vector similarity or keyword filter
|
|
*/
|
|
async function qdrantSearch(
|
|
query: string,
|
|
limit: number = 10
|
|
): Promise<QdrantSearchResult[]> {
|
|
const collectionName = 'heritage_custodians';
|
|
|
|
console.log('[Qdrant] Search query:', query);
|
|
console.log('[Qdrant] Limit:', limit);
|
|
console.log('[Qdrant] QDRANT_URL:', QDRANT_URL);
|
|
|
|
// Try to get embedding for semantic search
|
|
const embedding = await generateEmbedding(query);
|
|
console.log('[Qdrant] Has embedding:', !!embedding);
|
|
|
|
if (embedding) {
|
|
// Vector similarity search
|
|
const searchUrl = `${QDRANT_URL}/collections/${collectionName}/points/search`;
|
|
console.log('[Qdrant] Search URL:', searchUrl);
|
|
console.log('[Qdrant] Full search URL:', new URL(searchUrl, window.location.origin).href);
|
|
|
|
const response = await fetch(searchUrl, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
vector: embedding,
|
|
limit,
|
|
with_payload: true,
|
|
}),
|
|
});
|
|
|
|
console.log('[Qdrant] Search response status:', response.status);
|
|
if (!response.ok) {
|
|
const errorBody = await response.text();
|
|
console.error('[Qdrant] Search error body:', errorBody);
|
|
}
|
|
|
|
if (response.ok) {
|
|
const data = await response.json();
|
|
console.log('[Qdrant] Search results count:', data.result?.length || 0);
|
|
return data.result || [];
|
|
}
|
|
}
|
|
|
|
// Fallback: Scroll through points with keyword filter
|
|
// Extract keywords from query for filtering
|
|
const keywords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
|
console.log('[Qdrant] Fallback: Keywords:', keywords);
|
|
|
|
const scrollUrl = `${QDRANT_URL}/collections/${collectionName}/points/scroll`;
|
|
console.log('[Qdrant] Scroll URL:', scrollUrl);
|
|
console.log('[Qdrant] Full scroll URL:', new URL(scrollUrl, window.location.origin).href);
|
|
|
|
const response = await fetch(scrollUrl, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
limit: limit * 2, // Get more to filter
|
|
with_payload: true,
|
|
with_vector: false,
|
|
}),
|
|
});
|
|
|
|
console.log('[Qdrant] Scroll response status:', response.status);
|
|
|
|
if (!response.ok) {
|
|
const errorBody = await response.text();
|
|
console.error('[Qdrant] Scroll error body:', errorBody);
|
|
throw new Error(`Qdrant scroll failed: ${response.status}`);
|
|
}
|
|
|
|
const data = await response.json();
|
|
const points = data.result?.points || [];
|
|
|
|
// Simple keyword matching in payload
|
|
const scored = points.map((p: { id: string | number; payload: Record<string, unknown> }) => {
|
|
const payload = p.payload || {};
|
|
const text = JSON.stringify(payload).toLowerCase();
|
|
const matches = keywords.filter(k => text.includes(k)).length;
|
|
return {
|
|
id: p.id,
|
|
score: matches / Math.max(keywords.length, 1),
|
|
payload,
|
|
};
|
|
});
|
|
|
|
// Sort by score and return top results
|
|
return scored
|
|
.filter((p: { score: number }) => p.score > 0)
|
|
.sort((a: { score: number }, b: { score: number }) => b.score - a.score)
|
|
.slice(0, limit);
|
|
}
|
|
|
|
/**
|
|
* Execute SPARQL query against Oxigraph
|
|
*/
|
|
async function sparqlQuery(query: string): Promise<Record<string, unknown>[]> {
|
|
const response = await fetch(`${SPARQL_URL}/query`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/sparql-query',
|
|
'Accept': 'application/sparql-results+json',
|
|
},
|
|
body: query,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const error = await response.text();
|
|
throw new Error(`SPARQL query failed: ${response.status} - ${error}`);
|
|
}
|
|
|
|
const data = await response.json();
|
|
return data.results?.bindings || [];
|
|
}
|
|
|
|
/**
|
|
* Execute TypeQL query against TypeDB
|
|
*/
|
|
async function typedbQuery(query: string): Promise<Record<string, unknown>[]> {
|
|
const response = await fetch(`${TYPEDB_URL}/query`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ query, queryType: 'read' }),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const error = await response.text();
|
|
throw new Error(`TypeDB query failed: ${response.status} - ${error}`);
|
|
}
|
|
|
|
const data = await response.json();
|
|
return data.results || [];
|
|
}
|
|
|
|
/**
|
|
* Call DSPy backend to generate queries and response
|
|
*/
|
|
async function callDSPy(
|
|
question: string,
|
|
context: RAGContext,
|
|
options: RAGOptions
|
|
): Promise<{
|
|
answer: string;
|
|
sparqlQuery?: string;
|
|
typeqlQuery?: string;
|
|
visualizationType?: VisualizationType;
|
|
confidence: number;
|
|
retrievedResults?: RetrievedResult[];
|
|
queryType?: QueryType;
|
|
}> {
|
|
// Format conversation history for DSPy backend
|
|
// Backend expects: context = [{question: "...", answer: "..."}, ...]
|
|
// Each object should pair a user question with its corresponding assistant answer
|
|
const conversationContext: Array<{ question: string; answer: string }> = [];
|
|
const messages = options.conversationHistory || [];
|
|
|
|
// Pair consecutive user/assistant messages (skip last if current user question)
|
|
for (let i = 0; i < messages.length - 1; i++) {
|
|
const current = messages[i];
|
|
const next = messages[i + 1];
|
|
|
|
// If we have a user message followed by an assistant message, pair them
|
|
if (current.role === 'user' && next.role === 'assistant' && !next.isLoading) {
|
|
conversationContext.push({
|
|
question: current.content,
|
|
answer: next.content,
|
|
});
|
|
i++; // Skip the assistant message since we've paired it
|
|
}
|
|
}
|
|
|
|
// Keep only last 4 turns for context
|
|
const recentContext = conversationContext.slice(-4);
|
|
|
|
const requestUrl = `${DSPY_URL}/query`;
|
|
const requestBody = {
|
|
question,
|
|
language: options.language || 'nl',
|
|
context: recentContext, // Backend expects conversation history here (paired Q&A)
|
|
include_visualization: true,
|
|
embedding_model: options.embeddingModel || null, // Pass embedding model preference
|
|
};
|
|
|
|
console.log('[DSPy] Request URL:', requestUrl);
|
|
console.log('[DSPy] Request body:', JSON.stringify(requestBody, null, 2));
|
|
console.log('[DSPy] Window location:', window.location.href);
|
|
console.log('[DSPy] Full URL being fetched:', new URL(requestUrl, window.location.origin).href);
|
|
|
|
let response: Response;
|
|
try {
|
|
response = await fetch(requestUrl, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify(requestBody),
|
|
});
|
|
console.log('[DSPy] Response status:', response.status);
|
|
console.log('[DSPy] Response URL:', response.url);
|
|
console.log('[DSPy] Response headers:', Object.fromEntries(response.headers.entries()));
|
|
} catch (networkError) {
|
|
// Network error - server unreachable
|
|
console.error('[DSPy] Network error:', networkError);
|
|
const lang = options.language || 'nl';
|
|
return {
|
|
answer: lang === 'nl'
|
|
? '⚠️ **Serverfout**: Kan geen verbinding maken met de RAG-server. Controleer of de backend draait op poort 8003.'
|
|
: '⚠️ **Server Error**: Cannot connect to RAG server. Check if backend is running on port 8003.',
|
|
confidence: 0,
|
|
};
|
|
}
|
|
|
|
if (!response.ok) {
|
|
// HTTP error - log details for debugging
|
|
const responseBody = await response.text();
|
|
console.error(`[DSPy] HTTP ${response.status}: ${response.statusText}`);
|
|
console.error(`[DSPy] Response body:`, responseBody);
|
|
const lang = options.language || 'nl';
|
|
|
|
if (response.status === 404) {
|
|
return {
|
|
answer: lang === 'nl'
|
|
? '⚠️ **Serverfout (404)**: De RAG API endpoint is niet gevonden. Controleer de proxy configuratie in vite.config.ts en herstart de frontend.'
|
|
: '⚠️ **Server Error (404)**: RAG API endpoint not found. Check proxy configuration in vite.config.ts and restart frontend.',
|
|
confidence: 0,
|
|
};
|
|
}
|
|
|
|
if (response.status >= 500) {
|
|
return {
|
|
answer: lang === 'nl'
|
|
? `⚠️ **Serverfout (${response.status})**: De RAG-server heeft een interne fout. Controleer de backend logs.`
|
|
: `⚠️ **Server Error (${response.status})**: RAG server internal error. Check backend logs.`,
|
|
confidence: 0,
|
|
};
|
|
}
|
|
|
|
// Other HTTP errors - fall back to context-based answer
|
|
return {
|
|
answer: generateFallbackAnswer(question, context, lang),
|
|
confidence: 0.5,
|
|
};
|
|
}
|
|
|
|
// Map backend DSPyQueryResponse to frontend expected format
|
|
const data = await response.json();
|
|
console.log('[DSPy] Response data (first 500 chars):', JSON.stringify(data, null, 2).slice(0, 500));
|
|
console.log('[DSPy] retrieved_results:', data.retrieved_results);
|
|
console.log('[DSPy] retrieved_results count:', data.retrieved_results?.length ?? 0);
|
|
console.log('[DSPy] query_type:', data.query_type);
|
|
|
|
// Debug: log first result if available
|
|
if (data.retrieved_results?.length > 0) {
|
|
console.log('[DSPy] First retrieved result:', JSON.stringify(data.retrieved_results[0], null, 2));
|
|
}
|
|
|
|
// Flatten nested metadata structure from backend to match frontend RetrievedResult interface
|
|
// Backend returns: { person_id, name, scores: {...}, metadata: { headline, custodian_name, ... }, type }
|
|
// Frontend expects: { person_id, name, headline, custodian_name, ..., type }
|
|
const flattenedResults: RetrievedResult[] | undefined = data.retrieved_results?.map((result: Record<string, unknown>) => {
|
|
const metadata = result.metadata as Record<string, unknown> | undefined;
|
|
const scores = result.scores as Record<string, number> | undefined;
|
|
|
|
// If already flat (no metadata object), still normalize to ensure consistent structure
|
|
if (!metadata) {
|
|
return {
|
|
type: (result.type as 'person' | 'institution') || 'institution',
|
|
person_id: result.person_id as string | undefined,
|
|
name: (result.name as string) || 'Unknown',
|
|
headline: result.headline as string | undefined,
|
|
custodian_name: result.custodian_name as string | undefined,
|
|
custodian_slug: result.custodian_slug as string | undefined,
|
|
heritage_relevant: result.heritage_relevant as boolean | undefined,
|
|
heritage_type: result.heritage_type as string | undefined,
|
|
linkedin_url: result.linkedin_url as string | undefined,
|
|
score: result.score as number | undefined,
|
|
institution_type: result.institution_type as string | undefined,
|
|
city: result.city as string | undefined,
|
|
country: result.country as string | undefined,
|
|
description: result.description as string | undefined,
|
|
};
|
|
}
|
|
|
|
// Flatten metadata into top-level fields
|
|
return {
|
|
type: (result.type as 'person' | 'institution') || 'institution',
|
|
person_id: result.person_id as string | undefined,
|
|
name: (result.name as string) || 'Unknown',
|
|
headline: metadata.headline as string | undefined,
|
|
custodian_name: metadata.custodian_name as string | undefined,
|
|
custodian_slug: metadata.custodian_slug as string | undefined,
|
|
location: metadata.location as string | undefined,
|
|
heritage_relevant: metadata.heritage_relevant as boolean | undefined,
|
|
heritage_type: metadata.heritage_type as string | undefined,
|
|
linkedin_url: metadata.linkedin_url as string | undefined,
|
|
score: scores?.combined || scores?.vector,
|
|
// Institution fields (when type === 'institution')
|
|
institution_type: metadata.institution_type as string | undefined,
|
|
city: metadata.city as string | undefined,
|
|
country: metadata.country as string | undefined,
|
|
description: metadata.description as string | undefined,
|
|
};
|
|
});
|
|
|
|
console.log('[DSPy] Flattened results count:', flattenedResults?.length ?? 0);
|
|
if (flattenedResults?.length) {
|
|
console.log('[DSPy] First flattened result:', JSON.stringify(flattenedResults[0], null, 2));
|
|
}
|
|
|
|
return {
|
|
answer: data.answer || '',
|
|
sparqlQuery: data.visualization?.sparql_query, // If backend includes SPARQL
|
|
visualizationType: data.visualization?.type as VisualizationType,
|
|
confidence: data.sources_used?.length > 0 ? 0.85 : 0.6, // Estimate confidence
|
|
// New fields from backend for person/institution visualization
|
|
retrievedResults: flattenedResults,
|
|
queryType: data.query_type as QueryType | undefined,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Generate a fallback answer when DSPy service is unavailable
|
|
*/
|
|
function generateFallbackAnswer(
|
|
_question: string,
|
|
context: RAGContext,
|
|
language: 'nl' | 'en'
|
|
): string {
|
|
const count = context.totalRetrieved;
|
|
|
|
if (count === 0) {
|
|
return language === 'nl'
|
|
? 'Geen resultaten gevonden voor uw vraag.'
|
|
: 'No results found for your question.';
|
|
}
|
|
|
|
const institutions = context.qdrantResults.slice(0, 5).map(r => {
|
|
const name = r.payload?.name || r.payload?.custodian_name || 'Unknown';
|
|
return name;
|
|
});
|
|
|
|
if (language === 'nl') {
|
|
return `Ik heb ${count} resultaten gevonden. Enkele relevante instellingen: ${institutions.join(', ')}.`;
|
|
}
|
|
return `I found ${count} results. Some relevant institutions: ${institutions.join(', ')}.`;
|
|
}
|
|
|
|
/**
|
|
* Detect appropriate visualization type based on question and results
|
|
*/
|
|
function detectVisualizationType(
|
|
question: string,
|
|
context: RAGContext
|
|
): VisualizationType {
|
|
const q = question.toLowerCase();
|
|
|
|
// Map visualization keywords
|
|
if (q.includes('kaart') || q.includes('map') || q.includes('waar') ||
|
|
q.includes('where') || q.includes('locatie') || q.includes('location') ||
|
|
q.includes('provincie') || q.includes('province') || q.includes('stad') ||
|
|
q.includes('city') || q.includes('geografisch') || q.includes('geographic')) {
|
|
return 'map';
|
|
}
|
|
|
|
// Timeline keywords
|
|
if (q.includes('wanneer') || q.includes('when') || q.includes('geschiedenis') ||
|
|
q.includes('history') || q.includes('tijdlijn') || q.includes('timeline') ||
|
|
q.includes('opgericht') || q.includes('founded') || q.includes('jaar') ||
|
|
q.includes('year')) {
|
|
return 'timeline';
|
|
}
|
|
|
|
// Network/graph keywords
|
|
if (q.includes('relatie') || q.includes('relationship') || q.includes('verbinding') ||
|
|
q.includes('connection') || q.includes('netwerk') || q.includes('network') ||
|
|
q.includes('samenwer') || q.includes('collaborat')) {
|
|
return 'network';
|
|
}
|
|
|
|
// Chart keywords
|
|
if (q.includes('hoeveel') || q.includes('how many') || q.includes('aantal') ||
|
|
q.includes('count') || q.includes('statistiek') || q.includes('statistic') ||
|
|
q.includes('verdeling') || q.includes('distribution') || q.includes('vergelijk') ||
|
|
q.includes('compare')) {
|
|
return 'chart';
|
|
}
|
|
|
|
// If we have location data, show map
|
|
const hasCoordinates = context.qdrantResults.some(r =>
|
|
r.payload?.latitude || r.payload?.coordinates
|
|
);
|
|
if (hasCoordinates && context.totalRetrieved > 0) {
|
|
return 'map';
|
|
}
|
|
|
|
// Default to cards for institution results
|
|
if (context.qdrantResults.length > 0) {
|
|
return 'card';
|
|
}
|
|
|
|
return 'table';
|
|
}
|
|
|
|
/**
|
|
* Extract visualization data from RAG context
|
|
*/
|
|
function extractVisualizationData(
|
|
type: VisualizationType,
|
|
context: RAGContext
|
|
): VisualizationData {
|
|
const data: VisualizationData = { type };
|
|
|
|
// Extract institution data from Qdrant results
|
|
data.institutions = context.qdrantResults.map(r => {
|
|
const p = (r.payload || {}) as Record<string, unknown>;
|
|
const location = (p.location || {}) as Record<string, unknown>;
|
|
const coordinates = (p.coordinates || {}) as Record<string, unknown>;
|
|
return {
|
|
id: String(r.id),
|
|
name: String(p.name || p.custodian_name || p.institution_name || 'Unknown'),
|
|
type: String(p.type || p.institution_type || ''),
|
|
city: String(p.city || location.city || ''),
|
|
province: String(p.province || p.region || ''),
|
|
country: String(p.country || 'NL'),
|
|
latitude: Number(p.latitude || coordinates.lat || location.latitude),
|
|
longitude: Number(p.longitude || coordinates.lng || location.longitude),
|
|
description: String(p.description || ''),
|
|
website: String(p.website || p.url || ''),
|
|
isil: String(p.isil || p.isil_code || ''),
|
|
wikidata: String(p.wikidata || p.wikidata_id || ''),
|
|
rating: Number(p.rating || p.google_rating || 0),
|
|
reviews: Number(p.reviews || p.review_count || 0),
|
|
photoCount: Number(p.photoCount || p.photo_count || 0),
|
|
};
|
|
});
|
|
|
|
// Extract coordinates for map
|
|
if (type === 'map') {
|
|
data.coordinates = data.institutions
|
|
.filter(i => i.latitude && i.longitude && !isNaN(i.latitude) && !isNaN(i.longitude))
|
|
.map(i => ({
|
|
lat: i.latitude!,
|
|
lng: i.longitude!,
|
|
label: i.name,
|
|
type: i.type,
|
|
data: i,
|
|
}));
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Hook Implementation
|
|
// ============================================================================
|
|
|
|
export function useMultiDatabaseRAG(): UseMultiDatabaseRAGReturn {
|
|
const [isLoading, setIsLoading] = useState(false);
|
|
const [error, setError] = useState<Error | null>(null);
|
|
const [lastContext, setLastContext] = useState<RAGContext | null>(null);
|
|
const [cacheEnabled, setCacheEnabled] = useState(true);
|
|
const [lastCacheLookup, setLastCacheLookup] = useState<CacheLookupResult | null>(null);
|
|
|
|
// Initialize cache on mount
|
|
const cacheInitialized = useRef(false);
|
|
useEffect(() => {
|
|
if (!cacheInitialized.current) {
|
|
semanticCache.initialize().then(() => {
|
|
cacheInitialized.current = true;
|
|
console.log('[useMultiDatabaseRAG] Semantic cache initialized');
|
|
}).catch(err => {
|
|
console.error('[useMultiDatabaseRAG] Cache init failed:', err);
|
|
});
|
|
}
|
|
}, []);
|
|
|
|
/**
|
|
* Main RAG query function - orchestrates multi-database retrieval
|
|
* Now with semantic caching for API cost savings
|
|
*/
|
|
const queryRAG = useCallback(async (
|
|
question: string,
|
|
options: RAGOptions = {}
|
|
): Promise<RAGResponse> => {
|
|
setIsLoading(true);
|
|
setError(null);
|
|
setLastCacheLookup(null);
|
|
|
|
const {
|
|
maxQdrantResults = 20,
|
|
maxSparqlResults = 50,
|
|
maxTypeDBResults = 50,
|
|
includeSparql = true,
|
|
includeTypeDB = false, // Disabled by default (may not be running)
|
|
useCache = true,
|
|
bypassCache = false,
|
|
storeInCache = true,
|
|
} = options;
|
|
|
|
try {
|
|
// Step 1: Generate embedding for the query (needed for both cache lookup and Qdrant search)
|
|
const queryEmbedding = await generateEmbedding(question);
|
|
|
|
// Step 2: Check semantic cache (if enabled and not bypassed)
|
|
if (cacheEnabled && useCache && !bypassCache) {
|
|
try {
|
|
const cacheResult = await semanticCache.lookup(
|
|
question,
|
|
queryEmbedding,
|
|
{ language: options.language, model: options.model }
|
|
);
|
|
|
|
setLastCacheLookup(cacheResult);
|
|
|
|
if (cacheResult.found && cacheResult.entry) {
|
|
console.log(
|
|
`[useMultiDatabaseRAG] Cache HIT! Similarity: ${cacheResult.similarity.toFixed(3)}, ` +
|
|
`Method: ${cacheResult.method}, Saved API call!`
|
|
);
|
|
|
|
// Reconstruct RAGResponse from cached data
|
|
const cached = cacheResult.entry.response;
|
|
|
|
// Update context state
|
|
setLastContext(cached.context as RAGContext);
|
|
|
|
return {
|
|
answer: cached.answer,
|
|
sparqlQuery: cached.sparqlQuery,
|
|
typeqlQuery: cached.typeqlQuery,
|
|
context: cached.context as RAGContext,
|
|
visualizationType: cached.visualizationType as VisualizationType || 'card',
|
|
visualizationData: cached.visualizationData as VisualizationData,
|
|
sources: cached.sources as RAGSource[],
|
|
confidence: cached.confidence,
|
|
// Include new fields for person/institution visualization
|
|
retrievedResults: cached.retrievedResults as RetrievedResult[] | undefined,
|
|
queryType: cached.queryType as QueryType | undefined,
|
|
// Add cache metadata to indicate this was from cache
|
|
_fromCache: true,
|
|
_cacheMethod: cacheResult.method,
|
|
_cacheSimilarity: cacheResult.similarity,
|
|
} as RAGResponse & { _fromCache?: boolean; _cacheMethod?: string; _cacheSimilarity?: number };
|
|
}
|
|
} catch (cacheError) {
|
|
console.warn('[useMultiDatabaseRAG] Cache lookup failed, continuing without cache:', cacheError);
|
|
}
|
|
}
|
|
|
|
// Step 3: Cache miss - perform actual database queries
|
|
// Parallel retrieval from all databases
|
|
const retrievalPromises: Promise<unknown>[] = [
|
|
qdrantSearch(question, maxQdrantResults),
|
|
];
|
|
|
|
// Add SPARQL if enabled (construct a basic query from keywords)
|
|
if (includeSparql) {
|
|
const keywords = question.split(/\s+/).filter(w => w.length > 2).slice(0, 3);
|
|
const sparqlSearchQuery = `
|
|
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
PREFIX schema: <http://schema.org/>
|
|
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
|
|
|
SELECT ?s ?label ?type WHERE {
|
|
?s rdfs:label|schema:name|skos:prefLabel ?label .
|
|
OPTIONAL { ?s a ?type }
|
|
FILTER(CONTAINS(LCASE(STR(?label)), "${keywords[0]?.toLowerCase() || ''}"))
|
|
}
|
|
LIMIT ${maxSparqlResults}
|
|
`;
|
|
retrievalPromises.push(
|
|
sparqlQuery(sparqlSearchQuery).catch(() => [])
|
|
);
|
|
}
|
|
|
|
// Add TypeDB if enabled
|
|
if (includeTypeDB) {
|
|
const typeqlSearchQuery = `match $x isa heritage_custodian, has name $n; get $x, $n; limit ${maxTypeDBResults};`;
|
|
retrievalPromises.push(
|
|
typedbQuery(typeqlSearchQuery).catch(() => [])
|
|
);
|
|
}
|
|
|
|
// Wait for all retrievals
|
|
const results = await Promise.all(retrievalPromises);
|
|
|
|
const qdrantResults = results[0] as QdrantSearchResult[];
|
|
const sparqlResults = (includeSparql ? results[1] : []) as Record<string, unknown>[];
|
|
const typedbResults = (includeTypeDB ? results[2] || results[1] : []) as Record<string, unknown>[];
|
|
|
|
const context: RAGContext = {
|
|
qdrantResults,
|
|
sparqlResults,
|
|
typedbResults,
|
|
totalRetrieved: qdrantResults.length + sparqlResults.length + typedbResults.length,
|
|
};
|
|
|
|
setLastContext(context);
|
|
|
|
// Call DSPy to generate response
|
|
const dspyResponse = await callDSPy(question, context, options);
|
|
|
|
// Detect visualization type
|
|
const vizType = dspyResponse.visualizationType || detectVisualizationType(question, context);
|
|
|
|
// Extract visualization data
|
|
const vizData = extractVisualizationData(vizType, context);
|
|
|
|
// Build sources list
|
|
const sources: RAGSource[] = [
|
|
...qdrantResults.slice(0, 5).map(r => ({
|
|
database: 'qdrant' as const,
|
|
id: String(r.id),
|
|
name: String(r.payload?.name || r.payload?.custodian_name || ''),
|
|
score: r.score,
|
|
snippet: String(r.payload?.description || '').slice(0, 200),
|
|
})),
|
|
];
|
|
|
|
const response: RAGResponse = {
|
|
answer: dspyResponse.answer,
|
|
sparqlQuery: dspyResponse.sparqlQuery,
|
|
typeqlQuery: dspyResponse.typeqlQuery,
|
|
context,
|
|
visualizationType: vizType,
|
|
visualizationData: vizData,
|
|
sources,
|
|
confidence: dspyResponse.confidence,
|
|
// New fields from backend for person/institution visualization
|
|
retrievedResults: dspyResponse.retrievedResults,
|
|
queryType: dspyResponse.queryType,
|
|
};
|
|
|
|
// Step 4: Store in cache (if enabled and response is valid)
|
|
// Don't cache error responses (confidence: 0) - these are transient API errors
|
|
if (cacheEnabled && storeInCache && response.confidence > 0) {
|
|
try {
|
|
const cacheResponse: CachedResponse = {
|
|
answer: response.answer,
|
|
sparqlQuery: response.sparqlQuery,
|
|
typeqlQuery: response.typeqlQuery,
|
|
visualizationType: response.visualizationType,
|
|
visualizationData: response.visualizationData,
|
|
sources: response.sources,
|
|
confidence: response.confidence,
|
|
context: response.context,
|
|
// Include new fields for person/institution visualization
|
|
retrievedResults: response.retrievedResults,
|
|
queryType: response.queryType,
|
|
};
|
|
|
|
await semanticCache.store(
|
|
question,
|
|
queryEmbedding,
|
|
cacheResponse,
|
|
{ language: options.language, model: options.model }
|
|
);
|
|
|
|
console.log('[useMultiDatabaseRAG] Response cached for future queries');
|
|
} catch (cacheError) {
|
|
console.warn('[useMultiDatabaseRAG] Failed to cache response:', cacheError);
|
|
}
|
|
}
|
|
|
|
return response;
|
|
|
|
} catch (err) {
|
|
const error = err instanceof Error ? err : new Error('RAG query failed');
|
|
setError(error);
|
|
throw error;
|
|
} finally {
|
|
setIsLoading(false);
|
|
}
|
|
}, [cacheEnabled]);
|
|
|
|
/**
|
|
* Direct Qdrant search (for debugging/advanced use)
|
|
*/
|
|
const searchQdrant = useCallback(async (
|
|
query: string,
|
|
limit: number = 10
|
|
): Promise<QdrantSearchResult[]> => {
|
|
return qdrantSearch(query, limit);
|
|
}, []);
|
|
|
|
/**
|
|
* Direct SPARQL query (for debugging/advanced use)
|
|
*/
|
|
const querySparql = useCallback(async (
|
|
sparql: string
|
|
): Promise<Record<string, unknown>[]> => {
|
|
return sparqlQuery(sparql);
|
|
}, []);
|
|
|
|
/**
|
|
* Direct TypeDB query (for debugging/advanced use)
|
|
*/
|
|
const queryTypeDB = useCallback(async (
|
|
typeql: string
|
|
): Promise<Record<string, unknown>[]> => {
|
|
return typedbQuery(typeql);
|
|
}, []);
|
|
|
|
/**
|
|
* Clear cached context
|
|
*/
|
|
const clearContext = useCallback(() => {
|
|
setLastContext(null);
|
|
setError(null);
|
|
setLastCacheLookup(null);
|
|
}, []);
|
|
|
|
/**
|
|
* Get cache statistics
|
|
*/
|
|
const getCacheStats = useCallback(async (): Promise<CacheStats> => {
|
|
return semanticCache.getStats();
|
|
}, []);
|
|
|
|
/**
|
|
* Clear the semantic cache
|
|
* @returns Object indicating which caches were cleared
|
|
*/
|
|
const clearCache = useCallback(async (): Promise<{ localCleared: boolean; sharedCleared: boolean }> => {
|
|
const result = await semanticCache.clear();
|
|
console.log('[useMultiDatabaseRAG] Semantic cache cleared:', result);
|
|
return result;
|
|
}, []);
|
|
|
|
/**
|
|
* Update cache similarity threshold
|
|
*/
|
|
const setCacheSimilarityThreshold = useCallback((threshold: number): void => {
|
|
semanticCache.setConfig({ similarityThreshold: threshold });
|
|
console.log(`[useMultiDatabaseRAG] Cache similarity threshold set to ${threshold}`);
|
|
}, []);
|
|
|
|
return {
|
|
isLoading,
|
|
error,
|
|
lastContext,
|
|
cacheEnabled,
|
|
lastCacheLookup,
|
|
queryRAG,
|
|
searchQdrant,
|
|
querySparql,
|
|
queryTypeDB,
|
|
clearContext,
|
|
detectVisualizationType,
|
|
setCacheEnabled,
|
|
getCacheStats,
|
|
clearCache,
|
|
setCacheSimilarityThreshold,
|
|
};
|
|
}
|
|
|
|
export default useMultiDatabaseRAG;
|