/** * React Hook for DuckDB-WASM Database Interaction * Provides in-browser OLAP database for analytics on heritage institution data */ import { useState, useEffect, useCallback, useRef } from 'react'; import type { AsyncDuckDB, AsyncDuckDBConnection, DuckDBBundles } from '@duckdb/duckdb-wasm'; import * as duckdb from '@duckdb/duckdb-wasm'; export interface DuckDBStats { totalTables: number; totalRows: number; tables: Array<{ name: string; rowCount: number; columns: Array<{ name: string; type: string; }>; }>; memoryUsageMB: number; } export interface DuckDBStatus { isConnected: boolean; version: string; lastChecked: Date | null; responseTimeMs?: number; } export interface UseDuckDBReturn { status: DuckDBStatus; stats: DuckDBStats | null; isLoading: boolean; error: Error | null; refresh: () => Promise; executeQuery: (query: string) => Promise; loadData: (tableName: string, data: unknown[], columns?: string[]) => Promise; dropTable: (tableName: string) => Promise; exportTable: (tableName: string, format?: 'json' | 'csv' | 'parquet') => Promise; db: AsyncDuckDB | null; } // Local bundles for DuckDB-WASM (served from same origin to avoid CORS issues) const DUCKDB_BUNDLES: DuckDBBundles = { mvp: { mainModule: '/duckdb/duckdb-mvp.wasm', mainWorker: '/duckdb/duckdb-browser-mvp.worker.js', }, eh: { mainModule: '/duckdb/duckdb-eh.wasm', mainWorker: '/duckdb/duckdb-browser-eh.worker.js', }, }; /** * Hook for interacting with DuckDB-WASM in-browser database */ export function useDuckDB(): UseDuckDBReturn { const [db, setDb] = useState(null); const [conn, setConn] = useState(null); const [status, setStatus] = useState({ isConnected: false, version: '', lastChecked: null, }); const [stats, setStats] = useState(null); const [isLoading, setIsLoading] = useState(true); const [error, setError] = useState(null); const initializingRef = useRef(false); /** * Initialize DuckDB-WASM */ const initDB = useCallback(async () => { if (initializingRef.current || db) return; initializingRef.current = true; const startTime = performance.now(); try { // Select the best bundle for this browser const bundle = await duckdb.selectBundle(DUCKDB_BUNDLES); // Create worker and database const worker = new Worker(bundle.mainWorker!); const logger = new duckdb.ConsoleLogger(duckdb.LogLevel.WARNING); const database = new duckdb.AsyncDuckDB(logger, worker); await database.instantiate(bundle.mainModule); // Create connection const connection = await database.connect(); // Get version const versionResult = await connection.query('SELECT version() as version'); const version = versionResult.toArray()[0]?.version || 'unknown'; const responseTimeMs = Math.round(performance.now() - startTime); setDb(database); setConn(connection); setStatus({ isConnected: true, version, lastChecked: new Date(), responseTimeMs, }); } catch (err) { console.error('Failed to initialize DuckDB:', err); setError(err instanceof Error ? err : new Error('Failed to initialize DuckDB')); setStatus({ isConnected: false, version: '', lastChecked: new Date(), }); } finally { setIsLoading(false); initializingRef.current = false; } }, [db]); // Initialize on mount useEffect(() => { initDB(); // Cleanup on unmount return () => { conn?.close(); db?.terminate(); }; // eslint-disable-next-line react-hooks/exhaustive-deps }, []); /** * Refresh database statistics */ const refresh = useCallback(async () => { if (!conn) return; setIsLoading(true); setError(null); const startTime = performance.now(); try { // Get list of tables const tablesResult = await conn.query(` SELECT table_name FROM information_schema.tables WHERE table_schema = 'main' `); const tableNames = tablesResult.toArray().map((row: Record) => row.table_name as string); // Get details for each table const tables = await Promise.all(tableNames.map(async (tableName: string) => { // Get row count const countResult = await conn.query(`SELECT COUNT(*) as cnt FROM "${tableName}"`); const rowCount = Number(countResult.toArray()[0]?.cnt || 0); // Get columns const columnsResult = await conn.query(` SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${tableName}' AND table_schema = 'main' `); const columns = columnsResult.toArray().map((row: Record) => ({ name: row.column_name as string, type: row.data_type as string, })); return { name: tableName, rowCount, columns }; })); const totalRows = tables.reduce((sum: number, t: { rowCount: number }) => sum + t.rowCount, 0); const responseTimeMs = Math.round(performance.now() - startTime); setStats({ totalTables: tables.length, totalRows, tables, memoryUsageMB: 0, // Memory tracking not easily available in WASM }); setStatus(prev => ({ ...prev, isConnected: true, lastChecked: new Date(), responseTimeMs, })); } catch (err) { console.error('Failed to refresh DuckDB stats:', err); setError(err instanceof Error ? err : new Error('Failed to refresh stats')); } finally { setIsLoading(false); } }, [conn]); // Refresh stats when connection is established useEffect(() => { if (conn && status.isConnected) { refresh(); } }, [conn, status.isConnected, refresh]); /** * Execute a SQL query */ const executeQuery = useCallback(async (query: string): Promise => { if (!conn) throw new Error('DuckDB not initialized'); const result = await conn.query(query); return result.toArray().map((row: Record & { toJSON?: () => unknown }) => row.toJSON ? row.toJSON() : row); }, [conn]); /** * Load data into a table */ const loadData = useCallback(async ( tableName: string, data: unknown[], columns?: string[] ): Promise => { if (!conn || !db) throw new Error('DuckDB not initialized'); // Register JSON data as a file const jsonString = JSON.stringify(data); await db.registerFileText(`${tableName}.json`, jsonString); // Create table from JSON if (columns && columns.length > 0) { await conn.query(` CREATE OR REPLACE TABLE "${tableName}" AS SELECT ${columns.join(', ')} FROM read_json_auto('${tableName}.json') `); } else { await conn.query(` CREATE OR REPLACE TABLE "${tableName}" AS SELECT * FROM read_json_auto('${tableName}.json') `); } await refresh(); }, [conn, db, refresh]); /** * Drop a table */ const dropTable = useCallback(async (tableName: string): Promise => { if (!conn) throw new Error('DuckDB not initialized'); await conn.query(`DROP TABLE IF EXISTS "${tableName}"`); await refresh(); }, [conn, refresh]); /** * Export table data */ const exportTable = useCallback(async ( tableName: string, format: 'json' | 'csv' | 'parquet' = 'json' ): Promise => { if (!conn) throw new Error('DuckDB not initialized'); const result = await conn.query(`SELECT * FROM "${tableName}"`); const rows = result.toArray().map((row: Record & { toJSON?: () => unknown }) => row.toJSON ? row.toJSON() : row); if (format === 'json') { return JSON.stringify(rows, null, 2); } else if (format === 'csv') { if (rows.length === 0) return ''; const headers = Object.keys(rows[0] as object); const csvRows = rows.map((row: unknown) => headers.map(h => JSON.stringify((row as Record)[h] ?? '')).join(',') ); return [headers.join(','), ...csvRows].join('\n'); } // Parquet would require special handling - for now return JSON return JSON.stringify(rows, null, 2); }, [conn]); return { status, stats, isLoading, error, refresh, executeQuery, loadData, dropTable, exportTable, db, }; }