glam/frontend/src/hooks/useDuckDB.ts
2025-12-10 13:01:13 +01:00

300 lines
8.6 KiB
TypeScript

/**
* React Hook for DuckDB-WASM Database Interaction
* Provides in-browser OLAP database for analytics on heritage institution data
*/
import { useState, useEffect, useCallback, useRef } from 'react';
import type { AsyncDuckDB, AsyncDuckDBConnection, DuckDBBundles } from '@duckdb/duckdb-wasm';
import * as duckdb from '@duckdb/duckdb-wasm';
export interface DuckDBStats {
totalTables: number;
totalRows: number;
tables: Array<{
name: string;
rowCount: number;
columns: Array<{
name: string;
type: string;
}>;
}>;
memoryUsageMB: number;
}
export interface DuckDBStatus {
isConnected: boolean;
version: string;
lastChecked: Date | null;
responseTimeMs?: number;
}
export interface UseDuckDBReturn {
status: DuckDBStatus;
stats: DuckDBStats | null;
isLoading: boolean;
error: Error | null;
refresh: () => Promise<void>;
executeQuery: (query: string) => Promise<unknown[]>;
loadData: (tableName: string, data: unknown[], columns?: string[]) => Promise<void>;
dropTable: (tableName: string) => Promise<void>;
exportTable: (tableName: string, format?: 'json' | 'csv' | 'parquet') => Promise<string>;
db: AsyncDuckDB | null;
}
// Local bundles for DuckDB-WASM (served from same origin to avoid CORS issues)
const DUCKDB_BUNDLES: DuckDBBundles = {
mvp: {
mainModule: '/duckdb/duckdb-mvp.wasm',
mainWorker: '/duckdb/duckdb-browser-mvp.worker.js',
},
eh: {
mainModule: '/duckdb/duckdb-eh.wasm',
mainWorker: '/duckdb/duckdb-browser-eh.worker.js',
},
};
/**
* Hook for interacting with DuckDB-WASM in-browser database
*/
export function useDuckDB(): UseDuckDBReturn {
const [db, setDb] = useState<AsyncDuckDB | null>(null);
const [conn, setConn] = useState<AsyncDuckDBConnection | null>(null);
const [status, setStatus] = useState<DuckDBStatus>({
isConnected: false,
version: '',
lastChecked: null,
});
const [stats, setStats] = useState<DuckDBStats | null>(null);
const [isLoading, setIsLoading] = useState(true);
const [error, setError] = useState<Error | null>(null);
const initializingRef = useRef(false);
/**
* Initialize DuckDB-WASM
*/
const initDB = useCallback(async () => {
if (initializingRef.current || db) return;
initializingRef.current = true;
const startTime = performance.now();
try {
// Select the best bundle for this browser
const bundle = await duckdb.selectBundle(DUCKDB_BUNDLES);
// Create worker and database
const worker = new Worker(bundle.mainWorker!);
const logger = new duckdb.ConsoleLogger(duckdb.LogLevel.WARNING);
const database = new duckdb.AsyncDuckDB(logger, worker);
await database.instantiate(bundle.mainModule);
// Create connection
const connection = await database.connect();
// Get version
const versionResult = await connection.query('SELECT version() as version');
const version = versionResult.toArray()[0]?.version || 'unknown';
const responseTimeMs = Math.round(performance.now() - startTime);
setDb(database);
setConn(connection);
setStatus({
isConnected: true,
version,
lastChecked: new Date(),
responseTimeMs,
});
} catch (err) {
console.error('Failed to initialize DuckDB:', err);
setError(err instanceof Error ? err : new Error('Failed to initialize DuckDB'));
setStatus({
isConnected: false,
version: '',
lastChecked: new Date(),
});
} finally {
setIsLoading(false);
initializingRef.current = false;
}
}, [db]);
// Initialize on mount
useEffect(() => {
initDB();
// Cleanup on unmount
return () => {
conn?.close();
db?.terminate();
};
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
/**
* Refresh database statistics
*/
const refresh = useCallback(async () => {
if (!conn) return;
setIsLoading(true);
setError(null);
const startTime = performance.now();
try {
// Get list of tables
const tablesResult = await conn.query(`
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'main'
`);
const tableNames = tablesResult.toArray().map((row: Record<string, unknown>) => row.table_name as string);
// Get details for each table
const tables = await Promise.all(tableNames.map(async (tableName: string) => {
// Get row count
const countResult = await conn.query(`SELECT COUNT(*) as cnt FROM "${tableName}"`);
const rowCount = Number(countResult.toArray()[0]?.cnt || 0);
// Get columns
const columnsResult = await conn.query(`
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name = '${tableName}' AND table_schema = 'main'
`);
const columns = columnsResult.toArray().map((row: Record<string, unknown>) => ({
name: row.column_name as string,
type: row.data_type as string,
}));
return { name: tableName, rowCount, columns };
}));
const totalRows = tables.reduce((sum: number, t: { rowCount: number }) => sum + t.rowCount, 0);
const responseTimeMs = Math.round(performance.now() - startTime);
setStats({
totalTables: tables.length,
totalRows,
tables,
memoryUsageMB: 0, // Memory tracking not easily available in WASM
});
setStatus(prev => ({
...prev,
isConnected: true,
lastChecked: new Date(),
responseTimeMs,
}));
} catch (err) {
console.error('Failed to refresh DuckDB stats:', err);
setError(err instanceof Error ? err : new Error('Failed to refresh stats'));
} finally {
setIsLoading(false);
}
}, [conn]);
// Refresh stats when connection is established
useEffect(() => {
if (conn && status.isConnected) {
refresh();
}
}, [conn, status.isConnected, refresh]);
/**
* Execute a SQL query
*/
const executeQuery = useCallback(async (query: string): Promise<unknown[]> => {
if (!conn) throw new Error('DuckDB not initialized');
const result = await conn.query(query);
return result.toArray().map((row: Record<string, unknown> & { toJSON?: () => unknown }) => row.toJSON ? row.toJSON() : row);
}, [conn]);
/**
* Load data into a table
*/
const loadData = useCallback(async (
tableName: string,
data: unknown[],
columns?: string[]
): Promise<void> => {
if (!conn || !db) throw new Error('DuckDB not initialized');
// Register JSON data as a file
const jsonString = JSON.stringify(data);
await db.registerFileText(`${tableName}.json`, jsonString);
// Create table from JSON
if (columns && columns.length > 0) {
await conn.query(`
CREATE OR REPLACE TABLE "${tableName}" AS
SELECT ${columns.join(', ')}
FROM read_json_auto('${tableName}.json')
`);
} else {
await conn.query(`
CREATE OR REPLACE TABLE "${tableName}" AS
SELECT * FROM read_json_auto('${tableName}.json')
`);
}
await refresh();
}, [conn, db, refresh]);
/**
* Drop a table
*/
const dropTable = useCallback(async (tableName: string): Promise<void> => {
if (!conn) throw new Error('DuckDB not initialized');
await conn.query(`DROP TABLE IF EXISTS "${tableName}"`);
await refresh();
}, [conn, refresh]);
/**
* Export table data
*/
const exportTable = useCallback(async (
tableName: string,
format: 'json' | 'csv' | 'parquet' = 'json'
): Promise<string> => {
if (!conn) throw new Error('DuckDB not initialized');
const result = await conn.query(`SELECT * FROM "${tableName}"`);
const rows = result.toArray().map((row: Record<string, unknown> & { toJSON?: () => unknown }) => row.toJSON ? row.toJSON() : row);
if (format === 'json') {
return JSON.stringify(rows, null, 2);
} else if (format === 'csv') {
if (rows.length === 0) return '';
const headers = Object.keys(rows[0] as object);
const csvRows = rows.map((row: unknown) =>
headers.map(h => JSON.stringify((row as Record<string, unknown>)[h] ?? '')).join(',')
);
return [headers.join(','), ...csvRows].join('\n');
}
// Parquet would require special handling - for now return JSON
return JSON.stringify(rows, null, 2);
}, [conn]);
return {
status,
stats,
isLoading,
error,
refresh,
executeQuery,
loadData,
dropTable,
exportTable,
db,
};
}