300 lines
8.6 KiB
TypeScript
300 lines
8.6 KiB
TypeScript
/**
|
|
* React Hook for DuckDB-WASM Database Interaction
|
|
* Provides in-browser OLAP database for analytics on heritage institution data
|
|
*/
|
|
|
|
import { useState, useEffect, useCallback, useRef } from 'react';
|
|
import type { AsyncDuckDB, AsyncDuckDBConnection, DuckDBBundles } from '@duckdb/duckdb-wasm';
|
|
import * as duckdb from '@duckdb/duckdb-wasm';
|
|
|
|
export interface DuckDBStats {
|
|
totalTables: number;
|
|
totalRows: number;
|
|
tables: Array<{
|
|
name: string;
|
|
rowCount: number;
|
|
columns: Array<{
|
|
name: string;
|
|
type: string;
|
|
}>;
|
|
}>;
|
|
memoryUsageMB: number;
|
|
}
|
|
|
|
export interface DuckDBStatus {
|
|
isConnected: boolean;
|
|
version: string;
|
|
lastChecked: Date | null;
|
|
responseTimeMs?: number;
|
|
}
|
|
|
|
export interface UseDuckDBReturn {
|
|
status: DuckDBStatus;
|
|
stats: DuckDBStats | null;
|
|
isLoading: boolean;
|
|
error: Error | null;
|
|
refresh: () => Promise<void>;
|
|
executeQuery: (query: string) => Promise<unknown[]>;
|
|
loadData: (tableName: string, data: unknown[], columns?: string[]) => Promise<void>;
|
|
dropTable: (tableName: string) => Promise<void>;
|
|
exportTable: (tableName: string, format?: 'json' | 'csv' | 'parquet') => Promise<string>;
|
|
db: AsyncDuckDB | null;
|
|
}
|
|
|
|
// Local bundles for DuckDB-WASM (served from same origin to avoid CORS issues)
|
|
const DUCKDB_BUNDLES: DuckDBBundles = {
|
|
mvp: {
|
|
mainModule: '/duckdb/duckdb-mvp.wasm',
|
|
mainWorker: '/duckdb/duckdb-browser-mvp.worker.js',
|
|
},
|
|
eh: {
|
|
mainModule: '/duckdb/duckdb-eh.wasm',
|
|
mainWorker: '/duckdb/duckdb-browser-eh.worker.js',
|
|
},
|
|
};
|
|
|
|
/**
|
|
* Hook for interacting with DuckDB-WASM in-browser database
|
|
*/
|
|
export function useDuckDB(): UseDuckDBReturn {
|
|
const [db, setDb] = useState<AsyncDuckDB | null>(null);
|
|
const [conn, setConn] = useState<AsyncDuckDBConnection | null>(null);
|
|
const [status, setStatus] = useState<DuckDBStatus>({
|
|
isConnected: false,
|
|
version: '',
|
|
lastChecked: null,
|
|
});
|
|
const [stats, setStats] = useState<DuckDBStats | null>(null);
|
|
const [isLoading, setIsLoading] = useState(true);
|
|
const [error, setError] = useState<Error | null>(null);
|
|
|
|
const initializingRef = useRef(false);
|
|
|
|
/**
|
|
* Initialize DuckDB-WASM
|
|
*/
|
|
const initDB = useCallback(async () => {
|
|
if (initializingRef.current || db) return;
|
|
initializingRef.current = true;
|
|
|
|
const startTime = performance.now();
|
|
|
|
try {
|
|
// Select the best bundle for this browser
|
|
const bundle = await duckdb.selectBundle(DUCKDB_BUNDLES);
|
|
|
|
// Create worker and database
|
|
const worker = new Worker(bundle.mainWorker!);
|
|
const logger = new duckdb.ConsoleLogger(duckdb.LogLevel.WARNING);
|
|
const database = new duckdb.AsyncDuckDB(logger, worker);
|
|
|
|
await database.instantiate(bundle.mainModule);
|
|
|
|
// Create connection
|
|
const connection = await database.connect();
|
|
|
|
// Get version
|
|
const versionResult = await connection.query('SELECT version() as version');
|
|
const version = versionResult.toArray()[0]?.version || 'unknown';
|
|
|
|
const responseTimeMs = Math.round(performance.now() - startTime);
|
|
|
|
setDb(database);
|
|
setConn(connection);
|
|
setStatus({
|
|
isConnected: true,
|
|
version,
|
|
lastChecked: new Date(),
|
|
responseTimeMs,
|
|
});
|
|
|
|
} catch (err) {
|
|
console.error('Failed to initialize DuckDB:', err);
|
|
setError(err instanceof Error ? err : new Error('Failed to initialize DuckDB'));
|
|
setStatus({
|
|
isConnected: false,
|
|
version: '',
|
|
lastChecked: new Date(),
|
|
});
|
|
} finally {
|
|
setIsLoading(false);
|
|
initializingRef.current = false;
|
|
}
|
|
}, [db]);
|
|
|
|
// Initialize on mount
|
|
useEffect(() => {
|
|
initDB();
|
|
|
|
// Cleanup on unmount
|
|
return () => {
|
|
conn?.close();
|
|
db?.terminate();
|
|
};
|
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
}, []);
|
|
|
|
/**
|
|
* Refresh database statistics
|
|
*/
|
|
const refresh = useCallback(async () => {
|
|
if (!conn) return;
|
|
|
|
setIsLoading(true);
|
|
setError(null);
|
|
|
|
const startTime = performance.now();
|
|
|
|
try {
|
|
// Get list of tables
|
|
const tablesResult = await conn.query(`
|
|
SELECT table_name
|
|
FROM information_schema.tables
|
|
WHERE table_schema = 'main'
|
|
`);
|
|
const tableNames = tablesResult.toArray().map((row: Record<string, unknown>) => row.table_name as string);
|
|
|
|
// Get details for each table
|
|
const tables = await Promise.all(tableNames.map(async (tableName: string) => {
|
|
// Get row count
|
|
const countResult = await conn.query(`SELECT COUNT(*) as cnt FROM "${tableName}"`);
|
|
const rowCount = Number(countResult.toArray()[0]?.cnt || 0);
|
|
|
|
// Get columns
|
|
const columnsResult = await conn.query(`
|
|
SELECT column_name, data_type
|
|
FROM information_schema.columns
|
|
WHERE table_name = '${tableName}' AND table_schema = 'main'
|
|
`);
|
|
const columns = columnsResult.toArray().map((row: Record<string, unknown>) => ({
|
|
name: row.column_name as string,
|
|
type: row.data_type as string,
|
|
}));
|
|
|
|
return { name: tableName, rowCount, columns };
|
|
}));
|
|
|
|
const totalRows = tables.reduce((sum: number, t: { rowCount: number }) => sum + t.rowCount, 0);
|
|
|
|
const responseTimeMs = Math.round(performance.now() - startTime);
|
|
|
|
setStats({
|
|
totalTables: tables.length,
|
|
totalRows,
|
|
tables,
|
|
memoryUsageMB: 0, // Memory tracking not easily available in WASM
|
|
});
|
|
|
|
setStatus(prev => ({
|
|
...prev,
|
|
isConnected: true,
|
|
lastChecked: new Date(),
|
|
responseTimeMs,
|
|
}));
|
|
|
|
} catch (err) {
|
|
console.error('Failed to refresh DuckDB stats:', err);
|
|
setError(err instanceof Error ? err : new Error('Failed to refresh stats'));
|
|
} finally {
|
|
setIsLoading(false);
|
|
}
|
|
}, [conn]);
|
|
|
|
// Refresh stats when connection is established
|
|
useEffect(() => {
|
|
if (conn && status.isConnected) {
|
|
refresh();
|
|
}
|
|
}, [conn, status.isConnected, refresh]);
|
|
|
|
/**
|
|
* Execute a SQL query
|
|
*/
|
|
const executeQuery = useCallback(async (query: string): Promise<unknown[]> => {
|
|
if (!conn) throw new Error('DuckDB not initialized');
|
|
|
|
const result = await conn.query(query);
|
|
return result.toArray().map((row: Record<string, unknown> & { toJSON?: () => unknown }) => row.toJSON ? row.toJSON() : row);
|
|
}, [conn]);
|
|
|
|
/**
|
|
* Load data into a table
|
|
*/
|
|
const loadData = useCallback(async (
|
|
tableName: string,
|
|
data: unknown[],
|
|
columns?: string[]
|
|
): Promise<void> => {
|
|
if (!conn || !db) throw new Error('DuckDB not initialized');
|
|
|
|
// Register JSON data as a file
|
|
const jsonString = JSON.stringify(data);
|
|
await db.registerFileText(`${tableName}.json`, jsonString);
|
|
|
|
// Create table from JSON
|
|
if (columns && columns.length > 0) {
|
|
await conn.query(`
|
|
CREATE OR REPLACE TABLE "${tableName}" AS
|
|
SELECT ${columns.join(', ')}
|
|
FROM read_json_auto('${tableName}.json')
|
|
`);
|
|
} else {
|
|
await conn.query(`
|
|
CREATE OR REPLACE TABLE "${tableName}" AS
|
|
SELECT * FROM read_json_auto('${tableName}.json')
|
|
`);
|
|
}
|
|
|
|
await refresh();
|
|
}, [conn, db, refresh]);
|
|
|
|
/**
|
|
* Drop a table
|
|
*/
|
|
const dropTable = useCallback(async (tableName: string): Promise<void> => {
|
|
if (!conn) throw new Error('DuckDB not initialized');
|
|
|
|
await conn.query(`DROP TABLE IF EXISTS "${tableName}"`);
|
|
await refresh();
|
|
}, [conn, refresh]);
|
|
|
|
/**
|
|
* Export table data
|
|
*/
|
|
const exportTable = useCallback(async (
|
|
tableName: string,
|
|
format: 'json' | 'csv' | 'parquet' = 'json'
|
|
): Promise<string> => {
|
|
if (!conn) throw new Error('DuckDB not initialized');
|
|
|
|
const result = await conn.query(`SELECT * FROM "${tableName}"`);
|
|
const rows = result.toArray().map((row: Record<string, unknown> & { toJSON?: () => unknown }) => row.toJSON ? row.toJSON() : row);
|
|
|
|
if (format === 'json') {
|
|
return JSON.stringify(rows, null, 2);
|
|
} else if (format === 'csv') {
|
|
if (rows.length === 0) return '';
|
|
const headers = Object.keys(rows[0] as object);
|
|
const csvRows = rows.map((row: unknown) =>
|
|
headers.map(h => JSON.stringify((row as Record<string, unknown>)[h] ?? '')).join(',')
|
|
);
|
|
return [headers.join(','), ...csvRows].join('\n');
|
|
}
|
|
|
|
// Parquet would require special handling - for now return JSON
|
|
return JSON.stringify(rows, null, 2);
|
|
}, [conn]);
|
|
|
|
return {
|
|
status,
|
|
stats,
|
|
isLoading,
|
|
error,
|
|
refresh,
|
|
executeQuery,
|
|
loadData,
|
|
dropTable,
|
|
exportTable,
|
|
db,
|
|
};
|
|
}
|