glam/frontend/src/hooks/useDuckLake.ts
2025-12-09 07:56:35 +01:00

569 lines
15 KiB
TypeScript

/**
* React Hook for DuckLake Lakehouse API Interaction
* Provides time travel, ACID transactions, and schema evolution for heritage data
*
* DuckLake Features:
* - Time travel (query historical snapshots)
* - ACID transactions (multi-client writes)
* - Schema evolution tracking
* - Open format (Parquet + SQLite/PostgreSQL catalog)
*/
import { useState, useEffect, useCallback } from 'react';
// Configuration - auto-detects production vs development
// In production (bronhouder.nl), use relative /ducklake path via Caddy proxy
// In development (localhost), use direct connection to port 8765
const DUCKLAKE_BASE_URL = (() => {
// Check for explicit override first
if (import.meta.env.VITE_DUCKLAKE_URL) {
return import.meta.env.VITE_DUCKLAKE_URL;
}
// Auto-detect: if running on bronhouder.nl or production IP, use relative path
if (typeof window !== 'undefined') {
const hostname = window.location.hostname;
if (hostname === 'bronhouder.nl' || hostname === 'www.bronhouder.nl' || hostname === '91.98.224.44') {
return '/ducklake';
}
}
// Development fallback
return 'http://localhost:8765';
})();
// ============================================================================
// Type Definitions
// ============================================================================
export interface DuckLakeStatus {
isConnected: boolean;
duckdbVersion: string;
ducklakeAvailable: boolean;
catalogType: string;
dataPath: string;
dbAlias: string;
endpoint: string;
lastChecked: Date | null;
responseTimeMs?: number;
uptimeSeconds?: number;
}
export interface DuckLakeStats {
totalTables: number;
totalRows: number;
totalSnapshots: number;
lastSnapshotAt: Date | null;
tables: TableInfo[];
snapshots: SnapshotInfo[];
schemaChanges: SchemaChange[];
}
export interface TableInfo {
name: string;
rowCount: number;
columnCount: number;
columns: Array<{
name: string;
type: string;
}>;
sizeBytes?: number;
}
export interface SnapshotInfo {
snapshotId: number;
createdAt: Date;
rowCount?: number;
description?: string;
}
export interface SchemaChange {
changeId: number;
tableName: string;
changeType: string;
columnName?: string;
oldType?: string;
newType?: string;
changedAt: Date;
}
export interface QueryResult {
columns: string[];
rows: unknown[][];
rowCount: number;
executionTimeMs: number;
snapshotId?: number;
}
export interface SnapshotDiff {
snapshotId: number;
changes: Array<{
table: string;
operation: string;
count: number;
}>;
}
export interface UploadResult {
success: boolean;
tableName: string;
rowsInserted: number;
snapshotId: number;
message: string;
}
export interface UseDuckLakeReturn {
status: DuckLakeStatus;
stats: DuckLakeStats | null;
isLoading: boolean;
error: Error | null;
refresh: () => Promise<void>;
executeQuery: (query: string, snapshotId?: number) => Promise<QueryResult>;
listTables: () => Promise<TableInfo[]>;
listSnapshots: (tableName?: string) => Promise<SnapshotInfo[]>;
getSnapshotDiff: (snapshotId: number, tableName?: string) => Promise<SnapshotDiff>;
getSchemaEvolution: (tableName?: string) => Promise<SchemaChange[]>;
uploadData: (file: File, tableName: string, mode?: 'append' | 'replace' | 'create') => Promise<UploadResult>;
dropTable: (tableName: string) => Promise<void>;
exportTable: (tableName: string, format?: 'json' | 'csv' | 'parquet', snapshotId?: number) => Promise<Blob>;
}
// ============================================================================
// API Helper Functions
// ============================================================================
/**
* Make a GET request to DuckLake API
*/
async function apiGet<T>(endpoint: string): Promise<T> {
const response = await fetch(`${DUCKLAKE_BASE_URL}${endpoint}`, {
method: 'GET',
headers: {
'Accept': 'application/json',
},
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`DuckLake API error: ${response.status} - ${errorText}`);
}
return response.json();
}
/**
* Make a POST request to DuckLake API
*/
async function apiPost<T>(endpoint: string, body: unknown): Promise<T> {
const response = await fetch(`${DUCKLAKE_BASE_URL}${endpoint}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Accept': 'application/json',
},
body: JSON.stringify(body),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`DuckLake API error: ${response.status} - ${errorText}`);
}
return response.json();
}
/**
* Make a DELETE request to DuckLake API
*/
async function apiDelete(endpoint: string): Promise<void> {
const response = await fetch(`${DUCKLAKE_BASE_URL}${endpoint}`, {
method: 'DELETE',
headers: {
'Accept': 'application/json',
},
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`DuckLake API error: ${response.status} - ${errorText}`);
}
}
/**
* Upload a file to DuckLake API
*/
async function apiUpload(
file: File,
tableName: string,
mode: string
): Promise<UploadResult> {
const formData = new FormData();
formData.append('file', file);
formData.append('table_name', tableName);
formData.append('mode', mode);
const response = await fetch(`${DUCKLAKE_BASE_URL}/upload`, {
method: 'POST',
body: formData,
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Upload failed: ${response.status} - ${errorText}`);
}
const result = await response.json();
return {
success: result.success,
tableName: result.table_name,
rowsInserted: result.rows_inserted,
snapshotId: result.snapshot_id,
message: result.message,
};
}
// ============================================================================
// Main Hook
// ============================================================================
/**
* Hook for interacting with DuckLake lakehouse API
*/
export function useDuckLake(): UseDuckLakeReturn {
const [status, setStatus] = useState<DuckLakeStatus>({
isConnected: false,
duckdbVersion: '',
ducklakeAvailable: false,
catalogType: '',
dataPath: '',
dbAlias: '',
endpoint: DUCKLAKE_BASE_URL,
lastChecked: null,
});
const [stats, setStats] = useState<DuckLakeStats | null>(null);
const [isLoading, setIsLoading] = useState(true);
const [error, setError] = useState<Error | null>(null);
/**
* Refresh connection status and statistics
*/
const refresh = useCallback(async () => {
setIsLoading(true);
setError(null);
const startTime = performance.now();
try {
// Get server status
const statusResult = await apiGet<{
status: string;
duckdb_version: string;
ducklake_available: boolean;
catalog_type: string;
data_path: string;
db_alias: string;
tables: number;
total_rows: number;
snapshots: number;
last_snapshot_at: string | null;
uptime_seconds: number;
}>('/');
const responseTimeMs = Math.round(performance.now() - startTime);
setStatus({
isConnected: statusResult.status === 'healthy',
duckdbVersion: statusResult.duckdb_version,
ducklakeAvailable: statusResult.ducklake_available,
catalogType: statusResult.catalog_type,
dataPath: statusResult.data_path,
dbAlias: statusResult.db_alias,
endpoint: DUCKLAKE_BASE_URL,
lastChecked: new Date(),
responseTimeMs,
uptimeSeconds: statusResult.uptime_seconds,
});
// Get detailed table information
const tables = await apiGet<Array<{
name: string;
row_count: number;
column_count: number;
columns: Array<{ name: string; type: string }>;
size_bytes?: number;
}>>('/tables');
const mappedTables: TableInfo[] = tables.map(t => ({
name: t.name,
rowCount: t.row_count,
columnCount: t.column_count,
columns: t.columns,
sizeBytes: t.size_bytes,
}));
// Get snapshots if DuckLake is available
let snapshots: SnapshotInfo[] = [];
let schemaChanges: SchemaChange[] = [];
if (statusResult.ducklake_available) {
const snapshotsResult = await apiGet<Array<{
snapshot_id: number;
created_at: string;
row_count?: number;
description?: string;
}>>('/snapshots');
snapshots = snapshotsResult.map(s => ({
snapshotId: s.snapshot_id,
createdAt: new Date(s.created_at),
rowCount: s.row_count,
description: s.description,
}));
const schemaResult = await apiGet<Array<{
change_id: number;
table_name: string;
change_type: string;
column_name?: string;
old_type?: string;
new_type?: string;
changed_at: string;
}>>('/schema-evolution');
schemaChanges = schemaResult.map(c => ({
changeId: c.change_id,
tableName: c.table_name,
changeType: c.change_type,
columnName: c.column_name,
oldType: c.old_type,
newType: c.new_type,
changedAt: new Date(c.changed_at),
}));
}
const totalRows = mappedTables.reduce((sum, t) => sum + t.rowCount, 0);
setStats({
totalTables: mappedTables.length,
totalRows,
totalSnapshots: snapshots.length,
lastSnapshotAt: statusResult.last_snapshot_at
? new Date(statusResult.last_snapshot_at)
: null,
tables: mappedTables,
snapshots,
schemaChanges,
});
} catch (err) {
console.error('Failed to connect to DuckLake:', err);
setStatus({
isConnected: false,
duckdbVersion: '',
ducklakeAvailable: false,
catalogType: '',
dataPath: '',
dbAlias: '',
endpoint: DUCKLAKE_BASE_URL,
lastChecked: new Date(),
});
setStats(null);
setError(err instanceof Error ? err : new Error('Failed to connect to DuckLake'));
} finally {
setIsLoading(false);
}
}, []);
// Initial load
useEffect(() => {
refresh();
}, [refresh]);
/**
* Execute a SQL query, optionally at a specific snapshot
*/
const executeQuery = useCallback(async (
query: string,
snapshotId?: number
): Promise<QueryResult> => {
const result = await apiPost<{
columns: string[];
rows: unknown[][];
row_count: number;
execution_time_ms: number;
snapshot_id?: number;
}>('/query', {
query,
snapshot_id: snapshotId,
});
return {
columns: result.columns,
rows: result.rows,
rowCount: result.row_count,
executionTimeMs: result.execution_time_ms,
snapshotId: result.snapshot_id,
};
}, []);
/**
* List all tables with metadata
*/
const listTables = useCallback(async (): Promise<TableInfo[]> => {
const tables = await apiGet<Array<{
name: string;
row_count: number;
column_count: number;
columns: Array<{ name: string; type: string }>;
size_bytes?: number;
}>>('/tables');
return tables.map(t => ({
name: t.name,
rowCount: t.row_count,
columnCount: t.column_count,
columns: t.columns,
sizeBytes: t.size_bytes,
}));
}, []);
/**
* List snapshots (time travel points)
*/
const listSnapshots = useCallback(async (tableName?: string): Promise<SnapshotInfo[]> => {
const endpoint = tableName
? `/snapshots?table_name=${encodeURIComponent(tableName)}`
: '/snapshots';
const snapshots = await apiGet<Array<{
snapshot_id: number;
created_at: string;
row_count?: number;
description?: string;
}>>(endpoint);
return snapshots.map(s => ({
snapshotId: s.snapshot_id,
createdAt: new Date(s.created_at),
rowCount: s.row_count,
description: s.description,
}));
}, []);
/**
* Get changes in a specific snapshot
*/
const getSnapshotDiff = useCallback(async (
snapshotId: number,
tableName?: string
): Promise<SnapshotDiff> => {
const endpoint = tableName
? `/snapshots/${snapshotId}?table_name=${encodeURIComponent(tableName)}`
: `/snapshots/${snapshotId}`;
const result = await apiGet<{
snapshot_id: number;
changes: Array<{
table: string;
operation: string;
count: number;
}>;
}>(endpoint);
return {
snapshotId: result.snapshot_id,
changes: result.changes,
};
}, []);
/**
* Get schema evolution history
*/
const getSchemaEvolution = useCallback(async (tableName?: string): Promise<SchemaChange[]> => {
const endpoint = tableName
? `/schema-evolution?table_name=${encodeURIComponent(tableName)}`
: '/schema-evolution';
const changes = await apiGet<Array<{
change_id: number;
table_name: string;
change_type: string;
column_name?: string;
old_type?: string;
new_type?: string;
changed_at: string;
}>>(endpoint);
return changes.map(c => ({
changeId: c.change_id,
tableName: c.table_name,
changeType: c.change_type,
columnName: c.column_name,
oldType: c.old_type,
newType: c.new_type,
changedAt: new Date(c.changed_at),
}));
}, []);
/**
* Upload a data file to a table
*/
const uploadData = useCallback(async (
file: File,
tableName: string,
mode: 'append' | 'replace' | 'create' = 'append'
): Promise<UploadResult> => {
const result = await apiUpload(file, tableName, mode);
await refresh(); // Refresh stats after upload
return result;
}, [refresh]);
/**
* Drop a table
*/
const dropTable = useCallback(async (tableName: string): Promise<void> => {
await apiDelete(`/tables/${encodeURIComponent(tableName)}`);
await refresh(); // Refresh stats after drop
}, [refresh]);
/**
* Export table data
*/
const exportTable = useCallback(async (
tableName: string,
format: 'json' | 'csv' | 'parquet' = 'json',
snapshotId?: number
): Promise<Blob> => {
let endpoint = `/export/${encodeURIComponent(tableName)}?format=${format}`;
if (snapshotId !== undefined) {
endpoint += `&snapshot_id=${snapshotId}`;
}
const response = await fetch(`${DUCKLAKE_BASE_URL}${endpoint}`, {
method: 'GET',
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Export failed: ${response.status} - ${errorText}`);
}
return response.blob();
}, []);
return {
status,
stats,
isLoading,
error,
refresh,
executeQuery,
listTables,
listSnapshots,
getSnapshotDiff,
getSchemaEvolution,
uploadData,
dropTable,
exportTable,
};
}