569 lines
15 KiB
TypeScript
569 lines
15 KiB
TypeScript
/**
|
|
* React Hook for DuckLake Lakehouse API Interaction
|
|
* Provides time travel, ACID transactions, and schema evolution for heritage data
|
|
*
|
|
* DuckLake Features:
|
|
* - Time travel (query historical snapshots)
|
|
* - ACID transactions (multi-client writes)
|
|
* - Schema evolution tracking
|
|
* - Open format (Parquet + SQLite/PostgreSQL catalog)
|
|
*/
|
|
|
|
import { useState, useEffect, useCallback } from 'react';
|
|
|
|
// Configuration - auto-detects production vs development
|
|
// In production (bronhouder.nl), use relative /ducklake path via Caddy proxy
|
|
// In development (localhost), use direct connection to port 8765
|
|
const DUCKLAKE_BASE_URL = (() => {
|
|
// Check for explicit override first
|
|
if (import.meta.env.VITE_DUCKLAKE_URL) {
|
|
return import.meta.env.VITE_DUCKLAKE_URL;
|
|
}
|
|
|
|
// Auto-detect: if running on bronhouder.nl or production IP, use relative path
|
|
if (typeof window !== 'undefined') {
|
|
const hostname = window.location.hostname;
|
|
if (hostname === 'bronhouder.nl' || hostname === 'www.bronhouder.nl' || hostname === '91.98.224.44') {
|
|
return '/ducklake';
|
|
}
|
|
}
|
|
|
|
// Development fallback
|
|
return 'http://localhost:8765';
|
|
})();
|
|
|
|
// ============================================================================
|
|
// Type Definitions
|
|
// ============================================================================
|
|
|
|
export interface DuckLakeStatus {
|
|
isConnected: boolean;
|
|
duckdbVersion: string;
|
|
ducklakeAvailable: boolean;
|
|
catalogType: string;
|
|
dataPath: string;
|
|
dbAlias: string;
|
|
endpoint: string;
|
|
lastChecked: Date | null;
|
|
responseTimeMs?: number;
|
|
uptimeSeconds?: number;
|
|
}
|
|
|
|
export interface DuckLakeStats {
|
|
totalTables: number;
|
|
totalRows: number;
|
|
totalSnapshots: number;
|
|
lastSnapshotAt: Date | null;
|
|
tables: TableInfo[];
|
|
snapshots: SnapshotInfo[];
|
|
schemaChanges: SchemaChange[];
|
|
}
|
|
|
|
export interface TableInfo {
|
|
name: string;
|
|
rowCount: number;
|
|
columnCount: number;
|
|
columns: Array<{
|
|
name: string;
|
|
type: string;
|
|
}>;
|
|
sizeBytes?: number;
|
|
}
|
|
|
|
export interface SnapshotInfo {
|
|
snapshotId: number;
|
|
createdAt: Date;
|
|
rowCount?: number;
|
|
description?: string;
|
|
}
|
|
|
|
export interface SchemaChange {
|
|
changeId: number;
|
|
tableName: string;
|
|
changeType: string;
|
|
columnName?: string;
|
|
oldType?: string;
|
|
newType?: string;
|
|
changedAt: Date;
|
|
}
|
|
|
|
export interface QueryResult {
|
|
columns: string[];
|
|
rows: unknown[][];
|
|
rowCount: number;
|
|
executionTimeMs: number;
|
|
snapshotId?: number;
|
|
}
|
|
|
|
export interface SnapshotDiff {
|
|
snapshotId: number;
|
|
changes: Array<{
|
|
table: string;
|
|
operation: string;
|
|
count: number;
|
|
}>;
|
|
}
|
|
|
|
export interface UploadResult {
|
|
success: boolean;
|
|
tableName: string;
|
|
rowsInserted: number;
|
|
snapshotId: number;
|
|
message: string;
|
|
}
|
|
|
|
export interface UseDuckLakeReturn {
|
|
status: DuckLakeStatus;
|
|
stats: DuckLakeStats | null;
|
|
isLoading: boolean;
|
|
error: Error | null;
|
|
refresh: () => Promise<void>;
|
|
executeQuery: (query: string, snapshotId?: number) => Promise<QueryResult>;
|
|
listTables: () => Promise<TableInfo[]>;
|
|
listSnapshots: (tableName?: string) => Promise<SnapshotInfo[]>;
|
|
getSnapshotDiff: (snapshotId: number, tableName?: string) => Promise<SnapshotDiff>;
|
|
getSchemaEvolution: (tableName?: string) => Promise<SchemaChange[]>;
|
|
uploadData: (file: File, tableName: string, mode?: 'append' | 'replace' | 'create') => Promise<UploadResult>;
|
|
dropTable: (tableName: string) => Promise<void>;
|
|
exportTable: (tableName: string, format?: 'json' | 'csv' | 'parquet', snapshotId?: number) => Promise<Blob>;
|
|
}
|
|
|
|
// ============================================================================
|
|
// API Helper Functions
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Make a GET request to DuckLake API
|
|
*/
|
|
async function apiGet<T>(endpoint: string): Promise<T> {
|
|
const response = await fetch(`${DUCKLAKE_BASE_URL}${endpoint}`, {
|
|
method: 'GET',
|
|
headers: {
|
|
'Accept': 'application/json',
|
|
},
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`DuckLake API error: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
return response.json();
|
|
}
|
|
|
|
/**
|
|
* Make a POST request to DuckLake API
|
|
*/
|
|
async function apiPost<T>(endpoint: string, body: unknown): Promise<T> {
|
|
const response = await fetch(`${DUCKLAKE_BASE_URL}${endpoint}`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'Accept': 'application/json',
|
|
},
|
|
body: JSON.stringify(body),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`DuckLake API error: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
return response.json();
|
|
}
|
|
|
|
/**
|
|
* Make a DELETE request to DuckLake API
|
|
*/
|
|
async function apiDelete(endpoint: string): Promise<void> {
|
|
const response = await fetch(`${DUCKLAKE_BASE_URL}${endpoint}`, {
|
|
method: 'DELETE',
|
|
headers: {
|
|
'Accept': 'application/json',
|
|
},
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`DuckLake API error: ${response.status} - ${errorText}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Upload a file to DuckLake API
|
|
*/
|
|
async function apiUpload(
|
|
file: File,
|
|
tableName: string,
|
|
mode: string
|
|
): Promise<UploadResult> {
|
|
const formData = new FormData();
|
|
formData.append('file', file);
|
|
formData.append('table_name', tableName);
|
|
formData.append('mode', mode);
|
|
|
|
const response = await fetch(`${DUCKLAKE_BASE_URL}/upload`, {
|
|
method: 'POST',
|
|
body: formData,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`Upload failed: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
const result = await response.json();
|
|
return {
|
|
success: result.success,
|
|
tableName: result.table_name,
|
|
rowsInserted: result.rows_inserted,
|
|
snapshotId: result.snapshot_id,
|
|
message: result.message,
|
|
};
|
|
}
|
|
|
|
// ============================================================================
|
|
// Main Hook
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Hook for interacting with DuckLake lakehouse API
|
|
*/
|
|
export function useDuckLake(): UseDuckLakeReturn {
|
|
const [status, setStatus] = useState<DuckLakeStatus>({
|
|
isConnected: false,
|
|
duckdbVersion: '',
|
|
ducklakeAvailable: false,
|
|
catalogType: '',
|
|
dataPath: '',
|
|
dbAlias: '',
|
|
endpoint: DUCKLAKE_BASE_URL,
|
|
lastChecked: null,
|
|
});
|
|
const [stats, setStats] = useState<DuckLakeStats | null>(null);
|
|
const [isLoading, setIsLoading] = useState(true);
|
|
const [error, setError] = useState<Error | null>(null);
|
|
|
|
/**
|
|
* Refresh connection status and statistics
|
|
*/
|
|
const refresh = useCallback(async () => {
|
|
setIsLoading(true);
|
|
setError(null);
|
|
|
|
const startTime = performance.now();
|
|
|
|
try {
|
|
// Get server status
|
|
const statusResult = await apiGet<{
|
|
status: string;
|
|
duckdb_version: string;
|
|
ducklake_available: boolean;
|
|
catalog_type: string;
|
|
data_path: string;
|
|
db_alias: string;
|
|
tables: number;
|
|
total_rows: number;
|
|
snapshots: number;
|
|
last_snapshot_at: string | null;
|
|
uptime_seconds: number;
|
|
}>('/');
|
|
|
|
const responseTimeMs = Math.round(performance.now() - startTime);
|
|
|
|
setStatus({
|
|
isConnected: statusResult.status === 'healthy',
|
|
duckdbVersion: statusResult.duckdb_version,
|
|
ducklakeAvailable: statusResult.ducklake_available,
|
|
catalogType: statusResult.catalog_type,
|
|
dataPath: statusResult.data_path,
|
|
dbAlias: statusResult.db_alias,
|
|
endpoint: DUCKLAKE_BASE_URL,
|
|
lastChecked: new Date(),
|
|
responseTimeMs,
|
|
uptimeSeconds: statusResult.uptime_seconds,
|
|
});
|
|
|
|
// Get detailed table information
|
|
const tables = await apiGet<Array<{
|
|
name: string;
|
|
row_count: number;
|
|
column_count: number;
|
|
columns: Array<{ name: string; type: string }>;
|
|
size_bytes?: number;
|
|
}>>('/tables');
|
|
|
|
const mappedTables: TableInfo[] = tables.map(t => ({
|
|
name: t.name,
|
|
rowCount: t.row_count,
|
|
columnCount: t.column_count,
|
|
columns: t.columns,
|
|
sizeBytes: t.size_bytes,
|
|
}));
|
|
|
|
// Get snapshots if DuckLake is available
|
|
let snapshots: SnapshotInfo[] = [];
|
|
let schemaChanges: SchemaChange[] = [];
|
|
|
|
if (statusResult.ducklake_available) {
|
|
const snapshotsResult = await apiGet<Array<{
|
|
snapshot_id: number;
|
|
created_at: string;
|
|
row_count?: number;
|
|
description?: string;
|
|
}>>('/snapshots');
|
|
|
|
snapshots = snapshotsResult.map(s => ({
|
|
snapshotId: s.snapshot_id,
|
|
createdAt: new Date(s.created_at),
|
|
rowCount: s.row_count,
|
|
description: s.description,
|
|
}));
|
|
|
|
const schemaResult = await apiGet<Array<{
|
|
change_id: number;
|
|
table_name: string;
|
|
change_type: string;
|
|
column_name?: string;
|
|
old_type?: string;
|
|
new_type?: string;
|
|
changed_at: string;
|
|
}>>('/schema-evolution');
|
|
|
|
schemaChanges = schemaResult.map(c => ({
|
|
changeId: c.change_id,
|
|
tableName: c.table_name,
|
|
changeType: c.change_type,
|
|
columnName: c.column_name,
|
|
oldType: c.old_type,
|
|
newType: c.new_type,
|
|
changedAt: new Date(c.changed_at),
|
|
}));
|
|
}
|
|
|
|
const totalRows = mappedTables.reduce((sum, t) => sum + t.rowCount, 0);
|
|
|
|
setStats({
|
|
totalTables: mappedTables.length,
|
|
totalRows,
|
|
totalSnapshots: snapshots.length,
|
|
lastSnapshotAt: statusResult.last_snapshot_at
|
|
? new Date(statusResult.last_snapshot_at)
|
|
: null,
|
|
tables: mappedTables,
|
|
snapshots,
|
|
schemaChanges,
|
|
});
|
|
|
|
} catch (err) {
|
|
console.error('Failed to connect to DuckLake:', err);
|
|
setStatus({
|
|
isConnected: false,
|
|
duckdbVersion: '',
|
|
ducklakeAvailable: false,
|
|
catalogType: '',
|
|
dataPath: '',
|
|
dbAlias: '',
|
|
endpoint: DUCKLAKE_BASE_URL,
|
|
lastChecked: new Date(),
|
|
});
|
|
setStats(null);
|
|
setError(err instanceof Error ? err : new Error('Failed to connect to DuckLake'));
|
|
} finally {
|
|
setIsLoading(false);
|
|
}
|
|
}, []);
|
|
|
|
// Initial load
|
|
useEffect(() => {
|
|
refresh();
|
|
}, [refresh]);
|
|
|
|
/**
|
|
* Execute a SQL query, optionally at a specific snapshot
|
|
*/
|
|
const executeQuery = useCallback(async (
|
|
query: string,
|
|
snapshotId?: number
|
|
): Promise<QueryResult> => {
|
|
const result = await apiPost<{
|
|
columns: string[];
|
|
rows: unknown[][];
|
|
row_count: number;
|
|
execution_time_ms: number;
|
|
snapshot_id?: number;
|
|
}>('/query', {
|
|
query,
|
|
snapshot_id: snapshotId,
|
|
});
|
|
|
|
return {
|
|
columns: result.columns,
|
|
rows: result.rows,
|
|
rowCount: result.row_count,
|
|
executionTimeMs: result.execution_time_ms,
|
|
snapshotId: result.snapshot_id,
|
|
};
|
|
}, []);
|
|
|
|
/**
|
|
* List all tables with metadata
|
|
*/
|
|
const listTables = useCallback(async (): Promise<TableInfo[]> => {
|
|
const tables = await apiGet<Array<{
|
|
name: string;
|
|
row_count: number;
|
|
column_count: number;
|
|
columns: Array<{ name: string; type: string }>;
|
|
size_bytes?: number;
|
|
}>>('/tables');
|
|
|
|
return tables.map(t => ({
|
|
name: t.name,
|
|
rowCount: t.row_count,
|
|
columnCount: t.column_count,
|
|
columns: t.columns,
|
|
sizeBytes: t.size_bytes,
|
|
}));
|
|
}, []);
|
|
|
|
/**
|
|
* List snapshots (time travel points)
|
|
*/
|
|
const listSnapshots = useCallback(async (tableName?: string): Promise<SnapshotInfo[]> => {
|
|
const endpoint = tableName
|
|
? `/snapshots?table_name=${encodeURIComponent(tableName)}`
|
|
: '/snapshots';
|
|
|
|
const snapshots = await apiGet<Array<{
|
|
snapshot_id: number;
|
|
created_at: string;
|
|
row_count?: number;
|
|
description?: string;
|
|
}>>(endpoint);
|
|
|
|
return snapshots.map(s => ({
|
|
snapshotId: s.snapshot_id,
|
|
createdAt: new Date(s.created_at),
|
|
rowCount: s.row_count,
|
|
description: s.description,
|
|
}));
|
|
}, []);
|
|
|
|
/**
|
|
* Get changes in a specific snapshot
|
|
*/
|
|
const getSnapshotDiff = useCallback(async (
|
|
snapshotId: number,
|
|
tableName?: string
|
|
): Promise<SnapshotDiff> => {
|
|
const endpoint = tableName
|
|
? `/snapshots/${snapshotId}?table_name=${encodeURIComponent(tableName)}`
|
|
: `/snapshots/${snapshotId}`;
|
|
|
|
const result = await apiGet<{
|
|
snapshot_id: number;
|
|
changes: Array<{
|
|
table: string;
|
|
operation: string;
|
|
count: number;
|
|
}>;
|
|
}>(endpoint);
|
|
|
|
return {
|
|
snapshotId: result.snapshot_id,
|
|
changes: result.changes,
|
|
};
|
|
}, []);
|
|
|
|
/**
|
|
* Get schema evolution history
|
|
*/
|
|
const getSchemaEvolution = useCallback(async (tableName?: string): Promise<SchemaChange[]> => {
|
|
const endpoint = tableName
|
|
? `/schema-evolution?table_name=${encodeURIComponent(tableName)}`
|
|
: '/schema-evolution';
|
|
|
|
const changes = await apiGet<Array<{
|
|
change_id: number;
|
|
table_name: string;
|
|
change_type: string;
|
|
column_name?: string;
|
|
old_type?: string;
|
|
new_type?: string;
|
|
changed_at: string;
|
|
}>>(endpoint);
|
|
|
|
return changes.map(c => ({
|
|
changeId: c.change_id,
|
|
tableName: c.table_name,
|
|
changeType: c.change_type,
|
|
columnName: c.column_name,
|
|
oldType: c.old_type,
|
|
newType: c.new_type,
|
|
changedAt: new Date(c.changed_at),
|
|
}));
|
|
}, []);
|
|
|
|
/**
|
|
* Upload a data file to a table
|
|
*/
|
|
const uploadData = useCallback(async (
|
|
file: File,
|
|
tableName: string,
|
|
mode: 'append' | 'replace' | 'create' = 'append'
|
|
): Promise<UploadResult> => {
|
|
const result = await apiUpload(file, tableName, mode);
|
|
await refresh(); // Refresh stats after upload
|
|
return result;
|
|
}, [refresh]);
|
|
|
|
/**
|
|
* Drop a table
|
|
*/
|
|
const dropTable = useCallback(async (tableName: string): Promise<void> => {
|
|
await apiDelete(`/tables/${encodeURIComponent(tableName)}`);
|
|
await refresh(); // Refresh stats after drop
|
|
}, [refresh]);
|
|
|
|
/**
|
|
* Export table data
|
|
*/
|
|
const exportTable = useCallback(async (
|
|
tableName: string,
|
|
format: 'json' | 'csv' | 'parquet' = 'json',
|
|
snapshotId?: number
|
|
): Promise<Blob> => {
|
|
let endpoint = `/export/${encodeURIComponent(tableName)}?format=${format}`;
|
|
if (snapshotId !== undefined) {
|
|
endpoint += `&snapshot_id=${snapshotId}`;
|
|
}
|
|
|
|
const response = await fetch(`${DUCKLAKE_BASE_URL}${endpoint}`, {
|
|
method: 'GET',
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`Export failed: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
return response.blob();
|
|
}, []);
|
|
|
|
return {
|
|
status,
|
|
stats,
|
|
isLoading,
|
|
error,
|
|
refresh,
|
|
executeQuery,
|
|
listTables,
|
|
listSnapshots,
|
|
getSnapshotDiff,
|
|
getSchemaEvolution,
|
|
uploadData,
|
|
dropTable,
|
|
exportTable,
|
|
};
|
|
}
|