glam/frontend/src/hooks/useQdrant.ts
2025-12-14 17:09:55 +01:00

592 lines
17 KiB
TypeScript

/**
* React Hook for Qdrant Vector Database Interaction (via REST API)
* Provides interface to query Qdrant through its REST API
*
* Qdrant is a high-performance vector similarity search engine
* Used for semantic search, recommendations, and embedding visualization
*
* Self-hosted Qdrant instance - no API key required
*/
import { useState, useEffect, useCallback } from 'react';
// Configuration - self-hosted Qdrant instance
// Default to /qdrant which is the Caddy proxy path
const QDRANT_API_URL = import.meta.env.VITE_QDRANT_API_URL || '/qdrant';
export interface QdrantCollection {
name: string;
vectorsCount: number;
pointsCount: number;
segmentsCount: number;
status: 'green' | 'yellow' | 'red';
vectorSize: number;
distanceMetric: string;
onDiskPayload: boolean;
indexed: boolean;
}
export interface QdrantStats {
totalCollections: number;
totalVectors: number;
totalPoints: number;
collections: QdrantCollection[];
version: string;
}
export interface QdrantStatus {
isConnected: boolean;
endpoint: string;
version: string;
lastChecked: Date | null;
responseTimeMs?: number;
}
export interface QdrantPoint {
id: string | number;
vector: number[];
payload: Record<string, unknown>;
}
export interface QdrantSearchResult {
id: string | number;
score: number;
payload: Record<string, unknown>;
vector?: number[];
}
export interface ReindexRequest {
collection_name?: string;
embedding_model?: 'all-MiniLM-L6-v2' | 'text-embedding-3-small' | 'bge-base-en-v1.5'; // minilm_384, openai_1536, bge_768
batch_size?: number;
recreate_collection?: boolean;
}
export interface ReindexResponse {
success: boolean;
message: string;
collection_name?: string;
documents_indexed?: number;
embedding_model?: string;
vector_dimensions?: number;
elapsed_seconds?: number;
}
export interface ReindexStatus {
is_running: boolean;
progress: number;
current_batch: number;
total_batches: number;
documents_processed: number;
message: string;
completed: boolean;
success: boolean;
}
export interface QdrantCollectionInfo {
name: string;
status: 'green' | 'yellow' | 'red';
vectorsCount: number;
pointsCount: number;
segmentsCount: number;
config: {
params: {
vectors: {
size: number;
distance: string;
};
};
optimizerConfig: Record<string, unknown>;
walConfig: Record<string, unknown>;
};
payloadSchema: Record<string, { dataType: string; indexed: boolean }>;
}
export interface UseQdrantReturn {
status: QdrantStatus;
stats: QdrantStats | null;
isLoading: boolean;
error: Error | null;
refresh: () => Promise<void>;
getCollections: () => Promise<string[]>;
getCollectionInfo: (collectionName: string) => Promise<QdrantCollectionInfo>;
getPoints: (collectionName: string, limit?: number, offset?: number, withVectors?: boolean) => Promise<{ points: QdrantPoint[]; nextOffset: number | null }>;
searchPoints: (collectionName: string, vector: number[], limit?: number) => Promise<QdrantSearchResult[]>;
getPointById: (collectionName: string, pointId: string | number) => Promise<QdrantPoint | null>;
scrollPoints: (collectionName: string, limit?: number, offset?: string | number | null, filter?: Record<string, unknown>) => Promise<{ points: QdrantPoint[]; nextOffset: string | number | null }>;
reindexCollection: (options?: ReindexRequest) => Promise<ReindexResponse>;
getReindexStatus: () => Promise<ReindexStatus>;
}
/**
* Helper function for Qdrant API requests
* Self-hosted instance - no authentication required
*/
async function qdrantFetch<T>(
endpoint: string,
options: RequestInit = {}
): Promise<T> {
const headers: HeadersInit = {
'Content-Type': 'application/json',
...options.headers,
};
const response = await fetch(`${QDRANT_API_URL}${endpoint}`, {
...options,
headers,
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Qdrant API error: ${response.status} - ${errorText}`);
}
return response.json();
}
/**
* Check Qdrant connection status
*/
async function checkStatus(): Promise<{
connected: boolean;
version: string;
}> {
try {
const response = await qdrantFetch<{ title: string; version: string }>('/');
return {
connected: true,
version: response.version || 'unknown',
};
} catch {
return {
connected: false,
version: '',
};
}
}
/**
* Hook for interacting with Qdrant vector database
*/
export function useQdrant(): UseQdrantReturn {
const [status, setStatus] = useState<QdrantStatus>({
isConnected: false,
endpoint: QDRANT_API_URL,
version: '',
lastChecked: null,
});
const [stats, setStats] = useState<QdrantStats | null>(null);
const [isLoading, setIsLoading] = useState(true);
const [error, setError] = useState<Error | null>(null);
/**
* Check connection status and gather statistics
*/
const refresh = useCallback(async () => {
setIsLoading(true);
setError(null);
const startTime = performance.now();
try {
// Check connection status
const statusResult = await checkStatus();
const responseTimeMs = Math.round(performance.now() - startTime);
setStatus({
isConnected: statusResult.connected,
endpoint: QDRANT_API_URL,
version: statusResult.version,
lastChecked: new Date(),
responseTimeMs,
});
if (!statusResult.connected) {
setStats(null);
return;
}
// Get collections list
const collectionsResponse = await qdrantFetch<{
result: { collections: Array<{ name: string }> };
}>('/collections');
const collectionNames = collectionsResponse.result.collections.map(c => c.name);
// Get details for each collection
const collections: QdrantCollection[] = await Promise.all(
collectionNames.map(async (name) => {
try {
const info = await qdrantFetch<{
result: {
status: string;
vectors_count: number;
points_count: number;
segments_count: number;
config: {
params: {
vectors: { size: number; distance: string } | Record<string, { size: number; distance: string }>;
};
};
};
}>(`/collections/${name}`);
const config = info.result.config.params.vectors;
// Handle both named and unnamed vector configs
// Named config: { "default": { size: 1536, distance: "Cosine" } }
// Unnamed config: { size: 1536, distance: "Cosine" }
let vectorSize = 0;
let distanceMetric = 'Cosine';
if (config && typeof config === 'object') {
if ('size' in config && typeof config.size === 'number') {
// Unnamed vector config
vectorSize = config.size;
distanceMetric = typeof config.distance === 'string' ? config.distance : 'Cosine';
} else {
// Named vector configs - get the first one
const namedConfig = Object.values(config)[0];
if (namedConfig && typeof namedConfig === 'object' && 'size' in namedConfig) {
vectorSize = typeof namedConfig.size === 'number' ? namedConfig.size : 0;
distanceMetric = typeof namedConfig.distance === 'string' ? namedConfig.distance : 'Cosine';
}
}
}
return {
name,
vectorsCount: info.result.vectors_count || 0,
pointsCount: info.result.points_count || 0,
segmentsCount: info.result.segments_count || 0,
status: info.result.status as 'green' | 'yellow' | 'red',
vectorSize,
distanceMetric,
onDiskPayload: false,
indexed: info.result.status === 'green',
};
} catch (err) {
console.error(`Failed to get info for collection ${name}:`, err);
return {
name,
vectorsCount: 0,
pointsCount: 0,
segmentsCount: 0,
status: 'red' as const,
vectorSize: 0,
distanceMetric: 'unknown',
onDiskPayload: false,
indexed: false,
};
}
})
);
const totalVectors = collections.reduce((sum, c) => sum + c.vectorsCount, 0);
const totalPoints = collections.reduce((sum, c) => sum + c.pointsCount, 0);
setStats({
totalCollections: collections.length,
totalVectors,
totalPoints,
collections,
version: statusResult.version,
});
} catch (err) {
console.error('Failed to connect to Qdrant:', err);
setStatus({
isConnected: false,
endpoint: QDRANT_API_URL,
version: '',
lastChecked: new Date(),
});
setStats(null);
setError(err instanceof Error ? err : new Error('Failed to connect to Qdrant'));
} finally {
setIsLoading(false);
}
}, []);
// Initial load
useEffect(() => {
refresh();
}, [refresh]);
/**
* Get list of collection names
*/
const getCollections = useCallback(async (): Promise<string[]> => {
const response = await qdrantFetch<{
result: { collections: Array<{ name: string }> };
}>('/collections');
return response.result.collections.map(c => c.name);
}, []);
/**
* Get detailed info for a specific collection
*/
const getCollectionInfo = useCallback(async (collectionName: string): Promise<QdrantCollectionInfo> => {
const response = await qdrantFetch<{
result: {
status: string;
vectors_count: number;
points_count: number;
segments_count: number;
config: {
params: {
vectors: { size: number; distance: string } | Record<string, { size: number; distance: string }>;
};
optimizer_config: Record<string, unknown>;
wal_config: Record<string, unknown>;
};
payload_schema: Record<string, { data_type: string; params: { indexed: boolean } }>;
};
}>(`/collections/${collectionName}`);
const config = response.result.config.params.vectors;
// Handle both named and unnamed vector configs
let vectorSize = 0;
let distanceMetric = 'Cosine';
if (config && typeof config === 'object') {
if ('size' in config && typeof config.size === 'number') {
// Unnamed vector config
vectorSize = config.size;
distanceMetric = typeof config.distance === 'string' ? config.distance : 'Cosine';
} else {
// Named vector configs - get the first one
const namedConfig = Object.values(config)[0];
if (namedConfig && typeof namedConfig === 'object' && 'size' in namedConfig) {
vectorSize = typeof namedConfig.size === 'number' ? namedConfig.size : 0;
distanceMetric = typeof namedConfig.distance === 'string' ? namedConfig.distance : 'Cosine';
}
}
}
return {
name: collectionName,
status: response.result.status as 'green' | 'yellow' | 'red',
vectorsCount: response.result.vectors_count || 0,
pointsCount: response.result.points_count || 0,
segmentsCount: response.result.segments_count || 0,
config: {
params: {
vectors: {
size: vectorSize,
distance: distanceMetric,
},
},
optimizerConfig: response.result.config.optimizer_config || {},
walConfig: response.result.config.wal_config || {},
},
payloadSchema: Object.fromEntries(
Object.entries(response.result.payload_schema || {}).map(([key, val]) => [
key,
{ dataType: val.data_type, indexed: val.params?.indexed || false },
])
),
};
}, []);
/**
* Get points from a collection with pagination
*/
const getPoints = useCallback(async (
collectionName: string,
limit: number = 50,
offset: number = 0,
withVectors: boolean = false
): Promise<{ points: QdrantPoint[]; nextOffset: number | null }> => {
const response = await qdrantFetch<{
result: {
points: Array<{
id: string | number;
vector: number[] | null;
payload: Record<string, unknown>;
}>;
next_page_offset: number | null;
};
}>(`/collections/${collectionName}/points/scroll`, {
method: 'POST',
body: JSON.stringify({
limit,
offset,
with_vector: withVectors,
with_payload: true,
}),
});
return {
points: response.result.points.map(p => ({
id: p.id,
vector: p.vector || [],
payload: p.payload || {},
})),
nextOffset: response.result.next_page_offset,
};
}, []);
/**
* Scroll through points in a collection (for large datasets)
*/
const scrollPoints = useCallback(async (
collectionName: string,
limit: number = 50,
offset: string | number | null = null,
filter?: Record<string, unknown>
): Promise<{ points: QdrantPoint[]; nextOffset: string | number | null }> => {
const body: Record<string, unknown> = {
limit,
with_vector: true,
with_payload: true,
};
if (offset !== null) {
body.offset = offset;
}
if (filter) {
body.filter = filter;
}
const response = await qdrantFetch<{
result: {
points: Array<{
id: string | number;
vector: number[] | null;
payload: Record<string, unknown>;
}>;
next_page_offset: string | number | null;
};
}>(`/collections/${collectionName}/points/scroll`, {
method: 'POST',
body: JSON.stringify(body),
});
return {
points: response.result.points.map(p => ({
id: p.id,
vector: p.vector || [],
payload: p.payload || {},
})),
nextOffset: response.result.next_page_offset,
};
}, []);
/**
* Search for similar vectors
*/
const searchPoints = useCallback(async (
collectionName: string,
vector: number[],
limit: number = 10
): Promise<QdrantSearchResult[]> => {
const response = await qdrantFetch<{
result: Array<{
id: string | number;
score: number;
payload: Record<string, unknown>;
vector?: number[];
}>;
}>(`/collections/${collectionName}/points/search`, {
method: 'POST',
body: JSON.stringify({
vector,
limit,
with_payload: true,
}),
});
return response.result.map(r => ({
id: r.id,
score: r.score,
payload: r.payload || {},
vector: r.vector,
}));
}, []);
/**
* Get a specific point by ID
*/
const getPointById = useCallback(async (
collectionName: string,
pointId: string | number
): Promise<QdrantPoint | null> => {
try {
const response = await qdrantFetch<{
result: {
id: string | number;
vector: number[] | null;
payload: Record<string, unknown>;
};
}>(`/collections/${collectionName}/points/${pointId}`);
return {
id: response.result.id,
vector: response.result.vector || [],
payload: response.result.payload || {},
};
} catch {
return null;
}
}, []);
/**
* Reindex a Qdrant collection with new embeddings
* Calls the backend API which handles YAML loading and embedding generation
*/
const reindexCollection = useCallback(async (
options: ReindexRequest = {}
): Promise<ReindexResponse> => {
const response = await fetch('/api/qdrant/reindex', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
collection_name: options.collection_name || 'heritage_custodians',
embedding_model: options.embedding_model || 'all-MiniLM-L6-v2',
batch_size: options.batch_size || 100,
recreate_collection: options.recreate_collection ?? true,
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Reindex failed: ${response.status} - ${errorText}`);
}
return response.json();
}, []);
/**
* Get the current status of a reindexing operation
*/
const getReindexStatus = useCallback(async (): Promise<ReindexStatus> => {
const response = await fetch('/api/qdrant/reindex/status');
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Failed to get reindex status: ${response.status} - ${errorText}`);
}
return response.json();
}, []);
return {
status,
stats,
isLoading,
error,
refresh,
getCollections,
getCollectionInfo,
getPoints,
searchPoints,
getPointById,
scrollPoints,
reindexCollection,
getReindexStatus,
};
}