/** * React Hook for Qdrant Vector Database Interaction (via REST API) * Provides interface to query Qdrant through its REST API * * Qdrant is a high-performance vector similarity search engine * Used for semantic search, recommendations, and embedding visualization * * Self-hosted Qdrant instance - no API key required */ import { useState, useEffect, useCallback } from 'react'; // Configuration - self-hosted Qdrant instance // Default to /qdrant which is the Caddy proxy path const QDRANT_API_URL = import.meta.env.VITE_QDRANT_API_URL || '/qdrant'; export interface QdrantCollection { name: string; vectorsCount: number; pointsCount: number; segmentsCount: number; status: 'green' | 'yellow' | 'red'; vectorSize: number; distanceMetric: string; onDiskPayload: boolean; indexed: boolean; } export interface QdrantStats { totalCollections: number; totalVectors: number; totalPoints: number; collections: QdrantCollection[]; version: string; } export interface QdrantStatus { isConnected: boolean; endpoint: string; version: string; lastChecked: Date | null; responseTimeMs?: number; } export interface QdrantPoint { id: string | number; vector: number[]; payload: Record; } export interface QdrantSearchResult { id: string | number; score: number; payload: Record; vector?: number[]; } export interface ReindexRequest { collection_name?: string; embedding_model?: 'all-MiniLM-L6-v2' | 'text-embedding-3-small' | 'bge-base-en-v1.5'; // minilm_384, openai_1536, bge_768 batch_size?: number; recreate_collection?: boolean; } export interface ReindexResponse { success: boolean; message: string; collection_name?: string; documents_indexed?: number; embedding_model?: string; vector_dimensions?: number; elapsed_seconds?: number; } export interface ReindexStatus { is_running: boolean; progress: number; current_batch: number; total_batches: number; documents_processed: number; message: string; completed: boolean; success: boolean; } export interface QdrantCollectionInfo { name: string; status: 'green' | 'yellow' | 'red'; vectorsCount: number; pointsCount: number; segmentsCount: number; config: { params: { vectors: { size: number; distance: string; }; }; optimizerConfig: Record; walConfig: Record; }; payloadSchema: Record; } export interface UseQdrantReturn { status: QdrantStatus; stats: QdrantStats | null; isLoading: boolean; error: Error | null; refresh: () => Promise; getCollections: () => Promise; getCollectionInfo: (collectionName: string) => Promise; getPoints: (collectionName: string, limit?: number, offset?: number, withVectors?: boolean) => Promise<{ points: QdrantPoint[]; nextOffset: number | null }>; searchPoints: (collectionName: string, vector: number[], limit?: number) => Promise; getPointById: (collectionName: string, pointId: string | number) => Promise; scrollPoints: (collectionName: string, limit?: number, offset?: string | number | null, filter?: Record) => Promise<{ points: QdrantPoint[]; nextOffset: string | number | null }>; reindexCollection: (options?: ReindexRequest) => Promise; getReindexStatus: () => Promise; } /** * Helper function for Qdrant API requests * Self-hosted instance - no authentication required */ async function qdrantFetch( endpoint: string, options: RequestInit = {} ): Promise { const headers: HeadersInit = { 'Content-Type': 'application/json', ...options.headers, }; const response = await fetch(`${QDRANT_API_URL}${endpoint}`, { ...options, headers, }); if (!response.ok) { const errorText = await response.text(); throw new Error(`Qdrant API error: ${response.status} - ${errorText}`); } return response.json(); } /** * Check Qdrant connection status */ async function checkStatus(): Promise<{ connected: boolean; version: string; }> { try { const response = await qdrantFetch<{ title: string; version: string }>('/'); return { connected: true, version: response.version || 'unknown', }; } catch { return { connected: false, version: '', }; } } /** * Hook for interacting with Qdrant vector database */ export function useQdrant(): UseQdrantReturn { const [status, setStatus] = useState({ isConnected: false, endpoint: QDRANT_API_URL, version: '', lastChecked: null, }); const [stats, setStats] = useState(null); const [isLoading, setIsLoading] = useState(true); const [error, setError] = useState(null); /** * Check connection status and gather statistics */ const refresh = useCallback(async () => { setIsLoading(true); setError(null); const startTime = performance.now(); try { // Check connection status const statusResult = await checkStatus(); const responseTimeMs = Math.round(performance.now() - startTime); setStatus({ isConnected: statusResult.connected, endpoint: QDRANT_API_URL, version: statusResult.version, lastChecked: new Date(), responseTimeMs, }); if (!statusResult.connected) { setStats(null); return; } // Get collections list const collectionsResponse = await qdrantFetch<{ result: { collections: Array<{ name: string }> }; }>('/collections'); const collectionNames = collectionsResponse.result.collections.map(c => c.name); // Get details for each collection const collections: QdrantCollection[] = await Promise.all( collectionNames.map(async (name) => { try { const info = await qdrantFetch<{ result: { status: string; vectors_count: number; points_count: number; segments_count: number; config: { params: { vectors: { size: number; distance: string } | Record; }; }; }; }>(`/collections/${name}`); const config = info.result.config.params.vectors; // Handle both named and unnamed vector configs // Named config: { "default": { size: 1536, distance: "Cosine" } } // Unnamed config: { size: 1536, distance: "Cosine" } let vectorSize = 0; let distanceMetric = 'Cosine'; if (config && typeof config === 'object') { if ('size' in config && typeof config.size === 'number') { // Unnamed vector config vectorSize = config.size; distanceMetric = typeof config.distance === 'string' ? config.distance : 'Cosine'; } else { // Named vector configs - get the first one const namedConfig = Object.values(config)[0]; if (namedConfig && typeof namedConfig === 'object' && 'size' in namedConfig) { vectorSize = typeof namedConfig.size === 'number' ? namedConfig.size : 0; distanceMetric = typeof namedConfig.distance === 'string' ? namedConfig.distance : 'Cosine'; } } } return { name, vectorsCount: info.result.vectors_count || 0, pointsCount: info.result.points_count || 0, segmentsCount: info.result.segments_count || 0, status: info.result.status as 'green' | 'yellow' | 'red', vectorSize, distanceMetric, onDiskPayload: false, indexed: info.result.status === 'green', }; } catch (err) { console.error(`Failed to get info for collection ${name}:`, err); return { name, vectorsCount: 0, pointsCount: 0, segmentsCount: 0, status: 'red' as const, vectorSize: 0, distanceMetric: 'unknown', onDiskPayload: false, indexed: false, }; } }) ); const totalVectors = collections.reduce((sum, c) => sum + c.vectorsCount, 0); const totalPoints = collections.reduce((sum, c) => sum + c.pointsCount, 0); setStats({ totalCollections: collections.length, totalVectors, totalPoints, collections, version: statusResult.version, }); } catch (err) { console.error('Failed to connect to Qdrant:', err); setStatus({ isConnected: false, endpoint: QDRANT_API_URL, version: '', lastChecked: new Date(), }); setStats(null); setError(err instanceof Error ? err : new Error('Failed to connect to Qdrant')); } finally { setIsLoading(false); } }, []); // Initial load useEffect(() => { refresh(); }, [refresh]); /** * Get list of collection names */ const getCollections = useCallback(async (): Promise => { const response = await qdrantFetch<{ result: { collections: Array<{ name: string }> }; }>('/collections'); return response.result.collections.map(c => c.name); }, []); /** * Get detailed info for a specific collection */ const getCollectionInfo = useCallback(async (collectionName: string): Promise => { const response = await qdrantFetch<{ result: { status: string; vectors_count: number; points_count: number; segments_count: number; config: { params: { vectors: { size: number; distance: string } | Record; }; optimizer_config: Record; wal_config: Record; }; payload_schema: Record; }; }>(`/collections/${collectionName}`); const config = response.result.config.params.vectors; // Handle both named and unnamed vector configs let vectorSize = 0; let distanceMetric = 'Cosine'; if (config && typeof config === 'object') { if ('size' in config && typeof config.size === 'number') { // Unnamed vector config vectorSize = config.size; distanceMetric = typeof config.distance === 'string' ? config.distance : 'Cosine'; } else { // Named vector configs - get the first one const namedConfig = Object.values(config)[0]; if (namedConfig && typeof namedConfig === 'object' && 'size' in namedConfig) { vectorSize = typeof namedConfig.size === 'number' ? namedConfig.size : 0; distanceMetric = typeof namedConfig.distance === 'string' ? namedConfig.distance : 'Cosine'; } } } return { name: collectionName, status: response.result.status as 'green' | 'yellow' | 'red', vectorsCount: response.result.vectors_count || 0, pointsCount: response.result.points_count || 0, segmentsCount: response.result.segments_count || 0, config: { params: { vectors: { size: vectorSize, distance: distanceMetric, }, }, optimizerConfig: response.result.config.optimizer_config || {}, walConfig: response.result.config.wal_config || {}, }, payloadSchema: Object.fromEntries( Object.entries(response.result.payload_schema || {}).map(([key, val]) => [ key, { dataType: val.data_type, indexed: val.params?.indexed || false }, ]) ), }; }, []); /** * Get points from a collection with pagination */ const getPoints = useCallback(async ( collectionName: string, limit: number = 50, offset: number = 0, withVectors: boolean = false ): Promise<{ points: QdrantPoint[]; nextOffset: number | null }> => { const response = await qdrantFetch<{ result: { points: Array<{ id: string | number; vector: number[] | null; payload: Record; }>; next_page_offset: number | null; }; }>(`/collections/${collectionName}/points/scroll`, { method: 'POST', body: JSON.stringify({ limit, offset, with_vector: withVectors, with_payload: true, }), }); return { points: response.result.points.map(p => ({ id: p.id, vector: p.vector || [], payload: p.payload || {}, })), nextOffset: response.result.next_page_offset, }; }, []); /** * Scroll through points in a collection (for large datasets) */ const scrollPoints = useCallback(async ( collectionName: string, limit: number = 50, offset: string | number | null = null, filter?: Record ): Promise<{ points: QdrantPoint[]; nextOffset: string | number | null }> => { const body: Record = { limit, with_vector: true, with_payload: true, }; if (offset !== null) { body.offset = offset; } if (filter) { body.filter = filter; } const response = await qdrantFetch<{ result: { points: Array<{ id: string | number; vector: number[] | null; payload: Record; }>; next_page_offset: string | number | null; }; }>(`/collections/${collectionName}/points/scroll`, { method: 'POST', body: JSON.stringify(body), }); return { points: response.result.points.map(p => ({ id: p.id, vector: p.vector || [], payload: p.payload || {}, })), nextOffset: response.result.next_page_offset, }; }, []); /** * Search for similar vectors */ const searchPoints = useCallback(async ( collectionName: string, vector: number[], limit: number = 10 ): Promise => { const response = await qdrantFetch<{ result: Array<{ id: string | number; score: number; payload: Record; vector?: number[]; }>; }>(`/collections/${collectionName}/points/search`, { method: 'POST', body: JSON.stringify({ vector, limit, with_payload: true, }), }); return response.result.map(r => ({ id: r.id, score: r.score, payload: r.payload || {}, vector: r.vector, })); }, []); /** * Get a specific point by ID */ const getPointById = useCallback(async ( collectionName: string, pointId: string | number ): Promise => { try { const response = await qdrantFetch<{ result: { id: string | number; vector: number[] | null; payload: Record; }; }>(`/collections/${collectionName}/points/${pointId}`); return { id: response.result.id, vector: response.result.vector || [], payload: response.result.payload || {}, }; } catch { return null; } }, []); /** * Reindex a Qdrant collection with new embeddings * Calls the backend API which handles YAML loading and embedding generation */ const reindexCollection = useCallback(async ( options: ReindexRequest = {} ): Promise => { const response = await fetch('/api/qdrant/reindex', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ collection_name: options.collection_name || 'heritage_custodians', embedding_model: options.embedding_model || 'all-MiniLM-L6-v2', batch_size: options.batch_size || 100, recreate_collection: options.recreate_collection ?? true, }), }); if (!response.ok) { const errorText = await response.text(); throw new Error(`Reindex failed: ${response.status} - ${errorText}`); } return response.json(); }, []); /** * Get the current status of a reindexing operation */ const getReindexStatus = useCallback(async (): Promise => { const response = await fetch('/api/qdrant/reindex/status'); if (!response.ok) { const errorText = await response.text(); throw new Error(`Failed to get reindex status: ${response.status} - ${errorText}`); } return response.json(); }, []); return { status, stats, isLoading, error, refresh, getCollections, getCollectionInfo, getPoints, searchPoints, getPointById, scrollPoints, reindexCollection, getReindexStatus, }; }