592 lines
17 KiB
TypeScript
592 lines
17 KiB
TypeScript
/**
|
|
* React Hook for Qdrant Vector Database Interaction (via REST API)
|
|
* Provides interface to query Qdrant through its REST API
|
|
*
|
|
* Qdrant is a high-performance vector similarity search engine
|
|
* Used for semantic search, recommendations, and embedding visualization
|
|
*
|
|
* Self-hosted Qdrant instance - no API key required
|
|
*/
|
|
|
|
import { useState, useEffect, useCallback } from 'react';
|
|
|
|
// Configuration - self-hosted Qdrant instance
|
|
// Default to /qdrant which is the Caddy proxy path
|
|
const QDRANT_API_URL = import.meta.env.VITE_QDRANT_API_URL || '/qdrant';
|
|
|
|
export interface QdrantCollection {
|
|
name: string;
|
|
vectorsCount: number;
|
|
pointsCount: number;
|
|
segmentsCount: number;
|
|
status: 'green' | 'yellow' | 'red';
|
|
vectorSize: number;
|
|
distanceMetric: string;
|
|
onDiskPayload: boolean;
|
|
indexed: boolean;
|
|
}
|
|
|
|
export interface QdrantStats {
|
|
totalCollections: number;
|
|
totalVectors: number;
|
|
totalPoints: number;
|
|
collections: QdrantCollection[];
|
|
version: string;
|
|
}
|
|
|
|
export interface QdrantStatus {
|
|
isConnected: boolean;
|
|
endpoint: string;
|
|
version: string;
|
|
lastChecked: Date | null;
|
|
responseTimeMs?: number;
|
|
}
|
|
|
|
export interface QdrantPoint {
|
|
id: string | number;
|
|
vector: number[];
|
|
payload: Record<string, unknown>;
|
|
}
|
|
|
|
export interface QdrantSearchResult {
|
|
id: string | number;
|
|
score: number;
|
|
payload: Record<string, unknown>;
|
|
vector?: number[];
|
|
}
|
|
|
|
export interface ReindexRequest {
|
|
collection_name?: string;
|
|
embedding_model?: 'all-MiniLM-L6-v2' | 'text-embedding-3-small' | 'bge-base-en-v1.5'; // minilm_384, openai_1536, bge_768
|
|
batch_size?: number;
|
|
recreate_collection?: boolean;
|
|
}
|
|
|
|
export interface ReindexResponse {
|
|
success: boolean;
|
|
message: string;
|
|
collection_name?: string;
|
|
documents_indexed?: number;
|
|
embedding_model?: string;
|
|
vector_dimensions?: number;
|
|
elapsed_seconds?: number;
|
|
}
|
|
|
|
export interface ReindexStatus {
|
|
is_running: boolean;
|
|
progress: number;
|
|
current_batch: number;
|
|
total_batches: number;
|
|
documents_processed: number;
|
|
message: string;
|
|
completed: boolean;
|
|
success: boolean;
|
|
}
|
|
|
|
export interface QdrantCollectionInfo {
|
|
name: string;
|
|
status: 'green' | 'yellow' | 'red';
|
|
vectorsCount: number;
|
|
pointsCount: number;
|
|
segmentsCount: number;
|
|
config: {
|
|
params: {
|
|
vectors: {
|
|
size: number;
|
|
distance: string;
|
|
};
|
|
};
|
|
optimizerConfig: Record<string, unknown>;
|
|
walConfig: Record<string, unknown>;
|
|
};
|
|
payloadSchema: Record<string, { dataType: string; indexed: boolean }>;
|
|
}
|
|
|
|
export interface UseQdrantReturn {
|
|
status: QdrantStatus;
|
|
stats: QdrantStats | null;
|
|
isLoading: boolean;
|
|
error: Error | null;
|
|
refresh: () => Promise<void>;
|
|
getCollections: () => Promise<string[]>;
|
|
getCollectionInfo: (collectionName: string) => Promise<QdrantCollectionInfo>;
|
|
getPoints: (collectionName: string, limit?: number, offset?: number, withVectors?: boolean) => Promise<{ points: QdrantPoint[]; nextOffset: number | null }>;
|
|
searchPoints: (collectionName: string, vector: number[], limit?: number) => Promise<QdrantSearchResult[]>;
|
|
getPointById: (collectionName: string, pointId: string | number) => Promise<QdrantPoint | null>;
|
|
scrollPoints: (collectionName: string, limit?: number, offset?: string | number | null, filter?: Record<string, unknown>) => Promise<{ points: QdrantPoint[]; nextOffset: string | number | null }>;
|
|
reindexCollection: (options?: ReindexRequest) => Promise<ReindexResponse>;
|
|
getReindexStatus: () => Promise<ReindexStatus>;
|
|
}
|
|
|
|
/**
|
|
* Helper function for Qdrant API requests
|
|
* Self-hosted instance - no authentication required
|
|
*/
|
|
async function qdrantFetch<T>(
|
|
endpoint: string,
|
|
options: RequestInit = {}
|
|
): Promise<T> {
|
|
const headers: HeadersInit = {
|
|
'Content-Type': 'application/json',
|
|
...options.headers,
|
|
};
|
|
|
|
const response = await fetch(`${QDRANT_API_URL}${endpoint}`, {
|
|
...options,
|
|
headers,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`Qdrant API error: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
return response.json();
|
|
}
|
|
|
|
/**
|
|
* Check Qdrant connection status
|
|
*/
|
|
async function checkStatus(): Promise<{
|
|
connected: boolean;
|
|
version: string;
|
|
}> {
|
|
try {
|
|
const response = await qdrantFetch<{ title: string; version: string }>('/');
|
|
return {
|
|
connected: true,
|
|
version: response.version || 'unknown',
|
|
};
|
|
} catch {
|
|
return {
|
|
connected: false,
|
|
version: '',
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Hook for interacting with Qdrant vector database
|
|
*/
|
|
export function useQdrant(): UseQdrantReturn {
|
|
const [status, setStatus] = useState<QdrantStatus>({
|
|
isConnected: false,
|
|
endpoint: QDRANT_API_URL,
|
|
version: '',
|
|
lastChecked: null,
|
|
});
|
|
const [stats, setStats] = useState<QdrantStats | null>(null);
|
|
const [isLoading, setIsLoading] = useState(true);
|
|
const [error, setError] = useState<Error | null>(null);
|
|
|
|
/**
|
|
* Check connection status and gather statistics
|
|
*/
|
|
const refresh = useCallback(async () => {
|
|
setIsLoading(true);
|
|
setError(null);
|
|
|
|
const startTime = performance.now();
|
|
|
|
try {
|
|
// Check connection status
|
|
const statusResult = await checkStatus();
|
|
|
|
const responseTimeMs = Math.round(performance.now() - startTime);
|
|
|
|
setStatus({
|
|
isConnected: statusResult.connected,
|
|
endpoint: QDRANT_API_URL,
|
|
version: statusResult.version,
|
|
lastChecked: new Date(),
|
|
responseTimeMs,
|
|
});
|
|
|
|
if (!statusResult.connected) {
|
|
setStats(null);
|
|
return;
|
|
}
|
|
|
|
// Get collections list
|
|
const collectionsResponse = await qdrantFetch<{
|
|
result: { collections: Array<{ name: string }> };
|
|
}>('/collections');
|
|
|
|
const collectionNames = collectionsResponse.result.collections.map(c => c.name);
|
|
|
|
// Get details for each collection
|
|
const collections: QdrantCollection[] = await Promise.all(
|
|
collectionNames.map(async (name) => {
|
|
try {
|
|
const info = await qdrantFetch<{
|
|
result: {
|
|
status: string;
|
|
vectors_count: number;
|
|
points_count: number;
|
|
segments_count: number;
|
|
config: {
|
|
params: {
|
|
vectors: { size: number; distance: string } | Record<string, { size: number; distance: string }>;
|
|
};
|
|
};
|
|
};
|
|
}>(`/collections/${name}`);
|
|
|
|
const config = info.result.config.params.vectors;
|
|
// Handle both named and unnamed vector configs
|
|
// Named config: { "default": { size: 1536, distance: "Cosine" } }
|
|
// Unnamed config: { size: 1536, distance: "Cosine" }
|
|
let vectorSize = 0;
|
|
let distanceMetric = 'Cosine';
|
|
|
|
if (config && typeof config === 'object') {
|
|
if ('size' in config && typeof config.size === 'number') {
|
|
// Unnamed vector config
|
|
vectorSize = config.size;
|
|
distanceMetric = typeof config.distance === 'string' ? config.distance : 'Cosine';
|
|
} else {
|
|
// Named vector configs - get the first one
|
|
const namedConfig = Object.values(config)[0];
|
|
if (namedConfig && typeof namedConfig === 'object' && 'size' in namedConfig) {
|
|
vectorSize = typeof namedConfig.size === 'number' ? namedConfig.size : 0;
|
|
distanceMetric = typeof namedConfig.distance === 'string' ? namedConfig.distance : 'Cosine';
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
name,
|
|
vectorsCount: info.result.vectors_count || 0,
|
|
pointsCount: info.result.points_count || 0,
|
|
segmentsCount: info.result.segments_count || 0,
|
|
status: info.result.status as 'green' | 'yellow' | 'red',
|
|
vectorSize,
|
|
distanceMetric,
|
|
onDiskPayload: false,
|
|
indexed: info.result.status === 'green',
|
|
};
|
|
} catch (err) {
|
|
console.error(`Failed to get info for collection ${name}:`, err);
|
|
return {
|
|
name,
|
|
vectorsCount: 0,
|
|
pointsCount: 0,
|
|
segmentsCount: 0,
|
|
status: 'red' as const,
|
|
vectorSize: 0,
|
|
distanceMetric: 'unknown',
|
|
onDiskPayload: false,
|
|
indexed: false,
|
|
};
|
|
}
|
|
})
|
|
);
|
|
|
|
const totalVectors = collections.reduce((sum, c) => sum + c.vectorsCount, 0);
|
|
const totalPoints = collections.reduce((sum, c) => sum + c.pointsCount, 0);
|
|
|
|
setStats({
|
|
totalCollections: collections.length,
|
|
totalVectors,
|
|
totalPoints,
|
|
collections,
|
|
version: statusResult.version,
|
|
});
|
|
|
|
} catch (err) {
|
|
console.error('Failed to connect to Qdrant:', err);
|
|
setStatus({
|
|
isConnected: false,
|
|
endpoint: QDRANT_API_URL,
|
|
version: '',
|
|
lastChecked: new Date(),
|
|
});
|
|
setStats(null);
|
|
setError(err instanceof Error ? err : new Error('Failed to connect to Qdrant'));
|
|
} finally {
|
|
setIsLoading(false);
|
|
}
|
|
}, []);
|
|
|
|
// Initial load
|
|
useEffect(() => {
|
|
refresh();
|
|
}, [refresh]);
|
|
|
|
/**
|
|
* Get list of collection names
|
|
*/
|
|
const getCollections = useCallback(async (): Promise<string[]> => {
|
|
const response = await qdrantFetch<{
|
|
result: { collections: Array<{ name: string }> };
|
|
}>('/collections');
|
|
return response.result.collections.map(c => c.name);
|
|
}, []);
|
|
|
|
/**
|
|
* Get detailed info for a specific collection
|
|
*/
|
|
const getCollectionInfo = useCallback(async (collectionName: string): Promise<QdrantCollectionInfo> => {
|
|
const response = await qdrantFetch<{
|
|
result: {
|
|
status: string;
|
|
vectors_count: number;
|
|
points_count: number;
|
|
segments_count: number;
|
|
config: {
|
|
params: {
|
|
vectors: { size: number; distance: string } | Record<string, { size: number; distance: string }>;
|
|
};
|
|
optimizer_config: Record<string, unknown>;
|
|
wal_config: Record<string, unknown>;
|
|
};
|
|
payload_schema: Record<string, { data_type: string; params: { indexed: boolean } }>;
|
|
};
|
|
}>(`/collections/${collectionName}`);
|
|
|
|
const config = response.result.config.params.vectors;
|
|
// Handle both named and unnamed vector configs
|
|
let vectorSize = 0;
|
|
let distanceMetric = 'Cosine';
|
|
|
|
if (config && typeof config === 'object') {
|
|
if ('size' in config && typeof config.size === 'number') {
|
|
// Unnamed vector config
|
|
vectorSize = config.size;
|
|
distanceMetric = typeof config.distance === 'string' ? config.distance : 'Cosine';
|
|
} else {
|
|
// Named vector configs - get the first one
|
|
const namedConfig = Object.values(config)[0];
|
|
if (namedConfig && typeof namedConfig === 'object' && 'size' in namedConfig) {
|
|
vectorSize = typeof namedConfig.size === 'number' ? namedConfig.size : 0;
|
|
distanceMetric = typeof namedConfig.distance === 'string' ? namedConfig.distance : 'Cosine';
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
name: collectionName,
|
|
status: response.result.status as 'green' | 'yellow' | 'red',
|
|
vectorsCount: response.result.vectors_count || 0,
|
|
pointsCount: response.result.points_count || 0,
|
|
segmentsCount: response.result.segments_count || 0,
|
|
config: {
|
|
params: {
|
|
vectors: {
|
|
size: vectorSize,
|
|
distance: distanceMetric,
|
|
},
|
|
},
|
|
optimizerConfig: response.result.config.optimizer_config || {},
|
|
walConfig: response.result.config.wal_config || {},
|
|
},
|
|
payloadSchema: Object.fromEntries(
|
|
Object.entries(response.result.payload_schema || {}).map(([key, val]) => [
|
|
key,
|
|
{ dataType: val.data_type, indexed: val.params?.indexed || false },
|
|
])
|
|
),
|
|
};
|
|
}, []);
|
|
|
|
/**
|
|
* Get points from a collection with pagination
|
|
*/
|
|
const getPoints = useCallback(async (
|
|
collectionName: string,
|
|
limit: number = 50,
|
|
offset: number = 0,
|
|
withVectors: boolean = false
|
|
): Promise<{ points: QdrantPoint[]; nextOffset: number | null }> => {
|
|
const response = await qdrantFetch<{
|
|
result: {
|
|
points: Array<{
|
|
id: string | number;
|
|
vector: number[] | null;
|
|
payload: Record<string, unknown>;
|
|
}>;
|
|
next_page_offset: number | null;
|
|
};
|
|
}>(`/collections/${collectionName}/points/scroll`, {
|
|
method: 'POST',
|
|
body: JSON.stringify({
|
|
limit,
|
|
offset,
|
|
with_vector: withVectors,
|
|
with_payload: true,
|
|
}),
|
|
});
|
|
|
|
return {
|
|
points: response.result.points.map(p => ({
|
|
id: p.id,
|
|
vector: p.vector || [],
|
|
payload: p.payload || {},
|
|
})),
|
|
nextOffset: response.result.next_page_offset,
|
|
};
|
|
}, []);
|
|
|
|
/**
|
|
* Scroll through points in a collection (for large datasets)
|
|
*/
|
|
const scrollPoints = useCallback(async (
|
|
collectionName: string,
|
|
limit: number = 50,
|
|
offset: string | number | null = null,
|
|
filter?: Record<string, unknown>
|
|
): Promise<{ points: QdrantPoint[]; nextOffset: string | number | null }> => {
|
|
const body: Record<string, unknown> = {
|
|
limit,
|
|
with_vector: true,
|
|
with_payload: true,
|
|
};
|
|
|
|
if (offset !== null) {
|
|
body.offset = offset;
|
|
}
|
|
|
|
if (filter) {
|
|
body.filter = filter;
|
|
}
|
|
|
|
const response = await qdrantFetch<{
|
|
result: {
|
|
points: Array<{
|
|
id: string | number;
|
|
vector: number[] | null;
|
|
payload: Record<string, unknown>;
|
|
}>;
|
|
next_page_offset: string | number | null;
|
|
};
|
|
}>(`/collections/${collectionName}/points/scroll`, {
|
|
method: 'POST',
|
|
body: JSON.stringify(body),
|
|
});
|
|
|
|
return {
|
|
points: response.result.points.map(p => ({
|
|
id: p.id,
|
|
vector: p.vector || [],
|
|
payload: p.payload || {},
|
|
})),
|
|
nextOffset: response.result.next_page_offset,
|
|
};
|
|
}, []);
|
|
|
|
/**
|
|
* Search for similar vectors
|
|
*/
|
|
const searchPoints = useCallback(async (
|
|
collectionName: string,
|
|
vector: number[],
|
|
limit: number = 10
|
|
): Promise<QdrantSearchResult[]> => {
|
|
const response = await qdrantFetch<{
|
|
result: Array<{
|
|
id: string | number;
|
|
score: number;
|
|
payload: Record<string, unknown>;
|
|
vector?: number[];
|
|
}>;
|
|
}>(`/collections/${collectionName}/points/search`, {
|
|
method: 'POST',
|
|
body: JSON.stringify({
|
|
vector,
|
|
limit,
|
|
with_payload: true,
|
|
}),
|
|
});
|
|
|
|
return response.result.map(r => ({
|
|
id: r.id,
|
|
score: r.score,
|
|
payload: r.payload || {},
|
|
vector: r.vector,
|
|
}));
|
|
}, []);
|
|
|
|
/**
|
|
* Get a specific point by ID
|
|
*/
|
|
const getPointById = useCallback(async (
|
|
collectionName: string,
|
|
pointId: string | number
|
|
): Promise<QdrantPoint | null> => {
|
|
try {
|
|
const response = await qdrantFetch<{
|
|
result: {
|
|
id: string | number;
|
|
vector: number[] | null;
|
|
payload: Record<string, unknown>;
|
|
};
|
|
}>(`/collections/${collectionName}/points/${pointId}`);
|
|
|
|
return {
|
|
id: response.result.id,
|
|
vector: response.result.vector || [],
|
|
payload: response.result.payload || {},
|
|
};
|
|
} catch {
|
|
return null;
|
|
}
|
|
}, []);
|
|
|
|
/**
|
|
* Reindex a Qdrant collection with new embeddings
|
|
* Calls the backend API which handles YAML loading and embedding generation
|
|
*/
|
|
const reindexCollection = useCallback(async (
|
|
options: ReindexRequest = {}
|
|
): Promise<ReindexResponse> => {
|
|
const response = await fetch('/api/qdrant/reindex', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
collection_name: options.collection_name || 'heritage_custodians',
|
|
embedding_model: options.embedding_model || 'all-MiniLM-L6-v2',
|
|
batch_size: options.batch_size || 100,
|
|
recreate_collection: options.recreate_collection ?? true,
|
|
}),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`Reindex failed: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
return response.json();
|
|
}, []);
|
|
|
|
/**
|
|
* Get the current status of a reindexing operation
|
|
*/
|
|
const getReindexStatus = useCallback(async (): Promise<ReindexStatus> => {
|
|
const response = await fetch('/api/qdrant/reindex/status');
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`Failed to get reindex status: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
return response.json();
|
|
}, []);
|
|
|
|
return {
|
|
status,
|
|
stats,
|
|
isLoading,
|
|
error,
|
|
refresh,
|
|
getCollections,
|
|
getCollectionInfo,
|
|
getPoints,
|
|
searchPoints,
|
|
getPointById,
|
|
scrollPoints,
|
|
reindexCollection,
|
|
getReindexStatus,
|
|
};
|
|
}
|