glam/frontend/src/hooks/usePersonSearch.ts
kempersc 6c3fa6b5a3 Remove deprecated slots and add new slot definitions for enhanced data modeling
- Deleted obsolete slot definitions for work_location and workshop_space.
- Introduced new TaxonName class to represent scientific taxonomic names with detailed attributes.
- Archived existing slots related to surname_prefix, target_name, taxon_name, terminal_count, text_region_count, title, title_proper, total_chapter, total_characters_extracted, total_connections_extracted, track_name, transcript_format, traveling_venue, type_label, type_status, typical_responsibility, unesco_domain, unesco_inscription_year, unesco_list_status, uniform_title, unit_name, used_by_custodian, uv_filtered_required, valid_from_geo, valid_to_geo, validation_status, variant_of_name, verification_date, viability_status, within_auxiliary_place, and within_place.
- Updated slot descriptions and structures to improve clarity and compliance with standards.
2026-01-15 11:42:35 +01:00

250 lines
6.9 KiB
TypeScript

/**
* usePersonSearch Hook
*
* Provides semantic search functionality for person profiles using
* the RAG API's Qdrant vector database backend.
*
* Features:
* - Semantic vector search across all profiles
* - Filter by field type (name, email, domain, birth_year)
* - Debounced queries to prevent API overload
* - Caching of search results
*/
import { useState, useEffect, useCallback, useRef } from 'react';
export type SearchField = 'all' | 'name' | 'email' | 'domain' | 'birth_year';
export interface PersonSearchResult {
ppid?: string;
name: string;
headline?: string | null;
custodian_name?: string | null;
custodian_slug?: string | null;
linkedin_url?: string | null;
heritage_relevant?: boolean | null;
heritage_type?: string | null;
location?: string | null;
email?: string | null;
email_domain?: string | null;
birth_year?: number | null;
score?: number | null;
}
export interface PersonSearchResponse {
query: string;
results: PersonSearchResult[];
result_count: number;
query_time_ms: number;
collection_stats?: Record<string, unknown> | null;
embedding_model_used?: string | null;
}
interface UsePersonSearchOptions {
debounceMs?: number;
minQueryLength?: number;
maxResults?: number;
}
interface UsePersonSearchReturn {
// Search state
query: string;
setQuery: (query: string) => void;
searchField: SearchField;
setSearchField: (field: SearchField) => void;
// Results
results: PersonSearchResult[];
isSearching: boolean;
error: string | null;
// Metadata
queryTimeMs: number | null;
resultCount: number;
embeddingModelUsed: string | null;
// Actions
clearSearch: () => void;
search: (query: string, field?: SearchField) => Promise<void>;
}
const API_BASE = '/api/rag';
export function usePersonSearch(options: UsePersonSearchOptions = {}): UsePersonSearchReturn {
const {
debounceMs = 300,
minQueryLength = 2,
maxResults = 50,
} = options;
// State
const [query, setQuery] = useState('');
const [searchField, setSearchField] = useState<SearchField>('all');
const [results, setResults] = useState<PersonSearchResult[]>([]);
const [isSearching, setIsSearching] = useState(false);
const [error, setError] = useState<string | null>(null);
const [queryTimeMs, setQueryTimeMs] = useState<number | null>(null);
const [resultCount, setResultCount] = useState(0);
const [embeddingModelUsed, setEmbeddingModelUsed] = useState<string | null>(null);
// Refs for debouncing
const debounceTimerRef = useRef<NodeJS.Timeout | null>(null);
const abortControllerRef = useRef<AbortController | null>(null);
// Build the search query based on field selection
const buildSearchQuery = useCallback((rawQuery: string, field: SearchField): string => {
const trimmedQuery = rawQuery.trim();
if (!trimmedQuery) return '';
switch (field) {
case 'name':
return `person named ${trimmedQuery}`;
case 'email':
return `email address ${trimmedQuery}`;
case 'domain':
return `working at domain ${trimmedQuery}`;
case 'birth_year':
return `born in ${trimmedQuery}`;
case 'all':
default:
return trimmedQuery;
}
}, []);
// Perform the actual search
const performSearch = useCallback(async (searchQuery: string, field: SearchField) => {
if (searchQuery.trim().length < minQueryLength) {
setResults([]);
setResultCount(0);
setQueryTimeMs(null);
return;
}
// Cancel any pending request
if (abortControllerRef.current) {
abortControllerRef.current.abort();
}
abortControllerRef.current = new AbortController();
setIsSearching(true);
setError(null);
try {
const enhancedQuery = buildSearchQuery(searchQuery, field);
const response = await fetch(`${API_BASE}/persons/search`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
query: enhancedQuery,
k: maxResults,
only_heritage_relevant: false,
}),
signal: abortControllerRef.current.signal,
});
if (!response.ok) {
throw new Error(`Search failed: ${response.statusText}`);
}
const data: PersonSearchResponse = await response.json();
// Deduplicate results by name (Qdrant may return multiple embeddings per person)
const seen = new Map<string, PersonSearchResult>();
for (const result of data.results) {
const key = result.name.toLowerCase().trim();
const existing = seen.get(key);
// Keep the one with higher score, or the first if no scores
if (!existing || (result.score && existing.score && result.score > existing.score)) {
seen.set(key, result);
}
}
const dedupedResults = Array.from(seen.values());
setResults(dedupedResults);
setResultCount(dedupedResults.length);
setQueryTimeMs(data.query_time_ms);
setEmbeddingModelUsed(data.embedding_model_used || null);
} catch (err) {
if (err instanceof Error && err.name === 'AbortError') {
// Request was cancelled, ignore
return;
}
console.error('Person search error:', err);
setError(err instanceof Error ? err.message : 'Search failed');
setResults([]);
} finally {
setIsSearching(false);
}
}, [buildSearchQuery, maxResults, minQueryLength]);
// Debounced search effect
useEffect(() => {
if (debounceTimerRef.current) {
clearTimeout(debounceTimerRef.current);
}
if (!query.trim()) {
setResults([]);
setResultCount(0);
setQueryTimeMs(null);
return;
}
debounceTimerRef.current = setTimeout(() => {
performSearch(query, searchField);
}, debounceMs);
return () => {
if (debounceTimerRef.current) {
clearTimeout(debounceTimerRef.current);
}
};
}, [query, searchField, debounceMs, performSearch]);
// Manual search function (bypasses debounce)
const search = useCallback(async (searchQuery: string, field?: SearchField) => {
await performSearch(searchQuery, field || searchField);
}, [performSearch, searchField]);
// Clear search
const clearSearch = useCallback(() => {
setQuery('');
setResults([]);
setResultCount(0);
setQueryTimeMs(null);
setError(null);
if (abortControllerRef.current) {
abortControllerRef.current.abort();
}
}, []);
// Cleanup on unmount
useEffect(() => {
return () => {
if (debounceTimerRef.current) {
clearTimeout(debounceTimerRef.current);
}
if (abortControllerRef.current) {
abortControllerRef.current.abort();
}
};
}, []);
return {
query,
setQuery,
searchField,
setSearchField,
results,
isSearching,
error,
queryTimeMs,
resultCount,
embeddingModelUsed,
clearSearch,
search,
};
}