- Deleted obsolete slot definitions for work_location and workshop_space. - Introduced new TaxonName class to represent scientific taxonomic names with detailed attributes. - Archived existing slots related to surname_prefix, target_name, taxon_name, terminal_count, text_region_count, title, title_proper, total_chapter, total_characters_extracted, total_connections_extracted, track_name, transcript_format, traveling_venue, type_label, type_status, typical_responsibility, unesco_domain, unesco_inscription_year, unesco_list_status, uniform_title, unit_name, used_by_custodian, uv_filtered_required, valid_from_geo, valid_to_geo, validation_status, variant_of_name, verification_date, viability_status, within_auxiliary_place, and within_place. - Updated slot descriptions and structures to improve clarity and compliance with standards.
250 lines
6.9 KiB
TypeScript
250 lines
6.9 KiB
TypeScript
/**
|
|
* usePersonSearch Hook
|
|
*
|
|
* Provides semantic search functionality for person profiles using
|
|
* the RAG API's Qdrant vector database backend.
|
|
*
|
|
* Features:
|
|
* - Semantic vector search across all profiles
|
|
* - Filter by field type (name, email, domain, birth_year)
|
|
* - Debounced queries to prevent API overload
|
|
* - Caching of search results
|
|
*/
|
|
|
|
import { useState, useEffect, useCallback, useRef } from 'react';
|
|
|
|
export type SearchField = 'all' | 'name' | 'email' | 'domain' | 'birth_year';
|
|
|
|
export interface PersonSearchResult {
|
|
ppid?: string;
|
|
name: string;
|
|
headline?: string | null;
|
|
custodian_name?: string | null;
|
|
custodian_slug?: string | null;
|
|
linkedin_url?: string | null;
|
|
heritage_relevant?: boolean | null;
|
|
heritage_type?: string | null;
|
|
location?: string | null;
|
|
email?: string | null;
|
|
email_domain?: string | null;
|
|
birth_year?: number | null;
|
|
score?: number | null;
|
|
}
|
|
|
|
export interface PersonSearchResponse {
|
|
query: string;
|
|
results: PersonSearchResult[];
|
|
result_count: number;
|
|
query_time_ms: number;
|
|
collection_stats?: Record<string, unknown> | null;
|
|
embedding_model_used?: string | null;
|
|
}
|
|
|
|
interface UsePersonSearchOptions {
|
|
debounceMs?: number;
|
|
minQueryLength?: number;
|
|
maxResults?: number;
|
|
}
|
|
|
|
interface UsePersonSearchReturn {
|
|
// Search state
|
|
query: string;
|
|
setQuery: (query: string) => void;
|
|
searchField: SearchField;
|
|
setSearchField: (field: SearchField) => void;
|
|
|
|
// Results
|
|
results: PersonSearchResult[];
|
|
isSearching: boolean;
|
|
error: string | null;
|
|
|
|
// Metadata
|
|
queryTimeMs: number | null;
|
|
resultCount: number;
|
|
embeddingModelUsed: string | null;
|
|
|
|
// Actions
|
|
clearSearch: () => void;
|
|
search: (query: string, field?: SearchField) => Promise<void>;
|
|
}
|
|
|
|
const API_BASE = '/api/rag';
|
|
|
|
export function usePersonSearch(options: UsePersonSearchOptions = {}): UsePersonSearchReturn {
|
|
const {
|
|
debounceMs = 300,
|
|
minQueryLength = 2,
|
|
maxResults = 50,
|
|
} = options;
|
|
|
|
// State
|
|
const [query, setQuery] = useState('');
|
|
const [searchField, setSearchField] = useState<SearchField>('all');
|
|
const [results, setResults] = useState<PersonSearchResult[]>([]);
|
|
const [isSearching, setIsSearching] = useState(false);
|
|
const [error, setError] = useState<string | null>(null);
|
|
const [queryTimeMs, setQueryTimeMs] = useState<number | null>(null);
|
|
const [resultCount, setResultCount] = useState(0);
|
|
const [embeddingModelUsed, setEmbeddingModelUsed] = useState<string | null>(null);
|
|
|
|
// Refs for debouncing
|
|
const debounceTimerRef = useRef<NodeJS.Timeout | null>(null);
|
|
const abortControllerRef = useRef<AbortController | null>(null);
|
|
|
|
// Build the search query based on field selection
|
|
const buildSearchQuery = useCallback((rawQuery: string, field: SearchField): string => {
|
|
const trimmedQuery = rawQuery.trim();
|
|
if (!trimmedQuery) return '';
|
|
|
|
switch (field) {
|
|
case 'name':
|
|
return `person named ${trimmedQuery}`;
|
|
case 'email':
|
|
return `email address ${trimmedQuery}`;
|
|
case 'domain':
|
|
return `working at domain ${trimmedQuery}`;
|
|
case 'birth_year':
|
|
return `born in ${trimmedQuery}`;
|
|
case 'all':
|
|
default:
|
|
return trimmedQuery;
|
|
}
|
|
}, []);
|
|
|
|
// Perform the actual search
|
|
const performSearch = useCallback(async (searchQuery: string, field: SearchField) => {
|
|
if (searchQuery.trim().length < minQueryLength) {
|
|
setResults([]);
|
|
setResultCount(0);
|
|
setQueryTimeMs(null);
|
|
return;
|
|
}
|
|
|
|
// Cancel any pending request
|
|
if (abortControllerRef.current) {
|
|
abortControllerRef.current.abort();
|
|
}
|
|
abortControllerRef.current = new AbortController();
|
|
|
|
setIsSearching(true);
|
|
setError(null);
|
|
|
|
try {
|
|
const enhancedQuery = buildSearchQuery(searchQuery, field);
|
|
|
|
const response = await fetch(`${API_BASE}/persons/search`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
query: enhancedQuery,
|
|
k: maxResults,
|
|
only_heritage_relevant: false,
|
|
}),
|
|
signal: abortControllerRef.current.signal,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Search failed: ${response.statusText}`);
|
|
}
|
|
|
|
const data: PersonSearchResponse = await response.json();
|
|
|
|
// Deduplicate results by name (Qdrant may return multiple embeddings per person)
|
|
const seen = new Map<string, PersonSearchResult>();
|
|
for (const result of data.results) {
|
|
const key = result.name.toLowerCase().trim();
|
|
const existing = seen.get(key);
|
|
// Keep the one with higher score, or the first if no scores
|
|
if (!existing || (result.score && existing.score && result.score > existing.score)) {
|
|
seen.set(key, result);
|
|
}
|
|
}
|
|
const dedupedResults = Array.from(seen.values());
|
|
|
|
setResults(dedupedResults);
|
|
setResultCount(dedupedResults.length);
|
|
setQueryTimeMs(data.query_time_ms);
|
|
setEmbeddingModelUsed(data.embedding_model_used || null);
|
|
} catch (err) {
|
|
if (err instanceof Error && err.name === 'AbortError') {
|
|
// Request was cancelled, ignore
|
|
return;
|
|
}
|
|
console.error('Person search error:', err);
|
|
setError(err instanceof Error ? err.message : 'Search failed');
|
|
setResults([]);
|
|
} finally {
|
|
setIsSearching(false);
|
|
}
|
|
}, [buildSearchQuery, maxResults, minQueryLength]);
|
|
|
|
// Debounced search effect
|
|
useEffect(() => {
|
|
if (debounceTimerRef.current) {
|
|
clearTimeout(debounceTimerRef.current);
|
|
}
|
|
|
|
if (!query.trim()) {
|
|
setResults([]);
|
|
setResultCount(0);
|
|
setQueryTimeMs(null);
|
|
return;
|
|
}
|
|
|
|
debounceTimerRef.current = setTimeout(() => {
|
|
performSearch(query, searchField);
|
|
}, debounceMs);
|
|
|
|
return () => {
|
|
if (debounceTimerRef.current) {
|
|
clearTimeout(debounceTimerRef.current);
|
|
}
|
|
};
|
|
}, [query, searchField, debounceMs, performSearch]);
|
|
|
|
// Manual search function (bypasses debounce)
|
|
const search = useCallback(async (searchQuery: string, field?: SearchField) => {
|
|
await performSearch(searchQuery, field || searchField);
|
|
}, [performSearch, searchField]);
|
|
|
|
// Clear search
|
|
const clearSearch = useCallback(() => {
|
|
setQuery('');
|
|
setResults([]);
|
|
setResultCount(0);
|
|
setQueryTimeMs(null);
|
|
setError(null);
|
|
if (abortControllerRef.current) {
|
|
abortControllerRef.current.abort();
|
|
}
|
|
}, []);
|
|
|
|
// Cleanup on unmount
|
|
useEffect(() => {
|
|
return () => {
|
|
if (debounceTimerRef.current) {
|
|
clearTimeout(debounceTimerRef.current);
|
|
}
|
|
if (abortControllerRef.current) {
|
|
abortControllerRef.current.abort();
|
|
}
|
|
};
|
|
}, []);
|
|
|
|
return {
|
|
query,
|
|
setQuery,
|
|
searchField,
|
|
setSearchField,
|
|
results,
|
|
isSearching,
|
|
error,
|
|
queryTimeMs,
|
|
resultCount,
|
|
embeddingModelUsed,
|
|
clearSearch,
|
|
search,
|
|
};
|
|
}
|