From d5d970b5131b237369a33ecd620fdb56429152db Mon Sep 17 00:00:00 2001 From: kempersc Date: Wed, 14 Jan 2026 22:57:09 +0100 Subject: [PATCH] Remove deprecated slot definitions and add archived versions for future reference - Deleted the following slot definitions: - wikidata_class_slot - wikidata_entity_label_slot - wikidata_mapping_rationale_slot - word_count_slot - Added archived versions of the deleted slots to preserve historical data: - wikidata_class_archived_20260114.yaml - wikidata_entity_label_archived_20260114.yaml - wikidata_mapping_rationale_archived_20260114.yaml - word_count_archived_20260114.yaml - Introduced a new hook `usePersonSearch` for enhanced semantic search functionality in the frontend, supporting debounced queries and caching. --- .../schemas/20251121/linkml/manifest.json | 2 +- frontend/src/hooks/usePersonSearch.ts | 238 + frontend/src/pages/EntityReviewPage.css | 175 + frontend/src/pages/EntityReviewPage.tsx | 184 +- schemas/20251121/linkml/manifest.json | 2 +- .../linkml/modules/classes/FindingAid.yaml | 7 +- .../modules/classes/VideoTextContent.yaml | 29 +- .../modules/classes/WikidataAlignment.yaml | 37 +- .../wikidata_class_archived_20260114.yaml} | 0 ...idata_entity_label_archived_20260114.yaml} | 0 ..._mapping_rationale_archived_20260114.yaml} | 0 .../word_count_archived_20260114.yaml} | 0 .../linkml/modules/slots/slot_fixes.yaml | 6903 +++++++++-------- 13 files changed, 4332 insertions(+), 3245 deletions(-) create mode 100644 frontend/src/hooks/usePersonSearch.ts rename schemas/20251121/linkml/modules/slots/{wikidata_class.yaml => archive/wikidata_class_archived_20260114.yaml} (100%) rename schemas/20251121/linkml/modules/slots/{wikidata_entity_label.yaml => archive/wikidata_entity_label_archived_20260114.yaml} (100%) rename schemas/20251121/linkml/modules/slots/{wikidata_mapping_rationale.yaml => archive/wikidata_mapping_rationale_archived_20260114.yaml} (100%) rename schemas/20251121/linkml/modules/slots/{word_count.yaml => archive/word_count_archived_20260114.yaml} (100%) diff --git a/frontend/public/schemas/20251121/linkml/manifest.json b/frontend/public/schemas/20251121/linkml/manifest.json index 1e2461b0b7..76ac8c07a9 100644 --- a/frontend/public/schemas/20251121/linkml/manifest.json +++ b/frontend/public/schemas/20251121/linkml/manifest.json @@ -1,5 +1,5 @@ { - "generated": "2026-01-14T21:33:36.352Z", + "generated": "2026-01-14T21:38:51.740Z", "schemaRoot": "/schemas/20251121/linkml", "totalFiles": 3026, "categoryCounts": { diff --git a/frontend/src/hooks/usePersonSearch.ts b/frontend/src/hooks/usePersonSearch.ts new file mode 100644 index 0000000000..7d23221b23 --- /dev/null +++ b/frontend/src/hooks/usePersonSearch.ts @@ -0,0 +1,238 @@ +/** + * usePersonSearch Hook + * + * Provides semantic search functionality for person profiles using + * the RAG API's Qdrant vector database backend. + * + * Features: + * - Semantic vector search across all profiles + * - Filter by field type (name, email, domain, birth_year) + * - Debounced queries to prevent API overload + * - Caching of search results + */ + +import { useState, useEffect, useCallback, useRef } from 'react'; + +export type SearchField = 'all' | 'name' | 'email' | 'domain' | 'birth_year'; + +export interface PersonSearchResult { + ppid?: string; + name: string; + headline?: string | null; + custodian_name?: string | null; + custodian_slug?: string | null; + linkedin_url?: string | null; + heritage_relevant?: boolean | null; + heritage_type?: string | null; + location?: string | null; + email?: string | null; + email_domain?: string | null; + birth_year?: number | null; + score?: number | null; +} + +export interface PersonSearchResponse { + query: string; + results: PersonSearchResult[]; + result_count: number; + query_time_ms: number; + collection_stats?: Record | null; + embedding_model_used?: string | null; +} + +interface UsePersonSearchOptions { + debounceMs?: number; + minQueryLength?: number; + maxResults?: number; +} + +interface UsePersonSearchReturn { + // Search state + query: string; + setQuery: (query: string) => void; + searchField: SearchField; + setSearchField: (field: SearchField) => void; + + // Results + results: PersonSearchResult[]; + isSearching: boolean; + error: string | null; + + // Metadata + queryTimeMs: number | null; + resultCount: number; + embeddingModelUsed: string | null; + + // Actions + clearSearch: () => void; + search: (query: string, field?: SearchField) => Promise; +} + +const API_BASE = '/api/rag'; + +export function usePersonSearch(options: UsePersonSearchOptions = {}): UsePersonSearchReturn { + const { + debounceMs = 300, + minQueryLength = 2, + maxResults = 50, + } = options; + + // State + const [query, setQuery] = useState(''); + const [searchField, setSearchField] = useState('all'); + const [results, setResults] = useState([]); + const [isSearching, setIsSearching] = useState(false); + const [error, setError] = useState(null); + const [queryTimeMs, setQueryTimeMs] = useState(null); + const [resultCount, setResultCount] = useState(0); + const [embeddingModelUsed, setEmbeddingModelUsed] = useState(null); + + // Refs for debouncing + const debounceTimerRef = useRef(null); + const abortControllerRef = useRef(null); + + // Build the search query based on field selection + const buildSearchQuery = useCallback((rawQuery: string, field: SearchField): string => { + const trimmedQuery = rawQuery.trim(); + if (!trimmedQuery) return ''; + + switch (field) { + case 'name': + return `person named ${trimmedQuery}`; + case 'email': + return `email address ${trimmedQuery}`; + case 'domain': + return `working at domain ${trimmedQuery}`; + case 'birth_year': + return `born in ${trimmedQuery}`; + case 'all': + default: + return trimmedQuery; + } + }, []); + + // Perform the actual search + const performSearch = useCallback(async (searchQuery: string, field: SearchField) => { + if (searchQuery.trim().length < minQueryLength) { + setResults([]); + setResultCount(0); + setQueryTimeMs(null); + return; + } + + // Cancel any pending request + if (abortControllerRef.current) { + abortControllerRef.current.abort(); + } + abortControllerRef.current = new AbortController(); + + setIsSearching(true); + setError(null); + + try { + const enhancedQuery = buildSearchQuery(searchQuery, field); + + const response = await fetch(`${API_BASE}/persons/search`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query: enhancedQuery, + k: maxResults, + only_heritage_relevant: false, + }), + signal: abortControllerRef.current.signal, + }); + + if (!response.ok) { + throw new Error(`Search failed: ${response.statusText}`); + } + + const data: PersonSearchResponse = await response.json(); + + setResults(data.results); + setResultCount(data.result_count); + setQueryTimeMs(data.query_time_ms); + setEmbeddingModelUsed(data.embedding_model_used || null); + } catch (err) { + if (err instanceof Error && err.name === 'AbortError') { + // Request was cancelled, ignore + return; + } + console.error('Person search error:', err); + setError(err instanceof Error ? err.message : 'Search failed'); + setResults([]); + } finally { + setIsSearching(false); + } + }, [buildSearchQuery, maxResults, minQueryLength]); + + // Debounced search effect + useEffect(() => { + if (debounceTimerRef.current) { + clearTimeout(debounceTimerRef.current); + } + + if (!query.trim()) { + setResults([]); + setResultCount(0); + setQueryTimeMs(null); + return; + } + + debounceTimerRef.current = setTimeout(() => { + performSearch(query, searchField); + }, debounceMs); + + return () => { + if (debounceTimerRef.current) { + clearTimeout(debounceTimerRef.current); + } + }; + }, [query, searchField, debounceMs, performSearch]); + + // Manual search function (bypasses debounce) + const search = useCallback(async (searchQuery: string, field?: SearchField) => { + await performSearch(searchQuery, field || searchField); + }, [performSearch, searchField]); + + // Clear search + const clearSearch = useCallback(() => { + setQuery(''); + setResults([]); + setResultCount(0); + setQueryTimeMs(null); + setError(null); + if (abortControllerRef.current) { + abortControllerRef.current.abort(); + } + }, []); + + // Cleanup on unmount + useEffect(() => { + return () => { + if (debounceTimerRef.current) { + clearTimeout(debounceTimerRef.current); + } + if (abortControllerRef.current) { + abortControllerRef.current.abort(); + } + }; + }, []); + + return { + query, + setQuery, + searchField, + setSearchField, + results, + isSearching, + error, + queryTimeMs, + resultCount, + embeddingModelUsed, + clearSearch, + search, + }; +} diff --git a/frontend/src/pages/EntityReviewPage.css b/frontend/src/pages/EntityReviewPage.css index be6981183d..ae88cb6faf 100644 --- a/frontend/src/pages/EntityReviewPage.css +++ b/frontend/src/pages/EntityReviewPage.css @@ -505,6 +505,181 @@ color: var(--text-primary, #e0e0e0); } +/* Enhanced Profile Search */ +.profile-search-enhanced { + display: flex; + flex-direction: column; + gap: 0.5rem; + padding: 0.5rem; + margin-bottom: 0.5rem; +} + +/* Search Mode Toggle */ +.search-mode-toggle { + display: flex; + gap: 0.25rem; + background: var(--bg-secondary, #f5f5f5); + border-radius: 6px; + padding: 0.25rem; +} + +.search-mode-toggle .mode-btn { + flex: 1; + display: flex; + align-items: center; + justify-content: center; + gap: 0.375rem; + padding: 0.375rem 0.5rem; + border: none; + background: transparent; + border-radius: 4px; + font-size: 0.75rem; + font-weight: 500; + color: var(--text-secondary, #666); + cursor: pointer; + transition: all 0.15s; +} + +.search-mode-toggle .mode-btn:hover { + background: var(--bg-primary, #fff); + color: var(--text-primary, #1a1a2e); +} + +.search-mode-toggle .mode-btn.active { + background: var(--accent-color, #4f46e5); + color: white; +} + +.search-mode-toggle .mode-btn svg { + width: 14px; + height: 14px; +} + +.dark .search-mode-toggle { + background: var(--bg-tertiary, #2a2a4a); +} + +.dark .search-mode-toggle .mode-btn { + color: var(--text-secondary, #999); +} + +.dark .search-mode-toggle .mode-btn:hover { + background: var(--bg-secondary, #1a1a2e); + color: var(--text-primary, #e0e0e0); +} + +/* Search Field Filter Dropdown */ +.search-field-filter { + margin: 0; +} + +.search-field-filter .field-select { + width: 100%; + padding: 0.375rem 0.5rem; + font-size: 0.75rem; + border: 1px solid var(--border-color, #e0e0e0); + border-radius: 4px; + background: var(--bg-primary, #fff); + color: var(--text-primary, #1a1a2e); + cursor: pointer; + outline: none; +} + +.search-field-filter .field-select:focus { + border-color: var(--accent-color, #4f46e5); +} + +.dark .search-field-filter .field-select { + background: var(--bg-tertiary, #2a2a4a); + border-color: var(--border-color, #3a3a5a); + color: var(--text-primary, #e0e0e0); +} + +/* Search Stats */ +.search-stats { + display: flex; + align-items: center; + gap: 0.5rem; + font-size: 0.75rem; + color: var(--text-secondary, #666); + padding: 0 0.25rem; +} + +.search-stats .query-time { + opacity: 0.7; +} + +/* Search Error */ +.search-error { + display: flex; + align-items: center; + gap: 0.375rem; + padding: 0.375rem 0.5rem; + font-size: 0.75rem; + color: var(--error-color, #dc2626); + background: rgba(220, 38, 38, 0.1); + border-radius: 4px; +} + +/* Search Spinner */ +.profile-search .search-spinner { + flex-shrink: 0; + color: var(--accent-color, #4f46e5); +} + +/* Semantic Search Results */ +.profile-list.semantic-results .profile-item { + position: relative; +} + +.profile-item.semantic-result .score-badge { + font-size: 0.625rem; + padding: 0.125rem 0.375rem; + background: var(--accent-color, #4f46e5); + color: white; + border-radius: 10px; + font-weight: 600; + margin-left: auto; +} + +.profile-item .headline-text { + font-size: 0.7rem; + color: var(--text-secondary, #666); + font-style: italic; + max-width: 150px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.profile-item .custodian-badge { + display: inline-flex; + align-items: center; + gap: 0.25rem; + font-size: 0.65rem; + padding: 0.125rem 0.375rem; + background: var(--bg-tertiary, #e0e0e0); + border-radius: 4px; + color: var(--text-secondary, #666); +} + +.profile-item .location-badge { + display: inline-flex; + align-items: center; + gap: 0.25rem; + font-size: 0.65rem; + color: var(--text-secondary, #666); +} + +.dark .profile-item .headline-text { + color: var(--text-secondary, #999); +} + +.dark .profile-item .custodian-badge { + background: var(--bg-secondary, #1a1a2e); + color: var(--text-secondary, #999); +} + .profile-list { list-style: none; padding: 0; diff --git a/frontend/src/pages/EntityReviewPage.tsx b/frontend/src/pages/EntityReviewPage.tsx index 92365e35de..d0dd2d51d6 100644 --- a/frontend/src/pages/EntityReviewPage.tsx +++ b/frontend/src/pages/EntityReviewPage.tsx @@ -17,6 +17,7 @@ import { useState, useEffect, useCallback } from 'react'; import { useNavigate } from 'react-router-dom'; import { useLanguage } from '../contexts/LanguageContext'; import { Tooltip } from '../components/common/Tooltip'; +import { usePersonSearch, type SearchField } from '../hooks/usePersonSearch'; import { CheckCircle, XCircle, @@ -34,7 +35,9 @@ import { Star, Info, Search, - X + X, + Database, + Filter as FilterIcon } from 'lucide-react'; // Name similarity calculation using Levenshtein distance @@ -270,8 +273,23 @@ export default function EntityReviewPage() { type StatsFilter = 'all' | 'reviewed' | 'pending'; const [statsFilter, setStatsFilter] = useState('pending'); - // Profile search + // Profile search - now with semantic search mode const [profileSearchQuery, setProfileSearchQuery] = useState(''); + const [useSemanticSearch, setUseSemanticSearch] = useState(false); // Toggle: semantic vs local filter + + // Semantic search hook (searches ALL profiles in vector database) + const { + query: semanticQuery, + setQuery: setSemanticQuery, + searchField: semanticSearchField, + setSearchField: setSemanticSearchField, + results: semanticResults, + isSearching: semanticSearching, + error: semanticError, + queryTimeMs: semanticQueryTime, + resultCount: semanticResultCount, + clearSearch: clearSemanticSearch, + } = usePersonSearch({ debounceMs: 400, minQueryLength: 2, maxResults: 50 }); // Linkup search state const [linkupSearching, setLinkupSearching] = useState(false); @@ -857,29 +875,159 @@ export default function EntityReviewPage() {
{/* Profile List Sidebar */}