/** * useProgressiveInstitutions - Progressive loading hook for comprehensive + efficient map data * * This hook implements a tiered loading strategy: * * FIRST VISIT: * 1. Check IndexedDB for full cached data → If found, use immediately (instant!) * 2. If not cached: Load lite data (~0.7MB) for fast marker display (~1s) * 3. Background: Fetch full data (~27MB) and store in IndexedDB * 4. When complete: Upgrade markers with full data (seamless) * * RETURN VISIT: * 1. Load full data from IndexedDB → Instant with complete metadata! * 2. Background: Check if stale, refresh if needed * * Benefits: * - First visit: ~1s to interactive (lite markers) * - Return visit: ~0s to interactive (cached full data) * - Complete data: All fields available after background load * - Offline capable: Full map works without network * * @author GLAM / bronhouder.nl */ import { useState, useEffect, useCallback, useRef } from 'react'; import type { Institution } from '../components/map/InstitutionInfoPanel'; import { institutionsCache } from '../lib/storage/institutions-cache'; // Re-export types export type { Institution }; // ============================================================================ // Types // ============================================================================ export interface ProgressiveLoadingState { /** Current loading phase */ phase: 'init' | 'checking-cache' | 'loading-lite' | 'loading-full' | 'ready'; /** Human-readable status message */ message: string; /** Progress percentage (0-100) */ percent: number; /** Data completeness level */ dataLevel: 'none' | 'lite' | 'full'; /** Whether background loading is in progress */ backgroundLoading: boolean; } export interface UseProgressiveInstitutionsReturn { /** Institution data (may be lite or full depending on loading state) */ institutions: Institution[]; /** Current loading state */ state: ProgressiveLoadingState; /** Whether initial loading is complete (lite or full data available) */ isReady: boolean; /** Whether we have full comprehensive data */ hasFullData: boolean; /** Error if any */ error: Error | null; /** Total institution count */ totalCount: number; /** Force refresh from network */ refresh: () => Promise; /** Get full details for a specific institution (from cache or fetch) */ getInstitutionDetail: (ghcid: string) => Institution | null; } // ============================================================================ // Constants // ============================================================================ const GEO_API_BASE = '/api/geo'; // Type colors and names (same as useGeoApiInstitutions) const TYPE_COLORS: Record = { 'G': '#00bcd4', 'L': '#2ecc71', 'A': '#3498db', 'M': '#e74c3c', 'O': '#f39c12', 'R': '#1abc9c', 'C': '#795548', 'U': '#9e9e9e', 'B': '#4caf50', 'E': '#ff9800', 'S': '#9b59b6', 'F': '#95a5a6', 'I': '#673ab7', 'X': '#607d8b', 'P': '#8bc34a', 'H': '#607d8b', 'D': '#34495e', 'N': '#e91e63', 'T': '#ff5722', }; const TYPE_NAMES: Record = { 'G': 'Gallery', 'L': 'Library', 'A': 'Archive', 'M': 'Museum', 'O': 'Official', 'R': 'Research', 'C': 'Corporation', 'U': 'Unknown', 'B': 'Botanical', 'E': 'Education', 'S': 'Society', 'F': 'Features', 'I': 'Intangible', 'X': 'Mixed', 'P': 'Personal', 'H': 'Holy sites', 'D': 'Digital', 'N': 'NGO', 'T': 'Taste/smell', }; // ============================================================================ // Lite Institution Transformer (minimal fields for map markers) // ============================================================================ interface LiteFeature { type: 'Feature'; geometry: { type: 'Point'; coordinates: [number, number] }; properties: { ghcid: string; name: string; type: string; city?: string; country_code?: string; rating?: number; }; } function liteFeatureToInstitution(feature: LiteFeature): Institution { const props = feature.properties; const [lon, lat] = feature.geometry.coordinates; const typeCode = props.type || 'U'; return { lat, lon, name: props.name || 'Unknown', city: props.city || '', province: '', // Not in lite data type: typeCode, type_name: TYPE_NAMES[typeCode] || 'Unknown', color: TYPE_COLORS[typeCode] || '#9e9e9e', website: '', wikidata_id: '', description: '', rating: props.rating, ghcid: { current: props.ghcid, uuid: '', numeric: undefined }, }; } // ============================================================================ // Full Institution Transformer (all fields) // ============================================================================ interface FullFeature { type: 'Feature'; geometry: { type: 'Point'; coordinates: [number, number] }; properties: Record; } // Parse web_claims JSON for logo extraction interface WebClaim { claim_type?: string; claim_value?: string; raw_value?: string; source_url?: string; retrieved_on?: string; xpath?: string; extraction_method?: string; } /** * Parse web_claims from JSON string or object */ function parseWebClaims(value: unknown): WebClaim[] | undefined { if (!value) return undefined; try { let parsed = value; if (typeof value === 'string') { parsed = JSON.parse(value); } // Web claims can be an array directly or wrapped in an object const claims: WebClaim[] = Array.isArray(parsed) ? parsed : ((parsed as Record).claims as WebClaim[] || []); return claims.length > 0 ? claims : undefined; } catch { return undefined; } } /** * Resolve a potentially relative URL against a base URL */ function resolveUrl(url: string, baseUrl?: string): string { // Already absolute URL if (url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//')) { return url.startsWith('//') ? `https:${url}` : url; } // No base URL to resolve against if (!baseUrl) return url; try { // Use URL API to resolve relative URLs const base = new URL(baseUrl); return new URL(url, base).href; } catch { // If URL parsing fails, return as-is return url; } } /** * Check if a URL is a valid image URL (not a video, not relative without base) */ function isValidImageUrl(url: string): boolean { if (!url) return false; // Must be absolute URL if (!url.startsWith('http://') && !url.startsWith('https://')) { return false; } // Filter out non-image URLs const invalidPatterns = [ 'youtube.com/watch', 'youtu.be/', 'vimeo.com/', 'twitter.com/', 'facebook.com/', '.mp4', '.webm', '.mov', '.avi', ]; const lowerUrl = url.toLowerCase(); return !invalidPatterns.some(pattern => lowerUrl.includes(pattern)); } /** * Extract logo URL from web_claims - prefer logo_img_attr extraction method * Priority: logo_img_attr > og_image > favicon_link > others * Also resolves relative URLs against source_url */ function extractLogoFromWebClaims(webClaimsValue: unknown): string | undefined { const claims = parseWebClaims(webClaimsValue); if (!claims || claims.length === 0) return undefined; // Filter for logo claims const logoClaims = claims.filter(c => c.claim_type === 'logo' && c.claim_value); if (logoClaims.length === 0) return undefined; // Sort by preference: logo_img_attr > og_image > favicon_link > others const sorted = logoClaims.sort((a, b) => { const priority: Record = { 'logo_img_attr': 3, 'og_image': 2, 'favicon_link': 1, }; return (priority[b.extraction_method || ''] || 0) - (priority[a.extraction_method || ''] || 0); }); // Filter out favicons, loading placeholders, and non-image URLs const bestLogo = sorted.find(c => { const url = c.claim_value || ''; // Skip favicon-like URLs if (c.extraction_method === 'favicon_link') return false; // Skip very small images or placeholder images if (url.includes('favicon') || url.includes('loading')) return false; // Resolve the URL and check if it's valid const resolvedUrl = resolveUrl(url, c.source_url); return isValidImageUrl(resolvedUrl); }); const selectedClaim = bestLogo || sorted[0]; if (!selectedClaim?.claim_value) return undefined; // Resolve relative URLs against source_url const resolvedUrl = resolveUrl(selectedClaim.claim_value, selectedClaim.source_url); // Final validation - only return if it's a valid image URL return isValidImageUrl(resolvedUrl) ? resolvedUrl : undefined; } function fullFeatureToInstitution(feature: FullFeature): Institution { const props = feature.properties; const [lon, lat] = feature.geometry.coordinates; const typeCode = (props.type as string) || 'U'; // Parse complex fields const openingHours = parseOpeningHours(props.opening_hours); const reviews = parseReviews(props.reviews); const photos = parsePhotos(props.photos, props.photo_urls as string[] | undefined); const socialMedia = parseSocialMedia(props.social_media); const youtube = parseYouTube(props.youtube_enrichment); // Extract logo URL from web_claims (primary) or use direct logo_url prop (fallback) // Priority: web_claims logo_img_attr > web_claims og_image > props.logo_url // Only use props.logo_url if it's a valid absolute image URL (not relative, not video) const webClaimsLogo = extractLogoFromWebClaims(props.web_claims); const fallbackLogo = props.logo_url as string | undefined; const logoUrl = webClaimsLogo || (fallbackLogo && isValidImageUrl(fallbackLogo) ? fallbackLogo : undefined); return { lat, lon, name: (props.name as string) || 'Unknown', city: (props.city as string) || '', province: (props.province as string) || '', type: typeCode, type_name: (props.type_name as string) || TYPE_NAMES[typeCode] || 'Unknown', color: TYPE_COLORS[typeCode] || '#9e9e9e', website: (props.website as string) || '', wikidata_id: (props.wikidata_id as string) || '', description: (props.description as string) || '', rating: props.rating as number | undefined, total_ratings: props.total_ratings as number | undefined, phone: props.phone as string | undefined, address: props.formatted_address as string | undefined, reviews, photos, street_view_url: props.street_view_url as string | undefined, business_status: props.business_status as string | undefined, google_place_id: props.google_place_id as string | undefined, opening_hours: openingHours, ghcid: props.ghcid ? { current: props.ghcid as string, uuid: '', numeric: undefined, } : undefined, isil: props.isil_code ? { code: props.isil_code as string } : undefined, emic_name: props.emic_name as string | undefined, org_name: props.org_name as string | undefined, founding_year: safeExtractYear(props.founding_year), dissolution_year: safeExtractYear(props.dissolution_year), social_media: socialMedia, youtube, logo_url: logoUrl, }; } // ============================================================================ // Parser Utilities // ============================================================================ function safeExtractYear(value: unknown): number | undefined { if (value === null || value === undefined) return undefined; if (typeof value === 'number') return value; if (typeof value === 'string') { const match = value.match(/\d{4}/); return match ? parseInt(match[0], 10) : undefined; } if (typeof value === 'object') { const obj = value as Record; if (obj.time && typeof obj.time === 'string') { const match = obj.time.match(/^[+-]?(\d{4})/); return match ? parseInt(match[1], 10) : undefined; } } return undefined; } function parseOpeningHours(value: unknown): string[] | undefined { if (!value) return undefined; if (Array.isArray(value)) return value; if (typeof value === 'string') { try { const parsed = JSON.parse(value); if (parsed?.weekday_text) return parsed.weekday_text; if (Array.isArray(parsed)) return parsed; } catch { /* ignore */ } } if (typeof value === 'object' && (value as Record)?.weekday_text) { return (value as Record).weekday_text; } return undefined; } function parseReviews(value: unknown): Array<{ author: string; rating: number; text: string; time: string }> | undefined { if (!value) return undefined; let parsed = value; if (typeof value === 'string') { try { parsed = JSON.parse(value); } catch { return undefined; } } if (!Array.isArray(parsed)) return undefined; return parsed.map((r: Record) => ({ author: (r.author_name || r.author || 'Anonymous') as string, rating: (r.rating || 0) as number, text: (r.text || '') as string, time: (r.relative_time_description || r.time || '') as string, })); } function parsePhotos( photos: unknown, photoUrls?: string[] ): Array<{ url: string; attribution?: string }> | undefined { const result: Array<{ url: string; attribution?: string }> = []; if (photos) { let parsed = photos; if (typeof photos === 'string') { try { parsed = JSON.parse(photos); } catch { /* ignore */ } } if (Array.isArray(parsed)) { for (const p of parsed as Array>) { const url = (p.url as string) || (p.name ? `https://places.googleapis.com/v1/${p.name}/media?maxWidthPx=800` : ''); if (url) { result.push({ url, attribution: ((p.author_attributions as Array<{ displayName?: string }>)?.[0]?.displayName) as string | undefined, }); } } } } if (photoUrls && Array.isArray(photoUrls)) { for (const url of photoUrls) { if (url && typeof url === 'string' && !result.some(p => p.url === url)) { result.push({ url }); } } } return result.length > 0 ? result : undefined; } function parseSocialMedia(value: unknown): Record | undefined { if (!value) return undefined; if (typeof value === 'object' && value !== null) return value as Record; if (typeof value === 'string') { try { const parsed = JSON.parse(value); if (typeof parsed === 'object') return parsed; } catch { /* ignore */ } } return undefined; } function parseYouTube(value: unknown): Institution['youtube'] { if (!value) return undefined; let parsed = value; if (typeof value === 'string') { try { parsed = JSON.parse(value); } catch { return undefined; } } if (typeof parsed !== 'object' || parsed === null) return undefined; const data = parsed as Record; if (data.status && data.status !== 'SUCCESS') return undefined; const channel = (typeof data.channel === 'object' && data.channel !== null) ? data.channel as Record : data; if (!channel.channel_id && !channel.channel_url) return undefined; return { channel_id: String(channel.channel_id || ''), channel_url: String(channel.channel_url || ''), channel_title: String(channel.title || channel.channel_title || ''), channel_description: String(channel.description || channel.channel_description || ''), subscriber_count: typeof channel.subscriber_count === 'number' ? channel.subscriber_count : null, video_count: typeof channel.video_count === 'number' ? channel.video_count : null, view_count: typeof channel.view_count === 'number' ? channel.view_count : null, thumbnail_url: String(channel.thumbnail_url || ''), }; } // ============================================================================ // Main Hook // ============================================================================ export function useProgressiveInstitutions(): UseProgressiveInstitutionsReturn { console.log('[Progressive] Hook initializing'); const [institutions, setInstitutions] = useState([]); const [state, setState] = useState({ phase: 'init', message: 'Initializing...', percent: 0, dataLevel: 'none', backgroundLoading: false, }); const [error, setError] = useState(null); // Map for quick lookup by GHCID const institutionMap = useRef>(new Map()); // Track if we're mounted const isMounted = useRef(true); useEffect(() => { isMounted.current = true; return () => { isMounted.current = false; }; }, []); // Update institution map when data changes useEffect(() => { institutionMap.current.clear(); for (const inst of institutions) { if (inst.ghcid?.current) { institutionMap.current.set(inst.ghcid.current, inst); } } }, [institutions]); /** * Fetch lite data for fast initial display */ const fetchLiteData = useCallback(async (): Promise => { const response = await fetch(`${GEO_API_BASE}/institutions/lite`, { headers: { 'Accept': 'application/json' }, }); if (!response.ok) { throw new Error(`Lite API failed: ${response.status}`); } const data = await response.json(); return data.features.map(liteFeatureToInstitution); }, []); /** * Fetch full data with streaming progress */ const fetchFullData = useCallback(async ( onProgress?: (percent: number, message: string) => void ): Promise => { const response = await fetch(`${GEO_API_BASE}/institutions`, { headers: { 'Accept': 'application/json' }, }); if (!response.ok) { throw new Error(`Full API failed: ${response.status}`); } const contentLength = response.headers.get('Content-Length'); const totalBytes = contentLength ? parseInt(contentLength, 10) : 0; // Stream with progress tracking if (totalBytes && response.body) { const reader = response.body.getReader(); const chunks: Uint8Array[] = []; let receivedBytes = 0; while (true) { const { done, value } = await reader.read(); if (done) break; chunks.push(value); receivedBytes += value.length; if (onProgress) { const percent = Math.round((receivedBytes / totalBytes) * 100); onProgress(percent, `Downloading full data (${(receivedBytes / 1024 / 1024).toFixed(1)}MB)...`); } } const allChunks = new Uint8Array(receivedBytes); let position = 0; for (const chunk of chunks) { allChunks.set(chunk, position); position += chunk.length; } const text = new TextDecoder().decode(allChunks); const data = JSON.parse(text); return data.features.map(fullFeatureToInstitution); } // Fallback: no streaming const data = await response.json(); return data.features.map(fullFeatureToInstitution); }, []); /** * Main loading function */ const load = useCallback(async () => { console.log('[Progressive] load() called'); try { // Phase 1: Check IndexedDB cache console.log('[Progressive] Phase 1: Checking cache...'); setState(s => ({ ...s, phase: 'checking-cache', message: 'Checking cache...', percent: 5 })); const cacheStatus = await institutionsCache.getStatus(); console.log('[Progressive] Cache status:', cacheStatus); if (cacheStatus.hasCache && !cacheStatus.isExpired) { // Cache hit! Load from IndexedDB console.log('[Progressive] Cache hit! Loading from IndexedDB...'); const cachedData = await institutionsCache.get(true); console.log('[Progressive] Cached data loaded:', cachedData?.length || 0, 'institutions'); if (cachedData && cachedData.length > 0) { if (isMounted.current) { setInstitutions(cachedData); setState({ phase: 'ready', message: `Loaded ${cachedData.length.toLocaleString()} institutions (cached)`, percent: 100, dataLevel: 'full', backgroundLoading: cacheStatus.isStale, }); } // Background refresh if stale if (cacheStatus.isStale) { fetchFullData().then(async (freshData) => { await institutionsCache.set(freshData, 0); if (isMounted.current) { setInstitutions(freshData); setState(s => ({ ...s, backgroundLoading: false })); } }).catch(err => { console.warn('[Progressive] Background refresh failed:', err); if (isMounted.current) { setState(s => ({ ...s, backgroundLoading: false })); } }); } return; } } // Phase 2: No cache - fetch lite data first for fast display console.log('[Progressive] Phase 2: No cache, fetching lite data...'); setState(s => ({ ...s, phase: 'loading-lite', message: 'Loading map markers...', percent: 10 })); const liteData = await fetchLiteData(); console.log('[Progressive] Lite data loaded:', liteData.length, 'institutions'); if (isMounted.current) { setInstitutions(liteData); setState({ phase: 'loading-full', message: `Displayed ${liteData.length.toLocaleString()} markers, loading full data...`, percent: 30, dataLevel: 'lite', backgroundLoading: true, }); } // Phase 3: Fetch full data in background console.log('[Progressive] Phase 3: Fetching full data in background...'); const startTime = Date.now(); const fullData = await fetchFullData((percent, message) => { if (isMounted.current) { setState(s => ({ ...s, percent: 30 + Math.round(percent * 0.6), // Map 0-100 to 30-90 message, })); } }); const fetchDuration = Date.now() - startTime; console.log('[Progressive] Full data loaded:', fullData.length, 'institutions in', fetchDuration, 'ms'); // Store in IndexedDB await institutionsCache.set(fullData, fetchDuration); console.log('[Progressive] Data cached to IndexedDB'); if (isMounted.current) { setInstitutions(fullData); setState({ phase: 'ready', message: `Loaded ${fullData.length.toLocaleString()} institutions with full details`, percent: 100, dataLevel: 'full', backgroundLoading: false, }); } } catch (err) { console.error('[Progressive] Load failed:', err); console.error('[Progressive] Error details:', { name: (err as Error)?.name, message: (err as Error)?.message, stack: (err as Error)?.stack?.slice(0, 500), }); if (isMounted.current) { setError(err instanceof Error ? err : new Error('Failed to load institutions')); setState(s => ({ ...s, phase: 'ready', backgroundLoading: false })); } } }, [fetchLiteData, fetchFullData]); /** * Force refresh from network */ const refresh = useCallback(async () => { setState({ phase: 'loading-full', message: 'Refreshing data...', percent: 0, dataLevel: state.dataLevel, backgroundLoading: false, }); try { const startTime = Date.now(); const fullData = await fetchFullData((percent, message) => { if (isMounted.current) { setState(s => ({ ...s, percent, message })); } }); await institutionsCache.set(fullData, Date.now() - startTime); if (isMounted.current) { setInstitutions(fullData); setState({ phase: 'ready', message: `Refreshed ${fullData.length.toLocaleString()} institutions`, percent: 100, dataLevel: 'full', backgroundLoading: false, }); } } catch (err) { if (isMounted.current) { setError(err instanceof Error ? err : new Error('Refresh failed')); } } }, [fetchFullData, state.dataLevel]); /** * Get full details for a specific institution */ const getInstitutionDetail = useCallback((ghcid: string): Institution | null => { return institutionMap.current.get(ghcid) || null; }, []); // Initial load useEffect(() => { console.log('[Progressive] Initial load effect triggered'); load(); }, [load]); // Log state changes const isReady = state.phase === 'ready' || state.dataLevel !== 'none'; console.log('[Progressive] Return values:', { institutionsCount: institutions.length, phase: state.phase, dataLevel: state.dataLevel, isReady, hasError: !!error, errorMessage: error?.message, }); return { institutions, state, isReady, hasFullData: state.dataLevel === 'full', error, totalCount: institutions.length, refresh, getInstitutionDetail, }; } // ============================================================================ // Preload Function // ============================================================================ let preloadStarted = false; /** * Preload institutions data - call early to start caching */ export function preloadProgressiveInstitutions(): void { if (preloadStarted) return; preloadStarted = true; // Check cache first institutionsCache.getStatus().then(async (status) => { if (status.hasCache && !status.isExpired) { return; } // Fetch and cache full data try { const response = await fetch(`${GEO_API_BASE}/institutions`, { headers: { 'Accept': 'application/json' }, }); if (!response.ok) throw new Error(`API failed: ${response.status}`); const data = await response.json(); const institutions = data.features.map(fullFeatureToInstitution); await institutionsCache.set(institutions, 0); } catch { // Preload failed silently - will retry on component mount } }); }