glam/frontend/src/hooks/useProgressiveInstitutions.ts
2025-12-21 00:01:54 +01:00

786 lines
26 KiB
TypeScript

/**
* useProgressiveInstitutions - Progressive loading hook for comprehensive + efficient map data
*
* This hook implements a tiered loading strategy:
*
* FIRST VISIT:
* 1. Check IndexedDB for full cached data → If found, use immediately (instant!)
* 2. If not cached: Load lite data (~0.7MB) for fast marker display (~1s)
* 3. Background: Fetch full data (~27MB) and store in IndexedDB
* 4. When complete: Upgrade markers with full data (seamless)
*
* RETURN VISIT:
* 1. Load full data from IndexedDB → Instant with complete metadata!
* 2. Background: Check if stale, refresh if needed
*
* Benefits:
* - First visit: ~1s to interactive (lite markers)
* - Return visit: ~0s to interactive (cached full data)
* - Complete data: All fields available after background load
* - Offline capable: Full map works without network
*
* @author GLAM / bronhouder.nl
*/
import { useState, useEffect, useCallback, useRef } from 'react';
import type { Institution } from '../components/map/InstitutionInfoPanel';
import { institutionsCache } from '../lib/storage/institutions-cache';
// Re-export types
export type { Institution };
// ============================================================================
// Types
// ============================================================================
export interface ProgressiveLoadingState {
/** Current loading phase */
phase: 'init' | 'checking-cache' | 'loading-lite' | 'loading-full' | 'ready';
/** Human-readable status message */
message: string;
/** Progress percentage (0-100) */
percent: number;
/** Data completeness level */
dataLevel: 'none' | 'lite' | 'full';
/** Whether background loading is in progress */
backgroundLoading: boolean;
}
export interface UseProgressiveInstitutionsReturn {
/** Institution data (may be lite or full depending on loading state) */
institutions: Institution[];
/** Current loading state */
state: ProgressiveLoadingState;
/** Whether initial loading is complete (lite or full data available) */
isReady: boolean;
/** Whether we have full comprehensive data */
hasFullData: boolean;
/** Error if any */
error: Error | null;
/** Total institution count */
totalCount: number;
/** Force refresh from network */
refresh: () => Promise<void>;
/** Get full details for a specific institution (from cache or fetch) */
getInstitutionDetail: (ghcid: string) => Institution | null;
}
// ============================================================================
// Constants
// ============================================================================
const GEO_API_BASE = '/api/geo';
// Type colors and names (same as useGeoApiInstitutions)
const TYPE_COLORS: Record<string, string> = {
'G': '#00bcd4', 'L': '#2ecc71', 'A': '#3498db', 'M': '#e74c3c',
'O': '#f39c12', 'R': '#1abc9c', 'C': '#795548', 'U': '#9e9e9e',
'B': '#4caf50', 'E': '#ff9800', 'S': '#9b59b6', 'F': '#95a5a6',
'I': '#673ab7', 'X': '#607d8b', 'P': '#8bc34a', 'H': '#607d8b',
'D': '#34495e', 'N': '#e91e63', 'T': '#ff5722',
};
const TYPE_NAMES: Record<string, string> = {
'G': 'Gallery', 'L': 'Library', 'A': 'Archive', 'M': 'Museum',
'O': 'Official', 'R': 'Research', 'C': 'Corporation', 'U': 'Unknown',
'B': 'Botanical', 'E': 'Education', 'S': 'Society', 'F': 'Features',
'I': 'Intangible', 'X': 'Mixed', 'P': 'Personal', 'H': 'Holy sites',
'D': 'Digital', 'N': 'NGO', 'T': 'Taste/smell',
};
// ============================================================================
// Lite Institution Transformer (minimal fields for map markers)
// ============================================================================
interface LiteFeature {
type: 'Feature';
geometry: { type: 'Point'; coordinates: [number, number] };
properties: {
ghcid: string;
name: string;
type: string;
city?: string;
country_code?: string;
rating?: number;
};
}
function liteFeatureToInstitution(feature: LiteFeature): Institution {
const props = feature.properties;
const [lon, lat] = feature.geometry.coordinates;
const typeCode = props.type || 'U';
return {
lat,
lon,
name: props.name || 'Unknown',
city: props.city || '',
province: '', // Not in lite data
type: typeCode,
type_name: TYPE_NAMES[typeCode] || 'Unknown',
color: TYPE_COLORS[typeCode] || '#9e9e9e',
website: '',
wikidata_id: '',
description: '',
rating: props.rating,
ghcid: { current: props.ghcid, uuid: '', numeric: undefined },
};
}
// ============================================================================
// Full Institution Transformer (all fields)
// ============================================================================
interface FullFeature {
type: 'Feature';
geometry: { type: 'Point'; coordinates: [number, number] };
properties: Record<string, unknown>;
}
// Parse web_claims JSON for logo extraction
interface WebClaim {
claim_type?: string;
claim_value?: string;
raw_value?: string;
source_url?: string;
retrieved_on?: string;
xpath?: string;
extraction_method?: string;
}
/**
* Parse web_claims from JSON string or object
*/
function parseWebClaims(value: unknown): WebClaim[] | undefined {
if (!value) return undefined;
try {
let parsed = value;
if (typeof value === 'string') {
parsed = JSON.parse(value);
}
// Web claims can be an array directly or wrapped in an object
const claims: WebClaim[] = Array.isArray(parsed) ? parsed : ((parsed as Record<string, unknown>).claims as WebClaim[] || []);
return claims.length > 0 ? claims : undefined;
} catch {
return undefined;
}
}
/**
* Resolve a potentially relative URL against a base URL
*/
function resolveUrl(url: string, baseUrl?: string): string {
// Already absolute URL
if (url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//')) {
return url.startsWith('//') ? `https:${url}` : url;
}
// No base URL to resolve against
if (!baseUrl) return url;
try {
// Use URL API to resolve relative URLs
const base = new URL(baseUrl);
return new URL(url, base).href;
} catch {
// If URL parsing fails, return as-is
return url;
}
}
/**
* Check if a URL is a valid image URL (not a video, not relative without base)
*/
function isValidImageUrl(url: string): boolean {
if (!url) return false;
// Must be absolute URL
if (!url.startsWith('http://') && !url.startsWith('https://')) {
return false;
}
// Filter out non-image URLs
const invalidPatterns = [
'youtube.com/watch',
'youtu.be/',
'vimeo.com/',
'twitter.com/',
'facebook.com/',
'.mp4',
'.webm',
'.mov',
'.avi',
];
const lowerUrl = url.toLowerCase();
return !invalidPatterns.some(pattern => lowerUrl.includes(pattern));
}
/**
* Extract logo URL from web_claims - prefer logo_img_attr extraction method
* Priority: logo_img_attr > og_image > favicon_link > others
* Also resolves relative URLs against source_url
*/
function extractLogoFromWebClaims(webClaimsValue: unknown): string | undefined {
const claims = parseWebClaims(webClaimsValue);
if (!claims || claims.length === 0) return undefined;
// Filter for logo claims
const logoClaims = claims.filter(c => c.claim_type === 'logo' && c.claim_value);
if (logoClaims.length === 0) return undefined;
// Sort by preference: logo_img_attr > og_image > favicon_link > others
const sorted = logoClaims.sort((a, b) => {
const priority: Record<string, number> = {
'logo_img_attr': 3,
'og_image': 2,
'favicon_link': 1,
};
return (priority[b.extraction_method || ''] || 0) - (priority[a.extraction_method || ''] || 0);
});
// Filter out favicons, loading placeholders, and non-image URLs
const bestLogo = sorted.find(c => {
const url = c.claim_value || '';
// Skip favicon-like URLs
if (c.extraction_method === 'favicon_link') return false;
// Skip very small images or placeholder images
if (url.includes('favicon') || url.includes('loading')) return false;
// Resolve the URL and check if it's valid
const resolvedUrl = resolveUrl(url, c.source_url);
return isValidImageUrl(resolvedUrl);
});
const selectedClaim = bestLogo || sorted[0];
if (!selectedClaim?.claim_value) return undefined;
// Resolve relative URLs against source_url
const resolvedUrl = resolveUrl(selectedClaim.claim_value, selectedClaim.source_url);
// Final validation - only return if it's a valid image URL
return isValidImageUrl(resolvedUrl) ? resolvedUrl : undefined;
}
function fullFeatureToInstitution(feature: FullFeature): Institution {
const props = feature.properties;
const [lon, lat] = feature.geometry.coordinates;
const typeCode = (props.type as string) || 'U';
// Parse complex fields
const openingHours = parseOpeningHours(props.opening_hours);
const reviews = parseReviews(props.reviews);
const photos = parsePhotos(props.photos, props.photo_urls as string[] | undefined);
const socialMedia = parseSocialMedia(props.social_media);
const youtube = parseYouTube(props.youtube_enrichment);
// Extract logo URL from web_claims (primary) or use direct logo_url prop (fallback)
// Priority: web_claims logo_img_attr > web_claims og_image > props.logo_url
// Only use props.logo_url if it's a valid absolute image URL (not relative, not video)
const webClaimsLogo = extractLogoFromWebClaims(props.web_claims);
const fallbackLogo = props.logo_url as string | undefined;
const logoUrl = webClaimsLogo || (fallbackLogo && isValidImageUrl(fallbackLogo) ? fallbackLogo : undefined);
return {
lat,
lon,
name: (props.name as string) || 'Unknown',
city: (props.city as string) || '',
province: (props.province as string) || '',
type: typeCode,
type_name: (props.type_name as string) || TYPE_NAMES[typeCode] || 'Unknown',
color: TYPE_COLORS[typeCode] || '#9e9e9e',
website: (props.website as string) || '',
wikidata_id: (props.wikidata_id as string) || '',
description: (props.description as string) || '',
rating: props.rating as number | undefined,
total_ratings: props.total_ratings as number | undefined,
phone: props.phone as string | undefined,
address: props.formatted_address as string | undefined,
reviews,
photos,
street_view_url: props.street_view_url as string | undefined,
business_status: props.business_status as string | undefined,
google_place_id: props.google_place_id as string | undefined,
opening_hours: openingHours,
ghcid: props.ghcid ? {
current: props.ghcid as string,
uuid: '',
numeric: undefined,
} : undefined,
isil: props.isil_code ? { code: props.isil_code as string } : undefined,
emic_name: props.emic_name as string | undefined,
org_name: props.org_name as string | undefined,
founding_year: safeExtractYear(props.founding_year),
dissolution_year: safeExtractYear(props.dissolution_year),
social_media: socialMedia,
youtube,
logo_url: logoUrl,
};
}
// ============================================================================
// Parser Utilities
// ============================================================================
function safeExtractYear(value: unknown): number | undefined {
if (value === null || value === undefined) return undefined;
if (typeof value === 'number') return value;
if (typeof value === 'string') {
const match = value.match(/\d{4}/);
return match ? parseInt(match[0], 10) : undefined;
}
if (typeof value === 'object') {
const obj = value as Record<string, unknown>;
if (obj.time && typeof obj.time === 'string') {
const match = obj.time.match(/^[+-]?(\d{4})/);
return match ? parseInt(match[1], 10) : undefined;
}
}
return undefined;
}
function parseOpeningHours(value: unknown): string[] | undefined {
if (!value) return undefined;
if (Array.isArray(value)) return value;
if (typeof value === 'string') {
try {
const parsed = JSON.parse(value);
if (parsed?.weekday_text) return parsed.weekday_text;
if (Array.isArray(parsed)) return parsed;
} catch { /* ignore */ }
}
if (typeof value === 'object' && (value as Record<string, unknown>)?.weekday_text) {
return (value as Record<string, string[]>).weekday_text;
}
return undefined;
}
function parseReviews(value: unknown): Array<{ author: string; rating: number; text: string; time: string }> | undefined {
if (!value) return undefined;
let parsed = value;
if (typeof value === 'string') {
try { parsed = JSON.parse(value); } catch { return undefined; }
}
if (!Array.isArray(parsed)) return undefined;
return parsed.map((r: Record<string, unknown>) => ({
author: (r.author_name || r.author || 'Anonymous') as string,
rating: (r.rating || 0) as number,
text: (r.text || '') as string,
time: (r.relative_time_description || r.time || '') as string,
}));
}
function parsePhotos(
photos: unknown,
photoUrls?: string[]
): Array<{ url: string; attribution?: string }> | undefined {
const result: Array<{ url: string; attribution?: string }> = [];
if (photos) {
let parsed = photos;
if (typeof photos === 'string') {
try { parsed = JSON.parse(photos); } catch { /* ignore */ }
}
if (Array.isArray(parsed)) {
for (const p of parsed as Array<Record<string, unknown>>) {
const url = (p.url as string) || (p.name ? `https://places.googleapis.com/v1/${p.name}/media?maxWidthPx=800` : '');
if (url) {
result.push({
url,
attribution: ((p.author_attributions as Array<{ displayName?: string }>)?.[0]?.displayName) as string | undefined,
});
}
}
}
}
if (photoUrls && Array.isArray(photoUrls)) {
for (const url of photoUrls) {
if (url && typeof url === 'string' && !result.some(p => p.url === url)) {
result.push({ url });
}
}
}
return result.length > 0 ? result : undefined;
}
function parseSocialMedia(value: unknown): Record<string, string> | undefined {
if (!value) return undefined;
if (typeof value === 'object' && value !== null) return value as Record<string, string>;
if (typeof value === 'string') {
try {
const parsed = JSON.parse(value);
if (typeof parsed === 'object') return parsed;
} catch { /* ignore */ }
}
return undefined;
}
function parseYouTube(value: unknown): Institution['youtube'] {
if (!value) return undefined;
let parsed = value;
if (typeof value === 'string') {
try { parsed = JSON.parse(value); } catch { return undefined; }
}
if (typeof parsed !== 'object' || parsed === null) return undefined;
const data = parsed as Record<string, unknown>;
if (data.status && data.status !== 'SUCCESS') return undefined;
const channel = (typeof data.channel === 'object' && data.channel !== null)
? data.channel as Record<string, unknown>
: data;
if (!channel.channel_id && !channel.channel_url) return undefined;
return {
channel_id: String(channel.channel_id || ''),
channel_url: String(channel.channel_url || ''),
channel_title: String(channel.title || channel.channel_title || ''),
channel_description: String(channel.description || channel.channel_description || ''),
subscriber_count: typeof channel.subscriber_count === 'number' ? channel.subscriber_count : null,
video_count: typeof channel.video_count === 'number' ? channel.video_count : null,
view_count: typeof channel.view_count === 'number' ? channel.view_count : null,
thumbnail_url: String(channel.thumbnail_url || ''),
};
}
// ============================================================================
// Main Hook
// ============================================================================
export function useProgressiveInstitutions(): UseProgressiveInstitutionsReturn {
console.log('[Progressive] Hook initializing');
const [institutions, setInstitutions] = useState<Institution[]>([]);
const [state, setState] = useState<ProgressiveLoadingState>({
phase: 'init',
message: 'Initializing...',
percent: 0,
dataLevel: 'none',
backgroundLoading: false,
});
const [error, setError] = useState<Error | null>(null);
// Map for quick lookup by GHCID
const institutionMap = useRef<Map<string, Institution>>(new Map());
// Track if we're mounted
const isMounted = useRef(true);
useEffect(() => {
isMounted.current = true;
return () => { isMounted.current = false; };
}, []);
// Update institution map when data changes
useEffect(() => {
institutionMap.current.clear();
for (const inst of institutions) {
if (inst.ghcid?.current) {
institutionMap.current.set(inst.ghcid.current, inst);
}
}
}, [institutions]);
/**
* Fetch lite data for fast initial display
*/
const fetchLiteData = useCallback(async (): Promise<Institution[]> => {
const response = await fetch(`${GEO_API_BASE}/institutions/lite`, {
headers: { 'Accept': 'application/json' },
});
if (!response.ok) {
throw new Error(`Lite API failed: ${response.status}`);
}
const data = await response.json();
return data.features.map(liteFeatureToInstitution);
}, []);
/**
* Fetch full data with streaming progress
*/
const fetchFullData = useCallback(async (
onProgress?: (percent: number, message: string) => void
): Promise<Institution[]> => {
const response = await fetch(`${GEO_API_BASE}/institutions`, {
headers: { 'Accept': 'application/json' },
});
if (!response.ok) {
throw new Error(`Full API failed: ${response.status}`);
}
const contentLength = response.headers.get('Content-Length');
const totalBytes = contentLength ? parseInt(contentLength, 10) : 0;
// Stream with progress tracking
if (totalBytes && response.body) {
const reader = response.body.getReader();
const chunks: Uint8Array[] = [];
let receivedBytes = 0;
while (true) {
const { done, value } = await reader.read();
if (done) break;
chunks.push(value);
receivedBytes += value.length;
if (onProgress) {
const percent = Math.round((receivedBytes / totalBytes) * 100);
onProgress(percent, `Downloading full data (${(receivedBytes / 1024 / 1024).toFixed(1)}MB)...`);
}
}
const allChunks = new Uint8Array(receivedBytes);
let position = 0;
for (const chunk of chunks) {
allChunks.set(chunk, position);
position += chunk.length;
}
const text = new TextDecoder().decode(allChunks);
const data = JSON.parse(text);
return data.features.map(fullFeatureToInstitution);
}
// Fallback: no streaming
const data = await response.json();
return data.features.map(fullFeatureToInstitution);
}, []);
/**
* Main loading function
*/
const load = useCallback(async () => {
console.log('[Progressive] load() called');
try {
// Phase 1: Check IndexedDB cache
console.log('[Progressive] Phase 1: Checking cache...');
setState(s => ({ ...s, phase: 'checking-cache', message: 'Checking cache...', percent: 5 }));
const cacheStatus = await institutionsCache.getStatus();
console.log('[Progressive] Cache status:', cacheStatus);
if (cacheStatus.hasCache && !cacheStatus.isExpired) {
// Cache hit! Load from IndexedDB
console.log('[Progressive] Cache hit! Loading from IndexedDB...');
const cachedData = await institutionsCache.get(true);
console.log('[Progressive] Cached data loaded:', cachedData?.length || 0, 'institutions');
if (cachedData && cachedData.length > 0) {
if (isMounted.current) {
setInstitutions(cachedData);
setState({
phase: 'ready',
message: `Loaded ${cachedData.length.toLocaleString()} institutions (cached)`,
percent: 100,
dataLevel: 'full',
backgroundLoading: cacheStatus.isStale,
});
}
// Background refresh if stale
if (cacheStatus.isStale) {
fetchFullData().then(async (freshData) => {
await institutionsCache.set(freshData, 0);
if (isMounted.current) {
setInstitutions(freshData);
setState(s => ({ ...s, backgroundLoading: false }));
}
}).catch(err => {
console.warn('[Progressive] Background refresh failed:', err);
if (isMounted.current) {
setState(s => ({ ...s, backgroundLoading: false }));
}
});
}
return;
}
}
// Phase 2: No cache - fetch lite data first for fast display
console.log('[Progressive] Phase 2: No cache, fetching lite data...');
setState(s => ({ ...s, phase: 'loading-lite', message: 'Loading map markers...', percent: 10 }));
const liteData = await fetchLiteData();
console.log('[Progressive] Lite data loaded:', liteData.length, 'institutions');
if (isMounted.current) {
setInstitutions(liteData);
setState({
phase: 'loading-full',
message: `Displayed ${liteData.length.toLocaleString()} markers, loading full data...`,
percent: 30,
dataLevel: 'lite',
backgroundLoading: true,
});
}
// Phase 3: Fetch full data in background
console.log('[Progressive] Phase 3: Fetching full data in background...');
const startTime = Date.now();
const fullData = await fetchFullData((percent, message) => {
if (isMounted.current) {
setState(s => ({
...s,
percent: 30 + Math.round(percent * 0.6), // Map 0-100 to 30-90
message,
}));
}
});
const fetchDuration = Date.now() - startTime;
console.log('[Progressive] Full data loaded:', fullData.length, 'institutions in', fetchDuration, 'ms');
// Store in IndexedDB
await institutionsCache.set(fullData, fetchDuration);
console.log('[Progressive] Data cached to IndexedDB');
if (isMounted.current) {
setInstitutions(fullData);
setState({
phase: 'ready',
message: `Loaded ${fullData.length.toLocaleString()} institutions with full details`,
percent: 100,
dataLevel: 'full',
backgroundLoading: false,
});
}
} catch (err) {
console.error('[Progressive] Load failed:', err);
console.error('[Progressive] Error details:', {
name: (err as Error)?.name,
message: (err as Error)?.message,
stack: (err as Error)?.stack?.slice(0, 500),
});
if (isMounted.current) {
setError(err instanceof Error ? err : new Error('Failed to load institutions'));
setState(s => ({ ...s, phase: 'ready', backgroundLoading: false }));
}
}
}, [fetchLiteData, fetchFullData]);
/**
* Force refresh from network
*/
const refresh = useCallback(async () => {
setState({
phase: 'loading-full',
message: 'Refreshing data...',
percent: 0,
dataLevel: state.dataLevel,
backgroundLoading: false,
});
try {
const startTime = Date.now();
const fullData = await fetchFullData((percent, message) => {
if (isMounted.current) {
setState(s => ({ ...s, percent, message }));
}
});
await institutionsCache.set(fullData, Date.now() - startTime);
if (isMounted.current) {
setInstitutions(fullData);
setState({
phase: 'ready',
message: `Refreshed ${fullData.length.toLocaleString()} institutions`,
percent: 100,
dataLevel: 'full',
backgroundLoading: false,
});
}
} catch (err) {
if (isMounted.current) {
setError(err instanceof Error ? err : new Error('Refresh failed'));
}
}
}, [fetchFullData, state.dataLevel]);
/**
* Get full details for a specific institution
*/
const getInstitutionDetail = useCallback((ghcid: string): Institution | null => {
return institutionMap.current.get(ghcid) || null;
}, []);
// Initial load
useEffect(() => {
console.log('[Progressive] Initial load effect triggered');
load();
}, [load]);
// Log state changes
const isReady = state.phase === 'ready' || state.dataLevel !== 'none';
console.log('[Progressive] Return values:', {
institutionsCount: institutions.length,
phase: state.phase,
dataLevel: state.dataLevel,
isReady,
hasError: !!error,
errorMessage: error?.message,
});
return {
institutions,
state,
isReady,
hasFullData: state.dataLevel === 'full',
error,
totalCount: institutions.length,
refresh,
getInstitutionDetail,
};
}
// ============================================================================
// Preload Function
// ============================================================================
let preloadStarted = false;
/**
* Preload institutions data - call early to start caching
*/
export function preloadProgressiveInstitutions(): void {
if (preloadStarted) return;
preloadStarted = true;
// Check cache first
institutionsCache.getStatus().then(async (status) => {
if (status.hasCache && !status.isExpired) {
return;
}
// Fetch and cache full data
try {
const response = await fetch(`${GEO_API_BASE}/institutions`, {
headers: { 'Accept': 'application/json' },
});
if (!response.ok) throw new Error(`API failed: ${response.status}`);
const data = await response.json();
const institutions = data.features.map(fullFeatureToInstitution);
await institutionsCache.set(institutions, 0);
} catch {
// Preload failed silently - will retry on component mount
}
});
}