glam/frontend/src/utils/wikidata.ts
2025-12-11 22:32:09 +01:00

193 lines
5.9 KiB
TypeScript

/**
* Wikidata utility functions for fetching institution metadata
*
* Used to fetch image URLs from Wikidata when we only have a wikidata_id (Q-number)
*/
// Cache for Wikidata image URLs to avoid redundant API calls
const wikidataImageCache = new Map<string, string | null>();
// In-flight requests to prevent duplicate fetches
const inFlightRequests = new Map<string, Promise<string | null>>();
/**
* Fetch the main image URL for a Wikidata entity
*
* Uses the Wikidata API to get the image property (P18) and constructs
* a Wikimedia Commons URL from the filename.
*
* @param wikidataId - Wikidata entity ID (e.g., "Q190804" for Rijksmuseum)
* @param width - Desired image width in pixels (default 800)
* @returns Promise<string | null> - Wikimedia Commons image URL or null if no image
*/
export async function fetchWikidataImageUrl(
wikidataId: string,
width: number = 800
): Promise<string | null> {
if (!wikidataId || !wikidataId.startsWith('Q')) {
return null;
}
// Check cache first
if (wikidataImageCache.has(wikidataId)) {
return wikidataImageCache.get(wikidataId) || null;
}
// Check for in-flight request
if (inFlightRequests.has(wikidataId)) {
return inFlightRequests.get(wikidataId)!;
}
// Create the fetch promise
const fetchPromise = (async () => {
try {
// Use the Wikidata API to get entity data with only the image property
const url = `https://www.wikidata.org/w/api.php?action=wbgetentities&ids=${wikidataId}&props=claims&format=json&origin=*`;
const response = await fetch(url);
if (!response.ok) {
console.warn(`[Wikidata] Failed to fetch ${wikidataId}: ${response.status}`);
wikidataImageCache.set(wikidataId, null);
return null;
}
const data = await response.json();
const entity = data.entities?.[wikidataId];
if (!entity) {
wikidataImageCache.set(wikidataId, null);
return null;
}
// P18 is the "image" property in Wikidata
const imageClaim = entity.claims?.P18?.[0];
if (!imageClaim?.mainsnak?.datavalue?.value) {
// No image property
wikidataImageCache.set(wikidataId, null);
return null;
}
// The value is the filename on Wikimedia Commons
const filename = imageClaim.mainsnak.datavalue.value;
// Construct the Wikimedia Commons URL
// Replace spaces with underscores and encode the filename
const encodedFilename = encodeURIComponent(filename.replace(/ /g, '_'));
const imageUrl = `https://commons.wikimedia.org/wiki/Special:FilePath/${encodedFilename}?width=${width}`;
wikidataImageCache.set(wikidataId, imageUrl);
return imageUrl;
} catch (error) {
console.error(`[Wikidata] Error fetching image for ${wikidataId}:`, error);
wikidataImageCache.set(wikidataId, null);
return null;
} finally {
// Remove from in-flight requests
inFlightRequests.delete(wikidataId);
}
})();
// Store in-flight request
inFlightRequests.set(wikidataId, fetchPromise);
return fetchPromise;
}
/**
* Batch fetch Wikidata image URLs for multiple entities
* More efficient than individual calls when loading many institutions
*
* @param wikidataIds - Array of Wikidata entity IDs
* @param width - Desired image width
* @returns Map of wikidataId -> imageUrl (or null)
*/
export async function fetchWikidataImageUrlsBatch(
wikidataIds: string[],
width: number = 800
): Promise<Map<string, string | null>> {
const results = new Map<string, string | null>();
// Filter out invalid IDs and already cached ones
const idsToFetch = wikidataIds.filter(id => {
if (!id || !id.startsWith('Q')) {
results.set(id, null);
return false;
}
if (wikidataImageCache.has(id)) {
results.set(id, wikidataImageCache.get(id) || null);
return false;
}
return true;
});
if (idsToFetch.length === 0) {
return results;
}
// Wikidata API supports up to 50 entities per request
const batchSize = 50;
for (let i = 0; i < idsToFetch.length; i += batchSize) {
const batch = idsToFetch.slice(i, i + batchSize);
const idsParam = batch.join('|');
try {
const url = `https://www.wikidata.org/w/api.php?action=wbgetentities&ids=${idsParam}&props=claims&format=json&origin=*`;
const response = await fetch(url);
if (!response.ok) {
console.warn(`[Wikidata] Batch fetch failed: ${response.status}`);
batch.forEach(id => {
wikidataImageCache.set(id, null);
results.set(id, null);
});
continue;
}
const data = await response.json();
for (const id of batch) {
const entity = data.entities?.[id];
const imageClaim = entity?.claims?.P18?.[0];
if (imageClaim?.mainsnak?.datavalue?.value) {
const filename = imageClaim.mainsnak.datavalue.value;
const encodedFilename = encodeURIComponent(filename.replace(/ /g, '_'));
const imageUrl = `https://commons.wikimedia.org/wiki/Special:FilePath/${encodedFilename}?width=${width}`;
wikidataImageCache.set(id, imageUrl);
results.set(id, imageUrl);
} else {
wikidataImageCache.set(id, null);
results.set(id, null);
}
}
} catch (error) {
console.error(`[Wikidata] Batch fetch error:`, error);
batch.forEach(id => {
wikidataImageCache.set(id, null);
results.set(id, null);
});
}
}
return results;
}
/**
* Clear the Wikidata image cache
* Useful for testing or when data may have changed
*/
export function clearWikidataImageCache(): void {
wikidataImageCache.clear();
}
/**
* Get cache statistics
*/
export function getWikidataCacheStats(): { size: number; hits: number } {
return {
size: wikidataImageCache.size,
hits: wikidataImageCache.size // Simplified - actual hit counting would need more tracking
};
}