193 lines
5.9 KiB
TypeScript
193 lines
5.9 KiB
TypeScript
/**
|
|
* Wikidata utility functions for fetching institution metadata
|
|
*
|
|
* Used to fetch image URLs from Wikidata when we only have a wikidata_id (Q-number)
|
|
*/
|
|
|
|
// Cache for Wikidata image URLs to avoid redundant API calls
|
|
const wikidataImageCache = new Map<string, string | null>();
|
|
|
|
// In-flight requests to prevent duplicate fetches
|
|
const inFlightRequests = new Map<string, Promise<string | null>>();
|
|
|
|
/**
|
|
* Fetch the main image URL for a Wikidata entity
|
|
*
|
|
* Uses the Wikidata API to get the image property (P18) and constructs
|
|
* a Wikimedia Commons URL from the filename.
|
|
*
|
|
* @param wikidataId - Wikidata entity ID (e.g., "Q190804" for Rijksmuseum)
|
|
* @param width - Desired image width in pixels (default 800)
|
|
* @returns Promise<string | null> - Wikimedia Commons image URL or null if no image
|
|
*/
|
|
export async function fetchWikidataImageUrl(
|
|
wikidataId: string,
|
|
width: number = 800
|
|
): Promise<string | null> {
|
|
if (!wikidataId || !wikidataId.startsWith('Q')) {
|
|
return null;
|
|
}
|
|
|
|
// Check cache first
|
|
if (wikidataImageCache.has(wikidataId)) {
|
|
return wikidataImageCache.get(wikidataId) || null;
|
|
}
|
|
|
|
// Check for in-flight request
|
|
if (inFlightRequests.has(wikidataId)) {
|
|
return inFlightRequests.get(wikidataId)!;
|
|
}
|
|
|
|
// Create the fetch promise
|
|
const fetchPromise = (async () => {
|
|
try {
|
|
// Use the Wikidata API to get entity data with only the image property
|
|
const url = `https://www.wikidata.org/w/api.php?action=wbgetentities&ids=${wikidataId}&props=claims&format=json&origin=*`;
|
|
|
|
const response = await fetch(url);
|
|
if (!response.ok) {
|
|
console.warn(`[Wikidata] Failed to fetch ${wikidataId}: ${response.status}`);
|
|
wikidataImageCache.set(wikidataId, null);
|
|
return null;
|
|
}
|
|
|
|
const data = await response.json();
|
|
const entity = data.entities?.[wikidataId];
|
|
|
|
if (!entity) {
|
|
wikidataImageCache.set(wikidataId, null);
|
|
return null;
|
|
}
|
|
|
|
// P18 is the "image" property in Wikidata
|
|
const imageClaim = entity.claims?.P18?.[0];
|
|
if (!imageClaim?.mainsnak?.datavalue?.value) {
|
|
// No image property
|
|
wikidataImageCache.set(wikidataId, null);
|
|
return null;
|
|
}
|
|
|
|
// The value is the filename on Wikimedia Commons
|
|
const filename = imageClaim.mainsnak.datavalue.value;
|
|
|
|
// Construct the Wikimedia Commons URL
|
|
// Replace spaces with underscores and encode the filename
|
|
const encodedFilename = encodeURIComponent(filename.replace(/ /g, '_'));
|
|
const imageUrl = `https://commons.wikimedia.org/wiki/Special:FilePath/${encodedFilename}?width=${width}`;
|
|
|
|
wikidataImageCache.set(wikidataId, imageUrl);
|
|
return imageUrl;
|
|
} catch (error) {
|
|
console.error(`[Wikidata] Error fetching image for ${wikidataId}:`, error);
|
|
wikidataImageCache.set(wikidataId, null);
|
|
return null;
|
|
} finally {
|
|
// Remove from in-flight requests
|
|
inFlightRequests.delete(wikidataId);
|
|
}
|
|
})();
|
|
|
|
// Store in-flight request
|
|
inFlightRequests.set(wikidataId, fetchPromise);
|
|
|
|
return fetchPromise;
|
|
}
|
|
|
|
/**
|
|
* Batch fetch Wikidata image URLs for multiple entities
|
|
* More efficient than individual calls when loading many institutions
|
|
*
|
|
* @param wikidataIds - Array of Wikidata entity IDs
|
|
* @param width - Desired image width
|
|
* @returns Map of wikidataId -> imageUrl (or null)
|
|
*/
|
|
export async function fetchWikidataImageUrlsBatch(
|
|
wikidataIds: string[],
|
|
width: number = 800
|
|
): Promise<Map<string, string | null>> {
|
|
const results = new Map<string, string | null>();
|
|
|
|
// Filter out invalid IDs and already cached ones
|
|
const idsToFetch = wikidataIds.filter(id => {
|
|
if (!id || !id.startsWith('Q')) {
|
|
results.set(id, null);
|
|
return false;
|
|
}
|
|
if (wikidataImageCache.has(id)) {
|
|
results.set(id, wikidataImageCache.get(id) || null);
|
|
return false;
|
|
}
|
|
return true;
|
|
});
|
|
|
|
if (idsToFetch.length === 0) {
|
|
return results;
|
|
}
|
|
|
|
// Wikidata API supports up to 50 entities per request
|
|
const batchSize = 50;
|
|
|
|
for (let i = 0; i < idsToFetch.length; i += batchSize) {
|
|
const batch = idsToFetch.slice(i, i + batchSize);
|
|
const idsParam = batch.join('|');
|
|
|
|
try {
|
|
const url = `https://www.wikidata.org/w/api.php?action=wbgetentities&ids=${idsParam}&props=claims&format=json&origin=*`;
|
|
|
|
const response = await fetch(url);
|
|
if (!response.ok) {
|
|
console.warn(`[Wikidata] Batch fetch failed: ${response.status}`);
|
|
batch.forEach(id => {
|
|
wikidataImageCache.set(id, null);
|
|
results.set(id, null);
|
|
});
|
|
continue;
|
|
}
|
|
|
|
const data = await response.json();
|
|
|
|
for (const id of batch) {
|
|
const entity = data.entities?.[id];
|
|
const imageClaim = entity?.claims?.P18?.[0];
|
|
|
|
if (imageClaim?.mainsnak?.datavalue?.value) {
|
|
const filename = imageClaim.mainsnak.datavalue.value;
|
|
const encodedFilename = encodeURIComponent(filename.replace(/ /g, '_'));
|
|
const imageUrl = `https://commons.wikimedia.org/wiki/Special:FilePath/${encodedFilename}?width=${width}`;
|
|
|
|
wikidataImageCache.set(id, imageUrl);
|
|
results.set(id, imageUrl);
|
|
} else {
|
|
wikidataImageCache.set(id, null);
|
|
results.set(id, null);
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error(`[Wikidata] Batch fetch error:`, error);
|
|
batch.forEach(id => {
|
|
wikidataImageCache.set(id, null);
|
|
results.set(id, null);
|
|
});
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Clear the Wikidata image cache
|
|
* Useful for testing or when data may have changed
|
|
*/
|
|
export function clearWikidataImageCache(): void {
|
|
wikidataImageCache.clear();
|
|
}
|
|
|
|
/**
|
|
* Get cache statistics
|
|
*/
|
|
export function getWikidataCacheStats(): { size: number; hits: number } {
|
|
return {
|
|
size: wikidataImageCache.size,
|
|
hits: wikidataImageCache.size // Simplified - actual hit counting would need more tracking
|
|
};
|
|
}
|