/** * Context Node Matching Utilities * * Provides functions for matching RAG context nodes (from Qdrant, SPARQL, TypeDB) * against knowledge graph nodes (from Oxigraph). * * The core challenge: Context nodes have IDs like "qdrant:123" while graph nodes * have URIs like "http://example.org/entity/123". Matching is done via: * - Normalized name matching * - GHCID matching * - ISIL code matching * - Wikidata ID matching */ import type { GraphNode, GraphData } from '../components/database/KnowledgeGraphProjector'; /** * Source database colors for multi-source visualization */ export const SOURCE_COLORS = { qdrant: '#6366f1', // Indigo for vector search sparql: '#10b981', // Emerald for SPARQL/Oxigraph typedb: '#f59e0b', // Amber for TypeDB } as const; export type SourceType = keyof typeof SOURCE_COLORS; /** * Context info returned for matched nodes */ export interface ContextNodeInfo { source?: string; sources?: string[]; sourceColor?: string; score?: number; } /** * Options for building context node IDs */ export interface ContextNodeMatchingOptions { /** Enable debug logging */ debug?: boolean; } /** * Normalizes a name for matching by lowercasing and trimming */ export function normalizeName(name: unknown): string { if (name === null || name === undefined) return ''; return String(name).toLowerCase().trim(); } /** * Pattern to extract GHCID from heritage custodian URIs * Matches: https://w3id.org/heritage/ghcid/{GHCID} */ const GHCID_URI_PATTERN = /^https?:\/\/w3id\.org\/heritage\/ghcid\/([A-Z]{2}-[A-Z0-9-]+)$/i; /** * Extracts GHCID from a node URI if it matches the heritage custodian pattern. * Returns null if the URI doesn't contain a GHCID. */ export function extractGhcidFromUri(uri: string): string | null { const match = uri.match(GHCID_URI_PATTERN); return match ? match[1].toUpperCase() : null; } /** * Extracts potential matching keys from a node's attributes. * Returns a set of normalized keys in format "type:value" * * Handles both: * - Qdrant nodes: Have attributes like { ghcid, name, city } * - Oxigraph nodes: Have URIs like https://w3id.org/heritage/ghcid/{GHCID} and label */ export function extractMatchingKeys(node: GraphNode): Set { const keys = new Set(); const attrs = node.attributes || {}; // Add direct ID keys.add(node.id); // Extract GHCID from URI (for Oxigraph nodes) // URIs like: https://w3id.org/heritage/ghcid/NL-FR-WOL-M-K const ghcidFromUri = extractGhcidFromUri(node.id); if (ghcidFromUri) { keys.add(`ghcid:${ghcidFromUri}`); } // Match by name (normalized) - check multiple attribute names const name = normalizeName(attrs.name); if (name) { keys.add(`name:${name}`); } const custodianName = normalizeName(attrs.custodian_name); if (custodianName) { keys.add(`name:${custodianName}`); } // Also try the label (primary match for Oxigraph nodes) const label = normalizeName(node.label); if (label) { keys.add(`name:${label}`); } // Match by GHCID from attributes (for Qdrant nodes) if (attrs.ghcid) { keys.add(`ghcid:${String(attrs.ghcid).toUpperCase()}`); } // Match by ISIL if (attrs.isil_code) { keys.add(`isil:${String(attrs.isil_code)}`); } // Match by Wikidata ID if (attrs.wikidata_id) { keys.add(`wikidata:${String(attrs.wikidata_id)}`); } return keys; } /** * Builds a set of context node IDs for efficient matching. * The set contains normalized keys that can be used to check if * a graph node matches any context result. * * @param contextNodes - Array of nodes from RAG context (Qdrant, SPARQL, TypeDB) * @param options - Optional configuration * @returns Set of normalized matching keys */ export function buildContextNodeIds( contextNodes: GraphNode[], options: ContextNodeMatchingOptions = {} ): Set { const ids = new Set(); if (!contextNodes || contextNodes.length === 0) { return ids; } contextNodes.forEach(node => { const nodeKeys = extractMatchingKeys(node); nodeKeys.forEach(key => ids.add(key)); }); // Debug logging if (options.debug && contextNodes.length > 0) { const sampleNode = contextNodes[0]; console.log('[Context Matching] Context nodes sample:', { totalContextNodes: contextNodes.length, sampleId: sampleNode.id, sampleLabel: sampleNode.label, sampleAttrs: sampleNode.attributes, matchingKeysGenerated: Array.from(ids).slice(0, 10), }); } return ids; } /** * Checks if a graph node matches any context result. * * Handles both: * - Qdrant nodes: Have attributes like { ghcid, name, city } * - Oxigraph nodes: Have URIs like https://w3id.org/heritage/ghcid/{GHCID} and label * * @param node - A graph node from the full knowledge graph * @param contextNodeIds - Set of matching keys from buildContextNodeIds * @returns true if the node matches any context result */ export function isNodeInContext( node: GraphNode, contextNodeIds: Set ): boolean { if (contextNodeIds.size === 0) return false; // Direct ID match if (contextNodeIds.has(node.id)) return true; const attrs = node.attributes || {}; // Match by GHCID extracted from URI (for Oxigraph nodes) // URIs like: https://w3id.org/heritage/ghcid/NL-FR-WOL-M-K const ghcidFromUri = extractGhcidFromUri(node.id); if (ghcidFromUri && contextNodeIds.has(`ghcid:${ghcidFromUri}`)) return true; // Match by normalized name (try multiple attributes) const name = normalizeName(attrs.name || attrs.custodian_name || node.label); if (name && contextNodeIds.has(`name:${name}`)) return true; // Match by GHCID from attributes (for Qdrant nodes) if (attrs.ghcid && contextNodeIds.has(`ghcid:${String(attrs.ghcid).toUpperCase()}`)) return true; // Match by ISIL if (attrs.isil_code && contextNodeIds.has(`isil:${String(attrs.isil_code)}`)) return true; // Match by Wikidata ID if (attrs.wikidata_id && contextNodeIds.has(`wikidata:${String(attrs.wikidata_id)}`)) return true; return false; } /** * Finds the matching context node for a graph node and returns its info. * * Handles both: * - Qdrant nodes: Have attributes like { ghcid, name, city } * - Oxigraph nodes: Have URIs like https://w3id.org/heritage/ghcid/{GHCID} and label * * @param node - A graph node from the full knowledge graph * @param contextNodes - Array of nodes from RAG context * @returns Context info (source, score) or null if no match */ export function getContextNodeInfo( node: GraphNode, contextNodes: GraphNode[] ): ContextNodeInfo | null { if (!contextNodes || contextNodes.length === 0) return null; const attrs = node.attributes || {}; const nodeName = normalizeName(attrs.name || attrs.custodian_name || node.label); // Extract GHCID from URI (for Oxigraph nodes) const ghcidFromUri = extractGhcidFromUri(node.id); const nodeGhcid = ghcidFromUri || (attrs.ghcid ? String(attrs.ghcid).toUpperCase() : null); // Find matching context node const contextNode = contextNodes.find(cn => { // Direct ID match if (cn.id === node.id) return true; const cnAttrs = cn.attributes || {}; const cnName = normalizeName(cnAttrs.name || cnAttrs.custodian_name || cn.label); // Name match if (nodeName && cnName && nodeName === cnName) return true; // GHCID match (handle both URI-extracted and attribute-based) const cnGhcid = cnAttrs.ghcid ? String(cnAttrs.ghcid).toUpperCase() : null; if (nodeGhcid && cnGhcid && nodeGhcid === cnGhcid) return true; // ISIL match if (attrs.isil_code && cnAttrs.isil_code && attrs.isil_code === cnAttrs.isil_code) return true; // Wikidata match if (attrs.wikidata_id && cnAttrs.wikidata_id && attrs.wikidata_id === cnAttrs.wikidata_id) return true; return false; }); if (!contextNode) return null; return { source: contextNode.attributes?.source as string | undefined, sources: contextNode.attributes?.sources as string[] | undefined, sourceColor: contextNode.attributes?.sourceColor as string | undefined, score: contextNode.attributes?.score as number | undefined, }; } /** * Enhanced node with context highlighting attributes */ export interface EnhancedGraphNode extends GraphNode { attributes: GraphNode['attributes'] & { isContextResult?: boolean; isSourceVisible?: boolean; dimmed?: boolean; source?: string; sources?: string[]; sourceColor?: string; score?: number; }; } /** * Merges context highlighting into full graph data. * Nodes matching context results are marked as highlighted; * non-matching nodes are marked as dimmed. * * @param graphData - Full graph data from Oxigraph * @param contextData - Context graph data from RAG results * @param visibleSources - Set of source types currently visible (for filtering) * @param options - Optional configuration * @returns Enhanced graph data with highlighting attributes */ export function mergeContextHighlighting( graphData: GraphData, contextData: GraphData, visibleSources: Set, options: ContextNodeMatchingOptions = {} ): GraphData { if (!graphData) { return { nodes: [], edges: [] }; } const contextNodeIds = buildContextNodeIds(contextData?.nodes || [], options); let matchedCount = 0; const enhancedNodes: EnhancedGraphNode[] = graphData.nodes.map(node => { const inContext = isNodeInContext(node, contextNodeIds); if (inContext) matchedCount++; if (!inContext) { // Not in context - return with dimmed styling return { ...node, attributes: { ...node.attributes, isContextResult: false, dimmed: true, }, }; } // Node is in context - add highlighting and source info const contextInfo = getContextNodeInfo(node, contextData?.nodes || []); // Check if this source is currently visible const source = contextInfo?.source; const sources = contextInfo?.sources; let isSourceVisible = true; if (sources && sources.length > 0) { isSourceVisible = sources.some(s => visibleSources.has(s)); } else if (source) { isSourceVisible = visibleSources.has(source); } return { ...node, attributes: { ...node.attributes, isContextResult: true, isSourceVisible, dimmed: !isSourceVisible, source: contextInfo?.source, sources: contextInfo?.sources, sourceColor: contextInfo?.sourceColor, score: contextInfo?.score, }, }; }); // Debug logging if (options.debug && graphData.nodes.length > 0) { const sampleGraphNode = graphData.nodes[0]; const sampleContextNode = contextData?.nodes?.[0]; console.log('[Context Matching] Graph nodes vs context matching:', { totalGraphNodes: graphData.nodes.length, contextNodeIdsSize: contextNodeIds.size, matchedNodes: matchedCount, sampleGraphNode: { id: sampleGraphNode.id, label: sampleGraphNode.label, attrs: sampleGraphNode.attributes, extractedKeys: Array.from(extractMatchingKeys(sampleGraphNode)), }, sampleContextNode: sampleContextNode ? { id: sampleContextNode.id, label: sampleContextNode.label, attrs: sampleContextNode.attributes, extractedKeys: Array.from(extractMatchingKeys(sampleContextNode)), } : null, contextKeysSample: Array.from(contextNodeIds).slice(0, 15), }); } return { nodes: enhancedNodes, edges: graphData.edges, }; } /** * Counts how many graph nodes match context results * Useful for debugging and UI display */ export function countContextMatches( graphNodes: GraphNode[], contextNodes: GraphNode[] ): { matched: number; total: number; percentage: number } { const contextNodeIds = buildContextNodeIds(contextNodes); let matched = 0; graphNodes.forEach(node => { if (isNodeInContext(node, contextNodeIds)) { matched++; } }); const total = graphNodes.length; const percentage = total > 0 ? Math.round((matched / total) * 100) : 0; return { matched, total, percentage }; }