402 lines
12 KiB
TypeScript
402 lines
12 KiB
TypeScript
/**
|
|
* Context Node Matching Utilities
|
|
*
|
|
* Provides functions for matching RAG context nodes (from Qdrant, SPARQL, TypeDB)
|
|
* against knowledge graph nodes (from Oxigraph).
|
|
*
|
|
* The core challenge: Context nodes have IDs like "qdrant:123" while graph nodes
|
|
* have URIs like "http://example.org/entity/123". Matching is done via:
|
|
* - Normalized name matching
|
|
* - GHCID matching
|
|
* - ISIL code matching
|
|
* - Wikidata ID matching
|
|
*/
|
|
|
|
import type { GraphNode, GraphData } from '../components/database/KnowledgeGraphProjector';
|
|
|
|
/**
|
|
* Source database colors for multi-source visualization
|
|
*/
|
|
export const SOURCE_COLORS = {
|
|
qdrant: '#6366f1', // Indigo for vector search
|
|
sparql: '#10b981', // Emerald for SPARQL/Oxigraph
|
|
typedb: '#f59e0b', // Amber for TypeDB
|
|
} as const;
|
|
|
|
export type SourceType = keyof typeof SOURCE_COLORS;
|
|
|
|
/**
|
|
* Context info returned for matched nodes
|
|
*/
|
|
export interface ContextNodeInfo {
|
|
source?: string;
|
|
sources?: string[];
|
|
sourceColor?: string;
|
|
score?: number;
|
|
}
|
|
|
|
/**
|
|
* Options for building context node IDs
|
|
*/
|
|
export interface ContextNodeMatchingOptions {
|
|
/** Enable debug logging */
|
|
debug?: boolean;
|
|
}
|
|
|
|
/**
|
|
* Normalizes a name for matching by lowercasing and trimming
|
|
*/
|
|
export function normalizeName(name: unknown): string {
|
|
if (name === null || name === undefined) return '';
|
|
return String(name).toLowerCase().trim();
|
|
}
|
|
|
|
/**
|
|
* Pattern to extract GHCID from heritage custodian URIs
|
|
* Matches: https://w3id.org/heritage/ghcid/{GHCID}
|
|
*/
|
|
const GHCID_URI_PATTERN = /^https?:\/\/w3id\.org\/heritage\/ghcid\/([A-Z]{2}-[A-Z0-9-]+)$/i;
|
|
|
|
/**
|
|
* Extracts GHCID from a node URI if it matches the heritage custodian pattern.
|
|
* Returns null if the URI doesn't contain a GHCID.
|
|
*/
|
|
export function extractGhcidFromUri(uri: string): string | null {
|
|
const match = uri.match(GHCID_URI_PATTERN);
|
|
return match ? match[1].toUpperCase() : null;
|
|
}
|
|
|
|
/**
|
|
* Extracts potential matching keys from a node's attributes.
|
|
* Returns a set of normalized keys in format "type:value"
|
|
*
|
|
* Handles both:
|
|
* - Qdrant nodes: Have attributes like { ghcid, name, city }
|
|
* - Oxigraph nodes: Have URIs like https://w3id.org/heritage/ghcid/{GHCID} and label
|
|
*/
|
|
export function extractMatchingKeys(node: GraphNode): Set<string> {
|
|
const keys = new Set<string>();
|
|
const attrs = node.attributes || {};
|
|
|
|
// Add direct ID
|
|
keys.add(node.id);
|
|
|
|
// Extract GHCID from URI (for Oxigraph nodes)
|
|
// URIs like: https://w3id.org/heritage/ghcid/NL-FR-WOL-M-K
|
|
const ghcidFromUri = extractGhcidFromUri(node.id);
|
|
if (ghcidFromUri) {
|
|
keys.add(`ghcid:${ghcidFromUri}`);
|
|
}
|
|
|
|
// Match by name (normalized) - check multiple attribute names
|
|
const name = normalizeName(attrs.name);
|
|
if (name) {
|
|
keys.add(`name:${name}`);
|
|
}
|
|
|
|
const custodianName = normalizeName(attrs.custodian_name);
|
|
if (custodianName) {
|
|
keys.add(`name:${custodianName}`);
|
|
}
|
|
|
|
// Also try the label (primary match for Oxigraph nodes)
|
|
const label = normalizeName(node.label);
|
|
if (label) {
|
|
keys.add(`name:${label}`);
|
|
}
|
|
|
|
// Match by GHCID from attributes (for Qdrant nodes)
|
|
if (attrs.ghcid) {
|
|
keys.add(`ghcid:${String(attrs.ghcid).toUpperCase()}`);
|
|
}
|
|
|
|
// Match by ISIL
|
|
if (attrs.isil_code) {
|
|
keys.add(`isil:${String(attrs.isil_code)}`);
|
|
}
|
|
|
|
// Match by Wikidata ID
|
|
if (attrs.wikidata_id) {
|
|
keys.add(`wikidata:${String(attrs.wikidata_id)}`);
|
|
}
|
|
|
|
return keys;
|
|
}
|
|
|
|
/**
|
|
* Builds a set of context node IDs for efficient matching.
|
|
* The set contains normalized keys that can be used to check if
|
|
* a graph node matches any context result.
|
|
*
|
|
* @param contextNodes - Array of nodes from RAG context (Qdrant, SPARQL, TypeDB)
|
|
* @param options - Optional configuration
|
|
* @returns Set of normalized matching keys
|
|
*/
|
|
export function buildContextNodeIds(
|
|
contextNodes: GraphNode[],
|
|
options: ContextNodeMatchingOptions = {}
|
|
): Set<string> {
|
|
const ids = new Set<string>();
|
|
|
|
if (!contextNodes || contextNodes.length === 0) {
|
|
return ids;
|
|
}
|
|
|
|
contextNodes.forEach(node => {
|
|
const nodeKeys = extractMatchingKeys(node);
|
|
nodeKeys.forEach(key => ids.add(key));
|
|
});
|
|
|
|
// Debug logging
|
|
if (options.debug && contextNodes.length > 0) {
|
|
const sampleNode = contextNodes[0];
|
|
console.log('[Context Matching] Context nodes sample:', {
|
|
totalContextNodes: contextNodes.length,
|
|
sampleId: sampleNode.id,
|
|
sampleLabel: sampleNode.label,
|
|
sampleAttrs: sampleNode.attributes,
|
|
matchingKeysGenerated: Array.from(ids).slice(0, 10),
|
|
});
|
|
}
|
|
|
|
return ids;
|
|
}
|
|
|
|
/**
|
|
* Checks if a graph node matches any context result.
|
|
*
|
|
* Handles both:
|
|
* - Qdrant nodes: Have attributes like { ghcid, name, city }
|
|
* - Oxigraph nodes: Have URIs like https://w3id.org/heritage/ghcid/{GHCID} and label
|
|
*
|
|
* @param node - A graph node from the full knowledge graph
|
|
* @param contextNodeIds - Set of matching keys from buildContextNodeIds
|
|
* @returns true if the node matches any context result
|
|
*/
|
|
export function isNodeInContext(
|
|
node: GraphNode,
|
|
contextNodeIds: Set<string>
|
|
): boolean {
|
|
if (contextNodeIds.size === 0) return false;
|
|
|
|
// Direct ID match
|
|
if (contextNodeIds.has(node.id)) return true;
|
|
|
|
const attrs = node.attributes || {};
|
|
|
|
// Match by GHCID extracted from URI (for Oxigraph nodes)
|
|
// URIs like: https://w3id.org/heritage/ghcid/NL-FR-WOL-M-K
|
|
const ghcidFromUri = extractGhcidFromUri(node.id);
|
|
if (ghcidFromUri && contextNodeIds.has(`ghcid:${ghcidFromUri}`)) return true;
|
|
|
|
// Match by normalized name (try multiple attributes)
|
|
const name = normalizeName(attrs.name || attrs.custodian_name || node.label);
|
|
if (name && contextNodeIds.has(`name:${name}`)) return true;
|
|
|
|
// Match by GHCID from attributes (for Qdrant nodes)
|
|
if (attrs.ghcid && contextNodeIds.has(`ghcid:${String(attrs.ghcid).toUpperCase()}`)) return true;
|
|
|
|
// Match by ISIL
|
|
if (attrs.isil_code && contextNodeIds.has(`isil:${String(attrs.isil_code)}`)) return true;
|
|
|
|
// Match by Wikidata ID
|
|
if (attrs.wikidata_id && contextNodeIds.has(`wikidata:${String(attrs.wikidata_id)}`)) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Finds the matching context node for a graph node and returns its info.
|
|
*
|
|
* Handles both:
|
|
* - Qdrant nodes: Have attributes like { ghcid, name, city }
|
|
* - Oxigraph nodes: Have URIs like https://w3id.org/heritage/ghcid/{GHCID} and label
|
|
*
|
|
* @param node - A graph node from the full knowledge graph
|
|
* @param contextNodes - Array of nodes from RAG context
|
|
* @returns Context info (source, score) or null if no match
|
|
*/
|
|
export function getContextNodeInfo(
|
|
node: GraphNode,
|
|
contextNodes: GraphNode[]
|
|
): ContextNodeInfo | null {
|
|
if (!contextNodes || contextNodes.length === 0) return null;
|
|
|
|
const attrs = node.attributes || {};
|
|
const nodeName = normalizeName(attrs.name || attrs.custodian_name || node.label);
|
|
|
|
// Extract GHCID from URI (for Oxigraph nodes)
|
|
const ghcidFromUri = extractGhcidFromUri(node.id);
|
|
const nodeGhcid = ghcidFromUri || (attrs.ghcid ? String(attrs.ghcid).toUpperCase() : null);
|
|
|
|
// Find matching context node
|
|
const contextNode = contextNodes.find(cn => {
|
|
// Direct ID match
|
|
if (cn.id === node.id) return true;
|
|
|
|
const cnAttrs = cn.attributes || {};
|
|
const cnName = normalizeName(cnAttrs.name || cnAttrs.custodian_name || cn.label);
|
|
|
|
// Name match
|
|
if (nodeName && cnName && nodeName === cnName) return true;
|
|
|
|
// GHCID match (handle both URI-extracted and attribute-based)
|
|
const cnGhcid = cnAttrs.ghcid ? String(cnAttrs.ghcid).toUpperCase() : null;
|
|
if (nodeGhcid && cnGhcid && nodeGhcid === cnGhcid) return true;
|
|
|
|
// ISIL match
|
|
if (attrs.isil_code && cnAttrs.isil_code && attrs.isil_code === cnAttrs.isil_code) return true;
|
|
|
|
// Wikidata match
|
|
if (attrs.wikidata_id && cnAttrs.wikidata_id && attrs.wikidata_id === cnAttrs.wikidata_id) return true;
|
|
|
|
return false;
|
|
});
|
|
|
|
if (!contextNode) return null;
|
|
|
|
return {
|
|
source: contextNode.attributes?.source as string | undefined,
|
|
sources: contextNode.attributes?.sources as string[] | undefined,
|
|
sourceColor: contextNode.attributes?.sourceColor as string | undefined,
|
|
score: contextNode.attributes?.score as number | undefined,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Enhanced node with context highlighting attributes
|
|
*/
|
|
export interface EnhancedGraphNode extends GraphNode {
|
|
attributes: GraphNode['attributes'] & {
|
|
isContextResult?: boolean;
|
|
isSourceVisible?: boolean;
|
|
dimmed?: boolean;
|
|
source?: string;
|
|
sources?: string[];
|
|
sourceColor?: string;
|
|
score?: number;
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Merges context highlighting into full graph data.
|
|
* Nodes matching context results are marked as highlighted;
|
|
* non-matching nodes are marked as dimmed.
|
|
*
|
|
* @param graphData - Full graph data from Oxigraph
|
|
* @param contextData - Context graph data from RAG results
|
|
* @param visibleSources - Set of source types currently visible (for filtering)
|
|
* @param options - Optional configuration
|
|
* @returns Enhanced graph data with highlighting attributes
|
|
*/
|
|
export function mergeContextHighlighting(
|
|
graphData: GraphData,
|
|
contextData: GraphData,
|
|
visibleSources: Set<string>,
|
|
options: ContextNodeMatchingOptions = {}
|
|
): GraphData {
|
|
if (!graphData) {
|
|
return { nodes: [], edges: [] };
|
|
}
|
|
|
|
const contextNodeIds = buildContextNodeIds(contextData?.nodes || [], options);
|
|
|
|
let matchedCount = 0;
|
|
const enhancedNodes: EnhancedGraphNode[] = graphData.nodes.map(node => {
|
|
const inContext = isNodeInContext(node, contextNodeIds);
|
|
|
|
if (inContext) matchedCount++;
|
|
|
|
if (!inContext) {
|
|
// Not in context - return with dimmed styling
|
|
return {
|
|
...node,
|
|
attributes: {
|
|
...node.attributes,
|
|
isContextResult: false,
|
|
dimmed: true,
|
|
},
|
|
};
|
|
}
|
|
|
|
// Node is in context - add highlighting and source info
|
|
const contextInfo = getContextNodeInfo(node, contextData?.nodes || []);
|
|
|
|
// Check if this source is currently visible
|
|
const source = contextInfo?.source;
|
|
const sources = contextInfo?.sources;
|
|
let isSourceVisible = true;
|
|
|
|
if (sources && sources.length > 0) {
|
|
isSourceVisible = sources.some(s => visibleSources.has(s));
|
|
} else if (source) {
|
|
isSourceVisible = visibleSources.has(source);
|
|
}
|
|
|
|
return {
|
|
...node,
|
|
attributes: {
|
|
...node.attributes,
|
|
isContextResult: true,
|
|
isSourceVisible,
|
|
dimmed: !isSourceVisible,
|
|
source: contextInfo?.source,
|
|
sources: contextInfo?.sources,
|
|
sourceColor: contextInfo?.sourceColor,
|
|
score: contextInfo?.score,
|
|
},
|
|
};
|
|
});
|
|
|
|
// Debug logging
|
|
if (options.debug && graphData.nodes.length > 0) {
|
|
const sampleGraphNode = graphData.nodes[0];
|
|
const sampleContextNode = contextData?.nodes?.[0];
|
|
console.log('[Context Matching] Graph nodes vs context matching:', {
|
|
totalGraphNodes: graphData.nodes.length,
|
|
contextNodeIdsSize: contextNodeIds.size,
|
|
matchedNodes: matchedCount,
|
|
sampleGraphNode: {
|
|
id: sampleGraphNode.id,
|
|
label: sampleGraphNode.label,
|
|
attrs: sampleGraphNode.attributes,
|
|
extractedKeys: Array.from(extractMatchingKeys(sampleGraphNode)),
|
|
},
|
|
sampleContextNode: sampleContextNode ? {
|
|
id: sampleContextNode.id,
|
|
label: sampleContextNode.label,
|
|
attrs: sampleContextNode.attributes,
|
|
extractedKeys: Array.from(extractMatchingKeys(sampleContextNode)),
|
|
} : null,
|
|
contextKeysSample: Array.from(contextNodeIds).slice(0, 15),
|
|
});
|
|
}
|
|
|
|
return {
|
|
nodes: enhancedNodes,
|
|
edges: graphData.edges,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Counts how many graph nodes match context results
|
|
* Useful for debugging and UI display
|
|
*/
|
|
export function countContextMatches(
|
|
graphNodes: GraphNode[],
|
|
contextNodes: GraphNode[]
|
|
): { matched: number; total: number; percentage: number } {
|
|
const contextNodeIds = buildContextNodeIds(contextNodes);
|
|
|
|
let matched = 0;
|
|
graphNodes.forEach(node => {
|
|
if (isNodeInContext(node, contextNodeIds)) {
|
|
matched++;
|
|
}
|
|
});
|
|
|
|
const total = graphNodes.length;
|
|
const percentage = total > 0 ? Math.round((matched / total) * 100) : 0;
|
|
|
|
return { matched, total, percentage };
|
|
}
|