837 lines
26 KiB
TypeScript
837 lines
26 KiB
TypeScript
/**
|
|
* Unit tests for Context Node Matching utilities
|
|
*
|
|
* These tests verify the matching logic between:
|
|
* - Context nodes (from Qdrant, SPARQL, TypeDB) with IDs like "qdrant:123"
|
|
* - Graph nodes (from Oxigraph) with IDs like "http://data.glam.nl/custodian/123"
|
|
*/
|
|
|
|
import { describe, it, expect } from 'vitest';
|
|
import {
|
|
normalizeName,
|
|
extractMatchingKeys,
|
|
buildContextNodeIds,
|
|
isNodeInContext,
|
|
getContextNodeInfo,
|
|
mergeContextHighlighting,
|
|
countContextMatches,
|
|
SOURCE_COLORS,
|
|
} from '../../src/utils/contextNodeMatching';
|
|
import type { GraphNode, GraphData } from '../../src/components/database/KnowledgeGraphProjector';
|
|
|
|
// ============================================================================
|
|
// Test Fixtures
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Mock context nodes (from RAG results - Qdrant, SPARQL, TypeDB)
|
|
* These simulate what comes back from useMultiDatabaseRAG hook
|
|
*/
|
|
const mockContextNodes: GraphNode[] = [
|
|
{
|
|
id: 'qdrant:rijksmuseum-001',
|
|
label: 'Rijksmuseum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Rijksmuseum',
|
|
source: 'qdrant',
|
|
score: 0.95,
|
|
ghcid: 'NL-NH-AMS-M-RM',
|
|
isil_code: 'NL-AsdRM',
|
|
wikidata_id: 'Q190804',
|
|
},
|
|
},
|
|
{
|
|
id: 'sparql:amsterdam-museum-002',
|
|
label: 'Amsterdam Museum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Amsterdam Museum',
|
|
source: 'sparql',
|
|
score: 0.88,
|
|
ghcid: 'NL-NH-AMS-M-AM',
|
|
isil_code: 'NL-AsdAM',
|
|
},
|
|
},
|
|
{
|
|
id: 'typedb:stedelijk-003',
|
|
label: 'Stedelijk Museum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
custodian_name: 'Stedelijk Museum Amsterdam',
|
|
source: 'typedb',
|
|
score: 0.82,
|
|
},
|
|
},
|
|
];
|
|
|
|
/**
|
|
* Mock multi-source node (found by multiple databases)
|
|
*/
|
|
const mockMultiSourceNode: GraphNode = {
|
|
id: 'multi:vangogh-004',
|
|
label: 'Van Gogh Museum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Van Gogh Museum',
|
|
sources: ['qdrant', 'sparql'],
|
|
sourceColor: SOURCE_COLORS.qdrant,
|
|
score: 0.91,
|
|
ghcid: 'NL-NH-AMS-M-VGM',
|
|
},
|
|
};
|
|
|
|
/**
|
|
* Mock graph nodes (from Oxigraph full graph)
|
|
* These have URI-based IDs and potentially different attribute names
|
|
*/
|
|
const mockGraphNodes: GraphNode[] = [
|
|
// Should match by name (Rijksmuseum)
|
|
{
|
|
id: 'http://data.glam.nl/custodian/rijksmuseum',
|
|
label: 'Rijksmuseum Amsterdam',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Rijksmuseum',
|
|
ghcid: 'NL-NH-AMS-M-RM',
|
|
},
|
|
},
|
|
// Should match by GHCID (Amsterdam Museum)
|
|
{
|
|
id: 'http://data.glam.nl/custodian/amsterdam-museum',
|
|
label: 'Amsterdam Municipal Museum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Het Amsterdam Museum', // Different name!
|
|
ghcid: 'NL-NH-AMS-M-AM',
|
|
},
|
|
},
|
|
// Should match by name with custodian_name attribute
|
|
{
|
|
id: 'http://data.glam.nl/custodian/stedelijk',
|
|
label: 'Stedelijk',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
custodian_name: 'Stedelijk Museum Amsterdam',
|
|
},
|
|
},
|
|
// Should NOT match (not in context)
|
|
{
|
|
id: 'http://data.glam.nl/custodian/maritiem',
|
|
label: 'Het Scheepvaartmuseum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Het Scheepvaartmuseum',
|
|
ghcid: 'NL-NH-AMS-M-HSM',
|
|
},
|
|
},
|
|
// Should NOT match (not in context)
|
|
{
|
|
id: 'http://data.glam.nl/custodian/anne-frank',
|
|
label: 'Anne Frank House',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Anne Frank Huis',
|
|
ghcid: 'NL-NH-AMS-M-AFH',
|
|
},
|
|
},
|
|
];
|
|
|
|
// ============================================================================
|
|
// Tests: normalizeName
|
|
// ============================================================================
|
|
|
|
describe('normalizeName', () => {
|
|
it('should lowercase and trim strings', () => {
|
|
expect(normalizeName(' Rijksmuseum ')).toBe('rijksmuseum');
|
|
expect(normalizeName('AMSTERDAM MUSEUM')).toBe('amsterdam museum');
|
|
});
|
|
|
|
it('should handle null and undefined', () => {
|
|
expect(normalizeName(null)).toBe('');
|
|
expect(normalizeName(undefined)).toBe('');
|
|
});
|
|
|
|
it('should convert non-strings to strings', () => {
|
|
expect(normalizeName(123)).toBe('123');
|
|
expect(normalizeName(true)).toBe('true');
|
|
});
|
|
|
|
it('should preserve internal spaces', () => {
|
|
expect(normalizeName('Van Gogh Museum')).toBe('van gogh museum');
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Tests: extractMatchingKeys
|
|
// ============================================================================
|
|
|
|
describe('extractMatchingKeys', () => {
|
|
it('should extract ID', () => {
|
|
const node: GraphNode = {
|
|
id: 'test-id-123',
|
|
label: 'Test',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {},
|
|
};
|
|
const keys = extractMatchingKeys(node);
|
|
expect(keys.has('test-id-123')).toBe(true);
|
|
});
|
|
|
|
it('should extract name as normalized key', () => {
|
|
const node: GraphNode = {
|
|
id: 'test-id',
|
|
label: 'Test Label',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: { name: 'Rijksmuseum' },
|
|
};
|
|
const keys = extractMatchingKeys(node);
|
|
expect(keys.has('name:rijksmuseum')).toBe(true);
|
|
});
|
|
|
|
it('should extract custodian_name as name key', () => {
|
|
const node: GraphNode = {
|
|
id: 'test-id',
|
|
label: 'Test Label',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: { custodian_name: 'Amsterdam Museum' },
|
|
};
|
|
const keys = extractMatchingKeys(node);
|
|
expect(keys.has('name:amsterdam museum')).toBe(true);
|
|
});
|
|
|
|
it('should extract label as name key', () => {
|
|
const node: GraphNode = {
|
|
id: 'test-id',
|
|
label: 'Stedelijk Museum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {},
|
|
};
|
|
const keys = extractMatchingKeys(node);
|
|
expect(keys.has('name:stedelijk museum')).toBe(true);
|
|
});
|
|
|
|
it('should extract GHCID', () => {
|
|
const node: GraphNode = {
|
|
id: 'test-id',
|
|
label: 'Test',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: { ghcid: 'NL-NH-AMS-M-RM' },
|
|
};
|
|
const keys = extractMatchingKeys(node);
|
|
expect(keys.has('ghcid:NL-NH-AMS-M-RM')).toBe(true);
|
|
});
|
|
|
|
it('should extract ISIL code', () => {
|
|
const node: GraphNode = {
|
|
id: 'test-id',
|
|
label: 'Test',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: { isil_code: 'NL-AsdRM' },
|
|
};
|
|
const keys = extractMatchingKeys(node);
|
|
expect(keys.has('isil:NL-AsdRM')).toBe(true);
|
|
});
|
|
|
|
it('should extract Wikidata ID', () => {
|
|
const node: GraphNode = {
|
|
id: 'test-id',
|
|
label: 'Test',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: { wikidata_id: 'Q190804' },
|
|
};
|
|
const keys = extractMatchingKeys(node);
|
|
expect(keys.has('wikidata:Q190804')).toBe(true);
|
|
});
|
|
|
|
it('should extract all matching keys from a fully populated node', () => {
|
|
const keys = extractMatchingKeys(mockContextNodes[0]);
|
|
expect(keys.has('qdrant:rijksmuseum-001')).toBe(true);
|
|
expect(keys.has('name:rijksmuseum')).toBe(true);
|
|
expect(keys.has('ghcid:NL-NH-AMS-M-RM')).toBe(true);
|
|
expect(keys.has('isil:NL-AsdRM')).toBe(true);
|
|
expect(keys.has('wikidata:Q190804')).toBe(true);
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Tests: buildContextNodeIds
|
|
// ============================================================================
|
|
|
|
describe('buildContextNodeIds', () => {
|
|
it('should return empty set for empty input', () => {
|
|
expect(buildContextNodeIds([]).size).toBe(0);
|
|
expect(buildContextNodeIds(null as unknown as GraphNode[]).size).toBe(0);
|
|
expect(buildContextNodeIds(undefined as unknown as GraphNode[]).size).toBe(0);
|
|
});
|
|
|
|
it('should build set from multiple context nodes', () => {
|
|
const ids = buildContextNodeIds(mockContextNodes);
|
|
|
|
// Should contain IDs from all 3 nodes
|
|
expect(ids.has('qdrant:rijksmuseum-001')).toBe(true);
|
|
expect(ids.has('sparql:amsterdam-museum-002')).toBe(true);
|
|
expect(ids.has('typedb:stedelijk-003')).toBe(true);
|
|
|
|
// Should contain names
|
|
expect(ids.has('name:rijksmuseum')).toBe(true);
|
|
expect(ids.has('name:amsterdam museum')).toBe(true);
|
|
expect(ids.has('name:stedelijk museum amsterdam')).toBe(true);
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Tests: isNodeInContext
|
|
// ============================================================================
|
|
|
|
describe('isNodeInContext', () => {
|
|
const contextNodeIds = buildContextNodeIds(mockContextNodes);
|
|
|
|
it('should return false for empty context', () => {
|
|
expect(isNodeInContext(mockGraphNodes[0], new Set())).toBe(false);
|
|
});
|
|
|
|
it('should match by normalized name', () => {
|
|
// Rijksmuseum graph node should match Rijksmuseum context node
|
|
const graphNode = mockGraphNodes[0]; // "Rijksmuseum" in name attribute
|
|
expect(isNodeInContext(graphNode, contextNodeIds)).toBe(true);
|
|
});
|
|
|
|
it('should match by GHCID when names differ', () => {
|
|
// Amsterdam Museum graph node has different name but same GHCID
|
|
const graphNode = mockGraphNodes[1]; // "Het Amsterdam Museum" vs "Amsterdam Museum"
|
|
expect(isNodeInContext(graphNode, contextNodeIds)).toBe(true);
|
|
});
|
|
|
|
it('should match by custodian_name attribute', () => {
|
|
// Stedelijk graph node uses custodian_name
|
|
const graphNode = mockGraphNodes[2];
|
|
expect(isNodeInContext(graphNode, contextNodeIds)).toBe(true);
|
|
});
|
|
|
|
it('should NOT match nodes not in context', () => {
|
|
// Het Scheepvaartmuseum is not in context
|
|
const graphNode = mockGraphNodes[3];
|
|
expect(isNodeInContext(graphNode, contextNodeIds)).toBe(false);
|
|
});
|
|
|
|
it('should NOT match Anne Frank House (not in context)', () => {
|
|
const graphNode = mockGraphNodes[4];
|
|
expect(isNodeInContext(graphNode, contextNodeIds)).toBe(false);
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Tests: getContextNodeInfo
|
|
// ============================================================================
|
|
|
|
describe('getContextNodeInfo', () => {
|
|
it('should return null for empty context', () => {
|
|
expect(getContextNodeInfo(mockGraphNodes[0], [])).toBe(null);
|
|
expect(getContextNodeInfo(mockGraphNodes[0], null as unknown as GraphNode[])).toBe(null);
|
|
});
|
|
|
|
it('should return null for non-matching node', () => {
|
|
expect(getContextNodeInfo(mockGraphNodes[3], mockContextNodes)).toBe(null);
|
|
});
|
|
|
|
it('should return source info for matching node', () => {
|
|
const info = getContextNodeInfo(mockGraphNodes[0], mockContextNodes);
|
|
expect(info).not.toBe(null);
|
|
expect(info?.source).toBe('qdrant');
|
|
expect(info?.score).toBe(0.95);
|
|
});
|
|
|
|
it('should return sources array for multi-source node', () => {
|
|
// Add multi-source node to context
|
|
const contextWithMulti = [...mockContextNodes, mockMultiSourceNode];
|
|
|
|
// Create a graph node that matches by GHCID
|
|
const graphNode: GraphNode = {
|
|
id: 'http://data.glam.nl/custodian/vangogh',
|
|
label: 'Van Gogh',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Van Gogh Museum',
|
|
ghcid: 'NL-NH-AMS-M-VGM',
|
|
},
|
|
};
|
|
|
|
const info = getContextNodeInfo(graphNode, contextWithMulti);
|
|
expect(info).not.toBe(null);
|
|
expect(info?.sources).toEqual(['qdrant', 'sparql']);
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Tests: mergeContextHighlighting
|
|
// ============================================================================
|
|
|
|
describe('mergeContextHighlighting', () => {
|
|
const graphData: GraphData = {
|
|
nodes: mockGraphNodes,
|
|
edges: [
|
|
{ id: 'edge-1', source: mockGraphNodes[0].id, target: mockGraphNodes[1].id, label: 'relatedTo' },
|
|
],
|
|
};
|
|
|
|
const contextData: GraphData = {
|
|
nodes: mockContextNodes,
|
|
edges: [],
|
|
};
|
|
|
|
const allSourcesVisible = new Set(['qdrant', 'sparql', 'typedb']);
|
|
|
|
it('should return empty data for null graph', () => {
|
|
const result = mergeContextHighlighting(
|
|
null as unknown as GraphData,
|
|
contextData,
|
|
allSourcesVisible
|
|
);
|
|
expect(result.nodes).toEqual([]);
|
|
expect(result.edges).toEqual([]);
|
|
});
|
|
|
|
it('should mark matching nodes as context results', () => {
|
|
const result = mergeContextHighlighting(graphData, contextData, allSourcesVisible);
|
|
|
|
// First 3 nodes should be context results
|
|
expect(result.nodes[0].attributes?.isContextResult).toBe(true);
|
|
expect(result.nodes[1].attributes?.isContextResult).toBe(true);
|
|
expect(result.nodes[2].attributes?.isContextResult).toBe(true);
|
|
|
|
// Last 2 nodes should NOT be context results
|
|
expect(result.nodes[3].attributes?.isContextResult).toBe(false);
|
|
expect(result.nodes[4].attributes?.isContextResult).toBe(false);
|
|
});
|
|
|
|
it('should mark non-matching nodes as dimmed', () => {
|
|
const result = mergeContextHighlighting(graphData, contextData, allSourcesVisible);
|
|
|
|
// Non-matching nodes should be dimmed
|
|
expect(result.nodes[3].attributes?.dimmed).toBe(true);
|
|
expect(result.nodes[4].attributes?.dimmed).toBe(true);
|
|
|
|
// Matching nodes should NOT be dimmed (when source is visible)
|
|
expect(result.nodes[0].attributes?.dimmed).toBe(false);
|
|
expect(result.nodes[1].attributes?.dimmed).toBe(false);
|
|
});
|
|
|
|
it('should add source info to matching nodes', () => {
|
|
const result = mergeContextHighlighting(graphData, contextData, allSourcesVisible);
|
|
|
|
expect(result.nodes[0].attributes?.source).toBe('qdrant');
|
|
expect(result.nodes[0].attributes?.score).toBe(0.95);
|
|
|
|
expect(result.nodes[1].attributes?.source).toBe('sparql');
|
|
expect(result.nodes[1].attributes?.score).toBe(0.88);
|
|
});
|
|
|
|
it('should respect source visibility filter', () => {
|
|
// Only show qdrant results
|
|
const onlyQdrant = new Set(['qdrant']);
|
|
const result = mergeContextHighlighting(graphData, contextData, onlyQdrant);
|
|
|
|
// Qdrant source should be visible
|
|
expect(result.nodes[0].attributes?.isSourceVisible).toBe(true);
|
|
expect(result.nodes[0].attributes?.dimmed).toBe(false);
|
|
|
|
// SPARQL source should be dimmed
|
|
expect(result.nodes[1].attributes?.isSourceVisible).toBe(false);
|
|
expect(result.nodes[1].attributes?.dimmed).toBe(true);
|
|
});
|
|
|
|
it('should preserve edges', () => {
|
|
const result = mergeContextHighlighting(graphData, contextData, allSourcesVisible);
|
|
expect(result.edges).toEqual(graphData.edges);
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Tests: countContextMatches
|
|
// ============================================================================
|
|
|
|
describe('countContextMatches', () => {
|
|
it('should return zeros for empty inputs', () => {
|
|
expect(countContextMatches([], [])).toEqual({ matched: 0, total: 0, percentage: 0 });
|
|
});
|
|
|
|
it('should count matches correctly', () => {
|
|
const result = countContextMatches(mockGraphNodes, mockContextNodes);
|
|
|
|
expect(result.matched).toBe(3); // Rijksmuseum, Amsterdam Museum, Stedelijk
|
|
expect(result.total).toBe(5); // Total graph nodes
|
|
expect(result.percentage).toBe(60); // 3/5 = 60%
|
|
});
|
|
|
|
it('should return 0% when no matches', () => {
|
|
const noMatchNodes: GraphNode[] = [
|
|
{
|
|
id: 'http://data.glam.nl/custodian/unique',
|
|
label: 'Unique Museum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: { name: 'Completely Unique Name' },
|
|
},
|
|
];
|
|
|
|
const result = countContextMatches(noMatchNodes, mockContextNodes);
|
|
expect(result.matched).toBe(0);
|
|
expect(result.percentage).toBe(0);
|
|
});
|
|
|
|
it('should return 100% when all match', () => {
|
|
// Use context nodes as graph nodes (they should all match themselves)
|
|
const result = countContextMatches(mockContextNodes, mockContextNodes);
|
|
expect(result.matched).toBe(3);
|
|
expect(result.percentage).toBe(100);
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Tests: SOURCE_COLORS
|
|
// ============================================================================
|
|
|
|
describe('SOURCE_COLORS', () => {
|
|
it('should have colors for all expected sources', () => {
|
|
expect(SOURCE_COLORS.qdrant).toBe('#6366f1'); // Indigo
|
|
expect(SOURCE_COLORS.sparql).toBe('#10b981'); // Emerald
|
|
expect(SOURCE_COLORS.typedb).toBe('#f59e0b'); // Amber
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Integration Tests: Real-world Scenarios
|
|
// ============================================================================
|
|
|
|
describe('Integration: Real-world matching scenarios', () => {
|
|
it('should handle case where label differs from name attribute', () => {
|
|
const contextNode: GraphNode = {
|
|
id: 'qdrant:test-001',
|
|
label: 'Rijksmuseum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: { name: 'Rijksmuseum', source: 'qdrant' },
|
|
};
|
|
|
|
const graphNode: GraphNode = {
|
|
id: 'http://oxigraph/rijksmuseum',
|
|
label: 'Rijksmuseum Amsterdam', // Different label!
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: { name: 'Rijksmuseum' }, // But name matches
|
|
};
|
|
|
|
const contextIds = buildContextNodeIds([contextNode]);
|
|
expect(isNodeInContext(graphNode, contextIds)).toBe(true);
|
|
});
|
|
|
|
it('should handle matching via ISIL when names completely differ', () => {
|
|
const contextNode: GraphNode = {
|
|
id: 'sparql:kb-001',
|
|
label: 'KB',
|
|
type: 'custodian',
|
|
entityType: 'Library',
|
|
attributes: {
|
|
name: 'Koninklijke Bibliotheek',
|
|
isil_code: 'NL-HaKB',
|
|
source: 'sparql'
|
|
},
|
|
};
|
|
|
|
const graphNode: GraphNode = {
|
|
id: 'http://oxigraph/national-library',
|
|
label: 'Royal Library', // Completely different!
|
|
type: 'custodian',
|
|
entityType: 'Library',
|
|
attributes: {
|
|
name: 'National Library of the Netherlands', // Also different!
|
|
isil_code: 'NL-HaKB' // But ISIL matches
|
|
},
|
|
};
|
|
|
|
const contextIds = buildContextNodeIds([contextNode]);
|
|
expect(isNodeInContext(graphNode, contextIds)).toBe(true);
|
|
});
|
|
|
|
it('should handle matching via Wikidata ID', () => {
|
|
const contextNode: GraphNode = {
|
|
id: 'qdrant:eye-001',
|
|
label: 'EYE Film',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'EYE Filmmuseum',
|
|
wikidata_id: 'Q746741',
|
|
source: 'qdrant'
|
|
},
|
|
};
|
|
|
|
const graphNode: GraphNode = {
|
|
id: 'http://oxigraph/eye-filmmuseum',
|
|
label: 'EYE Film Institute',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Eye Film Institute Netherlands',
|
|
wikidata_id: 'Q746741'
|
|
},
|
|
};
|
|
|
|
const contextIds = buildContextNodeIds([contextNode]);
|
|
expect(isNodeInContext(graphNode, contextIds)).toBe(true);
|
|
});
|
|
|
|
it('should NOT match when no identifiers or names overlap', () => {
|
|
const contextNode: GraphNode = {
|
|
id: 'qdrant:museum-a',
|
|
label: 'Museum A',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Museum Alpha',
|
|
ghcid: 'NL-XX-001',
|
|
source: 'qdrant'
|
|
},
|
|
};
|
|
|
|
const graphNode: GraphNode = {
|
|
id: 'http://oxigraph/museum-b',
|
|
label: 'Museum B',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
name: 'Museum Beta',
|
|
ghcid: 'NL-XX-002' // Different GHCID
|
|
},
|
|
};
|
|
|
|
const contextIds = buildContextNodeIds([contextNode]);
|
|
expect(isNodeInContext(graphNode, contextIds)).toBe(false);
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Tests: extractGhcidFromUri
|
|
// ============================================================================
|
|
|
|
import { extractGhcidFromUri } from '../../src/utils/contextNodeMatching';
|
|
|
|
describe('extractGhcidFromUri', () => {
|
|
it('should extract GHCID from valid heritage custodian URI', () => {
|
|
expect(extractGhcidFromUri('https://w3id.org/heritage/ghcid/NL-FR-WOL-M-K'))
|
|
.toBe('NL-FR-WOL-M-K');
|
|
});
|
|
|
|
it('should handle various GHCID formats', () => {
|
|
expect(extractGhcidFromUri('https://w3id.org/heritage/ghcid/JP-OS-KAW-M-OGFC'))
|
|
.toBe('JP-OS-KAW-M-OGFC');
|
|
expect(extractGhcidFromUri('https://w3id.org/heritage/ghcid/CZ-JI-ADA-L-AS'))
|
|
.toBe('CZ-JI-ADA-L-AS');
|
|
expect(extractGhcidFromUri('https://w3id.org/heritage/ghcid/NL-NH-AMS-M-RM'))
|
|
.toBe('NL-NH-AMS-M-RM');
|
|
});
|
|
|
|
it('should return null for non-heritage URIs', () => {
|
|
expect(extractGhcidFromUri('http://data.glam.nl/custodian/rijksmuseum'))
|
|
.toBeNull();
|
|
expect(extractGhcidFromUri('https://example.org/entity/123'))
|
|
.toBeNull();
|
|
});
|
|
|
|
it('should return null for empty or invalid input', () => {
|
|
expect(extractGhcidFromUri('')).toBeNull();
|
|
expect(extractGhcidFromUri('not-a-uri')).toBeNull();
|
|
});
|
|
|
|
it('should handle HTTP variant (not just HTTPS)', () => {
|
|
expect(extractGhcidFromUri('http://w3id.org/heritage/ghcid/NL-FR-WOL-M-K'))
|
|
.toBe('NL-FR-WOL-M-K');
|
|
});
|
|
|
|
it('should normalize GHCID to uppercase', () => {
|
|
// If URI has lowercase (edge case), normalize it
|
|
expect(extractGhcidFromUri('https://w3id.org/heritage/ghcid/nl-fr-wol-m-k'))
|
|
.toBe('NL-FR-WOL-M-K');
|
|
});
|
|
});
|
|
|
|
// ============================================================================
|
|
// Tests: URI-based GHCID matching (Oxigraph integration)
|
|
// ============================================================================
|
|
|
|
describe('URI-based GHCID matching', () => {
|
|
/**
|
|
* Mock Oxigraph nodes - these have URIs with embedded GHCIDs but empty attributes
|
|
* This simulates real Oxigraph graph data structure
|
|
*/
|
|
const mockOxigraphNodes: GraphNode[] = [
|
|
{
|
|
id: 'https://w3id.org/heritage/ghcid/NL-FR-WOL-M-K',
|
|
label: "'t Kiekhuus",
|
|
type: 'subject',
|
|
entityType: 'hc:Custodian',
|
|
attributes: {}, // Empty attributes - typical for Oxigraph nodes
|
|
},
|
|
{
|
|
id: 'https://w3id.org/heritage/ghcid/JP-OS-KAW-M-OGFC',
|
|
label: 'Osaka Golf Course Museum',
|
|
type: 'subject',
|
|
entityType: 'hc:Custodian',
|
|
attributes: {},
|
|
},
|
|
{
|
|
id: 'https://w3id.org/heritage/ghcid/CZ-JI-ADA-L-AS',
|
|
label: 'ADAST, a.s.',
|
|
type: 'subject',
|
|
entityType: 'hc:Custodian',
|
|
attributes: {},
|
|
},
|
|
];
|
|
|
|
/**
|
|
* Mock Qdrant context nodes - these have GHCIDs in attributes
|
|
* This simulates real Qdrant RAG results
|
|
*/
|
|
const mockQdrantContextNodes: GraphNode[] = [
|
|
{
|
|
id: 'qdrant:123',
|
|
label: "'t Kiekhuus",
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
ghcid: 'NL-FR-WOL-M-K',
|
|
name: "'t Kiekhuus",
|
|
city: 'Wolvega',
|
|
source: 'qdrant',
|
|
score: 0.92,
|
|
},
|
|
},
|
|
{
|
|
id: 'qdrant:456',
|
|
label: 'Some Other Museum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
ghcid: 'JP-OS-KAW-M-OGFC',
|
|
name: 'Osaka Golf Course Museum',
|
|
source: 'qdrant',
|
|
score: 0.88,
|
|
},
|
|
},
|
|
];
|
|
|
|
it('should match Oxigraph URI-based GHCID with Qdrant attribute-based GHCID', () => {
|
|
const contextIds = buildContextNodeIds(mockQdrantContextNodes);
|
|
|
|
// First Oxigraph node should match (NL-FR-WOL-M-K)
|
|
expect(isNodeInContext(mockOxigraphNodes[0], contextIds)).toBe(true);
|
|
|
|
// Second Oxigraph node should match (JP-OS-KAW-M-OGFC)
|
|
expect(isNodeInContext(mockOxigraphNodes[1], contextIds)).toBe(true);
|
|
|
|
// Third Oxigraph node should NOT match (CZ-JI-ADA-L-AS not in context)
|
|
expect(isNodeInContext(mockOxigraphNodes[2], contextIds)).toBe(false);
|
|
});
|
|
|
|
it('should extract matching keys from Oxigraph URI-based nodes', () => {
|
|
const keys = extractMatchingKeys(mockOxigraphNodes[0]);
|
|
|
|
// Should include the full URI as direct ID
|
|
expect(keys.has('https://w3id.org/heritage/ghcid/NL-FR-WOL-M-K')).toBe(true);
|
|
|
|
// Should include the extracted GHCID
|
|
expect(keys.has('ghcid:NL-FR-WOL-M-K')).toBe(true);
|
|
|
|
// Should include the label as name
|
|
expect(keys.has("name:'t kiekhuus")).toBe(true);
|
|
});
|
|
|
|
it('should get context node info for matched Oxigraph node', () => {
|
|
const contextInfo = getContextNodeInfo(mockOxigraphNodes[0], mockQdrantContextNodes);
|
|
|
|
expect(contextInfo).not.toBeNull();
|
|
expect(contextInfo?.source).toBe('qdrant');
|
|
expect(contextInfo?.score).toBe(0.92);
|
|
});
|
|
|
|
it('should return null for unmatched Oxigraph node', () => {
|
|
const contextInfo = getContextNodeInfo(mockOxigraphNodes[2], mockQdrantContextNodes);
|
|
expect(contextInfo).toBeNull();
|
|
});
|
|
|
|
it('should handle case-insensitive GHCID matching', () => {
|
|
// Create a context node with lowercase GHCID (edge case)
|
|
const lowercaseContext: GraphNode[] = [{
|
|
id: 'qdrant:789',
|
|
label: 'Test Museum',
|
|
type: 'custodian',
|
|
entityType: 'Museum',
|
|
attributes: {
|
|
ghcid: 'nl-fr-wol-m-k', // Lowercase
|
|
source: 'qdrant',
|
|
},
|
|
}];
|
|
|
|
const contextIds = buildContextNodeIds(lowercaseContext);
|
|
|
|
// Should still match (GHCID matching should be case-insensitive)
|
|
expect(isNodeInContext(mockOxigraphNodes[0], contextIds)).toBe(true);
|
|
});
|
|
|
|
it('should merge context highlighting for Oxigraph nodes', () => {
|
|
const graphData: GraphData = {
|
|
nodes: mockOxigraphNodes,
|
|
edges: [],
|
|
};
|
|
|
|
const contextData: GraphData = {
|
|
nodes: mockQdrantContextNodes,
|
|
edges: [],
|
|
};
|
|
|
|
const visibleSources = new Set(['qdrant', 'sparql', 'typedb']);
|
|
|
|
const result = mergeContextHighlighting(graphData, contextData, visibleSources);
|
|
|
|
// First node should be marked as context result
|
|
expect(result.nodes[0].attributes.isContextResult).toBe(true);
|
|
expect(result.nodes[0].attributes.dimmed).toBe(false);
|
|
expect(result.nodes[0].attributes.source).toBe('qdrant');
|
|
|
|
// Second node should also be marked as context result
|
|
expect(result.nodes[1].attributes.isContextResult).toBe(true);
|
|
|
|
// Third node should be dimmed (not in context)
|
|
expect(result.nodes[2].attributes.isContextResult).toBe(false);
|
|
expect(result.nodes[2].attributes.dimmed).toBe(true);
|
|
});
|
|
|
|
it('should count matches correctly for Oxigraph URI-based nodes', () => {
|
|
const { matched, total, percentage } = countContextMatches(
|
|
mockOxigraphNodes,
|
|
mockQdrantContextNodes
|
|
);
|
|
|
|
expect(matched).toBe(2); // Two nodes match by GHCID
|
|
expect(total).toBe(3);
|
|
expect(percentage).toBe(67); // 2/3 = 66.67% → 67%
|
|
});
|
|
});
|