677 lines
24 KiB
TypeScript
677 lines
24 KiB
TypeScript
/**
|
|
* Unit tests for semantic-cache.ts entity extraction and matching
|
|
*
|
|
* Tests Rule 46: Ontology-Driven Cache Segmentation
|
|
* - Vocabulary-based entity extraction
|
|
* - Structured cache key generation
|
|
* - Entity matching for cache lookup
|
|
*/
|
|
|
|
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
|
import {
|
|
extractEntitiesFast,
|
|
extractEntitiesWithClarity,
|
|
generateStructuredCacheKey,
|
|
entitiesMatch,
|
|
normalizeQuery,
|
|
topologicalDistance,
|
|
combinedSimilarity,
|
|
calculateClarity,
|
|
explainLastDecision,
|
|
type ExtractedEntities,
|
|
type InstitutionTypeCode,
|
|
} from '../src/lib/semantic-cache'
|
|
|
|
describe('extractEntitiesFast', () => {
|
|
describe('institution type detection', () => {
|
|
it('should detect museum type from "musea"', () => {
|
|
const entities = extractEntitiesFast('Hoeveel musea zijn er in Amsterdam?')
|
|
expect(entities.institutionType).toBe('M')
|
|
})
|
|
|
|
it('should detect museum type from "museum"', () => {
|
|
const entities = extractEntitiesFast('Waar is het museum?')
|
|
expect(entities.institutionType).toBe('M')
|
|
})
|
|
|
|
it('should detect archive type from "archief"', () => {
|
|
const entities = extractEntitiesFast('Hoeveel archieven zijn er in Utrecht?')
|
|
expect(entities.institutionType).toBe('A')
|
|
})
|
|
|
|
it('should detect archive type from "archieven"', () => {
|
|
const entities = extractEntitiesFast('Toon alle archieven')
|
|
expect(entities.institutionType).toBe('A')
|
|
})
|
|
|
|
it('should detect library type from "bibliotheek"', () => {
|
|
const entities = extractEntitiesFast('Welke bibliotheken zijn er?')
|
|
expect(entities.institutionType).toBe('L')
|
|
})
|
|
|
|
it('should detect library type from "bibliotheken"', () => {
|
|
const entities = extractEntitiesFast('Hoeveel bibliotheken in Groningen?')
|
|
expect(entities.institutionType).toBe('L')
|
|
})
|
|
|
|
it('should detect gallery type from "gallerij"', () => {
|
|
const entities = extractEntitiesFast('Kunstgallerij in Amsterdam')
|
|
expect(entities.institutionType).toBe('G')
|
|
})
|
|
|
|
it('should detect education type from "universiteit"', () => {
|
|
const entities = extractEntitiesFast('Universiteit Utrecht collecties')
|
|
expect(entities.institutionType).toBe('E')
|
|
})
|
|
|
|
it('should detect holy sites from "kerk"', () => {
|
|
const entities = extractEntitiesFast('Welke kerken zijn er in Amsterdam?')
|
|
expect(entities.institutionType).toBe('H')
|
|
})
|
|
})
|
|
|
|
describe('location detection - provinces', () => {
|
|
it('should detect Noord-Holland province', () => {
|
|
const entities = extractEntitiesFast('musea in noord-holland')
|
|
expect(entities.location).toBe('NH')
|
|
expect(entities.locationType).toBe('province')
|
|
})
|
|
|
|
it('should detect Zuid-Holland province', () => {
|
|
const entities = extractEntitiesFast('archieven in zuid-holland')
|
|
expect(entities.location).toBe('ZH')
|
|
expect(entities.locationType).toBe('province')
|
|
})
|
|
|
|
it('should detect Utrecht province', () => {
|
|
const entities = extractEntitiesFast('bibliotheken in utrecht')
|
|
expect(entities.location).toBe('UT')
|
|
expect(entities.locationType).toBe('province')
|
|
})
|
|
|
|
it('should detect Gelderland province', () => {
|
|
const entities = extractEntitiesFast('musea gelderland')
|
|
expect(entities.location).toBe('GE')
|
|
expect(entities.locationType).toBe('province')
|
|
})
|
|
|
|
it('should detect Limburg province', () => {
|
|
const entities = extractEntitiesFast('archieven limburg')
|
|
expect(entities.location).toBe('LI')
|
|
expect(entities.locationType).toBe('province')
|
|
})
|
|
})
|
|
|
|
describe('location detection - cities', () => {
|
|
it('should detect Amsterdam', () => {
|
|
const entities = extractEntitiesFast('musea in amsterdam')
|
|
expect(entities.location).toBe('amsterdam')
|
|
expect(entities.locationType).toBe('city')
|
|
})
|
|
|
|
it('should detect Rotterdam', () => {
|
|
const entities = extractEntitiesFast('archieven rotterdam')
|
|
expect(entities.location).toBe('rotterdam')
|
|
expect(entities.locationType).toBe('city')
|
|
})
|
|
|
|
it('should detect Den Haag', () => {
|
|
const entities = extractEntitiesFast('bibliotheken den haag')
|
|
expect(entities.location).toBe('denhaag')
|
|
expect(entities.locationType).toBe('city')
|
|
})
|
|
|
|
it('should detect Maastricht', () => {
|
|
const entities = extractEntitiesFast('musea maastricht')
|
|
expect(entities.location).toBe('maastricht')
|
|
expect(entities.locationType).toBe('city')
|
|
})
|
|
})
|
|
|
|
describe('intent detection', () => {
|
|
it('should detect count intent from "hoeveel"', () => {
|
|
const entities = extractEntitiesFast('Hoeveel musea zijn er?')
|
|
expect(entities.intent).toBe('count')
|
|
})
|
|
|
|
it('should detect count intent from "aantal"', () => {
|
|
const entities = extractEntitiesFast('Wat is het aantal archieven?')
|
|
expect(entities.intent).toBe('count')
|
|
})
|
|
|
|
it('should detect list intent from "welke"', () => {
|
|
const entities = extractEntitiesFast('Welke bibliotheken zijn er?')
|
|
expect(entities.intent).toBe('list')
|
|
})
|
|
|
|
it('should detect list intent from "toon"', () => {
|
|
const entities = extractEntitiesFast('Toon alle musea')
|
|
expect(entities.intent).toBe('list')
|
|
})
|
|
|
|
it('should detect info intent from "wat is"', () => {
|
|
const entities = extractEntitiesFast('Wat is een archief?')
|
|
expect(entities.intent).toBe('info')
|
|
})
|
|
})
|
|
|
|
describe('combined entity extraction', () => {
|
|
it('should extract type, location, and intent together', () => {
|
|
const entities = extractEntitiesFast('Hoeveel musea zijn er in Amsterdam?')
|
|
expect(entities.institutionType).toBe('M')
|
|
expect(entities.location).toBe('amsterdam')
|
|
expect(entities.locationType).toBe('city')
|
|
expect(entities.intent).toBe('count')
|
|
})
|
|
|
|
it('should prefer province over city when province is mentioned', () => {
|
|
const entities = extractEntitiesFast('musea in noord-holland')
|
|
expect(entities.location).toBe('NH')
|
|
expect(entities.locationType).toBe('province')
|
|
})
|
|
})
|
|
})
|
|
|
|
describe('generateStructuredCacheKey', () => {
|
|
it('should generate key with intent, type, and location', () => {
|
|
const entities: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
location: 'amsterdam',
|
|
intent: 'count',
|
|
}
|
|
expect(generateStructuredCacheKey(entities)).toBe('count:m:amsterdam')
|
|
})
|
|
|
|
it('should include subtype when present', () => {
|
|
const entities: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
institutionSubtype: 'ART_MUSEUM',
|
|
location: 'amsterdam',
|
|
intent: 'count',
|
|
}
|
|
expect(generateStructuredCacheKey(entities)).toBe('count:m.art_museum:amsterdam')
|
|
})
|
|
|
|
it('should include record set type when present', () => {
|
|
const entities: ExtractedEntities = {
|
|
institutionType: 'A',
|
|
recordSetType: 'CIVIL_REGISTRY',
|
|
location: 'NH',
|
|
intent: 'list',
|
|
}
|
|
expect(generateStructuredCacheKey(entities)).toBe('list:a:civil_registry:nh')
|
|
})
|
|
|
|
it('should include both subtype and record set type', () => {
|
|
const entities: ExtractedEntities = {
|
|
institutionType: 'A',
|
|
institutionSubtype: 'MUNICIPAL_ARCHIVE',
|
|
recordSetType: 'CIVIL_REGISTRY',
|
|
location: 'amsterdam',
|
|
intent: 'list',
|
|
}
|
|
expect(generateStructuredCacheKey(entities)).toBe('list:a.municipal_archive:civil_registry:amsterdam')
|
|
})
|
|
|
|
it('should use defaults for missing fields', () => {
|
|
const entities: ExtractedEntities = {}
|
|
expect(generateStructuredCacheKey(entities)).toBe('query:any:nl')
|
|
})
|
|
|
|
it('should normalize subtype to snake_case', () => {
|
|
const entities: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
institutionSubtype: 'Art Museum',
|
|
location: 'amsterdam',
|
|
}
|
|
expect(generateStructuredCacheKey(entities)).toBe('query:m.art_museum:amsterdam')
|
|
})
|
|
})
|
|
|
|
describe('entitiesMatch', () => {
|
|
describe('location matching', () => {
|
|
it('should match when locations are equal', () => {
|
|
const query: ExtractedEntities = { location: 'amsterdam', institutionType: 'M' }
|
|
const cached: ExtractedEntities = { location: 'amsterdam', institutionType: 'M' }
|
|
expect(entitiesMatch(query, cached)).toBe(true)
|
|
})
|
|
|
|
it('should NOT match when locations differ', () => {
|
|
const query: ExtractedEntities = { location: 'amsterdam', institutionType: 'M' }
|
|
const cached: ExtractedEntities = { location: 'rotterdam', institutionType: 'M' }
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
|
|
it('should NOT match when query has location but cached does not', () => {
|
|
const query: ExtractedEntities = { location: 'amsterdam', institutionType: 'M' }
|
|
const cached: ExtractedEntities = { institutionType: 'M' }
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
|
|
it('should match when query has no location but cached does', () => {
|
|
const query: ExtractedEntities = { institutionType: 'M' }
|
|
const cached: ExtractedEntities = { location: 'amsterdam', institutionType: 'M' }
|
|
expect(entitiesMatch(query, cached)).toBe(true)
|
|
})
|
|
})
|
|
|
|
describe('institution type matching', () => {
|
|
it('should match when types are equal', () => {
|
|
const query: ExtractedEntities = { institutionType: 'M' }
|
|
const cached: ExtractedEntities = { institutionType: 'M' }
|
|
expect(entitiesMatch(query, cached)).toBe(true)
|
|
})
|
|
|
|
it('should NOT match when types differ', () => {
|
|
const query: ExtractedEntities = { institutionType: 'M' }
|
|
const cached: ExtractedEntities = { institutionType: 'A' }
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
|
|
it('should NOT match when query has type but cached does not', () => {
|
|
const query: ExtractedEntities = { institutionType: 'M' }
|
|
const cached: ExtractedEntities = {}
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
})
|
|
|
|
describe('subtype matching (Rule 46)', () => {
|
|
it('should match when subtypes are equal', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
institutionSubtype: 'ART_MUSEUM'
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
institutionSubtype: 'ART_MUSEUM'
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(true)
|
|
})
|
|
|
|
it('should NOT match when subtypes differ', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
institutionSubtype: 'ART_MUSEUM'
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
institutionSubtype: 'HISTORY_MUSEUM'
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
|
|
it('should NOT match when query has subtype but cached does not (prevents kunstmuseum -> generic museum match)', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
institutionSubtype: 'ART_MUSEUM',
|
|
location: 'amsterdam',
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
location: 'amsterdam',
|
|
// No subtype - generic museum cached response
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
|
|
it('should match when query has no subtype but cached does (generic matches specific)', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
location: 'amsterdam',
|
|
// No subtype - generic museum query
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
institutionSubtype: 'ART_MUSEUM',
|
|
location: 'amsterdam',
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(true)
|
|
})
|
|
})
|
|
|
|
describe('record set type matching', () => {
|
|
it('should match when record set types are equal', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'A',
|
|
recordSetType: 'CIVIL_REGISTRY'
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'A',
|
|
recordSetType: 'CIVIL_REGISTRY'
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(true)
|
|
})
|
|
|
|
it('should NOT match when record set types differ', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'A',
|
|
recordSetType: 'CIVIL_REGISTRY'
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'A',
|
|
recordSetType: 'COUNCIL_GOVERNANCE'
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
|
|
it('should NOT match when query has record set type but cached does not', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'A',
|
|
recordSetType: 'CIVIL_REGISTRY',
|
|
location: 'NH',
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'A',
|
|
location: 'NH',
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
})
|
|
|
|
describe('combined matching - geographic false positives', () => {
|
|
it('should prevent Amsterdam vs Rotterdam false positive', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
location: 'amsterdam',
|
|
intent: 'count',
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
location: 'rotterdam',
|
|
intent: 'count',
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
|
|
it('should prevent city vs province false positive', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
location: 'amsterdam',
|
|
locationType: 'city',
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
location: 'NH',
|
|
locationType: 'province',
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
})
|
|
|
|
describe('combined matching - subtype false positives', () => {
|
|
it('should prevent kunstmuseum vs generic museum false positive', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
institutionSubtype: 'ART_MUSEUM',
|
|
location: 'amsterdam',
|
|
intent: 'count',
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
location: 'amsterdam',
|
|
intent: 'count',
|
|
// No subtype
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
|
|
it('should prevent burgerlijke stand vs generic archive false positive', () => {
|
|
const query: ExtractedEntities = {
|
|
institutionType: 'A',
|
|
recordSetType: 'CIVIL_REGISTRY',
|
|
location: 'amsterdam',
|
|
}
|
|
const cached: ExtractedEntities = {
|
|
institutionType: 'A',
|
|
location: 'amsterdam',
|
|
// No record set type
|
|
}
|
|
expect(entitiesMatch(query, cached)).toBe(false)
|
|
})
|
|
})
|
|
})
|
|
|
|
describe('normalizeQuery', () => {
|
|
it('should lowercase the query', () => {
|
|
expect(normalizeQuery('Hoeveel MUSEA?')).toBe('hoeveel musea')
|
|
})
|
|
|
|
it('should trim whitespace', () => {
|
|
expect(normalizeQuery(' musea ')).toBe('musea')
|
|
})
|
|
|
|
it('should replace punctuation with spaces', () => {
|
|
expect(normalizeQuery('musea, archieven, en bibliotheken')).toBe('musea archieven en bibliotheken')
|
|
})
|
|
|
|
it('should collapse multiple spaces', () => {
|
|
expect(normalizeQuery('musea in amsterdam')).toBe('musea in amsterdam')
|
|
})
|
|
})
|
|
|
|
// ============================================================================
|
|
// Phase 2: Topological Distance Tests (Rule 46 Evolution)
|
|
// ============================================================================
|
|
|
|
describe('topologicalDistance', () => {
|
|
it('should return 0 for identical types', () => {
|
|
expect(topologicalDistance('M', 'M')).toBe(0)
|
|
expect(topologicalDistance('A', 'A')).toBe(0)
|
|
expect(topologicalDistance('M.ART', 'M.ART')).toBe(0)
|
|
})
|
|
|
|
it('should return 0.25 for sibling subtypes (same parent)', () => {
|
|
// M.ART and M.HISTORY are both children of M
|
|
const dist = topologicalDistance('M.ART', 'M.HISTORY')
|
|
expect(dist).toBeCloseTo(0.5, 1) // path: ART -> M -> HISTORY = 2 / 4 = 0.5
|
|
})
|
|
|
|
it('should return higher distance for different base types', () => {
|
|
// M and A are siblings under root
|
|
const dist = topologicalDistance('M', 'A')
|
|
expect(dist).toBeCloseTo(0.5, 1) // path: M -> * -> A = 2 / 4 = 0.5
|
|
})
|
|
|
|
it('should return even higher distance for subtypes of different base types', () => {
|
|
// M.ART and A.MUNICIPAL are in different branches
|
|
const dist = topologicalDistance('M.ART', 'A.MUNICIPAL')
|
|
expect(dist).toBeGreaterThan(0.5)
|
|
})
|
|
|
|
it('should handle unknown types gracefully', () => {
|
|
// Unknown types should be treated as direct children of root
|
|
const dist = topologicalDistance('UNKNOWN', 'M')
|
|
expect(dist).toBeGreaterThanOrEqual(0)
|
|
expect(dist).toBeLessThanOrEqual(1)
|
|
})
|
|
|
|
it('should be symmetric', () => {
|
|
expect(topologicalDistance('M', 'A')).toBe(topologicalDistance('A', 'M'))
|
|
expect(topologicalDistance('M.ART', 'L')).toBe(topologicalDistance('L', 'M.ART'))
|
|
})
|
|
})
|
|
|
|
describe('combinedSimilarity', () => {
|
|
it('should return pure embedding similarity when no types provided', () => {
|
|
const similarity = combinedSimilarity(0.95, undefined, undefined)
|
|
expect(similarity).toBe(0.95)
|
|
})
|
|
|
|
it('should weight embedding similarity at 0.7 by default', () => {
|
|
// Same type -> topological distance = 0 -> topo similarity = 1
|
|
const similarity = combinedSimilarity(0.9, 'M', 'M')
|
|
// 0.7 * 0.9 + 0.3 * 1.0 = 0.63 + 0.3 = 0.93
|
|
expect(similarity).toBeCloseTo(0.93, 2)
|
|
})
|
|
|
|
it('should penalize different types even with high embedding similarity', () => {
|
|
// Different types -> topological distance > 0 -> lower combined similarity
|
|
const sameType = combinedSimilarity(0.9, 'M', 'M')
|
|
const diffType = combinedSimilarity(0.9, 'M', 'A')
|
|
expect(diffType).toBeLessThan(sameType)
|
|
})
|
|
|
|
it('should heavily penalize cross-branch subtype matches', () => {
|
|
// M.ART vs A.MUNICIPAL - very different semantically
|
|
const crossBranch = combinedSimilarity(0.92, 'M.ART', 'A.MUNICIPAL')
|
|
// Even with 0.92 embedding similarity, the topological penalty should be significant
|
|
expect(crossBranch).toBeLessThan(0.85)
|
|
})
|
|
})
|
|
|
|
// ============================================================================
|
|
// Phase 5: Clarity Trading Tests (Rule 46 Evolution)
|
|
// ============================================================================
|
|
|
|
describe('calculateClarity', () => {
|
|
describe('ambiguity detection', () => {
|
|
it('should penalize temporal vagueness without dates', () => {
|
|
const result = calculateClarity('oude archieven in Amsterdam')
|
|
expect(result.clarityScore).toBeLessThan(0.7)
|
|
expect(result.ambiguities).toContain('temporal_vague')
|
|
})
|
|
|
|
it('should NOT penalize temporal terms with dates', () => {
|
|
const result = calculateClarity('archieven uit 1950 in Amsterdam')
|
|
expect(result.ambiguities).not.toContain('temporal_vague')
|
|
})
|
|
|
|
it('should penalize size vagueness', () => {
|
|
const result = calculateClarity('grote musea')
|
|
expect(result.ambiguities).toContain('size_vague')
|
|
})
|
|
|
|
it('should penalize quality vagueness', () => {
|
|
const result = calculateClarity('beste bibliotheken')
|
|
expect(result.ambiguities).toContain('quality_vague')
|
|
})
|
|
|
|
it('should penalize pronouns at start', () => {
|
|
const result = calculateClarity('Het is een archief?')
|
|
expect(result.ambiguities).toContain('pronoun_start')
|
|
})
|
|
|
|
it('should heavily penalize very short queries', () => {
|
|
const result = calculateClarity('musea?')
|
|
expect(result.clarityScore).toBeLessThan(0.5)
|
|
expect(result.ambiguities).toContain('too_short')
|
|
})
|
|
})
|
|
|
|
describe('clarity boosters', () => {
|
|
it('should boost clarity for specific city', () => {
|
|
const withCity = calculateClarity('musea in amsterdam')
|
|
const withoutCity = calculateClarity('musea in nederland')
|
|
expect(withCity.clarityScore).toBeGreaterThan(withoutCity.clarityScore)
|
|
})
|
|
|
|
it('should boost clarity for specific type', () => {
|
|
// "museum" matches specific_type pattern (+0.10), "gebouwen" does not
|
|
const withType = calculateClarity('Ik zoek een museum om te bezoeken')
|
|
const withoutType = calculateClarity('Ik zoek een gebouw om te bezoeken')
|
|
expect(withType.clarityScore).toBeGreaterThan(withoutType.clarityScore)
|
|
})
|
|
|
|
it('should boost clarity for clear intent', () => {
|
|
const withIntent = calculateClarity('hoeveel musea zijn er in amsterdam')
|
|
const withoutIntent = calculateClarity('musea amsterdam')
|
|
expect(withIntent.clarityScore).toBeGreaterThan(withoutIntent.clarityScore)
|
|
})
|
|
|
|
it('should boost clarity for identifiers', () => {
|
|
const result = calculateClarity('wat is ISIL code NL-AsdAM')
|
|
expect(result.clarityScore).toBeGreaterThanOrEqual(0.7)
|
|
})
|
|
|
|
it('should boost clarity for date ranges', () => {
|
|
const result = calculateClarity('archieven 1800-1900 in amsterdam')
|
|
expect(result.clarityScore).toBeGreaterThanOrEqual(0.7)
|
|
})
|
|
})
|
|
|
|
describe('entity-based clarity boost', () => {
|
|
it('should boost clarity when entities are extracted', () => {
|
|
const entities: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
location: 'amsterdam',
|
|
intent: 'count',
|
|
}
|
|
const withEntities = calculateClarity('hoeveel musea amsterdam', entities)
|
|
const withoutEntities = calculateClarity('hoeveel musea amsterdam')
|
|
expect(withEntities.clarityScore).toBeGreaterThanOrEqual(withoutEntities.clarityScore)
|
|
})
|
|
|
|
it('should boost clarity for subtype specificity', () => {
|
|
const entities: ExtractedEntities = {
|
|
institutionType: 'M',
|
|
institutionSubtype: 'ART_MUSEUM',
|
|
location: 'amsterdam',
|
|
}
|
|
const result = calculateClarity('kunstmusea amsterdam', entities)
|
|
expect(result.clarityScore).toBeGreaterThanOrEqual(0.7)
|
|
})
|
|
})
|
|
|
|
describe('combined clarity scoring', () => {
|
|
it('should pass threshold for clear, specific queries', () => {
|
|
const result = calculateClarity('Hoeveel musea zijn er in Amsterdam?')
|
|
expect(result.clarityScore).toBeGreaterThanOrEqual(0.7)
|
|
expect(result.ambiguities.length).toBe(0)
|
|
})
|
|
|
|
it('should fail threshold for vague, short queries', () => {
|
|
const result = calculateClarity('het oude?')
|
|
expect(result.clarityScore).toBeLessThan(0.7)
|
|
expect(result.ambiguities.length).toBeGreaterThan(0)
|
|
})
|
|
})
|
|
})
|
|
|
|
describe('extractEntitiesWithClarity', () => {
|
|
it('should include clarity score in extracted entities', () => {
|
|
const entities = extractEntitiesWithClarity('Hoeveel musea zijn er in Amsterdam?')
|
|
expect(entities.clarityScore).toBeDefined()
|
|
expect(typeof entities.clarityScore).toBe('number')
|
|
})
|
|
|
|
it('should include ambiguities when present', () => {
|
|
const entities = extractEntitiesWithClarity('oude grote musea')
|
|
expect(entities.ambiguities).toBeDefined()
|
|
expect(entities.ambiguities?.length).toBeGreaterThan(0)
|
|
})
|
|
|
|
it('should not include ambiguities array for clear queries', () => {
|
|
const entities = extractEntitiesWithClarity('Hoeveel archieven zijn er in Rotterdam?')
|
|
// Either no ambiguities or empty array
|
|
expect(entities.ambiguities === undefined || entities.ambiguities.length === 0).toBe(true)
|
|
})
|
|
|
|
it('should still extract institution type', () => {
|
|
const entities = extractEntitiesWithClarity('musea in amsterdam')
|
|
expect(entities.institutionType).toBe('M')
|
|
})
|
|
|
|
it('should still extract location', () => {
|
|
const entities = extractEntitiesWithClarity('musea in amsterdam')
|
|
expect(entities.location).toBe('amsterdam')
|
|
})
|
|
|
|
it('should still extract intent', () => {
|
|
const entities = extractEntitiesWithClarity('hoeveel musea zijn er')
|
|
expect(entities.intent).toBe('count')
|
|
})
|
|
})
|
|
|
|
// ============================================================================
|
|
// Phase 4: Message Handler Tests (Smalltalk-Inspired Introspection)
|
|
// ============================================================================
|
|
|
|
describe('explainLastDecision', () => {
|
|
it('should return null when no lookup has been performed', () => {
|
|
// Reset by calling the function - it starts as null
|
|
// Note: This test may be affected by previous test runs
|
|
const trace = explainLastDecision()
|
|
// Either null (fresh) or has data from previous test
|
|
expect(trace === null || typeof trace === 'object').toBe(true)
|
|
})
|
|
})
|