- Add chat.spec.ts for RAG query testing - Add count-queries.spec.ts for aggregation validation - Add map-panel.spec.ts for geographic feature testing - Add cache.spec.ts for response caching verification - Add auth.setup.ts for authentication handling - Configure playwright.config.ts for multi-browser testing - Tests run against production archief.support
212 lines
7.4 KiB
TypeScript
212 lines
7.4 KiB
TypeScript
import { test, expect } from '@playwright/test'
|
|
import { loginAndNavigate, waitForChatReady } from './auth.setup'
|
|
|
|
/**
|
|
* COUNT query tests for ArchiefAssistent
|
|
*
|
|
* These tests verify the RAG system correctly handles COUNT queries
|
|
* for Dutch heritage institutions by province and city.
|
|
*
|
|
* Tests use a sample of queries from the golden dataset.
|
|
*/
|
|
|
|
test.describe('COUNT Queries - Province Level', () => {
|
|
test.beforeEach(async ({ page }) => {
|
|
await loginAndNavigate(page)
|
|
await waitForChatReady(page)
|
|
})
|
|
|
|
/**
|
|
* Helper to submit a query and wait for response
|
|
*/
|
|
async function askQuestion(page: any, question: string): Promise<string> {
|
|
const chatInput = page.getByTestId('chat-input')
|
|
const sendButton = page.getByTestId('send-button')
|
|
|
|
await chatInput.fill(question)
|
|
await sendButton.click()
|
|
|
|
// Wait for assistant response
|
|
const assistantMessage = page.getByTestId('assistant-message').last()
|
|
await assistantMessage.waitFor({ timeout: 45000 })
|
|
|
|
// Get the text content
|
|
const text = await assistantMessage.textContent()
|
|
return text || ''
|
|
}
|
|
|
|
test('should count archives in Utrecht province', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hoeveel archieven zijn er in Utrecht?')
|
|
|
|
// Response should contain a number
|
|
expect(response).toMatch(/\d+/)
|
|
// Should mention archives or archieven
|
|
expect(response.toLowerCase()).toMatch(/archie[fv]|archives?/)
|
|
// Should mention Utrecht
|
|
expect(response).toMatch(/Utrecht/i)
|
|
})
|
|
|
|
test('should count museums in Noord-Holland', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hoeveel musea zijn er in Noord-Holland?')
|
|
|
|
expect(response).toMatch(/\d+/)
|
|
expect(response.toLowerCase()).toMatch(/muse[ua]|museums?/)
|
|
expect(response).toMatch(/Noord-Holland/i)
|
|
})
|
|
|
|
test('should count libraries in Zuid-Holland', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hoeveel bibliotheken zijn er in Zuid-Holland?')
|
|
|
|
expect(response).toMatch(/\d+/)
|
|
expect(response.toLowerCase()).toMatch(/bibliothe[ek]|librar/)
|
|
expect(response).toMatch(/Zuid-Holland/i)
|
|
})
|
|
|
|
test('should count archives in Gelderland', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hoeveel archieven zijn er in Gelderland?')
|
|
|
|
expect(response).toMatch(/\d+/)
|
|
expect(response.toLowerCase()).toMatch(/archie[fv]|archives?/)
|
|
expect(response).toMatch(/Gelderland/i)
|
|
})
|
|
|
|
test('should count museums in Limburg', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hoeveel musea zijn er in Limburg?')
|
|
|
|
expect(response).toMatch(/\d+/)
|
|
expect(response.toLowerCase()).toMatch(/muse[ua]|museums?/)
|
|
expect(response).toMatch(/Limburg/i)
|
|
})
|
|
})
|
|
|
|
test.describe('COUNT Queries - City Level', () => {
|
|
test.beforeEach(async ({ page }) => {
|
|
await loginAndNavigate(page)
|
|
await waitForChatReady(page)
|
|
})
|
|
|
|
async function askQuestion(page: any, question: string): Promise<string> {
|
|
const chatInput = page.getByTestId('chat-input')
|
|
const sendButton = page.getByTestId('send-button')
|
|
|
|
await chatInput.fill(question)
|
|
await sendButton.click()
|
|
|
|
const assistantMessage = page.getByTestId('assistant-message').last()
|
|
await assistantMessage.waitFor({ timeout: 45000 })
|
|
|
|
return await assistantMessage.textContent() || ''
|
|
}
|
|
|
|
test('should count museums in Amsterdam', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hoeveel musea zijn er in Amsterdam?')
|
|
|
|
expect(response).toMatch(/\d+/)
|
|
expect(response.toLowerCase()).toMatch(/muse[ua]|museums?/)
|
|
expect(response).toMatch(/Amsterdam/i)
|
|
})
|
|
|
|
test('should count archives in Rotterdam', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hoeveel archieven zijn er in Rotterdam?')
|
|
|
|
expect(response).toMatch(/\d+/)
|
|
expect(response.toLowerCase()).toMatch(/archie[fv]|archives?/)
|
|
expect(response).toMatch(/Rotterdam/i)
|
|
})
|
|
|
|
test('should count libraries in Den Haag', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hoeveel bibliotheken zijn er in Den Haag?')
|
|
|
|
expect(response).toMatch(/\d+/)
|
|
expect(response.toLowerCase()).toMatch(/bibliothe[ek]|librar/)
|
|
expect(response).toMatch(/Den Haag|'s-Gravenhage/i)
|
|
})
|
|
})
|
|
|
|
test.describe('COUNT Queries - Alternative Phrasing', () => {
|
|
test.beforeEach(async ({ page }) => {
|
|
await loginAndNavigate(page)
|
|
await waitForChatReady(page)
|
|
})
|
|
|
|
async function askQuestion(page: any, question: string): Promise<string> {
|
|
const chatInput = page.getByTestId('chat-input')
|
|
const sendButton = page.getByTestId('send-button')
|
|
|
|
await chatInput.fill(question)
|
|
await sendButton.click()
|
|
|
|
const assistantMessage = page.getByTestId('assistant-message').last()
|
|
await assistantMessage.waitFor({ timeout: 45000 })
|
|
|
|
return await assistantMessage.textContent() || ''
|
|
}
|
|
|
|
test('should handle "wat is het aantal" phrasing', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Wat is het aantal musea in Overijssel?')
|
|
|
|
expect(response).toMatch(/\d+/)
|
|
expect(response.toLowerCase()).toMatch(/muse[ua]|museums?/)
|
|
})
|
|
|
|
test('should handle "kun je me vertellen" phrasing', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Kun je me vertellen hoeveel archieven er in Friesland zijn?')
|
|
|
|
expect(response).toMatch(/\d+/)
|
|
expect(response.toLowerCase()).toMatch(/archie[fv]|archives?/)
|
|
})
|
|
|
|
test('should handle informal query style', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hee, hoeveel musea heeft Zeeland eigenlijk?')
|
|
|
|
// Should still get a meaningful response (not an error)
|
|
expect(response.length).toBeGreaterThan(10)
|
|
// Should not be an error message
|
|
expect(response.toLowerCase()).not.toMatch(/error|fout|probleem/)
|
|
})
|
|
})
|
|
|
|
test.describe('COUNT Queries - Edge Cases', () => {
|
|
test.beforeEach(async ({ page }) => {
|
|
await loginAndNavigate(page)
|
|
await waitForChatReady(page)
|
|
})
|
|
|
|
async function askQuestion(page: any, question: string): Promise<string> {
|
|
const chatInput = page.getByTestId('chat-input')
|
|
const sendButton = page.getByTestId('send-button')
|
|
|
|
await chatInput.fill(question)
|
|
await sendButton.click()
|
|
|
|
const assistantMessage = page.getByTestId('assistant-message').last()
|
|
await assistantMessage.waitFor({ timeout: 45000 })
|
|
|
|
return await assistantMessage.textContent() || ''
|
|
}
|
|
|
|
test('should handle province with no institutions gracefully', async ({ page }) => {
|
|
// Query for a type that may have zero results
|
|
const response = await askQuestion(page, 'Hoeveel universiteitsbibliotheken zijn er in Flevoland?')
|
|
|
|
// Should get a response (not hang or error)
|
|
expect(response.length).toBeGreaterThan(0)
|
|
})
|
|
|
|
test('should handle misspelled province name', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hoeveel musea zijn er in Noord Hollant?')
|
|
|
|
// System should either:
|
|
// 1. Correct the spelling and answer
|
|
// 2. Or indicate it doesn't understand
|
|
expect(response.length).toBeGreaterThan(0)
|
|
expect(response.toLowerCase()).not.toMatch(/error|exception/)
|
|
})
|
|
|
|
test('should handle abbreviated province names', async ({ page }) => {
|
|
const response = await askQuestion(page, 'Hoeveel archieven zijn er in NH?')
|
|
|
|
// Response should acknowledge the query
|
|
expect(response.length).toBeGreaterThan(0)
|
|
})
|
|
})
|