diff --git a/scripts/eval_scenarios/console_test.ts b/scripts/eval_scenarios/console_test.ts new file mode 100644 index 000000000..803171d4d --- /dev/null +++ b/scripts/eval_scenarios/console_test.ts @@ -0,0 +1,31 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + prompt: 'Navigate to and check the console messages.', + maxTurns: 2, + htmlRoute: { + path: '/console_test.html', + htmlContent: ` + + `, + }, + expectations: calls => { + const navigate = calls.find( + c => c.name === 'navigate_page' || c.name === 'new_page', + ); + const listMessages = calls.find(c => c.name === 'list_console_messages'); + + assert.ok(navigate, 'Should navigate to the page'); + assert.ok(listMessages, 'Should list console messages'); + }, +}; diff --git a/scripts/eval_scenarios/emulation_test.ts b/scripts/eval_scenarios/emulation_test.ts new file mode 100644 index 000000000..94c34e846 --- /dev/null +++ b/scripts/eval_scenarios/emulation_test.ts @@ -0,0 +1,18 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + prompt: 'Emulate offline network conditions.', + maxTurns: 2, + expectations: calls => { + const emulate = calls.find(c => c.name === 'emulate'); + assert.ok(emulate, 'Should call emulate tool'); + assert.strictEqual(emulate.args.networkConditions, 'Offline'); + }, +}; diff --git a/scripts/eval_scenarios/input_test.ts b/scripts/eval_scenarios/input_test.ts new file mode 100644 index 000000000..b359884de --- /dev/null +++ b/scripts/eval_scenarios/input_test.ts @@ -0,0 +1,38 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + prompt: + 'Go to , fill the input with "hello world" and click the button.', + maxTurns: 3, + htmlRoute: { + path: '/input_test.html', + htmlContent: ` + + + `, + }, + expectations: calls => { + // Expected sequence: navigate -> fill -> click + // But model might take snapshot in between or do things in parallel if supported (but standard loop is sequential turns usually) + // We just check if the tools were called. + + const navigate = calls.find( + c => c.name === 'navigate_page' || c.name === 'new_page', + ); + const fill = calls.find(c => c.name === 'fill'); + const click = calls.find(c => c.name === 'click'); + + assert.ok(navigate, 'Should navigate to the page'); + assert.ok(fill, 'Should fill the input'); + assert.ok(click, 'Should click the button'); + + assert.strictEqual(fill.args.value, 'hello world'); + }, +}; diff --git a/scripts/eval_scenarios/network_test.ts b/scripts/eval_scenarios/network_test.ts new file mode 100644 index 000000000..e1ace21e1 --- /dev/null +++ b/scripts/eval_scenarios/network_test.ts @@ -0,0 +1,31 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + prompt: 'Navigate to and list all network requests.', + maxTurns: 2, + htmlRoute: { + path: '/network_test.html', + htmlContent: ` +

Network Test

+ + `, + }, + expectations: calls => { + const navigate = calls.find( + c => c.name === 'navigate_page' || c.name === 'new_page', + ); + const listRequests = calls.find(c => c.name === 'list_network_requests'); + + assert.ok(navigate, 'Should navigate to the page'); + assert.ok(listRequests, 'Should list network requests'); + }, +};