Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions scripts/eval_scenarios/console_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import assert from 'node:assert';

import type {TestScenario} from '../eval_gemini.ts';

export const scenario: TestScenario = {
prompt: 'Navigate to <TEST_URL> and check the console messages.',
maxTurns: 2,
htmlRoute: {
path: '/console_test.html',
htmlContent: `
<script>
console.log('Test log message');
console.error('Test error message');
</script>
`,
},
expectations: calls => {
const navigate = calls.find(
c => c.name === 'navigate_page' || c.name === 'new_page',
);
const listMessages = calls.find(c => c.name === 'list_console_messages');

assert.ok(navigate, 'Should navigate to the page');
assert.ok(listMessages, 'Should list console messages');
},
};
18 changes: 18 additions & 0 deletions scripts/eval_scenarios/emulation_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import assert from 'node:assert';

import type {TestScenario} from '../eval_gemini.ts';

export const scenario: TestScenario = {
prompt: 'Emulate offline network conditions.',
maxTurns: 2,
expectations: calls => {
const emulate = calls.find(c => c.name === 'emulate');
assert.ok(emulate, 'Should call emulate tool');
assert.strictEqual(emulate.args.networkConditions, 'Offline');
},
};
38 changes: 38 additions & 0 deletions scripts/eval_scenarios/input_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import assert from 'node:assert';

import type {TestScenario} from '../eval_gemini.ts';

export const scenario: TestScenario = {
prompt:
'Go to <TEST_URL>, fill the input with "hello world" and click the button.',
maxTurns: 3,
htmlRoute: {
path: '/input_test.html',
htmlContent: `
<input type="text" id="test-input" />
<button id="test-button">Submit</button>
`,
},
expectations: calls => {
// Expected sequence: navigate -> fill -> click
// But model might take snapshot in between or do things in parallel if supported (but standard loop is sequential turns usually)
// We just check if the tools were called.

const navigate = calls.find(
c => c.name === 'navigate_page' || c.name === 'new_page',
);
const fill = calls.find(c => c.name === 'fill');
const click = calls.find(c => c.name === 'click');

assert.ok(navigate, 'Should navigate to the page');
assert.ok(fill, 'Should fill the input');
assert.ok(click, 'Should click the button');

assert.strictEqual(fill.args.value, 'hello world');
},
};
31 changes: 31 additions & 0 deletions scripts/eval_scenarios/network_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import assert from 'node:assert';

import type {TestScenario} from '../eval_gemini.ts';

export const scenario: TestScenario = {
prompt: 'Navigate to <TEST_URL> and list all network requests.',
maxTurns: 2,
htmlRoute: {
path: '/network_test.html',
htmlContent: `
<h1>Network Test</h1>
<script>
fetch('/network_test.html'); // Self fetch to ensure at least one request
</script>
`,
},
expectations: calls => {
const navigate = calls.find(
c => c.name === 'navigate_page' || c.name === 'new_page',
);
const listRequests = calls.find(c => c.name === 'list_network_requests');

assert.ok(navigate, 'Should navigate to the page');
assert.ok(listRequests, 'Should list network requests');
},
};