diff --git a/scripts/eval_gemini.ts b/scripts/eval_gemini.ts index d75a2a532..dbb30dc45 100644 --- a/scripts/eval_gemini.ts +++ b/scripts/eval_gemini.ts @@ -142,7 +142,9 @@ async function runSingleScenario( name: request.name, args: (request.arguments as Record) || {}, }); - return originalCallTool(request, schema); + const response = await originalCallTool(request, schema); + debugLog(`Tool response: ${JSON.stringify(response)}`); + return response; }; const ai = new GoogleGenAI({apiKey}); diff --git a/scripts/eval_scenarios/select_page_test.ts b/scripts/eval_scenarios/select_page_test.ts new file mode 100644 index 000000000..a8bfba328 --- /dev/null +++ b/scripts/eval_scenarios/select_page_test.ts @@ -0,0 +1,40 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + prompt: + 'Open new page and then open new page https://developers.chrome.com. Select the page.', + maxTurns: 3, + htmlRoute: { + path: '/test.html', + htmlContent: ` +

test

+ `, + }, + expectations: calls => { + assert.strictEqual(calls.length, 3); + assert.ok(calls[0].name === 'new_page', 'First call should be navigation'); + assert.ok(calls[1].name === 'new_page', 'Second call should be navigation'); + assert.ok( + calls[2].name === 'select_page', + 'Third call should be select_page', + ); + assert.strictEqual( + calls[2].args.pageId, + 2, + 'PageId has to be set to 2. about:blank is 1, is 2, https://developers.chrome.com is 3.', + ); + assert.strictEqual( + calls[2].args.bringToFront, + undefined, + 'bringToFront should use the default value.', + ); + }, +};