@@ -18,6 +18,7 @@ import {StdioClientTransport} from '@modelcontextprotocol/sdk/client/stdio.js';
1818
1919const ROOT_DIR = path . resolve ( import . meta. dirname , '..' ) ;
2020const SCENARIOS_DIR = path . join ( import . meta. dirname , 'eval_scenarios' ) ;
21+ import { TestServer } from '../build/tests/server.js' ;
2122
2223// Define schema for our test scenarios
2324export interface CapturedFunctionCall {
@@ -29,6 +30,10 @@ export interface TestScenario {
2930 prompt : string ;
3031 maxTurns : number ;
3132 expectations : ( calls : CapturedFunctionCall [ ] ) => void ;
33+ htmlRoute ?: {
34+ path : string ;
35+ htmlContent : string ;
36+ } ;
3237}
3338
3439async function loadScenario ( scenarioPath : string ) : Promise < TestScenario > {
@@ -84,10 +89,24 @@ async function runSingleScenario(
8489
8590 let client : Client | undefined ;
8691 let transport : StdioClientTransport | undefined ;
92+ let server : TestServer | undefined ;
8793
8894 try {
8995 const scenario = await loadScenario ( absolutePath ) ;
9096
97+ if ( scenario . htmlRoute ) {
98+ server = new TestServer ( TestServer . randomPort ( ) ) ;
99+ await server . start ( ) ;
100+ server . addHtmlRoute (
101+ scenario . htmlRoute . path ,
102+ scenario . htmlRoute . htmlContent ,
103+ ) ;
104+ scenario . prompt = scenario . prompt . replace (
105+ '<TEST_URL>' ,
106+ server . getRoute ( scenario . htmlRoute . path ) ,
107+ ) ;
108+ }
109+
91110 // Path to the compiled MCP server
92111 const serverPath = path . join ( ROOT_DIR , 'build/src/index.js' ) ;
93112 if ( ! fs . existsSync ( serverPath ) ) {
@@ -148,7 +167,7 @@ async function runSingleScenario(
148167
149168 const genAI = new GoogleGenerativeAI ( apiKey ) ;
150169 const model = genAI . getGenerativeModel ( {
151- model : 'gemini-3-pro-preview ' ,
170+ model : 'gemini-2.5-flash ' ,
152171 tools : [ { functionDeclarations} ] ,
153172 } ) ;
154173
@@ -167,7 +186,9 @@ async function runSingleScenario(
167186 console . log ( `\n--- Turn 1 (User) ---` ) ;
168187 console . log ( scenario . prompt ) ;
169188
170- let result = await chat . sendMessage ( scenario . prompt ) ;
189+ let result = await chat . sendMessage ( scenario . prompt , {
190+ timeout : 5000 ,
191+ } ) ;
171192 let response = result . response ;
172193
173194 while ( turnCount < scenario . maxTurns ) {
@@ -248,6 +269,7 @@ async function runSingleScenario(
248269 } finally {
249270 await client ?. close ( ) ;
250271 await transport ?. close ( ) ;
272+ await server ?. stop ( ) ;
251273 }
252274}
253275const apiKey = process . env . GEMINI_API_KEY ;
0 commit comments