Skip to content

Commit 5f2f80c

Browse files
committed
handle complex inPage tool responses
1 parent da33cb5 commit 5f2f80c

6 files changed

Lines changed: 759 additions & 21 deletions

File tree

src/McpContext.ts

Lines changed: 161 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,16 @@ import type {
2929
Viewport,
3030
Target,
3131
} from './third_party/index.js';
32-
import type {DevTools} from './third_party/index.js';
33-
import {Locator} from './third_party/index.js';
32+
import type {DevTools, Protocol} from './third_party/index.js';
33+
import {Locator, type ElementHandle} from './third_party/index.js';
3434
import {PredefinedNetworkConditions} from './third_party/index.js';
3535
import {listPages} from './tools/pages.js';
3636
import {CLOSE_PAGE_ERROR} from './tools/ToolDefinition.js';
3737
import type {
3838
Context,
3939
DevToolsData,
4040
SupportedExtensions,
41+
ContextPage,
4142
} from './tools/ToolDefinition.js';
4243
import type {TraceResult} from './trace-processing/parse.js';
4344
import type {
@@ -79,7 +80,7 @@ export class McpContext implements Context {
7980
#extensionServiceWorkers: ExtensionServiceWorker[] = [];
8081

8182
#mcpPages = new Map<Page, McpPage>();
82-
#selectedPage?: McpPage;
83+
#selectedPage?: ContextPage;
8384
#networkCollector: NetworkCollector;
8485
#consoleCollector: ConsoleCollector;
8586
#devtoolsUniverseManager: UniverseManager;
@@ -165,7 +166,10 @@ export class McpContext implements Context {
165166
return context;
166167
}
167168

168-
resolveCdpRequestId(page: McpPage, cdpRequestId: string): number | undefined {
169+
resolveCdpRequestId(
170+
page: ContextPage,
171+
cdpRequestId: string,
172+
): number | undefined {
169173
if (!cdpRequestId) {
170174
this.logger('no network request');
171175
return;
@@ -182,14 +186,14 @@ export class McpContext implements Context {
182186
}
183187

184188
resolveCdpElementId(
185-
page: McpPage,
189+
page: ContextPage,
186190
cdpBackendNodeId: number,
187191
): string | undefined {
188192
if (!cdpBackendNodeId) {
189193
this.logger('no cdpBackendNodeId');
190194
return;
191195
}
192-
const snapshot = page.textSnapshot;
196+
const snapshot = page.getSnapshot();
193197
if (!snapshot) {
194198
this.logger('no text snapshot');
195199
return;
@@ -282,7 +286,7 @@ export class McpContext implements Context {
282286
return this.#networkCollector.getById(page.pptrPage, reqid);
283287
}
284288

285-
async restoreEmulation(page: McpPage) {
289+
async restoreEmulation(page: ContextPage) {
286290
const currentSetting = page.emulationSettings;
287291
await this.emulate(currentSetting, page.pptrPage);
288292
}
@@ -448,7 +452,7 @@ export class McpContext implements Context {
448452
return this.#selectedPage?.pptrPage === page;
449453
}
450454

451-
selectPage(newPage: McpPage): void {
455+
selectPage(newPage: ContextPage): void {
452456
this.#selectedPage = newPage;
453457
this.#updateSelectedPageTimeouts();
454458
}
@@ -681,7 +685,7 @@ export class McpContext implements Context {
681685
return this.#mcpPages.get(page)?.devToolsPage;
682686
}
683687

684-
async getDevToolsData(page: McpPage): Promise<DevToolsData> {
688+
async getDevToolsData(page: ContextPage): Promise<DevToolsData> {
685689
try {
686690
this.logger('Getting DevTools UI data');
687691
const devtoolsPage = this.getDevToolsPage(page.pptrPage);
@@ -718,9 +722,10 @@ export class McpContext implements Context {
718722
* Creates a text snapshot of a page.
719723
*/
720724
async createTextSnapshot(
721-
page: McpPage,
725+
page: ContextPage,
722726
verbose = false,
723727
devtoolsData: DevToolsData | undefined = undefined,
728+
extraHandles?: ElementHandle[],
724729
): Promise<void> {
725730
const rootNode = await page.pptrPage.accessibility.snapshot({
726731
includeIframes: true,
@@ -774,14 +779,159 @@ export class McpContext implements Context {
774779
};
775780

776781
const rootNodeWithId = assignIds(rootNode);
782+
783+
const createExtraNode = async (
784+
handle: ElementHandle,
785+
): Promise<TextSnapshotNode | null> => {
786+
const backendNodeId = await handle.backendNodeId();
787+
if (!backendNodeId) {
788+
return null;
789+
}
790+
const uniqueBackendId = `custom_${backendNodeId}`;
791+
if (seenUniqueIds.has(uniqueBackendId)) {
792+
return null;
793+
}
794+
795+
let id = '';
796+
if (uniqueBackendNodeIdToMcpId.has(uniqueBackendId)) {
797+
id = uniqueBackendNodeIdToMcpId.get(uniqueBackendId)!;
798+
} else {
799+
id = `${snapshotId}_${idCounter++}`;
800+
uniqueBackendNodeIdToMcpId.set(uniqueBackendId, id);
801+
}
802+
seenUniqueIds.add(uniqueBackendId);
803+
804+
const tagHandle = await handle.getProperty('localName');
805+
const tagValue = await tagHandle.jsonValue();
806+
const extraNode: TextSnapshotNode = {
807+
role: tagValue,
808+
id,
809+
backendNodeId,
810+
children: [],
811+
elementHandle: async () => handle,
812+
};
813+
return extraNode;
814+
};
815+
816+
const findAncestorNode = async (
817+
handle: ElementHandle,
818+
): Promise<TextSnapshotNode | null> => {
819+
let ancestorHandle = await handle.evaluateHandle(el => el.parentElement);
820+
821+
while (ancestorHandle) {
822+
const ancestorElement = ancestorHandle.asElement();
823+
if (!ancestorElement) {
824+
await ancestorHandle.dispose();
825+
return null;
826+
}
827+
828+
const ancestorBackendId = await ancestorElement.backendNodeId();
829+
if (ancestorBackendId) {
830+
const ancestorNode = idToNode
831+
.values()
832+
.find(node => node.backendNodeId === ancestorBackendId);
833+
if (ancestorNode) {
834+
await ancestorHandle.dispose();
835+
return ancestorNode;
836+
}
837+
}
838+
839+
const nextHandle = await ancestorElement.evaluateHandle(
840+
el => el.parentElement,
841+
);
842+
await ancestorHandle.dispose();
843+
ancestorHandle = nextHandle;
844+
}
845+
return null;
846+
};
847+
848+
const findDescendantNodes = async (
849+
backendNodeId: number,
850+
): Promise<Set<number>> => {
851+
const descendantIds = new Set<number>();
852+
try {
853+
// @ts-expect-error internal API
854+
const client = page.pptrPage._client();
855+
if (client) {
856+
const {node}: {node: Protocol.DOM.Node} = await client.send(
857+
'DOM.describeNode',
858+
{
859+
backendNodeId,
860+
depth: -1,
861+
pierce: true,
862+
},
863+
);
864+
const collect = (node: Protocol.DOM.Node) => {
865+
if (node.backendNodeId && node.backendNodeId !== backendNodeId) {
866+
descendantIds.add(node.backendNodeId);
867+
}
868+
if (node.children) {
869+
for (const child of node.children) {
870+
collect(child);
871+
}
872+
}
873+
};
874+
collect(node);
875+
}
876+
} catch (e) {
877+
this.logger(
878+
`Failed to collect descendants for backend node ${backendNodeId}`,
879+
e,
880+
);
881+
}
882+
return descendantIds;
883+
};
884+
885+
const moveChildNodes = (
886+
attachTarget: TextSnapshotNode,
887+
extraNode: TextSnapshotNode,
888+
descendantIds: Set<number>,
889+
): number => {
890+
let firstMovedIndex = -1;
891+
if (descendantIds.size > 0 && attachTarget.children) {
892+
const remainingChildren: TextSnapshotNode[] = [];
893+
for (const child of attachTarget.children) {
894+
if (child.backendNodeId && descendantIds.has(child.backendNodeId)) {
895+
if (firstMovedIndex === -1) {
896+
firstMovedIndex = remainingChildren.length;
897+
}
898+
extraNode.children.push(child);
899+
} else {
900+
remainingChildren.push(child);
901+
}
902+
}
903+
attachTarget.children = remainingChildren;
904+
}
905+
return firstMovedIndex !== -1
906+
? firstMovedIndex
907+
: attachTarget.children
908+
? attachTarget.children.length
909+
: 0;
910+
};
911+
912+
if (extraHandles) {
913+
page.setExtraHandles(extraHandles);
914+
}
915+
for (const handle of page.getExtraHandles() ?? []) {
916+
const extraNode = await createExtraNode(handle);
917+
if (!extraNode) {
918+
continue;
919+
}
920+
idToNode.set(extraNode.id, extraNode);
921+
const attachTarget = (await findAncestorNode(handle)) || rootNodeWithId;
922+
const descendantIds = await findDescendantNodes(extraNode.backendNodeId!);
923+
const index = moveChildNodes(attachTarget, extraNode, descendantIds);
924+
attachTarget.children.splice(index, 0, extraNode);
925+
}
926+
777927
const snapshot: TextSnapshot = {
778928
root: rootNodeWithId,
779929
snapshotId: String(snapshotId),
780930
idToNode,
781931
hasSelectedElement: false,
782932
verbose,
783933
};
784-
page.textSnapshot = snapshot;
934+
page.setSnapshot(snapshot);
785935
const data = devtoolsData ?? (await this.getDevToolsData(page));
786936
if (data?.cdpBackendNodeId) {
787937
snapshot.hasSelectedElement = true;

src/McpPage.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ export class McpPage implements ContextPage {
4040
// Snapshot
4141
textSnapshot: TextSnapshot | null = null;
4242
uniqueBackendNodeIdToMcpId = new Map<string, string>();
43+
extraHandles?: ElementHandle[];
4344

4445
// Emulation
4546
emulationSettings: EmulationSettings = {};
@@ -164,4 +165,20 @@ export class McpPage implements ContextPage {
164165
getAXNodeByUid(uid: string) {
165166
return this.textSnapshot?.idToNode.get(uid);
166167
}
168+
169+
getSnapshot(): TextSnapshot | null {
170+
return this.textSnapshot;
171+
}
172+
173+
setSnapshot(snapshot: TextSnapshot): void {
174+
this.textSnapshot = snapshot;
175+
}
176+
177+
getExtraHandles(): ElementHandle[] | undefined {
178+
return this.extraHandles;
179+
}
180+
181+
setExtraHandles(extraHandles: ElementHandle[]): void {
182+
this.extraHandles = extraHandles;
183+
}
167184
}

src/tools/ToolDefinition.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import type {
2020
TextSnapshotNode,
2121
GeolocationOptions,
2222
ExtensionServiceWorker,
23+
TextSnapshot,
24+
EmulationSettings,
2325
} from '../types.js';
2426
import type {InstalledExtension} from '../utils/ExtensionRegistry.js';
2527
import type {PaginationOptions} from '../utils/types.js';
@@ -220,6 +222,16 @@ export type Context = Readonly<{
220222
triggerExtensionAction(id: string): Promise<void>;
221223
listExtensions(): InstalledExtension[];
222224
getExtension(id: string): InstalledExtension | undefined;
225+
resolveCdpElementId(
226+
page: ContextPage,
227+
cdpBackendNodeId: number,
228+
): string | undefined;
229+
createTextSnapshot(
230+
page: ContextPage,
231+
verbose: boolean,
232+
devtoolsData: DevToolsData | undefined,
233+
extraHandles?: ElementHandle[],
234+
): Promise<void>;
223235
getSelectedMcpPage(): McpPage;
224236
getExtensionServiceWorkers(): ExtensionServiceWorker[];
225237
getExtensionServiceWorkerId(
@@ -250,6 +262,12 @@ export type ContextPage = Readonly<{
250262
options?: {timeout?: number; handleDialog?: 'accept' | 'dismiss' | string},
251263
): Promise<void>;
252264
getInPageTools(): ToolGroup<InPageToolDefinition> | undefined;
265+
getSnapshot(): TextSnapshot | null;
266+
setSnapshot(snapshot: TextSnapshot): void;
267+
getExtraHandles(): ElementHandle[] | undefined;
268+
setExtraHandles(extraHandles: ElementHandle[]): void;
269+
readonly uniqueBackendNodeIdToMcpId: Map<string, string>;
270+
readonly emulationSettings: EmulationSettings;
253271
}>;
254272

255273
export function defineTool<Schema extends zod.ZodRawShape>(

0 commit comments

Comments
 (0)