Skip to content

Commit fed35b9

Browse files
committed
in-page tool output: handle DOM elements and limit depth
1 parent 7fa7857 commit fed35b9

6 files changed

Lines changed: 712 additions & 22 deletions

File tree

src/McpContext.ts

Lines changed: 165 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import {
1616
type ListenerMap,
1717
type UncaughtError,
1818
} from './PageCollector.js';
19-
import type {DevTools} from './third_party/index.js';
19+
import type {DevTools, Protocol} from './third_party/index.js';
2020
import type {
2121
Browser,
2222
BrowserContext,
@@ -29,11 +29,15 @@ import type {
2929
Viewport,
3030
Target,
3131
} from './third_party/index.js';
32-
import {Locator} from './third_party/index.js';
32+
import {Locator, type ElementHandle} from './third_party/index.js';
3333
import {PredefinedNetworkConditions} from './third_party/index.js';
3434
import {listPages} from './tools/pages.js';
3535
import {CLOSE_PAGE_ERROR} from './tools/ToolDefinition.js';
36-
import type {Context, DevToolsData} from './tools/ToolDefinition.js';
36+
import type {
37+
Context,
38+
DevToolsData,
39+
ContextPage,
40+
} from './tools/ToolDefinition.js';
3741
import type {TraceResult} from './trace-processing/parse.js';
3842
import type {
3943
EmulationSettings,
@@ -74,7 +78,7 @@ export class McpContext implements Context {
7478
#extensionServiceWorkers: ExtensionServiceWorker[] = [];
7579

7680
#mcpPages = new Map<Page, McpPage>();
77-
#selectedPage?: McpPage;
81+
#selectedPage?: ContextPage;
7882
#networkCollector: NetworkCollector;
7983
#consoleCollector: ConsoleCollector;
8084
#devtoolsUniverseManager: UniverseManager;
@@ -159,7 +163,10 @@ export class McpContext implements Context {
159163
return context;
160164
}
161165

162-
resolveCdpRequestId(page: McpPage, cdpRequestId: string): number | undefined {
166+
resolveCdpRequestId(
167+
page: ContextPage,
168+
cdpRequestId: string,
169+
): number | undefined {
163170
if (!cdpRequestId) {
164171
this.logger('no network request');
165172
return;
@@ -176,14 +183,14 @@ export class McpContext implements Context {
176183
}
177184

178185
resolveCdpElementId(
179-
page: McpPage,
186+
page: ContextPage,
180187
cdpBackendNodeId: number,
181188
): string | undefined {
182189
if (!cdpBackendNodeId) {
183190
this.logger('no cdpBackendNodeId');
184191
return;
185192
}
186-
const snapshot = page.textSnapshot;
193+
const snapshot = page.getSnapshot();
187194
if (!snapshot) {
188195
this.logger('no text snapshot');
189196
return;
@@ -276,7 +283,7 @@ export class McpContext implements Context {
276283
return this.#networkCollector.getById(page.pptrPage, reqid);
277284
}
278285

279-
async restoreEmulation(page: McpPage) {
286+
async restoreEmulation(page: ContextPage) {
280287
const currentSetting = page.emulationSettings;
281288
await this.emulate(currentSetting, page.pptrPage);
282289
}
@@ -442,7 +449,7 @@ export class McpContext implements Context {
442449
return this.#selectedPage?.pptrPage === page;
443450
}
444451

445-
selectPage(newPage: McpPage): void {
452+
selectPage(newPage: ContextPage): void {
446453
this.#selectedPage = newPage;
447454
this.#updateSelectedPageTimeouts();
448455
}
@@ -675,7 +682,7 @@ export class McpContext implements Context {
675682
return this.#mcpPages.get(page)?.devToolsPage;
676683
}
677684

678-
async getDevToolsData(page: McpPage): Promise<DevToolsData> {
685+
async getDevToolsData(page: ContextPage): Promise<DevToolsData> {
679686
try {
680687
this.logger('Getting DevTools UI data');
681688
const devtoolsPage = this.getDevToolsPage(page.pptrPage);
@@ -712,9 +719,10 @@ export class McpContext implements Context {
712719
* Creates a text snapshot of a page.
713720
*/
714721
async createTextSnapshot(
715-
page: McpPage,
722+
page: ContextPage,
716723
verbose = false,
717724
devtoolsData: DevToolsData | undefined = undefined,
725+
extraHandles?: ElementHandle[],
718726
): Promise<void> {
719727
const rootNode = await page.pptrPage.accessibility.snapshot({
720728
includeIframes: true,
@@ -768,14 +776,159 @@ export class McpContext implements Context {
768776
};
769777

770778
const rootNodeWithId = assignIds(rootNode);
779+
780+
const createExtraNode = async (
781+
handle: ElementHandle,
782+
): Promise<TextSnapshotNode | null> => {
783+
const backendNodeId = await handle.backendNodeId();
784+
if (!backendNodeId) {
785+
return null;
786+
}
787+
const uniqueBackendId = `custom_${backendNodeId}`;
788+
if (seenUniqueIds.has(uniqueBackendId)) {
789+
return null;
790+
}
791+
792+
let id = '';
793+
if (uniqueBackendNodeIdToMcpId.has(uniqueBackendId)) {
794+
id = uniqueBackendNodeIdToMcpId.get(uniqueBackendId)!;
795+
} else {
796+
id = `${snapshotId}_${idCounter++}`;
797+
uniqueBackendNodeIdToMcpId.set(uniqueBackendId, id);
798+
}
799+
seenUniqueIds.add(uniqueBackendId);
800+
801+
const tagHandle = await handle.getProperty('localName');
802+
const tagValue = await tagHandle.jsonValue();
803+
const extraNode: TextSnapshotNode = {
804+
role: tagValue,
805+
id,
806+
backendNodeId,
807+
children: [],
808+
elementHandle: async () => handle,
809+
};
810+
return extraNode;
811+
};
812+
813+
const findAncestorNode = async (
814+
handle: ElementHandle,
815+
): Promise<TextSnapshotNode | null> => {
816+
let ancestorHandle = await handle.evaluateHandle(el => el.parentElement);
817+
818+
while (ancestorHandle) {
819+
const ancestorElement = ancestorHandle.asElement();
820+
if (!ancestorElement) {
821+
await ancestorHandle.dispose();
822+
return null;
823+
}
824+
825+
const ancestorBackendId = await ancestorElement.backendNodeId();
826+
if (ancestorBackendId) {
827+
const ancestorNode = idToNode
828+
.values()
829+
.find(node => node.backendNodeId === ancestorBackendId);
830+
if (ancestorNode) {
831+
await ancestorHandle.dispose();
832+
return ancestorNode;
833+
}
834+
}
835+
836+
const nextHandle = await ancestorElement.evaluateHandle(
837+
el => el.parentElement,
838+
);
839+
await ancestorHandle.dispose();
840+
ancestorHandle = nextHandle;
841+
}
842+
return null;
843+
};
844+
845+
const findDescendantNodes = async (
846+
backendNodeId: number,
847+
): Promise<Set<number>> => {
848+
const descendantIds = new Set<number>();
849+
try {
850+
// @ts-expect-error internal API
851+
const client = page.pptrPage._client();
852+
if (client) {
853+
const {node}: {node: Protocol.DOM.Node} = await client.send(
854+
'DOM.describeNode',
855+
{
856+
backendNodeId,
857+
depth: -1,
858+
pierce: true,
859+
},
860+
);
861+
const collect = (node: Protocol.DOM.Node) => {
862+
if (node.backendNodeId && node.backendNodeId !== backendNodeId) {
863+
descendantIds.add(node.backendNodeId);
864+
}
865+
if (node.children) {
866+
for (const child of node.children) {
867+
collect(child);
868+
}
869+
}
870+
};
871+
collect(node);
872+
}
873+
} catch (e) {
874+
this.logger(
875+
`Failed to collect descendants for backend node ${backendNodeId}`,
876+
e,
877+
);
878+
}
879+
return descendantIds;
880+
};
881+
882+
const moveChildNodes = (
883+
attachTarget: TextSnapshotNode,
884+
extraNode: TextSnapshotNode,
885+
descendantIds: Set<number>,
886+
): number => {
887+
let firstMovedIndex = -1;
888+
if (descendantIds.size > 0 && attachTarget.children) {
889+
const remainingChildren: TextSnapshotNode[] = [];
890+
for (const child of attachTarget.children) {
891+
if (child.backendNodeId && descendantIds.has(child.backendNodeId)) {
892+
if (firstMovedIndex === -1) {
893+
firstMovedIndex = remainingChildren.length;
894+
}
895+
extraNode.children.push(child);
896+
} else {
897+
remainingChildren.push(child);
898+
}
899+
}
900+
attachTarget.children = remainingChildren;
901+
}
902+
return firstMovedIndex !== -1
903+
? firstMovedIndex
904+
: attachTarget.children
905+
? attachTarget.children.length
906+
: 0;
907+
};
908+
909+
if (extraHandles) {
910+
page.setExtraHandles(extraHandles);
911+
}
912+
for (const handle of page.getExtraHandles() ?? []) {
913+
const extraNode = await createExtraNode(handle);
914+
if (!extraNode) {
915+
continue;
916+
}
917+
idToNode.set(extraNode.id, extraNode);
918+
const attachTarget = (await findAncestorNode(handle)) || rootNodeWithId;
919+
const descendantIds = await findDescendantNodes(extraNode.backendNodeId!);
920+
const index = moveChildNodes(attachTarget, extraNode, descendantIds);
921+
attachTarget.children.splice(index, 0, extraNode);
922+
}
923+
771924
const snapshot: TextSnapshot = {
772925
root: rootNodeWithId,
773926
snapshotId: String(snapshotId),
774927
idToNode,
775928
hasSelectedElement: false,
776929
verbose,
777930
};
778-
page.textSnapshot = snapshot;
931+
page.setSnapshot(snapshot);
779932
const data = devtoolsData ?? (await this.getDevToolsData(page));
780933
if (data?.cdpBackendNodeId) {
781934
snapshot.hasSelectedElement = true;

src/McpPage.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export class McpPage implements ContextPage {
3939
// Snapshot
4040
textSnapshot: TextSnapshot | null = null;
4141
uniqueBackendNodeIdToMcpId = new Map<string, string>();
42+
extraHandles?: ElementHandle[];
4243

4344
// Emulation
4445
emulationSettings: EmulationSettings = {};
@@ -159,4 +160,20 @@ export class McpPage implements ContextPage {
159160
getAXNodeByUid(uid: string) {
160161
return this.textSnapshot?.idToNode.get(uid);
161162
}
163+
164+
getSnapshot(): TextSnapshot | null {
165+
return this.textSnapshot;
166+
}
167+
168+
setSnapshot(snapshot: TextSnapshot): void {
169+
this.textSnapshot = snapshot;
170+
}
171+
172+
getExtraHandles(): ElementHandle[] | undefined {
173+
return this.extraHandles;
174+
}
175+
176+
setExtraHandles(extraHandles: ElementHandle[]): void {
177+
this.extraHandles = extraHandles;
178+
}
162179
}

src/tools/ToolDefinition.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ import type {
1919
TextSnapshotNode,
2020
GeolocationOptions,
2121
ExtensionServiceWorker,
22+
TextSnapshot,
23+
EmulationSettings,
2224
} from '../types.js';
2325
import type {InstalledExtension} from '../utils/ExtensionRegistry.js';
2426
import type {PaginationOptions} from '../utils/types.js';
@@ -194,6 +196,16 @@ export type Context = Readonly<{
194196
triggerExtensionAction(id: string): Promise<void>;
195197
listExtensions(): InstalledExtension[];
196198
getExtension(id: string): InstalledExtension | undefined;
199+
resolveCdpElementId(
200+
page: ContextPage,
201+
cdpBackendNodeId: number,
202+
): string | undefined;
203+
createTextSnapshot(
204+
page: ContextPage,
205+
verbose: boolean,
206+
devtoolsData: DevToolsData | undefined,
207+
extraHandles?: ElementHandle[],
208+
): Promise<void>;
197209
getSelectedMcpPage(): McpPage;
198210
getExtensionServiceWorkers(): ExtensionServiceWorker[];
199211
getExtensionServiceWorkerId(
@@ -213,6 +225,12 @@ export type ContextPage = Readonly<{
213225
options?: {timeout?: number},
214226
): Promise<void>;
215227
getInPageTools(): ToolGroup<InPageToolDefinition> | undefined;
228+
getSnapshot(): TextSnapshot | null;
229+
setSnapshot(snapshot: TextSnapshot): void;
230+
getExtraHandles(): ElementHandle[] | undefined;
231+
setExtraHandles(extraHandles: ElementHandle[]): void;
232+
readonly uniqueBackendNodeIdToMcpId: Map<string, string>;
233+
readonly emulationSettings: EmulationSettings;
216234
}>;
217235

218236
export function defineTool<Schema extends zod.ZodRawShape>(

0 commit comments

Comments
 (0)