chore: serialize session entries for tool calls and user actions (#803)

This commit is contained in:
Pavel Feldman
2025-07-31 15:16:56 -07:00
committed by GitHub
parent 2a86ac74e3
commit 3787439fc1
6 changed files with 351 additions and 197 deletions

View File

@@ -57,16 +57,17 @@ export class BrowserServerBackend implements ServerBackend {
const context = this._context!;
const response = new Response(context, schema.name, parsedArguments);
const tool = this._tools.find(tool => tool.schema.name === schema.name)!;
await context.setInputRecorderEnabled(false);
context.setRunningTool(true);
try {
await tool.handle(context, parsedArguments, response);
} catch (error) {
await response.finish();
this._sessionLog?.logResponse(response);
} catch (error: any) {
response.addError(String(error));
} finally {
await context.setInputRecorderEnabled(true);
context.setRunningTool(false);
}
await this._sessionLog?.logResponse(response);
return await response.serialize();
return response.serialize();
}
serverInitialized(version: mcpServer.ClientVersion | undefined) {

View File

@@ -24,13 +24,14 @@ import type { Tool } from './tools/tool.js';
import type { FullConfig } from './config.js';
import type { BrowserContextFactory } from './browserContextFactory.js';
import type * as actions from './actions.js';
import type { Action, SessionLog } from './sessionLog.js';
import type { SessionLog } from './sessionLog.js';
const testDebug = debug('pw:mcp:test');
export class Context {
readonly tools: Tool[];
readonly config: FullConfig;
readonly sessionLog: SessionLog | undefined;
private _browserContextPromise: Promise<{ browserContext: playwright.BrowserContext, close: () => Promise<void> }> | undefined;
private _browserContextFactory: BrowserContextFactory;
private _tabs: Tab[] = [];
@@ -40,14 +41,13 @@ export class Context {
private static _allContexts: Set<Context> = new Set();
private _closeBrowserContextPromise: Promise<void> | undefined;
private _inputRecorder: InputRecorder | undefined;
private _sessionLog: SessionLog | undefined;
private _isRunningTool: boolean = false;
constructor(tools: Tool[], config: FullConfig, browserContextFactory: BrowserContextFactory, sessionLog: SessionLog | undefined) {
this.tools = tools;
this.config = config;
this._browserContextFactory = browserContextFactory;
this._sessionLog = sessionLog;
this.sessionLog = sessionLog;
testDebug('create context');
Context._allContexts.add(this);
}
@@ -93,29 +93,6 @@ export class Context {
return this._currentTab!;
}
async listTabsMarkdown(force: boolean = false): Promise<string[]> {
if (this._tabs.length === 1 && !force)
return [];
if (!this._tabs.length) {
return [
'### Open tabs',
'No open tabs. Use the "browser_navigate" tool to navigate to a page first.',
'',
];
}
const lines: string[] = ['### Open tabs'];
for (let i = 0; i < this._tabs.length; i++) {
const tab = this._tabs[i];
const title = await tab.title();
const url = tab.page.url();
const current = tab === this._currentTab ? ' (current)' : '';
lines.push(`- ${i}:${current} [${title}] (${url})`);
}
lines.push('');
return lines;
}
async closeTab(index: number | undefined): Promise<string> {
const tab = index === undefined ? this._currentTab : this._tabs[index];
@@ -152,8 +129,12 @@ export class Context {
this._closeBrowserContextPromise = undefined;
}
async setInputRecorderEnabled(enabled: boolean) {
await this._inputRecorder?.setEnabled(enabled);
isRunningTool() {
return this._isRunningTool;
}
setRunningTool(isRunningTool: boolean) {
this._isRunningTool = isRunningTool;
}
private async _closeBrowserContextImpl() {
@@ -208,8 +189,8 @@ export class Context {
const result = await this._browserContextFactory.createContext(this.clientVersion!);
const { browserContext } = result;
await this._setupRequestInterception(browserContext);
if (this._sessionLog)
this._inputRecorder = await InputRecorder.create(this._sessionLog, browserContext);
if (this.sessionLog)
await InputRecorder.create(this, browserContext);
for (const page of browserContext.pages())
this._onPageCreated(page);
browserContext.on('page', page => this._onPageCreated(page));
@@ -226,87 +207,54 @@ export class Context {
}
export class InputRecorder {
private _actions: Action[] = [];
private _enabled = false;
private _sessionLog: SessionLog;
private _context: Context;
private _browserContext: playwright.BrowserContext;
private _flushTimer: NodeJS.Timeout | undefined;
private constructor(sessionLog: SessionLog, browserContext: playwright.BrowserContext) {
this._sessionLog = sessionLog;
private constructor(context: Context, browserContext: playwright.BrowserContext) {
this._context = context;
this._browserContext = browserContext;
}
static async create(sessionLog: SessionLog, browserContext: playwright.BrowserContext) {
const recorder = new InputRecorder(sessionLog, browserContext);
static async create(context: Context, browserContext: playwright.BrowserContext) {
const recorder = new InputRecorder(context, browserContext);
await recorder._initialize();
await recorder.setEnabled(true);
return recorder;
}
private async _initialize() {
const sessionLog = this._context.sessionLog!;
await (this._browserContext as any)._enableRecorder({
mode: 'recording',
recorderMode: 'api',
}, {
actionAdded: (page: playwright.Page, data: actions.ActionInContext, code: string) => {
if (!this._enabled)
if (this._context.isRunningTool())
return;
const tab = Tab.forPage(page);
this._actions.push({ ...data, tab, code: code.trim(), timestamp: performance.now() });
this._scheduleFlush();
if (tab)
sessionLog.logUserAction(data.action, tab, code, false);
},
actionUpdated: (page: playwright.Page, data: actions.ActionInContext, code: string) => {
if (!this._enabled)
if (this._context.isRunningTool())
return;
const tab = Tab.forPage(page);
this._actions[this._actions.length - 1] = { ...data, tab, code: code.trim(), timestamp: performance.now() };
this._scheduleFlush();
if (tab)
sessionLog.logUserAction(data.action, tab, code, true);
},
signalAdded: (page: playwright.Page, data: actions.SignalInContext) => {
if (this._context.isRunningTool())
return;
if (data.signal.name !== 'navigation')
return;
const tab = Tab.forPage(page);
this._actions.push({
frame: data.frame,
action: {
name: 'navigate',
url: data.signal.url,
signals: [],
},
startTime: data.timestamp,
endTime: data.timestamp,
tab,
code: `await page.goto('${data.signal.url}');`,
timestamp: performance.now(),
});
this._scheduleFlush();
const navigateAction: actions.Action = {
name: 'navigate',
url: data.signal.url,
signals: [],
};
if (tab)
sessionLog.logUserAction(navigateAction, tab, `await page.goto('${data.signal.url}');`, false);
},
});
}
async setEnabled(enabled: boolean) {
this._enabled = enabled;
if (!enabled)
await this._flush();
}
private _clearTimer() {
if (this._flushTimer) {
clearTimeout(this._flushTimer);
this._flushTimer = undefined;
}
}
private _scheduleFlush() {
this._clearTimer();
this._flushTimer = setTimeout(() => this._flush(), 1000);
}
private async _flush() {
this._clearTimer();
const actions = this._actions;
this._actions = [];
await this._sessionLog.logActions(actions);
}
}

View File

@@ -16,8 +16,7 @@
import { renderModalStates } from './tab.js';
import type { TabSnapshot } from './tab.js';
import type { ModalState } from './tools/tool.js';
import type { Tab, TabSnapshot } from './tab.js';
import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types.js';
import type { Context } from './context.js';
@@ -28,7 +27,7 @@ export class Response {
private _context: Context;
private _includeSnapshot = false;
private _includeTabs = false;
private _snapshot: { tabSnapshot?: TabSnapshot, modalState?: ModalState } | undefined;
private _tabSnapshot: TabSnapshot | undefined;
readonly toolName: string;
readonly toolArgs: Record<string, any>;
@@ -81,17 +80,20 @@ export class Response {
this._includeTabs = true;
}
async snapshot(): Promise<{ tabSnapshot?: TabSnapshot, modalState?: ModalState }> {
if (this._snapshot)
return this._snapshot;
async finish() {
// All the async snapshotting post-action is happening here.
// Everything below should race against modal states.
if (this._includeSnapshot && this._context.currentTab())
this._snapshot = await this._context.currentTabOrDie().captureSnapshot();
else
this._snapshot = {};
return this._snapshot;
this._tabSnapshot = await this._context.currentTabOrDie().captureSnapshot();
for (const tab of this._context.tabs())
await tab.updateTitle();
}
async serialize(): Promise<{ content: (TextContent | ImageContent)[], isError?: boolean }> {
tabSnapshot(): TabSnapshot | undefined {
return this._tabSnapshot;
}
serialize(): { content: (TextContent | ImageContent)[], isError?: boolean } {
const response: string[] = [];
// Start with command result.
@@ -112,16 +114,14 @@ ${this._code.join('\n')}
// List browser tabs.
if (this._includeSnapshot || this._includeTabs)
response.push(...(await this._context.listTabsMarkdown(this._includeTabs)));
response.push(...renderTabsMarkdown(this._context.tabs(), this._includeTabs));
// Add snapshot if provided.
const snapshot = await this.snapshot();
if (snapshot?.modalState) {
response.push(...renderModalStates(this._context, [snapshot.modalState]));
if (this._tabSnapshot?.modalStates.length) {
response.push(...renderModalStates(this._context, this._tabSnapshot.modalStates));
response.push('');
}
if (snapshot?.tabSnapshot) {
response.push(renderTabSnapshot(snapshot.tabSnapshot));
} else if (this._tabSnapshot) {
response.push(renderTabSnapshot(this._tabSnapshot));
response.push('');
}
@@ -172,6 +172,28 @@ function renderTabSnapshot(tabSnapshot: TabSnapshot): string {
return lines.join('\n');
}
function renderTabsMarkdown(tabs: Tab[], force: boolean = false): string[] {
if (tabs.length === 1 && !force)
return [];
if (!tabs.length) {
return [
'### Open tabs',
'No open tabs. Use the "browser_navigate" tool to navigate to a page first.',
'',
];
}
const lines: string[] = ['### Open tabs'];
for (let i = 0; i < tabs.length; i++) {
const tab = tabs[i];
const current = tab.isCurrentTab() ? ' (current)' : '';
lines.push(`- ${i}:${current} [${tab.lastTitle()}] (${tab.page.url()})`);
}
lines.push('');
return lines;
}
function trim(text: string, maxLength: number) {
if (text.length <= maxLength)
return text;

View File

@@ -19,17 +19,32 @@ import path from 'path';
import { outputFile } from './config.js';
import { Response } from './response.js';
import { logUnhandledError } from './log.js';
import type { FullConfig } from './config.js';
import type * as actions from './actions.js';
import type { Tab } from './tab.js';
import type { Tab, TabSnapshot } from './tab.js';
export type Action = actions.ActionInContext & { code: string; tab?: Tab | undefined; timestamp: number };
type LogEntry = {
timestamp: number;
toolCall?: {
toolName: string;
toolArgs: Record<string, any>;
result: string;
isError?: boolean;
};
userAction?: actions.Action;
code: string;
tabSnapshot?: TabSnapshot;
};
export class SessionLog {
private _folder: string;
private _file: string;
private _ordinal = 0;
private _lastModified = 0;
private _pendingEntries: LogEntry[] = [];
private _sessionFileQueue = Promise.resolve();
private _flushEntriesTimeout: NodeJS.Timeout | undefined;
constructor(sessionFolder: string) {
this._folder = sessionFolder;
@@ -44,90 +59,118 @@ export class SessionLog {
return new SessionLog(sessionFolder);
}
lastModified() {
return this._lastModified;
logResponse(response: Response) {
const entry: LogEntry = {
timestamp: performance.now(),
toolCall: {
toolName: response.toolName,
toolArgs: response.toolArgs,
result: response.result(),
isError: response.isError(),
},
code: response.code(),
tabSnapshot: response.tabSnapshot(),
};
this._appendEntry(entry);
}
async logResponse(response: Response) {
this._lastModified = performance.now();
const prefix = `${(++this._ordinal).toString().padStart(3, '0')}`;
const lines: string[] = [
`### Tool call: ${response.toolName}`,
`- Args`,
'```json',
JSON.stringify(response.toolArgs, null, 2),
'```',
];
if (response.result()) {
lines.push(
response.isError() ? `- Error` : `- Result`,
'```',
response.result(),
'```');
logUserAction(action: actions.Action, tab: Tab, code: string, isUpdate: boolean) {
code = code.trim();
if (isUpdate) {
const lastEntry = this._pendingEntries[this._pendingEntries.length - 1];
if (lastEntry.userAction?.name === action.name) {
lastEntry.userAction = action;
lastEntry.code = code;
return;
}
}
if (response.code()) {
lines.push(
`- Code`,
'```js',
response.code(),
'```');
if (action.name === 'navigate') {
// Already logged at this location.
const lastEntry = this._pendingEntries[this._pendingEntries.length - 1];
if (lastEntry?.tabSnapshot?.url === action.url)
return;
}
const snapshot = await response.snapshot();
if (snapshot?.tabSnapshot) {
const fileName = `${prefix}.snapshot.yml`;
await fs.promises.writeFile(path.join(this._folder, fileName), snapshot.tabSnapshot?.ariaSnapshot);
lines.push(`- Snapshot: ${fileName}`);
}
for (const image of response.images()) {
const fileName = `${prefix}.screenshot.${extension(image.contentType)}`;
await fs.promises.writeFile(path.join(this._folder, fileName), image.data);
lines.push(`- Screenshot: ${fileName}`);
}
lines.push('', '', '');
await this._appendLines(lines);
const entry: LogEntry = {
timestamp: performance.now(),
userAction: action,
code,
tabSnapshot: {
url: tab.page.url(),
title: '',
ariaSnapshot: action.ariaSnapshot || '',
modalStates: [],
consoleMessages: [],
downloads: [],
},
};
this._appendEntry(entry);
}
async logActions(actions: Action[]) {
// Skip recent navigation, it is a side-effect of the previous action or tool use.
if (actions?.[0]?.action?.name === 'navigate' && actions[0].timestamp - this._lastModified < 1000)
return;
private _appendEntry(entry: LogEntry) {
this._pendingEntries.push(entry);
if (this._flushEntriesTimeout)
clearTimeout(this._flushEntriesTimeout);
this._flushEntriesTimeout = setTimeout(() => this._flushEntries(), 1000);
}
this._lastModified = performance.now();
const lines: string[] = [];
for (const action of actions) {
const prefix = `${(++this._ordinal).toString().padStart(3, '0')}`;
lines.push(
`### User action: ${action.action.name}`,
);
if (action.code) {
private async _flushEntries() {
clearTimeout(this._flushEntriesTimeout);
const entries = this._pendingEntries;
this._pendingEntries = [];
const lines: string[] = [''];
for (const entry of entries) {
const ordinal = (++this._ordinal).toString().padStart(3, '0');
if (entry.toolCall) {
lines.push(
`### Tool call: ${entry.toolCall.toolName}`,
`- Args`,
'```json',
JSON.stringify(entry.toolCall.toolArgs, null, 2),
'```',
);
if (entry.toolCall.result) {
lines.push(
entry.toolCall.isError ? `- Error` : `- Result`,
'```',
entry.toolCall.result,
'```',
);
}
}
if (entry.userAction) {
const actionData = { ...entry.userAction } as any;
delete actionData.ariaSnapshot;
delete actionData.selector;
delete actionData.signals;
lines.push(
`### User action: ${entry.userAction.name}`,
`- Args`,
'```json',
JSON.stringify(actionData, null, 2),
'```',
);
}
if (entry.code) {
lines.push(
`- Code`,
'```js',
action.code,
entry.code,
'```');
}
if (action.action.ariaSnapshot) {
const fileName = `${prefix}.snapshot.yml`;
await fs.promises.writeFile(path.join(this._folder, fileName), action.action.ariaSnapshot);
if (entry.tabSnapshot) {
const fileName = `${ordinal}.snapshot.yml`;
fs.promises.writeFile(path.join(this._folder, fileName), entry.tabSnapshot.ariaSnapshot).catch(logUnhandledError);
lines.push(`- Snapshot: ${fileName}`);
}
lines.push('', '', '');
lines.push('', '');
}
await this._appendLines(lines);
}
private async _appendLines(lines: string[]) {
await fs.promises.appendFile(this._file, lines.join('\n'));
this._sessionFileQueue = this._sessionFileQueue.then(() => fs.promises.appendFile(this._file, lines.join('\n')));
}
}
function extension(contentType: string): 'jpg' | 'png' {
if (contentType === 'image/jpeg')
return 'jpg';
return 'png';
}

View File

@@ -48,6 +48,7 @@ export type TabSnapshot = {
export class Tab extends EventEmitter<TabEventsInterface> {
readonly context: Context;
readonly page: playwright.Page;
private _lastTitle = 'about:blank';
private _consoleMessages: ConsoleMessage[] = [];
private _recentConsoleMessages: ConsoleMessage[] = [];
private _requests: Map<playwright.Request, playwright.Response | null> = new Map();
@@ -137,8 +138,18 @@ export class Tab extends EventEmitter<TabEventsInterface> {
this._onPageClose(this);
}
async title(): Promise<string> {
return await callOnPageNoTrace(this.page, page => page.title());
async updateTitle() {
await this._raceAgainstModalStates(async () => {
this._lastTitle = await callOnPageNoTrace(this.page, page => page.title());
});
}
lastTitle(): string {
return this._lastTitle;
}
isCurrentTab(): boolean {
return this === this.context.currentTab();
}
async waitForLoadState(state: 'load', options?: { timeout?: number }): Promise<void> {
@@ -182,15 +193,15 @@ export class Tab extends EventEmitter<TabEventsInterface> {
return this._requests;
}
async captureSnapshot(): Promise<{ tabSnapshot?: TabSnapshot, modalState?: ModalState }> {
async captureSnapshot(): Promise<TabSnapshot> {
let tabSnapshot: TabSnapshot | undefined;
const modalState = await this._raceAgainstModalStates(async () => {
const modalStates = await this._raceAgainstModalStates(async () => {
const snapshot = await (this.page as PageEx)._snapshotForAI();
tabSnapshot = {
url: this.page.url(),
title: await this.page.title(),
ariaSnapshot: snapshot,
modalStates: this.modalStates(),
modalStates: [],
consoleMessages: [],
downloads: this._downloads,
};
@@ -200,25 +211,32 @@ export class Tab extends EventEmitter<TabEventsInterface> {
tabSnapshot.consoleMessages = this._recentConsoleMessages;
this._recentConsoleMessages = [];
}
return { tabSnapshot, modalState };
return tabSnapshot ?? {
url: this.page.url(),
title: '',
ariaSnapshot: '',
modalStates,
consoleMessages: [],
downloads: [],
};
}
private _javaScriptBlocked(): boolean {
return this._modalStates.some(state => state.type === 'dialog');
}
private async _raceAgainstModalStates(action: () => Promise<void>): Promise<ModalState | undefined> {
private async _raceAgainstModalStates(action: () => Promise<void>): Promise<ModalState[]> {
if (this.modalStates().length)
return this.modalStates()[0];
return this.modalStates();
const promise = new ManualPromise<ModalState>();
const listener = (modalState: ModalState) => promise.resolve(modalState);
const promise = new ManualPromise<ModalState[]>();
const listener = (modalState: ModalState) => promise.resolve([modalState]);
this.once(TabEvents.modalState, listener);
return await Promise.race([
action().then(() => {
this.off(TabEvents.modalState, listener);
return undefined;
return [];
}),
promise,
]);

View File

@@ -47,8 +47,8 @@ test('session log should record tool calls', async ({ startClient, server }, tes
const output = stderr().split('\n').filter(line => line.startsWith('Session: '))[0];
const sessionFolder = output.substring('Session: '.length);
const sessionLog = await fs.promises.readFile(path.join(sessionFolder, 'session.md'), 'utf8');
expect(sessionLog).toBe(`### Tool call: browser_navigate
await expect.poll(() => readSessionLog(sessionFolder)).toBe(`
### Tool call: browser_navigate
- Args
\`\`\`json
{
@@ -76,11 +76,120 @@ await page.getByRole('button', { name: 'Submit' }).click();
\`\`\`
- Snapshot: 002.snapshot.yml
`);
});
test('session log should record user action', async ({ cdpServer, startClient }, testInfo) => {
const browserContext = await cdpServer.start();
const { client, stderr } = await startClient({
args: [
'--save-session',
'--output-dir', testInfo.outputPath('output'),
`--cdp-endpoint=${cdpServer.endpoint}`,
],
});
// Force browser context creation.
await client.callTool({
name: 'browser_snapshot',
});
const [page] = browserContext.pages();
await page.setContent(`
<button>Button 1</button>
<button>Button 2</button>
`);
await page.getByRole('button', { name: 'Button 1' }).click();
const output = stderr().split('\n').filter(line => line.startsWith('Session: '))[0];
const sessionFolder = output.substring('Session: '.length);
await expect.poll(() => readSessionLog(sessionFolder)).toBe(`
### Tool call: browser_snapshot
- Args
\`\`\`json
{}
\`\`\`
- Snapshot: 001.snapshot.yml
### User action: click
- Args
\`\`\`json
{
"name": "click",
"ref": "e2",
"button": "left",
"modifiers": 0,
"clickCount": 1
}
\`\`\`
- Code
\`\`\`js
await page.getByRole('button', { name: 'Button 1' }).click();
\`\`\`
- Snapshot: 002.snapshot.yml
`);
});
test('session log should record tool user actions', async ({ cdpServer, startClient }, testInfo) => {
test('session log should update user action', async ({ cdpServer, startClient }, testInfo) => {
const browserContext = await cdpServer.start();
const { client, stderr } = await startClient({
args: [
'--save-session',
'--output-dir', testInfo.outputPath('output'),
`--cdp-endpoint=${cdpServer.endpoint}`,
],
});
// Force browser context creation.
await client.callTool({
name: 'browser_snapshot',
});
const [page] = browserContext.pages();
await page.setContent(`
<button>Button 1</button>
<button>Button 2</button>
`);
await page.getByRole('button', { name: 'Button 1' }).dblclick();
const output = stderr().split('\n').filter(line => line.startsWith('Session: '))[0];
const sessionFolder = output.substring('Session: '.length);
await expect.poll(() => readSessionLog(sessionFolder)).toBe(`
### Tool call: browser_snapshot
- Args
\`\`\`json
{}
\`\`\`
- Snapshot: 001.snapshot.yml
### User action: click
- Args
\`\`\`json
{
"name": "click",
"ref": "e2",
"button": "left",
"modifiers": 0,
"clickCount": 2
}
\`\`\`
- Code
\`\`\`js
await page.getByRole('button', { name: 'Button 1' }).dblclick();
\`\`\`
- Snapshot: 002.snapshot.yml
`);
});
test('session log should record tool calls and user actions', async ({ cdpServer, startClient }, testInfo) => {
const browserContext = await cdpServer.start();
const { client, stderr } = await startClient({
args: [
@@ -117,8 +226,8 @@ test('session log should record tool user actions', async ({ cdpServer, startCli
const output = stderr().split('\n').filter(line => line.startsWith('Session: '))[0];
const sessionFolder = output.substring('Session: '.length);
const sessionLog = await fs.promises.readFile(path.join(sessionFolder, 'session.md'), 'utf8');
expect(sessionLog).toBe(`### Tool call: browser_snapshot
await expect.poll(() => readSessionLog(sessionFolder)).toBe(`
### Tool call: browser_snapshot
- Args
\`\`\`json
{}
@@ -127,6 +236,16 @@ test('session log should record tool user actions', async ({ cdpServer, startCli
### User action: click
- Args
\`\`\`json
{
"name": "click",
"ref": "e2",
"button": "left",
"modifiers": 0,
"clickCount": 1
}
\`\`\`
- Code
\`\`\`js
await page.getByRole('button', { name: 'Button 1' }).click();
@@ -148,6 +267,9 @@ await page.getByRole('button', { name: 'Button 2' }).click();
\`\`\`
- Snapshot: 003.snapshot.yml
`);
});
async function readSessionLog(sessionFolder: string): Promise<string> {
return await fs.promises.readFile(path.join(sessionFolder, 'session.md'), 'utf8').catch(() => '');
}