Skip to content

Commit 892ccb0

Browse files
Merge pull request #4 from auto-browse/feature_assertions
Feature assertions and reporting improvement
2 parents f0e3e3c + 2e7cb9d commit 892ccb0

27 files changed

+1259
-1004
lines changed

package-lock.json

Lines changed: 15 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@auto-browse/auto-browse",
3-
"version": "0.1.4",
3+
"version": "0.1.5",
44
"description": "AI-powered browser automation",
55
"author": "auto-browse",
66
"homepage": "https://www.auto-browse.com/",
@@ -21,10 +21,10 @@
2121
"format": "prettier --write \"src/**/*.ts\"",
2222
"lint": "eslint \"src/**/*.ts\"",
2323
"postinstall": "playwright install --with-deps",
24-
"clean": "rm -rf dist playwright-report test-results",
24+
"clean": "rm -rf dist",
2525
"check": "npm run format && npm run lint",
2626
"prepublishOnly": "npm run clean && npm run build && npm run format && npm run lint",
27-
"publish-package": "npm run clean && npm run build && npm run format && npm run lint && npm publish --access public --provenance"
27+
"publish-package": "npm run clean && npm run build && npm run format && npm run lint && npm publish --access public"
2828
},
2929
"keywords": [
3030
"playwright",
@@ -55,6 +55,7 @@
5555
"@playwright/test": "1.52.0-alpha-1743011787000",
5656
"dotenv": "^16.4.7",
5757
"playwright": "1.52.0-alpha-1743011787000",
58+
"yaml": "^2.7.1",
5859
"zod": "^3.24.2"
5960
},
6061
"devDependencies": {

src/auto.ts

Lines changed: 114 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,52 @@
11
import { test as base } from '@playwright/test';
2+
import { z } from 'zod';
23
import { AutoConfig } from './types';
34
import { sessionManager, context } from './browser';
45
import { createReactAgent } from '@langchain/langgraph/prebuilt';
56
import { HumanMessage } from '@langchain/core/messages';
67
import { createLLMModel } from './llm';
78
import {
8-
browser_click,
9-
browser_type,
10-
browser_get_text,
11-
browser_navigate,
12-
browser_snapshot,
13-
browser_hover,
14-
browser_drag,
15-
browser_select_option,
16-
browser_take_screenshot,
17-
browser_go_back,
18-
browser_wait,
19-
browser_press_key,
20-
browser_save_pdf,
21-
browser_choose_file,
22-
browser_go_forward,
23-
browser_assert,
9+
browser_click,
10+
browser_type,
11+
browser_get_text,
12+
browser_navigate,
13+
browser_snapshot,
14+
browser_hover,
15+
browser_drag,
16+
browser_select_option,
17+
browser_take_screenshot,
18+
browser_go_back,
19+
browser_wait,
20+
browser_press_key,
21+
browser_save_pdf,
22+
browser_choose_file,
23+
browser_go_forward,
24+
browser_assert,
25+
browser_page_assert
2426
} from './tools';
2527

28+
// Define response schema
29+
const AutoResponseSchema = z.object({
30+
action: z
31+
.string()
32+
.describe('The type of action performed (assert, click, type, etc)'),
33+
error: z.string().describe('Error message if any, empty string if none'),
34+
output: z.string().describe('Raw output from the action')
35+
});
36+
2637
// Extend base test to automatically track page
2738
export const test = base.extend({
28-
page: async ({ page }, use) => {
29-
sessionManager.setPage(page);
30-
await use(page);
31-
},
39+
page: async ({ page }, use) => {
40+
sessionManager.setPage(page);
41+
await use(page);
42+
}
3243
});
3344

3445
// Initialize the LangChain agent with more detailed instructions
3546
const initializeAgent = () => {
36-
const model = createLLMModel();
47+
const model = createLLMModel();
3748

38-
const prompt = `You are a web automation assistant. When given a natural language instruction:
49+
const prompt = `You are a web automation assistant. When given a natural language instruction:
3950
- Always call the snapshot tool first to analyze the page structure and elements, so you can understand the context ad the elements available on the page to perform the requested action
4051
- For "get" or "get text" instructions, use the getText tool to retrieve content
4152
- For "click" instructions, use the click tool to interact with elements
@@ -51,79 +62,101 @@ const initializeAgent = () => {
5162
- For pressing keys, use the pressKey tool
5263
- For saving PDFs, use the savePDF tool
5364
- For choosing files, use the chooseFile tool
54-
- For verification and assertions, use the assert tool
55-
Return the operation result or content as requested.`;
65+
- While calling the verification and assertion tools, DO NOT assume or make up any expected values. Use the values as provided in the instruction only.
66+
- For verification and assertions like {"isVisible", "hasText", "isEnabled", "isChecked"}, use the browser_assert tool
67+
- For page assertions like {page title, current page url} use the browser_page_assert tools
68+
Return a stringified JSON object with exactly these fields:
69+
{
70+
"action": "<type of action performed>",
71+
"error": "<error message or empty string>",
72+
"output": "<your output message>"
73+
}`;
5674

57-
const agent = createReactAgent({
58-
llm: model,
59-
tools: [
60-
browser_click,
61-
browser_type,
62-
browser_get_text,
63-
browser_navigate,
64-
browser_snapshot,
65-
browser_hover,
66-
browser_drag,
67-
browser_select_option,
68-
browser_take_screenshot,
69-
browser_go_back,
70-
browser_wait,
71-
browser_press_key,
72-
browser_save_pdf,
73-
browser_choose_file,
74-
browser_assert,
75-
browser_go_forward,
76-
],
77-
stateModifier: prompt,
78-
});
75+
const agent = createReactAgent({
76+
llm: model,
77+
tools: [
78+
browser_click,
79+
browser_type,
80+
browser_get_text,
81+
browser_navigate,
82+
browser_snapshot,
83+
browser_hover,
84+
browser_drag,
85+
browser_select_option,
86+
browser_take_screenshot,
87+
browser_go_back,
88+
browser_wait,
89+
browser_press_key,
90+
browser_save_pdf,
91+
browser_choose_file,
92+
browser_assert,
93+
browser_go_forward,
94+
browser_page_assert
95+
],
96+
stateModifier: prompt,
97+
responseFormat: {
98+
prompt: `Return a stringified JSON object with exactly these fields:
99+
{
100+
"action": "<type of action performed>",
101+
"error": "<error message or empty string>",
102+
"output": "<your output message>"
103+
}`,
104+
schema: AutoResponseSchema
105+
}
106+
});
79107

80-
return { agent };
108+
return { agent };
81109
};
82110

83111
// Main auto function that processes instructions
84112
export async function auto(
85-
instruction: string,
86-
config?: AutoConfig,
113+
instruction: string,
114+
config?: AutoConfig
87115
): Promise<any> {
88-
console.log(`[Auto] Processing instruction: "${instruction}"`);
116+
console.log(`[Auto] Processing instruction: "${instruction}"`);
89117

90-
if (config?.page) {
91-
sessionManager.setPage(config.page);
92-
console.log(`[Auto] Page set from config`);
93-
} else {
94-
try {
95-
sessionManager.getPage();
96-
} catch {
97-
// In standalone mode, create a new page
98-
console.log(`[Auto] No existing page, creating new page`);
99-
await context.createPage();
100-
}
118+
if (config?.page) {
119+
sessionManager.setPage(config.page);
120+
console.log(`[Auto] Page set from config`);
121+
} else {
122+
try {
123+
sessionManager.getPage();
124+
} catch {
125+
// In standalone mode, create a new page
126+
console.log(`[Auto] No existing page, creating new page`);
127+
await context.createPage();
101128
}
129+
}
102130

103-
// Create and invoke the agent
104-
console.log(`[Auto] Creating agent for instruction`);
105-
const { agent } = initializeAgent();
106-
const result = await agent.invoke({
107-
messages: [new HumanMessage(instruction)],
108-
});
131+
// Create and invoke the agent
132+
console.log(`[Auto] Creating agent for instruction`);
133+
const { agent } = initializeAgent();
134+
const response = await agent.invoke({
135+
messages: [new HumanMessage(instruction)]
136+
});
137+
const result = response.structuredResponse;
138+
// Process agent result
139+
try {
140+
console.log(`[Auto] Agent response:`, result);
109141

110-
console.log('Agent result:', result);
111-
// Process agent result
112-
const response = result.messages?.[-1]?.content;
113-
console.log(`[Auto] Agent response:`, response);
142+
// Parse and validate the response
143+
const validatedResponse = AutoResponseSchema.parse(result);
114144

115-
if (typeof response === 'string') {
116-
// If it's a success message, return null to match original behavior
117-
if (response.startsWith('Successfully')) {
118-
console.log(`[Auto] Detected success message, returning null`);
119-
return null;
120-
}
121-
console.log(`[Auto] Returning response string`);
122-
return response;
145+
console.log(`[Auto] Action: ${validatedResponse.action}`);
146+
if (validatedResponse.error) {
147+
console.log(`[Auto] Error: ${validatedResponse.error}`);
148+
throw {
149+
error: validatedResponse.error,
150+
output: validatedResponse.output
151+
};
123152
}
124153

125-
console.log(`[Auto] No string response, returning null`);
126-
return null;
154+
// Return the output or null if successful with no output
155+
return validatedResponse.output || null;
156+
} catch (error) {
157+
console.log(`[Auto] Error processing response:`, error);
158+
throw error;
159+
}
127160
}
128161

129162
// Export everything needed for the package

0 commit comments

Comments
 (0)