mirror of
https://github.com/browser-use/browser-use.git
synced 2025-02-18 01:18:20 +03:00
* Validator * Test mind2web * Cleaned up logger * Pytest logger * Cleaned up logger * Disable flag for human input * Multiple clicks per button * Multiple clicks per button * More structured system prompt * Fields with description * System prompt example * One logger * Cleaner logging * Log step in step function * Fix critical clicking error - wrong argument used * Improved thought process of agent * Improve system prompt * Remove human input message * Custome action registration * Pydantic model for custom actions * Pydantic model for custome output * Runs through, model outputs functions, but not called yet * Work in progress - description for custome actions * Description works, but schema not yet * Model can call the right action - but is not executed * Seperate is_controller_action and is_custom_action * Works! Model can call custom function * Use registry for action, but result is not feed back to model * Include result in messages * Works with custom function - but typing is not correct * Renamed registry * First test cases * Captcha tests * Pydantic for tests * Improve prompts for multy step * System prompt structure * Handle errors like validation error * Refactor error handling in agent * Refactor error handling in agent * Improved logging * Update view * Fix click parameter to index * Simplify dynamic actions * Use run instead of step * Rename history * Rename AgentService to Agent * Rename ControllerService to Controller * Pytest file * Rename get state * Rename BrowserService * reversed dom extraction recursion to while * Rename use_vision * Rename use_vision * reversed dom tree items and made browser less anoying * Renaming and fixing type errors * Renamed class names for agent * updated requirements * Update prompt * Action registration works for user and controller * Fix done call by returning ActionResult * Fix if result is none * Rename AgentOutput and ActionModel * Improved prompt Passes 6/8 tests from test_agent_actions * Calculate token cost * Improve display * Simplified logger * Test function calling * created super simple xpath extraction algo * Tests logging * tiny fixes to dom extraction * Remove test * Dont log number of clicks * Pytest file * merged per element js checks * Check if driver is still open * super fast processing * fixed agent planning and stuff * Fix example * Fix example * Improve error * Improved error correction * New line for step * small type error fixes * Test for pydantic * Fix line * Removed sample * fixed readme and examples --------- Co-authored-by: magmueller <mamagnus00@gmail.com>
209 lines
5.8 KiB
Python
209 lines
5.8 KiB
Python
import asyncio
|
|
|
|
import pytest
|
|
from langchain_anthropic import ChatAnthropic
|
|
from langchain_openai import ChatOpenAI
|
|
from pydantic import BaseModel
|
|
|
|
from browser_use.agent.service import Agent
|
|
from browser_use.controller.service import Controller
|
|
|
|
|
|
@pytest.fixture
|
|
def llm():
|
|
"""Initialize language model for testing"""
|
|
|
|
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
|
|
return ChatOpenAI(model='gpt-4o')
|
|
# return ChatOpenAI(model='gpt-4o-mini')
|
|
|
|
|
|
@pytest.fixture
|
|
async def agent_with_controller():
|
|
"""Create agent with controller for testing"""
|
|
controller = Controller(keep_open=False)
|
|
print('init controller')
|
|
try:
|
|
yield controller
|
|
finally:
|
|
if controller.browser:
|
|
controller.browser.close(force=True)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ecommerce_interaction(llm, agent_with_controller):
|
|
"""Test complex ecommerce interaction sequence"""
|
|
agent = Agent(
|
|
task="Go to amazon.com, search for 'laptop', filter by 4+ stars, and find the price of the first result",
|
|
llm=llm,
|
|
controller=agent_with_controller,
|
|
save_conversation_path='tmp/test_ecommerce_interaction/conversation',
|
|
)
|
|
|
|
history = await agent.run(max_steps=20)
|
|
|
|
# Verify sequence of actions
|
|
action_sequence = []
|
|
for h in history:
|
|
action = getattr(h.model_output, 'action', None)
|
|
if action and (getattr(action, 'go_to_url', None) or getattr(action, 'open_tab', None)):
|
|
action_sequence.append('navigate')
|
|
elif action and getattr(action, 'input_text', None):
|
|
action_sequence.append('input')
|
|
# Check that the input is 'laptop'
|
|
inp = action.input_text.text.lower()
|
|
if inp == 'laptop':
|
|
action_sequence.append('input_exact_correct')
|
|
elif 'laptop' in inp:
|
|
action_sequence.append('correct_in_input')
|
|
else:
|
|
action_sequence.append('incorrect_input')
|
|
|
|
elif action and getattr(action, 'click_element', None):
|
|
action_sequence.append('click')
|
|
|
|
if action is None:
|
|
print(h.result)
|
|
print(h.model_output)
|
|
|
|
# Verify essential steps were performed
|
|
assert 'navigate' in action_sequence # Navigated to Amazon
|
|
assert 'input' in action_sequence # Entered search term
|
|
assert 'click' in action_sequence # Clicked search/filter
|
|
assert 'input_exact_correct' in action_sequence or 'correct_in_input' in action_sequence
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_error_recovery(llm, agent_with_controller):
|
|
"""Test agent's ability to recover from errors"""
|
|
agent = Agent(
|
|
task='Navigate to nonexistent-site.com and then recover by going to google.com',
|
|
llm=llm,
|
|
controller=agent_with_controller,
|
|
)
|
|
|
|
history = await agent.run(max_steps=10)
|
|
|
|
recovery_action = next(
|
|
(
|
|
h
|
|
for h in history
|
|
if h.model_output
|
|
and getattr(h.model_output, 'action', None)
|
|
and getattr(h.model_output.action, 'go_to_url', None)
|
|
and getattr(h.model_output.action.go_to_url, 'url', '').endswith('google.com') # type: ignore -> pretty weird way to do this
|
|
),
|
|
None,
|
|
)
|
|
assert recovery_action is not None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_find_contact_email(llm, agent_with_controller):
|
|
"""Test agent's ability to find contact email on a website"""
|
|
agent = Agent(
|
|
task='Go to https://browser-use.com/ and find out the contact email',
|
|
llm=llm,
|
|
controller=agent_with_controller,
|
|
)
|
|
|
|
history = await agent.run(max_steps=10)
|
|
|
|
# Verify the agent found the contact email
|
|
email_action = next(
|
|
(
|
|
h
|
|
for h in history
|
|
if h.result.extracted_content and 'info@browser-use.com' in h.result.extracted_content
|
|
),
|
|
None,
|
|
)
|
|
assert email_action is not None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_agent_finds_installation_command(llm, agent_with_controller):
|
|
"""Test agent's ability to find the pip installation command for browser-use on the web"""
|
|
agent = Agent(
|
|
task='Find the pip installation command for the browser-use repo',
|
|
llm=llm,
|
|
controller=agent_with_controller,
|
|
)
|
|
|
|
history = await agent.run(max_steps=10)
|
|
|
|
# Verify the agent found the correct installation command
|
|
install_command_action = next(
|
|
(
|
|
h
|
|
for h in history
|
|
if h.result.extracted_content
|
|
and 'pip install browser-use' in h.result.extracted_content
|
|
),
|
|
None,
|
|
)
|
|
assert install_command_action is not None
|
|
|
|
|
|
class CaptchaTest(BaseModel):
|
|
name: str
|
|
url: str
|
|
success_text: str
|
|
additional_text: str | None = None
|
|
|
|
|
|
# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize(
|
|
'captcha',
|
|
[
|
|
# good test for num_clicks
|
|
CaptchaTest(
|
|
name='Rotate Captcha',
|
|
url='https://2captcha.com/demo/rotatecaptcha',
|
|
success_text='Captcha is passed successfully',
|
|
additional_text='Use num_clicks with number to click multiple times at once in same direction. click done when image is exact correct position.',
|
|
),
|
|
CaptchaTest(
|
|
name='Text Captcha',
|
|
url='https://2captcha.com/demo/text',
|
|
success_text='Captcha is passed successfully!',
|
|
),
|
|
CaptchaTest(
|
|
name='Basic Captcha',
|
|
url='https://captcha.com/demos/features/captcha-demo.aspx',
|
|
success_text='Correct!',
|
|
),
|
|
CaptchaTest(
|
|
name='MT Captcha',
|
|
url='https://2captcha.com/demo/mtcaptcha',
|
|
success_text='Verified Successfully',
|
|
additional_text='Stop when you solved it successfully.',
|
|
),
|
|
],
|
|
)
|
|
async def test_captcha_solver(llm, agent_with_controller, captcha: CaptchaTest):
|
|
"""Test agent's ability to solve different types of captchas"""
|
|
agent = Agent(
|
|
task=f'Go to {captcha.url} and solve the captcha. {captcha.additional_text}',
|
|
llm=llm,
|
|
controller=agent_with_controller,
|
|
)
|
|
|
|
history = await agent.run(max_steps=10)
|
|
|
|
# Verify the agent solved the captcha
|
|
solved = False
|
|
for h in history:
|
|
last = h.state.items
|
|
if any(captcha.success_text in item.text for item in last):
|
|
solved = True
|
|
break
|
|
assert solved, f'Failed to solve {captcha.name}'
|
|
|
|
|
|
# python -m pytest tests/test_agent_actions.py -v --capture=no
|
|
|
|
|
|
# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
|