support connect own browser

This commit is contained in:
warmshao
2025-01-02 23:19:45 +08:00
parent 863e865446
commit 2024ad300f
5 changed files with 133 additions and 37 deletions

View File

@@ -26,10 +26,10 @@ class CustomSystemPrompt(SystemPrompt):
{
"current_state": {
"prev_action_evaluation": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Ignore the action result. The website is the ground truth. Also mention if something unexpected happened like new suggestions in an input field. Shortly state why/why not. Note that the result you output must be consistent with the reasoning you output afterwards. If you consider it to be 'Failed,' you should reflect on this during your thought.",
"import_contents": "Please think about whether there is any content closely related to user\'s instruction or task on the current page? If there is, please output the content. If not, please output \"None\".",
"completed_contents": "Update the task progress. Don\'t output the purpose of any operation. Completed contents is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the current and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button",
"import_contents": "Please think about whether there is any content closely related to user\'s instruction on the current page? If there is, please output the content. If not, please output \"None\".",
"completed_contents": "Update the input Task Progress. Completed contents is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the current page and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button",
"thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If the output of prev_action_evaluation is 'Failed', please reflect and output your reflection here. If you think you have entered the wrong page, consider to go back to the previous page in next action.",
"summary": "Please generate a brief natural language description for the operation in next Actions based on your Thought."
"summary": "Please generate a brief natural language description for the operation in next actions based on your Thought."
},
"action": [
{

View File

@@ -13,7 +13,7 @@ from .custom_context import CustomBrowserContext
class CustomBrowser(Browser):
async def new_context(
self, config: BrowserContextConfig = BrowserContextConfig()
self, config: BrowserContextConfig = BrowserContextConfig(), context: CustomBrowserContext = None
) -> BrowserContext:
"""Create a browser context"""
return CustomBrowserContext(config=config, browser=self)
return CustomBrowserContext(config=config, browser=self, context=context)

View File

@@ -20,8 +20,19 @@ logger = logging.getLogger(__name__)
class CustomBrowserContext(BrowserContext):
def __init__(
self,
browser: 'Browser',
config: BrowserContextConfig = BrowserContextConfig(),
context: BrowserContext = None
):
super(CustomBrowserContext, self).__init__(browser, config)
self.context = context
async def _create_context(self, browser: PlaywrightBrowser):
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
if self.context:
return self.context
if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
# Connect to existing Chrome instance instead of creating new one
context = browser.contexts[0]

View File

@@ -3,10 +3,14 @@
# @Author : wenshao
# @ProjectName: browser-use-webui
# @FileName: test_browser_use.py
import pdb
from dotenv import load_dotenv
load_dotenv()
import sys
sys.path.append(".")
import os
import sys
from pprint import pprint
@@ -74,6 +78,7 @@ async def test_browser_use_org():
async def test_browser_use_custom():
from playwright.async_api import async_playwright
from browser_use.browser.context import BrowserContextWindowSize
from src.browser.custom_browser import CustomBrowser, BrowserConfig
@@ -81,6 +86,7 @@ async def test_browser_use_custom():
from src.controller.custom_controller import CustomController
from src.agent.custom_agent import CustomAgent
from src.agent.custom_prompts import CustomSystemPrompt
from src.browser.custom_context import CustomBrowserContext
window_w, window_h = 1920, 1080
@@ -95,49 +101,91 @@ async def test_browser_use_custom():
llm = utils.get_llm_model(
provider="gemini",
model_name="gemini-2.0-flash-exp",
temperature=0.8,
temperature=1.0,
api_key=os.getenv("GOOGLE_API_KEY", "")
)
browser = CustomBrowser(
config=BrowserConfig(
headless=False,
disable_security=True,
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
)
)
controller = CustomController()
async with await browser.new_context(
config=BrowserContextConfig(
trace_path='./tmp/traces',
save_recording_path="./tmp/record_videos",
use_own_browser = True
disable_security = True
playwright = None
browser_context_ = None
try:
if use_own_browser:
playwright = await async_playwright().start()
chrome_exe = os.getenv("CHROME_PATH", "")
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
browser_context_ = await playwright.chromium.launch_persistent_context(
user_data_dir=chrome_use_data,
executable_path=chrome_exe,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
headless=False, # 保持浏览器窗口可见
user_agent=(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
),
java_script_enabled=True,
bypass_csp=disable_security,
ignore_https_errors=disable_security,
record_video_dir="./tmp/record_videos",
record_video_size={'width': window_w, 'height': window_h}
)
else:
browser_context_ = None
browser = CustomBrowser(
config=BrowserConfig(
headless=False,
disable_security=True,
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
)
) as browser_context:
agent = Agent(
task="go to google.com and type 'OpenAI' click search and give me the first url",
llm=llm,
browser_context=browser_context,
controller=controller,
system_prompt_class=CustomSystemPrompt
)
history: AgentHistoryList = await agent.run(max_steps=10)
print('Final Result:')
pprint(history.final_result(), indent=4)
async with await browser.new_context(
config=BrowserContextConfig(
trace_path='./tmp/result_processing',
save_recording_path="./tmp/record_videos",
no_viewport=False,
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
),
context=browser_context_
) as browser_context:
agent = CustomAgent(
task="go to google.com and type 'OpenAI' click search and give me the first url",
add_infos="", # some hints for llm to complete the task
llm=llm,
browser_context=browser_context,
controller=controller,
system_prompt_class=CustomSystemPrompt
)
history: AgentHistoryList = await agent.run(max_steps=10)
print('\nErrors:')
pprint(history.errors(), indent=4)
print('Final Result:')
pprint(history.final_result(), indent=4)
# e.g. xPaths the model clicked on
print('\nModel Outputs:')
pprint(history.model_actions(), indent=4)
print('\nErrors:')
pprint(history.errors(), indent=4)
print('\nThoughts:')
pprint(history.model_thoughts(), indent=4)
# close browser
await browser.close()
# e.g. xPaths the model clicked on
print('\nModel Outputs:')
pprint(history.model_actions(), indent=4)
print('\nThoughts:')
pprint(history.model_thoughts(), indent=4)
# close browser
except Exception as e:
import traceback
traceback.print_exc()
finally:
# 显式关闭持久化上下文
if browser_context_:
await browser_context_.close()
# 关闭 Playwright 对象
if playwright:
await playwright.stop()
await browser.close()
if __name__ == '__main__':

37
tests/test_playwright.py Normal file
View File

@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
# @Time : 2025/1/2
# @Author : wenshao
# @Email : wenshaoguo1026@gmail.com
# @Project : browser-use-webui
# @FileName: test_playwright.py
import pdb
from dotenv import load_dotenv
load_dotenv()
def test_connect_browser():
import os
from playwright.sync_api import sync_playwright
chrome_exe = os.getenv("CHROME_PATH", "")
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
with sync_playwright() as p:
browser = p.chromium.launch_persistent_context(
user_data_dir=chrome_use_data,
executable_path=chrome_exe,
headless=False # 保持浏览器窗口可见
)
page = browser.new_page()
page.goto("https://mail.google.com/mail/u/0/#inbox")
page.wait_for_load_state()
input("按下回车键以关闭浏览器...")
browser.close()
if __name__ == '__main__':
test_connect_browser()