support connect own browser

2025-01-02 23:19:45 +08:00
parent 863e865446
commit 2024ad300f
5 changed files with 133 additions and 37 deletions
--- a/src/agent/custom_prompts.py
+++ b/src/agent/custom_prompts.py
@@ -26,10 +26,10 @@ class CustomSystemPrompt(SystemPrompt):
       {
         "current_state": {
           "prev_action_evaluation": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Ignore the action result. The website is the ground truth. Also mention if something unexpected happened like new suggestions in an input field. Shortly state why/why not. Note that the result you output must be consistent with the reasoning you output afterwards. If you consider it to be 'Failed,' you should reflect on this during your thought.",
-           "import_contents": "Please think about whether there is any content closely related to user\'s instruction or task on the current page? If there is, please output the content. If not, please output \"None\".",
-           "completed_contents": "Update the task progress. Don\'t output the purpose of any operation. Completed contents is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the current and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button",
+           "import_contents": "Please think about whether there is any content closely related to user\'s instruction on the current page? If there is, please output the content. If not, please output \"None\".",
+           "completed_contents": "Update the input Task Progress. Completed contents is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the current page and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button",
           "thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If the output of prev_action_evaluation is 'Failed', please reflect and output your reflection here. If you think you have entered the wrong page, consider to go back to the previous page in next action.",
-           "summary": "Please generate a brief natural language description for the operation in next Actions based on your Thought."
+           "summary": "Please generate a brief natural language description for the operation in next actions based on your Thought."
         },
         "action": [
           {
--- a/src/browser/custom_browser.py
+++ b/src/browser/custom_browser.py
@@ -13,7 +13,7 @@ from .custom_context import CustomBrowserContext
 class CustomBrowser(Browser):

    async def new_context(
-            self, config: BrowserContextConfig = BrowserContextConfig()
+            self, config: BrowserContextConfig = BrowserContextConfig(), context: CustomBrowserContext = None
    ) -> BrowserContext:
        """Create a browser context"""
-        return CustomBrowserContext(config=config, browser=self)
+        return CustomBrowserContext(config=config, browser=self, context=context)
--- a/src/browser/custom_context.py
+++ b/src/browser/custom_context.py
@@ -20,8 +20,19 @@ logger = logging.getLogger(__name__)

 class CustomBrowserContext(BrowserContext):

+    def __init__(
+            self,
+            browser: 'Browser',
+            config: BrowserContextConfig = BrowserContextConfig(),
+            context: BrowserContext = None
+    ):
+        super(CustomBrowserContext, self).__init__(browser, config)
+        self.context = context
+
    async def _create_context(self, browser: PlaywrightBrowser):
        """Creates a new browser context with anti-detection measures and loads cookies if available."""
+        if self.context:
+            return self.context
        if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
            # Connect to existing Chrome instance instead of creating new one
            context = browser.contexts[0]
--- a/tests/test_browser_use.py
+++ b/tests/test_browser_use.py
@@ -3,10 +3,14 @@
 # @Author  : wenshao
 # @ProjectName: browser-use-webui
 # @FileName: test_browser_use.py
+import pdb
+
 from dotenv import load_dotenv

 load_dotenv()
+import sys

+sys.path.append(".")
 import os
 import sys
 from pprint import pprint
@@ -74,6 +78,7 @@ async def test_browser_use_org():


 async def test_browser_use_custom():
+    from playwright.async_api import async_playwright
    from browser_use.browser.context import BrowserContextWindowSize

    from src.browser.custom_browser import CustomBrowser, BrowserConfig
@@ -81,6 +86,7 @@ async def test_browser_use_custom():
    from src.controller.custom_controller import CustomController
    from src.agent.custom_agent import CustomAgent
    from src.agent.custom_prompts import CustomSystemPrompt
+    from src.browser.custom_context import CustomBrowserContext

    window_w, window_h = 1920, 1080

@@ -95,49 +101,91 @@ async def test_browser_use_custom():
    llm = utils.get_llm_model(
        provider="gemini",
        model_name="gemini-2.0-flash-exp",
-        temperature=0.8,
+        temperature=1.0,
        api_key=os.getenv("GOOGLE_API_KEY", "")
    )

-    browser = CustomBrowser(
-        config=BrowserConfig(
-            headless=False,
-            disable_security=True,
-            extra_chromium_args=[f'--window-size={window_w},{window_h}'],
-        )
-    )
    controller = CustomController()
-    async with await browser.new_context(
-            config=BrowserContextConfig(
-                trace_path='./tmp/traces',
-                save_recording_path="./tmp/record_videos",
+    use_own_browser = True
+    disable_security = True
+    playwright = None
+    browser_context_ = None
+    try:
+        if use_own_browser:
+            playwright = await async_playwright().start()
+            chrome_exe = os.getenv("CHROME_PATH", "")
+            chrome_use_data = os.getenv("CHROME_USER_DATA", "")
+            browser_context_ = await playwright.chromium.launch_persistent_context(
+                user_data_dir=chrome_use_data,
+                executable_path=chrome_exe,
                no_viewport=False,
-                browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
+                headless=False,  # 保持浏览器窗口可见
+                user_agent=(
+                    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
+                    '(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
+                ),
+                java_script_enabled=True,
+                bypass_csp=disable_security,
+                ignore_https_errors=disable_security,
+                record_video_dir="./tmp/record_videos",
+                record_video_size={'width': window_w, 'height': window_h}
+            )
+        else:
+            browser_context_ = None
+
+        browser = CustomBrowser(
+            config=BrowserConfig(
+                headless=False,
+                disable_security=True,
+                extra_chromium_args=[f'--window-size={window_w},{window_h}'],
            )
-    ) as browser_context:
-        agent = Agent(
-            task="go to google.com and type 'OpenAI' click search and give me the first url",
-            llm=llm,
-            browser_context=browser_context,
-            controller=controller,
-            system_prompt_class=CustomSystemPrompt
        )
-        history: AgentHistoryList = await agent.run(max_steps=10)

-        print('Final Result:')
-        pprint(history.final_result(), indent=4)
+        async with await browser.new_context(
+                config=BrowserContextConfig(
+                    trace_path='./tmp/result_processing',
+                    save_recording_path="./tmp/record_videos",
+                    no_viewport=False,
+                    browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
+                ),
+                context=browser_context_
+        ) as browser_context:
+            agent = CustomAgent(
+                task="go to google.com and type 'OpenAI' click search and give me the first url",
+                add_infos="",  # some hints for llm to complete the task
+                llm=llm,
+                browser_context=browser_context,
+                controller=controller,
+                system_prompt_class=CustomSystemPrompt
+            )
+            history: AgentHistoryList = await agent.run(max_steps=10)

-        print('\nErrors:')
-        pprint(history.errors(), indent=4)
+            print('Final Result:')
+            pprint(history.final_result(), indent=4)

-        # e.g. xPaths the model clicked on
-        print('\nModel Outputs:')
-        pprint(history.model_actions(), indent=4)
+            print('\nErrors:')
+            pprint(history.errors(), indent=4)

-        print('\nThoughts:')
-        pprint(history.model_thoughts(), indent=4)
-    # close browser
-    await browser.close()
+            # e.g. xPaths the model clicked on
+            print('\nModel Outputs:')
+            pprint(history.model_actions(), indent=4)
+
+            print('\nThoughts:')
+            pprint(history.model_thoughts(), indent=4)
+            # close browser
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+    finally:
+        # 显式关闭持久化上下文
+        if browser_context_:
+            await browser_context_.close()
+
+        # 关闭 Playwright 对象
+        if playwright:
+            await playwright.stop()
+
+        await browser.close()


 if __name__ == '__main__':
--- a/tests/test_playwright.py
+++ b/tests/test_playwright.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2025/1/2
+# @Author  : wenshao
+# @Email   : wenshaoguo1026@gmail.com
+# @Project : browser-use-webui
+# @FileName: test_playwright.py
+import pdb
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+def test_connect_browser():
+    import os
+    from playwright.sync_api import sync_playwright
+
+    chrome_exe = os.getenv("CHROME_PATH", "")
+    chrome_use_data = os.getenv("CHROME_USER_DATA", "")
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch_persistent_context(
+            user_data_dir=chrome_use_data,
+            executable_path=chrome_exe,
+            headless=False  # 保持浏览器窗口可见
+        )
+
+        page = browser.new_page()
+        page.goto("https://mail.google.com/mail/u/0/#inbox")
+        page.wait_for_load_state()
+
+        input("按下回车键以关闭浏览器...")
+
+        browser.close()
+
+
+if __name__ == '__main__':
+    test_connect_browser()