stream function only

2025-01-10 01:13:19 +07:00
parent d466231034
commit cae23eb16e
7 changed files with 401 additions and 352 deletions
--- a/.gradio/certificate.pem
+++ b/.gradio/certificate.pem
@@ -0,0 +1,31 @@
+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----
--- a/src/agent/custom_agent.py
+++ b/src/agent/custom_agent.py
@@ -69,6 +69,11 @@ class CustomAgent(Agent):
            max_actions_per_step: int = 10,
            tool_call_in_content: bool = True,
    ):
+        # Store tool_call_in_content before calling parent's __init__
+        self.tool_call_in_content = tool_call_in_content
+        self.add_infos = add_infos
+        
+        # Call parent's __init__ without tool_call_in_content
        super().__init__(
            task=task,
            llm=llm,
@@ -85,9 +90,9 @@ class CustomAgent(Agent):
            include_attributes=include_attributes,
            max_error_length=max_error_length,
            max_actions_per_step=max_actions_per_step,
-            tool_call_in_content=tool_call_in_content,
        )
-        self.add_infos = add_infos
+        
+        # Initialize message manager with tool_call_in_content
        self.message_manager = CustomMassageManager(
            llm=self.llm,
            task=self.task,
@@ -97,7 +102,7 @@ class CustomAgent(Agent):
            include_attributes=self.include_attributes,
            max_error_length=self.max_error_length,
            max_actions_per_step=self.max_actions_per_step,
-            tool_call_in_content=tool_call_in_content,
+            tool_call_in_content=self.tool_call_in_content,
        )

    def _setup_action_models(self) -> None:
--- a/src/agent/custom_massage_manager.py
+++ b/src/agent/custom_massage_manager.py
@@ -12,7 +12,8 @@ from typing import List, Optional, Type
 from browser_use.agent.message_manager.service import MessageManager
 from browser_use.agent.message_manager.views import MessageHistory
 from browser_use.agent.prompts import SystemPrompt
-from browser_use.agent.views import ActionResult, AgentStepInfo
+from browser_use.agent.views import ActionResult
+from .custom_views import CustomAgentStepInfo
 from browser_use.browser.views import BrowserState
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import (
@@ -40,6 +41,7 @@ class CustomMassageManager(MessageManager):
            max_actions_per_step: int = 10,
            tool_call_in_content: bool = False,
    ):
+        self.tool_call_in_content = tool_call_in_content
        super().__init__(
            llm=llm,
            task=task,
@@ -51,13 +53,17 @@ class CustomMassageManager(MessageManager):
            include_attributes=include_attributes,
            max_error_length=max_error_length,
            max_actions_per_step=max_actions_per_step,
-            tool_call_in_content=tool_call_in_content,
        )

        # Custom: Move Task info to state_message
        self.history = MessageHistory()
        self._add_message_with_tokens(self.system_prompt)
-        tool_calls = [
+        tool_calls = self._create_tool_calls()
+        example_tool_call = self._create_example_tool_call(tool_calls)
+        self._add_message_with_tokens(example_tool_call)
+
+    def _create_tool_calls(self):
+        return [
            {
                'name': 'CustomAgentOutput',
                'args': {
@@ -74,25 +80,25 @@ class CustomMassageManager(MessageManager):
                'type': 'tool_call',
            }
        ]
+
+    def _create_example_tool_call(self, tool_calls):
        if self.tool_call_in_content:
            # openai throws error if tool_calls are not responded -> move to content
-            example_tool_call = AIMessage(
+            return AIMessage(
                content=f'{tool_calls}',
                tool_calls=[],
            )
        else:
-            example_tool_call = AIMessage(
+            return AIMessage(
                content=f'',
                tool_calls=tool_calls,
            )

-        self._add_message_with_tokens(example_tool_call)
-
    def add_state_message(
-            self,
-            state: BrowserState,
-            result: Optional[List[ActionResult]] = None,
-            step_info: Optional[AgentStepInfo] = None,
+        self,
+        state: BrowserState,
+        result: Optional[List[ActionResult]] = None,
+        step_info: Optional[CustomAgentStepInfo] = None,
    ) -> None:
        """Add browser state as human message"""

@@ -105,7 +111,7 @@ class CustomMassageManager(MessageManager):
                        self._add_message_with_tokens(msg)
                    if r.error:
                        msg = HumanMessage(
-                            content=str(r.error)[-self.max_error_length:]
+                            content=str(r.error)[-self.max_error_length :]
                        )
                        self._add_message_with_tokens(msg)
                    result = None  # if result in history, we dont want to add it again
--- a/src/browser/custom_browser.py
+++ b/src/browser/custom_browser.py
@@ -17,4 +17,4 @@ class CustomBrowser(Browser):
        context: CustomBrowserContext = None,
    ) -> BrowserContext:
        """Create a browser context"""
-        return CustomBrowserContext(config=config, browser=self, context=context)
+        return CustomBrowserContext(config=config, browser=self, context=context)
--- a/src/browser/custom_context.py
+++ b/src/browser/custom_context.py
@@ -3,55 +3,66 @@
 # @Author  : wenshao
 # @Email   : wenshaoguo1026@gmail.com
 # @Project : browser-use-webui
-# @FileName: context.py
+# @FileName: merged_context.py

+import asyncio
+import base64
 import json
 import logging
 import os
+from typing import TYPE_CHECKING

+from playwright.async_api import Browser as PlaywrightBrowser, Page, BrowserContext as PlaywrightContext
 from browser_use.browser.browser import Browser
 from browser_use.browser.context import BrowserContext, BrowserContextConfig
-from playwright.async_api import Browser as PlaywrightBrowser
+
+if TYPE_CHECKING:
+    from .custom_browser import CustomBrowser

 logger = logging.getLogger(__name__)

-
 class CustomBrowserContext(BrowserContext):
    def __init__(
        self,
-        browser: "Browser",
+        browser: "CustomBrowser",  # Forward declaration for CustomBrowser
        config: BrowserContextConfig = BrowserContextConfig(),
-        context: BrowserContext = None,
+        context: PlaywrightContext = None
    ):
        super(CustomBrowserContext, self).__init__(browser=browser, config=config)
-        self.context = context
+        self._impl_context = context  # Rename to avoid confusion
+        self._page = None
+        self.session = None  # Add session attribute

-    async def _create_context(self, browser: PlaywrightBrowser):
+    @property
+    def impl_context(self) -> PlaywrightContext:
+        """Returns the underlying Playwright context implementation"""
+        return self._impl_context
+
+    async def _create_context(self, browser: PlaywrightBrowser = None):
        """Creates a new browser context with anti-detection measures and loads cookies if available."""
-        # If we have a context, return it directly
-        if self.context:
-            return self.context
-        if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
-            # Connect to existing Chrome instance instead of creating new one
-            context = browser.contexts[0]
-        else:
-            # Original code for creating new context
-            context = await browser.new_context(
-                viewport=self.config.browser_window_size,
-                no_viewport=False,
-                user_agent=(
-                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
-                    "(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
-                ),
-                java_script_enabled=True,
-                bypass_csp=self.config.disable_security,
-                ignore_https_errors=self.config.disable_security,
-                record_video_dir=self.config.save_recording_path,
-                record_video_size=self.config.browser_window_size,  # set record video size, same as windows size
-            )
+        if self._impl_context:
+            return self._impl_context
+
+        # If a Playwright browser is not provided, get it from our custom browser
+        pw_browser = browser or await self.browser.get_playwright_browser()
+        
+        context_args = {
+            'viewport': self.config.browser_window_size,
+            'no_viewport': False, 
+            'bypass_csp': self.config.disable_security,
+            'ignore_https_errors': self.config.disable_security
+        }
+        
+        if self.config.save_recording_path:
+            context_args.update({
+                'record_video_dir': self.config.save_recording_path,
+                'record_video_size': self.config.browser_window_size
+            })
+
+        self._impl_context = await pw_browser.new_context(**context_args)

        if self.config.trace_path:
-            await context.tracing.start(screenshots=True, snapshots=True, sources=True)
+            await self._impl_context.tracing.start(screenshots=True, snapshots=True, sources=True)

        # Load cookies if they exist
        if self.config.cookies_file and os.path.exists(self.config.cookies_file):
@@ -60,10 +71,10 @@ class CustomBrowserContext(BrowserContext):
                logger.info(
                    f"Loaded {len(cookies)} cookies from {self.config.cookies_file}"
                )
-                await context.add_cookies(cookies)
+                await self._impl_context.add_cookies(cookies)

        # Expose anti-detection scripts
-        await context.add_init_script(
+        await self._impl_context.add_init_script(
            """
            // Webdriver property
            Object.defineProperty(navigator, 'webdriver', {
@@ -93,4 +104,42 @@ class CustomBrowserContext(BrowserContext):
            """
        )

-        return context
+        # Create an initial page
+        self._page = await self._impl_context.new_page()
+        await self._page.goto('about:blank')  # Ensure page is ready
+        
+        return self._impl_context
+
+    async def new_page(self) -> Page:
+        """Creates and returns a new page in this context"""
+        if not self._impl_context:
+            await self._create_context()
+        return await self._impl_context.new_page()
+
+    async def __aenter__(self):
+        if not self._impl_context:
+            await self._create_context()
+        return self
+
+    async def __aexit__(self, *args):
+        if self._impl_context:
+            await self._impl_context.close()
+            self._impl_context = None
+
+    @property
+    def pages(self):
+        """Returns list of pages in context"""
+        return self._impl_context.pages if self._impl_context else []
+
+    async def get_state(self, **kwargs):
+        if self._impl_context:
+            pages = self._impl_context.pages
+            if pages:
+                return await super().get_state(**kwargs)
+        return None
+
+    async def get_pages(self):
+        """Get pages in a way that works"""
+        if not self._impl_context:
+            return []
+        return self._impl_context.pages
--- a/src/utils/stream_utils.py
+++ b/src/utils/stream_utils.py
@@ -28,18 +28,4 @@ async def capture_screenshot(browser_context: BrowserContext) -> str:
        except Exception as e:
            return f"<div class='error'>Screenshot failed: {str(e)}</div>"
    except Exception as e:
-        return f"<div class='error'>Screenshot error: {str(e)}</div>"
-
-async def stream_browser_view(browser_context: BrowserContext) -> AsyncGenerator[str, None]:
-    """Stream browser view to the UI"""
-    try:
-        while True:
-            try:
-                screenshot_html = await capture_screenshot(browser_context)
-                yield screenshot_html
-                await asyncio.sleep(0.2)  # 5 FPS
-            except Exception as e:
-                yield f"<div class='error'>Screenshot error: {str(e)}</div>"
-                await asyncio.sleep(1)  # Wait before retrying
-    except Exception as e:
-        yield f"<div class='error'>Stream error: {str(e)}</div>"
+        return f"<div class='error'>Screenshot error: {str(e)}</div>"
--- a/webui.py
+++ b/webui.py
@@ -6,28 +6,23 @@
 # @FileName: webui.py

 import pdb
+import glob

 from dotenv import load_dotenv
-
 load_dotenv()
 import argparse
-import os
-
 import gradio as gr
-import argparse
-
-
-from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean
+import os
 import asyncio
-import os, glob
-from browser_use.agent.service import Agent
+from playwright.async_api import async_playwright
 from browser_use.browser.browser import Browser, BrowserConfig
 from browser_use.browser.context import (
    BrowserContextConfig,
    BrowserContextWindowSize,
 )
-from playwright.async_api import async_playwright
-
+from browser_use.agent.service import Agent
+from src.browser.custom_browser import CustomBrowser
+from src.controller.custom_controller import CustomController
 from src.agent.custom_agent import CustomAgent
 from src.agent.custom_prompts import CustomSystemPrompt
 from src.browser.custom_browser import CustomBrowser
@@ -35,8 +30,10 @@ from src.browser.custom_context import BrowserContextConfig
 from src.controller.custom_controller import CustomController
 from src.utils import utils
 from src.utils.utils import update_model_dropdown
+from src.utils.file_utils import get_latest_files
 from src.utils.stream_utils import capture_screenshot

+
 async def run_browser_agent(
        agent_type,
        llm_provider,
@@ -50,86 +47,84 @@ async def run_browser_agent(
        window_w,
        window_h,
        save_recording_path,
-        save_trace_path,
        enable_recording,
        task,
        add_infos,
        max_steps,
        use_vision,
        max_actions_per_step,
-        tool_call_in_content
+        tool_call_in_content,
+        browser_context=None  
 ):
-    # Disable recording if the checkbox is unchecked
-    if not enable_recording:
-        save_recording_path = None
-
-    # Ensure the recording directory exists if recording is enabled
-    if save_recording_path:
-        os.makedirs(save_recording_path, exist_ok=True)
-
-    # Get the list of existing videos before the agent runs
-    existing_videos = set()
-    if save_recording_path:
-        existing_videos = set(
-            glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
-            + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
+    """Run the browser agent with proper browser context initialization"""
+    browser = None
+    try:
+        if browser_context is None:
+            browser = CustomBrowser(
+                config=BrowserConfig(
+                    headless=headless,
+                    disable_security=disable_security,
+                    extra_chromium_args=[f'--window-size={window_w},{window_h}'],
+                )
+            )
+            browser_context = await browser.new_context(
+                config=BrowserContextConfig(
+                    trace_path='./tmp/traces',
+                    save_recording_path=save_recording_path if enable_recording else None,
+                    no_viewport=False,
+                    browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
+                )
+            )
+            
+        # Run the agent
+        llm = utils.get_llm_model(
+            provider=llm_provider,
+            model_name=llm_model_name,
+            temperature=llm_temperature,
+            base_url=llm_base_url,
+            api_key=llm_api_key,
        )

-    # Run the agent
-    llm = utils.get_llm_model(
-        provider=llm_provider,
-        model_name=llm_model_name,
-        temperature=llm_temperature,
-        base_url=llm_base_url,
-        api_key=llm_api_key,
-    )
-    if agent_type == "org":
-        final_result, errors, model_actions, model_thoughts = await run_org_agent(
-            llm=llm,
-            headless=headless,
-            disable_security=disable_security,
-            window_w=window_w,
-            window_h=window_h,
-            save_recording_path=save_recording_path,
-            save_trace_path=save_trace_path,
-            task=task,
-            max_steps=max_steps,
-            use_vision=use_vision,
-            max_actions_per_step=max_actions_per_step,
-            tool_call_in_content=tool_call_in_content
-        )
-    elif agent_type == "custom":
-        final_result, errors, model_actions, model_thoughts = await run_custom_agent(
-            llm=llm,
-            use_own_browser=use_own_browser,
-            headless=headless,
-            disable_security=disable_security,
-            window_w=window_w,
-            window_h=window_h,
-            save_recording_path=save_recording_path,
-            save_trace_path=save_trace_path,
-            task=task,
-            add_infos=add_infos,
-            max_steps=max_steps,
-            use_vision=use_vision,
-            max_actions_per_step=max_actions_per_step,
-            tool_call_in_content=tool_call_in_content
-        )
-    else:
-        raise ValueError(f"Invalid agent type: {agent_type}")
-
-    # Get the list of videos after the agent runs (if recording is enabled)
-    latest_video = None
-    if save_recording_path:
-        new_videos = set(
-            glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
-            + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
-        )
-        if new_videos - existing_videos:
-            latest_video = list(new_videos - existing_videos)[0]  # Get the first new video
-
-    return final_result, errors, model_actions, model_thoughts, latest_video
+        if agent_type == "org":
+            result = await run_org_agent(
+                llm=llm,
+                headless=headless,
+                disable_security=disable_security,
+                window_w=window_w,
+                window_h=window_h,
+                save_recording_path=save_recording_path,
+                task=task,
+                max_steps=max_steps,
+                use_vision=use_vision,
+                max_actions_per_step=max_actions_per_step,
+                tool_call_in_content=tool_call_in_content,
+                browser_context=browser_context,
+            )
+        elif agent_type == "custom":
+            result = await run_custom_agent(
+                llm=llm,
+                use_own_browser=use_own_browser,
+                headless=headless,
+                disable_security=disable_security,
+                window_w=window_w,
+                window_h=window_h,
+                save_recording_path=save_recording_path,
+                task=task,
+                add_infos=add_infos,
+                max_steps=max_steps,
+                use_vision=use_vision,
+                max_actions_per_step=max_actions_per_step,
+                tool_call_in_content=tool_call_in_content,
+                browser_context=browser_context,
+            )
+        else:
+            raise ValueError(f"Invalid agent type: {agent_type}")
+        
+        return result

+    finally:
+        if browser:
+            await browser.close()

 async def run_org_agent(
        llm,
@@ -138,48 +133,65 @@ async def run_org_agent(
        window_w,
        window_h,
        save_recording_path,
-        save_trace_path,
        task,
        max_steps,
        use_vision,
        max_actions_per_step,
-        tool_call_in_content
-
+        tool_call_in_content,
+        browser_context=None,  # receive context
 ):
-    browser = Browser(
-        config=BrowserConfig(
-            headless=headless,
-            disable_security=disable_security,
-            extra_chromium_args=[f"--window-size={window_w},{window_h}"],
-        )
-    )
-    async with await browser.new_context(
-            config=BrowserContextConfig(
-                trace_path=save_trace_path if save_trace_path else None,
-                save_recording_path=save_recording_path if save_recording_path else None,
-                no_viewport=False,
-                browser_window_size=BrowserContextWindowSize(
-                    width=window_w, height=window_h
-                ),
+    browser = None
+    if browser_context is None:
+        browser = Browser(
+            config=BrowserConfig(
+                headless=False,  # Force non-headless for streaming
+                disable_security=disable_security,
+                extra_chromium_args=[f'--window-size={window_w},{window_h}'],
            )
-    ) as browser_context:
+        )
+        async with await browser.new_context(
+                config=BrowserContextConfig(
+                    trace_path='./tmp/traces',
+                    save_recording_path=save_recording_path if save_recording_path else None,
+                    no_viewport=False,
+                    browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
+                )
+        ) as browser_context_in:
+            agent = Agent(
+                task=task,
+                llm=llm,
+                use_vision=use_vision,
+                browser_context=browser_context_in,
+            )
+            history = await agent.run(max_steps=max_steps)
+            
+            final_result = history.final_result()
+            errors = history.errors()
+            model_actions = history.model_actions()
+            model_thoughts = history.model_thoughts()
+        
+        recorded_files = get_latest_files(save_recording_path)
+        trace_file = get_latest_files(save_recording_path + "/../traces")
+        
+        await browser.close()
+        return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
+    else:
+        # Reuse existing context
        agent = Agent(
            task=task,
            llm=llm,
            use_vision=use_vision,
-            browser_context=browser_context,
            max_actions_per_step=max_actions_per_step,
-            tool_call_in_content=tool_call_in_content
+            browser_context=browser_context,
        )
        history = await agent.run(max_steps=max_steps)
-
        final_result = history.final_result()
        errors = history.errors()
        model_actions = history.model_actions()
        model_thoughts = history.model_thoughts()
-    await browser.close()
-    return final_result, errors, model_actions, model_thoughts
-
+        recorded_files = get_latest_files(save_recording_path)
+        trace_file = get_latest_files(save_recording_path + "/../traces")
+        return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')

 async def run_custom_agent(
        llm,
@@ -189,17 +201,17 @@ async def run_custom_agent(
        window_w,
        window_h,
        save_recording_path,
-        save_trace_path,
        task,
        add_infos,
        max_steps,
        use_vision,
        max_actions_per_step,
-        tool_call_in_content
+        tool_call_in_content,
+        browser_context=None,  # receive context
 ):
    controller = CustomController()
    playwright = None
-    browser_context_ = None
+    browser = None
    try:
        if use_own_browser:
            playwright = await async_playwright().start()
@@ -210,12 +222,12 @@ async def run_custom_agent(
                chrome_exe = None
            elif not os.path.exists(chrome_exe):
                raise ValueError(f"Chrome executable not found at {chrome_exe}")
-
+            
            if chrome_use_data == "":
                chrome_use_data = None

            browser_context_ = await playwright.chromium.launch_persistent_context(
-                user_data_dir=chrome_use_data,
+                user_data_dir=chrome_use_data if chrome_use_data else "",
                executable_path=chrome_exe,
                no_viewport=False,
                headless=headless,  # 保持浏览器窗口可见
@@ -232,26 +244,8 @@ async def run_custom_agent(
        else:
            browser_context_ = None

-        browser = CustomBrowser(
-            config=BrowserConfig(
-                headless=headless,
-                disable_security=disable_security,
-                extra_chromium_args=[f"--window-size={window_w},{window_h}"],
-            )
-        )
-        async with await browser.new_context(
-                config=BrowserContextConfig(
-                    trace_path=save_trace_path if save_trace_path else None,
-                    save_recording_path=save_recording_path
-                    if save_recording_path
-                    else None,
-                    no_viewport=False,
-                    browser_window_size=BrowserContextWindowSize(
-                        width=window_w, height=window_h
-                    ),
-                ),
-                context=browser_context_,
-        ) as browser_context:
+        if browser_context is not None:
+            # Reuse context
            agent = CustomAgent(
                task=task,
                add_infos=add_infos,
@@ -264,11 +258,50 @@ async def run_custom_agent(
                tool_call_in_content=tool_call_in_content
            )
            history = await agent.run(max_steps=max_steps)
-
            final_result = history.final_result()
            errors = history.errors()
            model_actions = history.model_actions()
            model_thoughts = history.model_thoughts()
+            recorded_files = get_latest_files(save_recording_path)
+            trace_file = get_latest_files(save_recording_path + "/../traces")
+            return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
+        else:
+            browser = CustomBrowser(
+                config=BrowserConfig(
+                    headless=headless,
+                    disable_security=disable_security,
+                    extra_chromium_args=[f'--window-size={window_w},{window_h}'],
+                )
+            )
+            async with await browser.new_context(
+                    config=BrowserContextConfig(
+                        trace_path='./tmp/result_processing',
+                        save_recording_path=save_recording_path if save_recording_path else None,
+                        no_viewport=False,
+                        browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
+                    ),
+                    context=browser_context_
+            ) as browser_context_in:
+                agent = CustomAgent(
+                    task=task,
+                    add_infos=add_infos,
+                    use_vision=use_vision,
+                    llm=llm,
+                    browser_context=browser_context_in,
+                    controller=controller,
+                    system_prompt_class=CustomSystemPrompt,
+                    max_actions_per_step=max_actions_per_step,
+                    tool_call_in_content=tool_call_in_content
+                )
+                history = await agent.run(max_steps=max_steps)
+
+                final_result = history.final_result()
+                errors = history.errors()
+                model_actions = history.model_actions()
+                model_thoughts = history.model_thoughts()
+                
+                recorded_files = get_latest_files(save_recording_path)
+                trace_file = get_latest_files(save_recording_path + "/../traces")

    except Exception as e:
        import traceback
@@ -278,6 +311,8 @@ async def run_custom_agent(
        errors = str(e) + "\n" + traceback.format_exc()
        model_actions = ""
        model_thoughts = ""
+        recorded_files = {}
+        trace_file = {}
    finally:
        # 显式关闭持久化上下文
        if browser_context_:
@@ -286,20 +321,9 @@ async def run_custom_agent(
        # 关闭 Playwright 对象
        if playwright:
            await playwright.stop()
-        await browser.close()
-    return final_result, errors, model_actions, model_thoughts
-
-# Define the theme map globally
-theme_map = {
-    "Default": Default(),
-    "Soft": Soft(),
-    "Monochrome": Monochrome(),
-    "Glass": Glass(),
-    "Origin": Origin(),
-    "Citrus": Citrus(),
-    "Ocean": Ocean(),
-    "Base": Base()
-}
+        if browser:
+            await browser.close()
+    return final_result, errors, model_actions, model_thoughts, trace_file.get('.webm'), recorded_files.get('.zip')

 async def run_with_stream(
    agent_type,
@@ -314,8 +338,6 @@ async def run_with_stream(
    window_w,
    window_h,
    save_recording_path,
-    save_trace_path,
-    enable_recording,
    task,
    add_infos,
    max_steps,
@@ -338,7 +360,7 @@ async def run_with_stream(
        # Create a new browser context
        async with await browser.new_context(
            config=BrowserContextConfig(
-                trace_path=save_trace_path,
+                trace_path="./tmp/traces",
                save_recording_path=save_recording_path,
                no_viewport=False,
                browser_window_size=BrowserContextWindowSize(
@@ -349,26 +371,25 @@ async def run_with_stream(
            # Run the browser agent in the background
            agent_task = asyncio.create_task(
                run_browser_agent(
-                    agent_type,
-                    llm_provider,
-                    llm_model_name,
-                    llm_temperature,
-                    llm_base_url,
-                    llm_api_key,
-                    use_own_browser,
-                    headless,
-                    disable_security,
-                    window_w,
-                    window_h,
-                    save_recording_path,
-                    save_trace_path,
-                    enable_recording,
-                    task,
-                    add_infos,
-                    max_steps,
-                    use_vision,
-                    max_actions_per_step,
-                    tool_call_in_content,
+                    agent_type=agent_type,
+                    llm_provider=llm_provider,
+                    llm_model_name=llm_model_name,
+                    llm_temperature=llm_temperature,
+                    llm_base_url=llm_base_url,
+                    llm_api_key=llm_api_key,
+                    use_own_browser=use_own_browser,
+                    headless=headless,
+                    disable_security=disable_security,
+                    window_w=window_w,
+                    window_h=window_h,
+                    save_recording_path=save_recording_path,
+                    enable_recording=True,  # Add this parameter
+                    task=task,
+                    add_infos=add_infos,
+                    max_steps=max_steps,
+                    use_vision=use_vision,
+                    max_actions_per_step=max_actions_per_step,
+                    tool_call_in_content=tool_call_in_content,
                    browser_context=browser_context  # Explicit keyword argument
                )
            )
@@ -439,6 +460,21 @@ async def run_with_stream(
        if browser:
            await browser.close()

+from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base
+
+# Define the theme map globally
+theme_map = {
+    "Default": Default(),
+    "Soft": Soft(),
+    "Monochrome": Monochrome(),
+    "Glass": Glass(),
+    "Origin": Origin(),
+    "Citrus": Citrus(),
+    "Ocean": Ocean(),
+    "Base": Base()
+}
+
+# Create the Gradio UI
 def create_ui(theme_name="Ocean"):
    css = """
    .gradio-container {
@@ -457,19 +493,8 @@ def create_ui(theme_name="Ocean"):
    }
    """

-    js = """
-    function refresh() {
-        const url = new URL(window.location);
-        if (url.searchParams.get('__theme') !== 'dark') {
-            url.searchParams.set('__theme', 'dark');
-            window.location.href = url.href;
-        }
-    }
-    """
-
-    with gr.Blocks(
-            title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js
-    ) as demo:
+    with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo:
+        # Header
        with gr.Row():
            gr.Markdown(
                """
@@ -550,7 +575,7 @@ def create_ui(theme_name="Ocean"):
                            value=os.getenv(f"{llm_provider.value.upper()}_API_KEY", ""),  # Default to .env value
                            info="Your API key (leave blank to use .env)"
                        )
-
+                    
            with gr.TabItem("🌐 Browser Settings", id=3):
                with gr.Group():
                    with gr.Row():
@@ -595,128 +620,75 @@ def create_ui(theme_name="Ocean"):
                        interactive=True,  # Allow editing only if recording is enabled
                    )

-                    save_trace_path = gr.Textbox(
-                        label="Trace Path",
-                        placeholder="e.g. ./tmp/traces",
-                        value="./tmp/traces",
-                        info="Path to save Agent traces",
-                        interactive=True,
-                    )
-
            with gr.TabItem("🤖 Run Agent", id=4):
                task = gr.Textbox(
-                    label="Task Description",
                    lines=4,
-                    placeholder="Enter your task here...",
                    value="go to google.com and type 'OpenAI' click search and give me the first url",
                    info="Describe what you want the agent to do",
                )
-                add_infos = gr.Textbox(
-                    label="Additional Information",
-                    lines=3,
-                    placeholder="Add any helpful context or instructions...",
-                    info="Optional hints to help the LLM complete the task",
+                add_infos = gr.Textbox(lines=3, label="Additional Information")
+
+            # Results
+            with gr.Tab("📊 Results"):
+                browser_view = gr.HTML(
+                    value="<div>Waiting for browser session...</div>",
+                    label="Live Browser View",
                )
+                final_result_output = gr.Textbox(label="Final Result", lines=3)
+                errors_output = gr.Textbox(label="Errors", lines=3)
+                model_actions_output = gr.Textbox(label="Model Actions", lines=3)
+                model_thoughts_output = gr.Textbox(label="Model Thoughts", lines=3)
+                recording_file = gr.Video(label="Latest Recording")
+                trace_file = gr.File(label="Trace File")
+        with gr.Row():
+            run_button = gr.Button("▶️ Run Agent", variant="primary")

-                with gr.Row():
-                    run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
-                    stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
-
-            with gr.TabItem("📊 Results", id=5):
-                recording_display = gr.Video(label="Latest Recording")
-
-                with gr.Group():
-                    gr.Markdown("### Results")
-                    with gr.Row():
-                        with gr.Column():
-                            final_result_output = gr.Textbox(
-                                label="Final Result", lines=3, show_label=True
-                            )
-                        with gr.Column():
-                            errors_output = gr.Textbox(
-                                label="Errors", lines=3, show_label=True
-                            )
-                    with gr.Row():
-                        with gr.Column():
-                            model_actions_output = gr.Textbox(
-                                label="Model Actions", lines=3, show_label=True
-                            )
-                        with gr.Column():
-                            model_thoughts_output = gr.Textbox(
-                                label="Model Thoughts", lines=3, show_label=True
-                            )
-
-            with gr.TabItem("🎥 Recordings", id=6):
-                def list_recordings(save_recording_path):
-                    if not os.path.exists(save_recording_path):
-                        return []
-
-                    # Get all video files
-                    recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
-
-                    # Sort recordings by creation time (oldest first)
-                    recordings.sort(key=os.path.getctime)
-
-                    # Add numbering to the recordings
-                    numbered_recordings = []
-                    for idx, recording in enumerate(recordings, start=1):
-                        filename = os.path.basename(recording)
-                        numbered_recordings.append((recording, f"{idx}. {filename}"))
-
-                    return numbered_recordings
-
-                recordings_gallery = gr.Gallery(
-                    label="Recordings",
-                    value=list_recordings("./tmp/record_videos"),
-                    columns=3,
-                    height="auto",
-                    object_fit="contain"
-                )
-
-                refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary")
-                refresh_button.click(
-                    fn=list_recordings,
-                    inputs=save_recording_path,
-                    outputs=recordings_gallery
-                )
-
-        # Attach the callback to the LLM provider dropdown
-        llm_provider.change(
-            lambda provider, api_key, base_url: update_model_dropdown(provider, api_key, base_url),
-            inputs=[llm_provider, llm_api_key, llm_base_url],
-            outputs=llm_model_name
-        )
-
-        # Add this after defining the components
-        enable_recording.change(
-            lambda enabled: gr.update(interactive=enabled),
-            inputs=enable_recording,
-            outputs=save_recording_path
-        )
-
-        # Run button click handler
+        # Button logic
        run_button.click(
            fn=run_with_stream,
            inputs=[
-                agent_type, llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
-                use_own_browser, headless, disable_security, window_w, window_h, save_recording_path, save_trace_path,
-                enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_call_in_content
+                agent_type,
+                llm_provider,
+                llm_model_name,
+                llm_temperature,
+                llm_base_url,
+                llm_api_key,
+                use_own_browser,
+                headless,
+                disable_security,
+                window_w,
+                window_h,
+                save_recording_path,
+                task,
+                add_infos,
+                max_steps,
+                use_vision,
+                max_actions_per_step,
+                tool_call_in_content,
            ],
-            outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output, recording_display],
+            outputs=[
+                browser_view,
+                final_result_output,
+                errors_output,
+                model_actions_output,
+                model_thoughts_output,
+                recording_file,
+                trace_file
+            ],
+            queue=True,
        )

    return demo

-def main():
+
+if __name__ == "__main__":
+    import argparse
+
    parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
-    parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
-    parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
-    parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
-    parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
+    parser.add_argument("--ip", type=str, default="0.0.0.0", help="IP address to bind to")
+    parser.add_argument("--port", type=int, default=7860, help="Port to listen on")
+    parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys())
    args = parser.parse_args()

-    demo = create_ui(theme_name=args.theme)
-    demo.launch(server_name=args.ip, server_port=args.port)
-
-if __name__ == '__main__':
-    main()
+    ui = create_ui(theme_name=args.theme)
+    ui.launch(server_name=args.ip, server_port=args.port, share=True)