new stream function without the need to modify custom context

2025-01-12 19:57:24 +07:00
parent 585800f3c7
commit 92069a5bb4
4 changed files with 191 additions and 154 deletions
--- a/src/browser/custom_browser.py
+++ b/src/browser/custom_browser.py
@@ -4,6 +4,16 @@
 # @ProjectName: browser-use-webui
 # @FileName: browser.py

+import asyncio
+
+from playwright.async_api import Browser as PlaywrightBrowser
+from playwright.async_api import (
+	BrowserContext as PlaywrightBrowserContext,
+)
+from playwright.async_api import (
+	Playwright,
+	async_playwright,
+)
 from browser_use.browser.browser import Browser
 from browser_use.browser.context import BrowserContext, BrowserContextConfig
 from playwright.async_api import BrowserContext as PlaywrightBrowserContext
@@ -15,36 +25,102 @@ from .custom_context import CustomBrowserContext
 logger = logging.getLogger(__name__)

 class CustomBrowser(Browser):
-    _global_context = None

    async def new_context(
        self,
-        config: BrowserContextConfig = BrowserContextConfig(),
-        context: PlaywrightBrowserContext = None,
+        config: BrowserContextConfig = BrowserContextConfig()
    ) -> CustomBrowserContext:
-        """Create a browser context with persistence support"""
-        persistence_config = BrowserPersistenceConfig.from_env()
-        
-        if persistence_config.persistent_session:
-            if CustomBrowser._global_context is not None:
-                logger.info("Reusing existing persistent browser context")
-                return CustomBrowser._global_context
-            
-            context_instance = CustomBrowserContext(config=config, browser=self, context=context)
-            CustomBrowser._global_context = context_instance
-            logger.info("Created new persistent browser context")
-            return context_instance
-        
-        logger.info("Creating non-persistent browser context")
-        return CustomBrowserContext(config=config, browser=self, context=context)
+        return CustomBrowserContext(config=config, browser=self)
+
+    async def _setup_browser(self, playwright: Playwright) -> PlaywrightBrowser:
+        """Sets up and returns a Playwright Browser instance with anti-detection measures."""
+        if self.config.wss_url:
+            browser = await playwright.chromium.connect(self.config.wss_url)
+            return browser
+        elif self.config.chrome_instance_path:
+            import subprocess
+
+            import requests
+
+            try:
+                # Check if browser is already running
+                response = requests.get('http://localhost:9222/json/version', timeout=2)
+                if response.status_code == 200:
+                    logger.info('Reusing existing Chrome instance')
+                    browser = await playwright.chromium.connect_over_cdp(
+                        endpoint_url='http://localhost:9222',
+                        timeout=20000,  # 20 second timeout for connection
+                    )
+                    return browser
+            except requests.ConnectionError:
+                logger.debug('No existing Chrome instance found, starting a new one')
+
+            # Start a new Chrome instance
+            subprocess.Popen(
+                [
+                    self.config.chrome_instance_path,
+                    '--remote-debugging-port=9222',
+                ],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+
+            # Attempt to connect again after starting a new instance
+            for _ in range(10):
+                try:
+                    response = requests.get('http://localhost:9222/json/version', timeout=2)
+                    if response.status_code == 200:
+                        break
+                except requests.ConnectionError:
+                    pass
+                await asyncio.sleep(1)
+
+            try:
+                browser = await playwright.chromium.connect_over_cdp(
+                    endpoint_url='http://localhost:9222',
+                    timeout=20000,  # 20 second timeout for connection
+                )
+                return browser
+            except Exception as e:
+                logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
+                raise RuntimeError(
+                    ' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
+                )

-    async def close(self):
-        """Override close to respect persistence setting"""
-        persistence_config = BrowserPersistenceConfig.from_env()
-        if not persistence_config.persistent_session:
-            if CustomBrowser._global_context is not None:
-                await CustomBrowser._global_context.close()
-                CustomBrowser._global_context = None
-            await super().close()
        else:
-            logger.info("Skipping browser close due to persistent session")
+            try:
+                disable_security_args = []
+                if self.config.disable_security:
+                    disable_security_args = [
+                        '--disable-web-security',
+                        '--disable-site-isolation-trials',
+                        '--disable-features=IsolateOrigins,site-per-process',
+                    ]
+
+                browser = await playwright.chromium.launch(
+                    headless=self.config.headless,
+                    args=[
+                             '--no-sandbox',
+                             '--disable-blink-features=AutomationControlled',
+                             '--disable-infobars',
+                             '--disable-background-timer-throttling',
+                             '--disable-popup-blocking',
+                             '--disable-backgrounding-occluded-windows',
+                             '--disable-renderer-backgrounding',
+                             '--disable-window-activation',
+                             '--disable-focus-on-load',
+                             '--no-first-run',
+                             '--no-default-browser-check',
+                             '--no-startup-window',
+                             '--window-position=0,0',
+                             # '--window-size=1280,1000',
+                         ]
+                         + disable_security_args
+                         + self.config.extra_chromium_args,
+                    proxy=self.config.proxy,
+                )
+
+                return browser
+            except Exception as e:
+                logger.error(f'Failed to initialize Playwright browser: {str(e)}')
+                raise
--- a/src/browser/custom_context.py
+++ b/src/browser/custom_context.py
@@ -15,7 +15,6 @@ from playwright.async_api import Browser as PlaywrightBrowser
 from playwright.async_api import BrowserContext as PlaywrightBrowserContext

 from .config import BrowserPersistenceConfig
-
 logger = logging.getLogger(__name__)


@@ -23,33 +22,21 @@ class CustomBrowserContext(BrowserContext):
    def __init__(
        self,
        browser: "Browser",
-        config: BrowserContextConfig = BrowserContextConfig(),
-        context: PlaywrightBrowserContext = None,
+        config: BrowserContextConfig = BrowserContextConfig()
    ):
        super(CustomBrowserContext, self).__init__(browser=browser, config=config)
-        self.context = context
-        self._page = None
-        self._persistence_config = BrowserPersistenceConfig.from_env()
-
-    @property
-    def impl_context(self) -> PlaywrightBrowserContext:
-        """Returns the underlying Playwright context implementation"""
-        if self.context is None:
-            raise RuntimeError("Failed to create or retrieve a browser context.")
-        return self.context

    async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowserContext:
        """Creates a new browser context with anti-detection measures and loads cookies if available."""
-        if self.context:
-            return self.context
+        # If we have a context, return it directly

        # Check if we should use existing context for persistence
-        if self._persistence_config.persistent_session and len(browser.contexts) > 0:
-            logger.info("Using existing persistent context.")
-            self.context = browser.contexts[0]
+        if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
+            # Connect to existing Chrome instance instead of creating new one
+            context = browser.contexts[0]
        else:
-            logger.info("Creating a new browser context.")
-            self.context = await browser.new_context(
+            # Original code for creating new context
+            context = await browser.new_context(
                viewport=self.config.browser_window_size,
                no_viewport=False,
                user_agent=(
@@ -63,19 +50,20 @@ class CustomBrowserContext(BrowserContext):
                record_video_size=self.config.browser_window_size,
            )

-        # Handle tracing
        if self.config.trace_path:
-            await self.context.tracing.start(screenshots=True, snapshots=True, sources=True)
+            await context.tracing.start(screenshots=True, snapshots=True, sources=True)

        # Load cookies if they exist
        if self.config.cookies_file and os.path.exists(self.config.cookies_file):
            with open(self.config.cookies_file, "r") as f:
                cookies = json.load(f)
-                logger.info(f"Loaded {len(cookies)} cookies from {self.config.cookies_file}.")
-                await self.context.add_cookies(cookies)
+                logger.info(
+                    f"Loaded {len(cookies)} cookies from {self.config.cookies_file}"
+                )
+                await context.add_cookies(cookies)

        # Expose anti-detection scripts
-        await self.context.add_init_script(
+        await context.add_init_script(
            """
            // Webdriver property
            Object.defineProperty(navigator, 'webdriver', {
@@ -105,41 +93,4 @@ class CustomBrowserContext(BrowserContext):
            """
        )

-        # Create initial page if none exists
-        if not self.context.pages:
-            self._page = await self.context.new_page()
-            await self._page.goto('about:blank')
-
-        return self.context
-
-    async def new_page(self):
-        """Creates and returns a new page in this context."""
-        if not self.context:
-            await self._create_context(await self.browser.get_playwright_browser())
-        return await self.context.new_page()
-
-    async def get_current_page(self):
-        """Returns the current page or creates one if none exists."""
-        if not self.context:
-            await self._create_context(await self.browser.get_playwright_browser())
-        if not self.context:
-            raise RuntimeError("Browser context is not initialized.")
-        pages = self.context.pages
-        if not pages:
-            logger.warning("No existing pages in the context. Creating a new page.")
-            return await self.context.new_page()
-        return pages[0]
-
-    async def close(self):
-        """Override close to respect persistence setting."""
-        if not self._persistence_config.persistent_session and self.context:
-            await self.context.close()
-            self.context = None
-
-    @property
-    def pages(self):
-        """Returns list of pages in the context."""
-        if not self.context:
-            logger.warning("Attempting to access pages but context is not initialized.")
-            return []
-        return self.context.pages
+        return context
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -15,6 +15,7 @@ from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_ollama import ChatOllama
 from langchain_openai import AzureChatOpenAI, ChatOpenAI
 import gradio as gr
+from src.browser.custom_context import CustomBrowserContext

 def get_llm_model(provider: str, **kwargs):
    """
@@ -164,36 +165,30 @@ def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Di
            print(f"Error getting latest {file_type} file: {e}")
            
    return latest_files
-
-async def capture_screenshot(browser_context) -> str:
+async def capture_screenshot(browser_context: CustomBrowserContext) -> str:
    """Capture and encode a screenshot"""
+    latest_screenshot = ""
    try:
-        # Get the implementation context - handle both direct Playwright context and wrapped context
-        context = browser_context
-        if hasattr(browser_context, 'context'):
-            context = browser_context.context
-        
-        if not context:
-            return "<div>No browser context available</div>"
-            
-        # Get all pages
-        pages = context.pages
-        if not pages:
-            return "<div>Waiting for page to be available...</div>"
+        # Extract the Playwright browser instance
+        playwright_browser = browser_context.browser.playwright_browser  # Ensure this is correct.

-        # Use the first non-blank page or fallback to first page
-        active_page = None
-        for page in pages:
-            if page.url != 'about:blank':
-                active_page = page
-                break
-        
-        if not active_page and pages:
+        # Check if the browser instance is valid and if an existing context can be reused
+        if playwright_browser and playwright_browser.contexts:
+            playwright_context = playwright_browser.contexts[0]
+        else:
+            return latest_screenshot
+
+        # Access pages in the context
+        if playwright_context:
+            pages = playwright_context.pages
+
+        # Use an existing page or create a new one if none exist
+        if pages:
            active_page = pages[0]
-            
-        if not active_page:
-            return "<div>No active page available</div>"
-
+            for page in pages:
+                if page.url != "about:blank":
+                    active_page = page
+        
        # Take screenshot
        try:
            screenshot = await active_page.screenshot(
@@ -202,9 +197,9 @@ async def capture_screenshot(browser_context) -> str:
                scale="css"
            )
            encoded = base64.b64encode(screenshot).decode('utf-8')
-            return f'<img src="data:image/jpeg;base64,{encoded}" style="width:100%; max-width:1200px; border:1px solid #ccc;">'
+            return f'<img src="data:image/jpeg;base64,{encoded}" style="width:80vw; height:90vh ; border:1px solid #ccc;">'
        except Exception as e:
-            return f"<div class='error'>Screenshot failed: {str(e)}</div>"
-            
+            return f"<div class='error' style='width:80vw; height:90vh'>Screenshot failed: {str(e)}</div>"
+
    except Exception as e:
-        return f"<div class='error'>Screenshot error: {str(e)}</div>"
+        return f"<div class='error' style='width:80vw; height:90vh'>Screenshot error: {str(e)}</div>"
--- a/webui.py
+++ b/webui.py
@@ -58,6 +58,9 @@ async def run_browser_agent(
        use_vision,
        max_actions_per_step,
        tool_call_in_content,
+        browser,
+        browser_context,
+        playwright
 ):
    # Disable recording if the checkbox is unchecked
    if not enable_recording:
@@ -84,7 +87,7 @@ async def run_browser_agent(
        api_key=llm_api_key,
    )
    if agent_type == "org":
-        final_result, errors, model_actions, model_thoughts = await run_org_agent(
+        final_result, errors, model_actions, model_thoughts, recorded_files, trace_file = await run_org_agent(
            llm=llm,
            headless=headless,
            disable_security=disable_security,
@@ -97,9 +100,12 @@ async def run_browser_agent(
            use_vision=use_vision,
            max_actions_per_step=max_actions_per_step,
            tool_call_in_content=tool_call_in_content,
+            browser=browser,
+            browser_context=browser_context,
+            playwright=playwright
        )
    elif agent_type == "custom":
-        final_result, errors, model_actions, model_thoughts = await run_custom_agent(
+        final_result, errors, model_actions, model_thoughts, recorded_files, trace_file = await run_custom_agent(
            llm=llm,
            use_own_browser=use_own_browser,
            headless=headless,
@@ -113,7 +119,10 @@ async def run_browser_agent(
            max_steps=max_steps,
            use_vision=use_vision,
            max_actions_per_step=max_actions_per_step,
-            tool_call_in_content=tool_call_in_content
+            tool_call_in_content=tool_call_in_content,
+            browser=browser,
+            browser_context=browser_context,
+            playwright=playwright
        )
    else:
        raise ValueError(f"Invalid agent type: {agent_type}")
@@ -142,7 +151,10 @@ async def run_org_agent(
        max_steps,
        use_vision,
        max_actions_per_step,
-        tool_call_in_content
+        tool_call_in_content,
+        browser,
+        browser_context,
+        playwright
 ):
    browser = Browser(
        config=BrowserConfig(
@@ -196,17 +208,18 @@ async def run_custom_agent(
        max_steps,
        use_vision,
        max_actions_per_step,
-        tool_call_in_content
-):
-    global _global_browser, _global_browser_context, _global_playwright
-    
+        tool_call_in_content,
+        browser,
+        browser_context,
+        playwright
+):    
    controller = CustomController()
    persistence_config = BrowserPersistenceConfig.from_env()
    
    try:
        # Initialize global browser if needed
-        if _global_browser is None:
-            _global_browser = CustomBrowser(
+        if browser is None:
+            browser = CustomBrowser(
                config=BrowserConfig(
                    headless=headless,
                    disable_security=disable_security,
@@ -216,12 +229,12 @@ async def run_custom_agent(

        # Handle browser context based on configuration
        if use_own_browser:
-            if _global_browser_context is None:
-                _global_playwright = await async_playwright().start()
+            if browser_context is None:
+                playwright = await async_playwright().start()
                chrome_exe = os.getenv("CHROME_PATH", "")
                chrome_use_data = os.getenv("CHROME_USER_DATA", "")

-                browser_context = await _global_playwright.chromium.launch_persistent_context(
+                browser_context = await playwright.chromium.launch_persistent_context(
                    user_data_dir=chrome_use_data,
                    executable_path=chrome_exe,
                    no_viewport=False,
@@ -236,7 +249,7 @@ async def run_custom_agent(
                    record_video_dir=save_recording_path if save_recording_path else None,
                    record_video_size={"width": window_w, "height": window_h},
                )
-                _global_browser_context = await _global_browser.new_context(
+                browser_context = await browser.new_context(
                    config=BrowserContextConfig(
                        trace_path=save_trace_path if save_trace_path else None,
                        save_recording_path=save_recording_path if save_recording_path else None,
@@ -245,11 +258,10 @@ async def run_custom_agent(
                            width=window_w, height=window_h
                        ),
                    ),
-                    context=browser_context,
                )
        else:
-            if _global_browser_context is None:
-                _global_browser_context = await _global_browser.new_context(
+            if browser_context is None:
+                browser_context = await browser.new_context(
                    config=BrowserContextConfig(
                        trace_path=save_trace_path if save_trace_path else None,
                        save_recording_path=save_recording_path if save_recording_path else None,
@@ -266,7 +278,7 @@ async def run_custom_agent(
            add_infos=add_infos,
            use_vision=use_vision,
            llm=llm,
-            browser_context=_global_browser_context,
+            browser_context=browser_context,
            controller=controller,
            system_prompt_class=CustomSystemPrompt,
            max_actions_per_step=max_actions_per_step,
@@ -292,17 +304,17 @@ async def run_custom_agent(
    finally:
        # Handle cleanup based on persistence configuration
        if not persistence_config.persistent_session:
-            if _global_browser_context:
-                await _global_browser_context.close()
-                _global_browser_context = None
+            if browser_context:
+                await browser_context.close()
+                browser_context = None

-            if _global_playwright:
-                await _global_playwright.stop()
-                _global_playwright = None
+            if playwright:
+                await playwright.stop()
+                playwright = None

-            if _global_browser:
-                await _global_browser.close()
-                _global_browser = None
+            if browser:
+                await browser.close()
+                browser = None
    return final_result, errors, model_actions, model_thoughts, trace_file.get('.webm'), recorded_files.get('.zip')

 async def run_with_stream(
@@ -325,7 +337,7 @@ async def run_with_stream(
    max_steps,
    use_vision,
    max_actions_per_step,
-    tool_call_in_content,
+    tool_call_in_content
 ):
    """Wrapper to run the agent and handle streaming."""
    global _global_browser, _global_browser_context
@@ -376,12 +388,15 @@ async def run_with_stream(
                max_steps=max_steps,
                use_vision=use_vision,
                max_actions_per_step=max_actions_per_step,
-                tool_call_in_content=tool_call_in_content
+                tool_call_in_content=tool_call_in_content,
+                browser=_global_browser,
+                browser_context=_global_browser_context,
+                playwright=_global_playwright if use_own_browser else None
            )
        )

        # Initialize values for streaming
-        html_content = "<div>Using browser...</div>"
+        html_content = "<div style='width:80vw; height:90vh'>Using browser...</div>"
        final_result = errors = model_actions = model_thoughts = ""
        recording = trace = None

@@ -390,7 +405,7 @@ async def run_with_stream(
            try:
                html_content = await capture_screenshot(_global_browser_context)
            except Exception as e:
-                html_content = f"<div class='error'>Screenshot error: {str(e)}</div>"
+                html_content = f"<div class='error' style='width:80vw; height:90vh'>Screenshot error: {str(e)}</div>"
            
            yield [
                html_content,
@@ -426,7 +441,7 @@ async def run_with_stream(
    except Exception as e:
        import traceback
        yield [
-            f"<div class='error'>Browser error: {str(e)}</div>",
+            f"<div class='error' style='width:80vw; height:90vh'>Browser error: {str(e)}</div>",
            "",
            f"Error: {str(e)}\n{traceback.format_exc()}",
            "",
@@ -625,14 +640,14 @@ def create_ui(theme_name="Ocean"):
                    placeholder="Add any helpful context or instructions...",
                    info="Optional hints to help the LLM complete the task",
                )
-                
+
                with gr.Row():
                    run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
                    stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
                    
                with gr.Row():
                    browser_view = gr.HTML(
-                        value="<div>Waiting for browser session...</div>",
+                        value="<div style='width:80vw; height:90vh'>Waiting for browser session...</div>",
                        label="Live Browser View",
                )