resolve conflict with new webui version - implement docker

2025-01-10 21:47:16 +07:00
parent 97b8cec17a 1ab85f0633
commit ed3a04553d
10 changed files with 679 additions and 326 deletions
--- a/.env.example
+++ b/.env.example
@@ -17,5 +17,17 @@ ANONYMIZED_TELEMETRY=true
 # LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
 BROWSER_USE_LOGGING_LEVEL=info

+# Chrome settings
 CHROME_PATH=
-CHROME_USER_DATA=
+CHROME_USER_DATA=
+CHROME_DEBUGGING_PORT=9222
+CHROME_DEBUGGING_HOST=localhost
+CHROME_PERSISTENT_SESSION=false  # Set to true to keep browser open between AI tasks
+
+# Display settings
+RESOLUTION=1920x1080x24  # Format: WIDTHxHEIGHTxDEPTH
+RESOLUTION_WIDTH=1920    # Width in pixels
+RESOLUTION_HEIGHT=1080   # Height in pixels
+
+# VNC settings
+VNC_PASSWORD=youvncpassword
--- a/82
+++ b/82
@@ -0,0 +1,82 @@
+FROM python:3.11-slim
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    wget \
+    gnupg \
+    curl \
+    unzip \
+    xvfb \
+    libgconf-2-4 \
+    libxss1 \
+    libnss3 \
+    libnspr4 \
+    libasound2 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdbus-1-3 \
+    libdrm2 \
+    libgbm1 \
+    libgtk-3-0 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxfixes3 \
+    libxrandr2 \
+    xdg-utils \
+    fonts-liberation \
+    dbus \
+    xauth \
+    xvfb \
+    x11vnc \
+    tigervnc-tools \
+    supervisor \
+    net-tools \
+    procps \
+    git \
+    python3-numpy \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install noVNC
+RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
+    && git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
+    && ln -s /opt/novnc/vnc.html /opt/novnc/index.html
+
+# Install Chrome
+RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
+    && echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list \
+    && apt-get update \
+    && apt-get install -y google-chrome-stable \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set up working directory
+WORKDIR /app
+
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Install Playwright and browsers with system dependencies
+ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
+RUN playwright install --with-deps chromium
+RUN playwright install-deps
+
+# Copy the application code
+COPY . .
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV BROWSER_USE_LOGGING_LEVEL=info
+ENV CHROME_PATH=/usr/bin/google-chrome
+ENV ANONYMIZED_TELEMETRY=false
+ENV DISPLAY=:99
+ENV RESOLUTION=1920x1080x24
+ENV VNC_PASSWORD=vncpassword
+
+# Set up supervisor configuration
+RUN mkdir -p /var/log/supervisor
+COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+EXPOSE 7788 6080 5900
+
+CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
--- a/README.md
+++ b/README.md
@@ -17,9 +17,13 @@ We would like to officially thank [WarmShao](https://github.com/warmshao) for hi

 **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.

-<video src="https://github.com/user-attachments/assets/56bc7080-f2e3-4367-af22-6bf2245ff6cb" controls="controls"  >Your browser does not support playing this video!</video>
+**Persistent Browser Sessions:** You can choose to keep the browser window open between AI tasks, allowing you to see the complete history and state of AI interactions.

-## Installation Guide
+<video src="https://github.com/user-attachments/assets/56bc7080-f2e3-4367-af22-6bf2245ff6cb" controls="controls">Your browser does not support playing this video!</video>
+
+## Installation Options
+
+### Option 1: Local Installation

 Read the [quickstart guide](https://docs.browser-use.com/quickstart#prepare-the-environment) or follow the steps below to get started.

@@ -49,84 +53,132 @@ Then install playwright:
 playwright install
 ```

+### Option 2: Docker Installation
+
+1. **Prerequisites:**
+   - Docker and Docker Compose installed on your system
+   - Git to clone the repository
+
+2. **Setup:**
+   ```bash
+   # Clone the repository
+   git clone https://github.com/browser-use/web-ui.git
+   cd web-ui
+
+   # Copy and configure environment variables
+   cp .env.example .env
+   # Edit .env with your preferred text editor and add your API keys
+   ```
+
+3. **Run with Docker:**
+   ```bash
+   # Build and start the container with default settings (browser closes after AI tasks)
+   docker compose up --build
+
+   # Or run with persistent browser (browser stays open between AI tasks)
+   CHROME_PERSISTENT_SESSION=true docker compose up --build
+   ```
+
+4. **Access the Application:**
+   - WebUI: `http://localhost:7788`
+   - VNC Viewer (to see browser interactions): `http://localhost:6080/vnc.html`
+   
+   Default VNC password is "vncpassword". You can change it by setting the `VNC_PASSWORD` environment variable in your `.env` file.
+
+
 ## Usage

-1.  **Run the WebUI:**
+### Local Setup
+1.  Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. `cp .env.example .env`
+2.  **Run the WebUI:**
    ```bash
    python webui.py --ip 127.0.0.1 --port 7788
    ```
-2.  **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
-3.  **Using Your Own Browser:**
-    - Close all chrome windows
+4. WebUI options:
+   - `--ip`: The IP address to bind the WebUI to. Default is `127.0.0.1`.
+   - `--port`: The port to bind the WebUI to. Default is `7788`.
+   - `--theme`: The theme for the user interface. Default is `Ocean`.
+     - **Default**: The standard theme with a balanced design.
+     - **Soft**: A gentle, muted color scheme for a relaxed viewing experience.
+     - **Monochrome**: A grayscale theme with minimal color for simplicity and focus.
+     - **Glass**: A sleek, semi-transparent design for a modern appearance.
+     - **Origin**: A classic, retro-inspired theme for a nostalgic feel.
+     - **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors.
+     - **Ocean** (default): A blue, ocean-inspired theme providing a calming effect.
+   - `--dark-mode`: Enables dark mode for the user interface.
+3.  **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
+4.  **Using Your Own Browser(Optional):**
+    - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser.
+      - Windows
+        ```env
+         CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
+         CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
+        ```
+        > Note: Replace `YourUsername` with your actual Windows username for Windows systems.
+      - Mac
+        ```env
+         CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+         CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1"
+        ```
+    - Close all Chrome windows
    - Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
    - Check the "Use Own Browser" option within the Browser Settings.
+5. **Keep Browser Open(Optional):**
+    - Set `CHROME_PERSISTENT_SESSION=true` in the `.env` file.

-### Options:
+### Docker Setup
+1. **Environment Variables:**
+   - All configuration is done through the `.env` file
+   - Available environment variables:
+     ```
+     # LLM API Keys
+     OPENAI_API_KEY=your_key_here
+     ANTHROPIC_API_KEY=your_key_here
+     GOOGLE_API_KEY=your_key_here

-### `--theme`
+     # Browser Settings
+     CHROME_PERSISTENT_SESSION=true   # Set to true to keep browser open between AI tasks
+     RESOLUTION=1920x1080x24         # Custom resolution format: WIDTHxHEIGHTxDEPTH
+     RESOLUTION_WIDTH=1920           # Custom width in pixels
+     RESOLUTION_HEIGHT=1080          # Custom height in pixels

- **Type**: `str`
- **Default**: `Ocean`
- **Description**: Specifies the theme for the user interface.
- **Options**:
-  The available themes are defined in the `theme_map` dictionary. Below are the options you can choose from:
-  - **Default**: The standard theme with a balanced design.
-  - **Soft**: A gentle, muted color scheme for a relaxed viewing experience.
-  - **Monochrome**: A grayscale theme with minimal color for simplicity and focus.
-  - **Glass**: A sleek, semi-transparent design for a modern appearance.
-  - **Origin**: A classic, retro-inspired theme for a nostalgic feel.
-  - **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors.
-  - **Ocean** (default): A blue, ocean-inspired theme providing a calming effect.
+     # VNC Settings
+     VNC_PASSWORD=your_vnc_password  # Optional, defaults to "vncpassword"
+     ```

-**Example**:
+2. **Browser Persistence Modes:**
+   - **Default Mode (CHROME_PERSISTENT_SESSION=false):**
+     - Browser opens and closes with each AI task
+     - Clean state for each interaction
+     - Lower resource usage

-```bash
-python webui.py --ip 127.0.0.1 --port 7788 --theme Glass
-```
+   - **Persistent Mode (CHROME_PERSISTENT_SESSION=true):**
+     - Browser stays open between AI tasks
+     - Maintains history and state
+     - Allows viewing previous AI interactions
+     - Set in `.env` file or via environment variable when starting container

-### `--dark-mode`
+3. **Viewing Browser Interactions:**
+   - Access the noVNC viewer at `http://localhost:6080/vnc.html`
+   - Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD)
+   - You can now see all browser interactions in real-time

- **Type**: `boolean`
- **Default**: Disabled
- **Description**: Enables dark mode for the user interface. This is a simple toggle; including the flag activates dark mode, while omitting it keeps the interface in light mode.
- **Options**:
-  - **Enabled (`--dark-mode`)**: Activates dark mode, switching the interface to a dark color scheme for better visibility in low-light environments.
-  - **Disabled (default)**: Keeps the interface in the default light mode.
+4. **Container Management:**
+   ```bash
+   # Start with persistent browser
+   CHROME_PERSISTENT_SESSION=true docker compose up -d

-**Example**:
+   # Start with default mode (browser closes after tasks)
+   docker compose up -d

-```bash
-python webui.py --ip 127.0.0.1 --port 7788 --dark-mode
-```
+   # View logs
+   docker compose logs -f

-## (Optional) Configure Environment Variables
-
-Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. With
-
-```bash
-cp .env.example .env
-```
-
-**If using your own browser:** - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser.
-
-You can just copy examples down below to your `.env` file.
-
-### Windows
-
-```env
-CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
-CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
-```
-
-> Note: Replace `YourUsername` with your actual Windows username for Windows systems.
-
-### Mac
-
-```env
-CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
-CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1"
-```
+   # Stop the container
+   docker compose down
+   ```

 ## Changelog

- [x] **2025/01/06:** Thanks to @richard-devbot, a New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
+- [x] **2025/01/10:** Thanks to @casistack. Now we have Docker Setup option and also Support keep browser open between tasks.[Video tutorial demo](https://github.com/browser-use/web-ui/issues/1#issuecomment-2582511750).
+- [x] **2025/01/06:** Thanks to @richard-devbot. A New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,51 @@
+services:
+  browser-use-webui:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "7788:7788"  # Gradio default port
+      - "6080:6080"  # noVNC web interface
+      - "5900:5900"  # VNC port
+      - "9222:9222"  # Chrome remote debugging port
+    environment:
+      - OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
+      - AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
+      - AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
+      - DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
+      - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
+      - BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
+      - ANONYMIZED_TELEMETRY=false
+      - CHROME_PATH=/usr/bin/google-chrome
+      - CHROME_USER_DATA=/app/data/chrome_data
+      - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
+      - DISPLAY=:99
+      - PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
+      - RESOLUTION=${RESOLUTION:-1920x1080x24}
+      - RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
+      - RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
+      - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
+      - PERSISTENT_BROWSER_PORT=9222
+      - PERSISTENT_BROWSER_HOST=localhost
+      - CHROME_DEBUGGING_PORT=9222
+      - CHROME_DEBUGGING_HOST=localhost
+    volumes:
+      - ./data:/app/data
+      - ./data/chrome_data:/app/data/chrome_data
+      - /tmp/.X11-unix:/tmp/.X11-unix
+    restart: unless-stopped
+    shm_size: '2gb'
+    cap_add:
+      - SYS_ADMIN
+    security_opt:
+      - seccomp=unconfined
+    tmpfs:
+      - /tmp
+    healthcheck:
+      test: ["CMD", "nc", "-z", "localhost", "5900"]
+      interval: 10s
+      timeout: 5s
+      retries: 3
--- a/src/browser/config.py
+++ b/src/browser/config.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2025/1/6
+# @Author  : wenshao
+# @ProjectName: browser-use-webui
+# @FileName: config.py
+
+import os
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class BrowserPersistenceConfig:
+    """Configuration for browser persistence"""
+
+    persistent_session: bool = False
+    user_data_dir: Optional[str] = None
+    debugging_port: Optional[int] = None
+    debugging_host: Optional[str] = None
+
+    @classmethod
+    def from_env(cls) -> "BrowserPersistenceConfig":
+        """Create config from environment variables"""
+        return cls(
+            persistent_session=os.getenv("CHROME_PERSISTENT_SESSION", "").lower()
+            == "true",
+            user_data_dir=os.getenv("CHROME_USER_DATA"),
+            debugging_port=int(os.getenv("CHROME_DEBUGGING_PORT", "9222")),
+            debugging_host=os.getenv("CHROME_DEBUGGING_HOST", "localhost"),
+        )
--- a/src/browser/custom_browser.py
+++ b/src/browser/custom_browser.py
@@ -6,15 +6,45 @@

 from browser_use.browser.browser import Browser
 from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from playwright.async_api import BrowserContext as PlaywrightBrowserContext
+import logging

+from .config import BrowserPersistenceConfig
 from .custom_context import CustomBrowserContext

+logger = logging.getLogger(__name__)

 class CustomBrowser(Browser):
+    _global_context = None
+
    async def new_context(
        self,
        config: BrowserContextConfig = BrowserContextConfig(),
-        context: CustomBrowserContext = None,
-    ) -> BrowserContext:
-        """Create a browser context"""
+        context: PlaywrightBrowserContext = None,
+    ) -> CustomBrowserContext:
+        """Create a browser context with persistence support"""
+        persistence_config = BrowserPersistenceConfig.from_env()
+        
+        if persistence_config.persistent_session:
+            if CustomBrowser._global_context is not None:
+                logger.info("Reusing existing persistent browser context")
+                return CustomBrowser._global_context
+            
+            context_instance = CustomBrowserContext(config=config, browser=self, context=context)
+            CustomBrowser._global_context = context_instance
+            logger.info("Created new persistent browser context")
+            return context_instance
+        
+        logger.info("Creating non-persistent browser context")
        return CustomBrowserContext(config=config, browser=self, context=context)
+
+    async def close(self):
+        """Override close to respect persistence setting"""
+        persistence_config = BrowserPersistenceConfig.from_env()
+        if not persistence_config.persistent_session:
+            if CustomBrowser._global_context is not None:
+                await CustomBrowser._global_context.close()
+                CustomBrowser._global_context = None
+            await super().close()
+        else:
+            logger.info("Skipping browser close due to persistent session")
--- a/src/browser/custom_context.py
+++ b/src/browser/custom_context.py
@@ -9,84 +9,77 @@ import json
 import logging
 import os

-from playwright.async_api import Browser as PlaywrightBrowser, Page, BrowserContext as PlaywrightContext
 from browser_use.browser.browser import Browser
 from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from playwright.async_api import Browser as PlaywrightBrowser
+from playwright.async_api import BrowserContext as PlaywrightBrowserContext
+
+from .config import BrowserPersistenceConfig

 logger = logging.getLogger(__name__)
+
+
 class CustomBrowserContext(BrowserContext):
    def __init__(
        self,
-        browser: "CustomBrowser",  # Forward declaration for CustomBrowser
+        browser: "Browser",
        config: BrowserContextConfig = BrowserContextConfig(),
-        context: PlaywrightContext = None
+        context: PlaywrightBrowserContext = None,
    ):
        super(CustomBrowserContext, self).__init__(browser=browser, config=config)
-        self.context = context  # Rename to avoid confusion
+        self.context = context
        self._page = None
+        self._persistence_config = BrowserPersistenceConfig.from_env()

    @property
-    def impl_context(self) -> PlaywrightContext:
+    def impl_context(self) -> PlaywrightBrowserContext:
        """Returns the underlying Playwright context implementation"""
        return self.context

-    async def _create_context(self, browser: PlaywrightBrowser = None):
+    async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowserContext:
        """Creates a new browser context with anti-detection measures and loads cookies if available."""
        if self.context:
+            logger.info("Browser context already exists, returning existing context.")
            return self.context

-        # If a Playwright browser is not provided, get it from our custom browser
-        pw_browser = browser or await self.browser.get_playwright_browser()
-        
-        context_args = {
-            'viewport': self.config.browser_window_size,
-            'no_viewport': False, 
-            'bypass_csp': self.config.disable_security,
-            'ignore_https_errors': self.config.disable_security
-        }
-        
-        if self.config.save_recording_path:
-            context_args.update({
-                'record_video_dir': self.config.save_recording_path,
-                'record_video_size': self.config.browser_window_size
-            })
-
-        self.context = await pw_browser.new_context(**context_args)
+        # Check for persistent context
+        if self._persistence_config.persistent_session and len(browser.contexts) > 0:
+            logger.info("Using existing persistent context.")
+            self.context = browser.contexts[0]
+        else:
+            logger.info("Creating a new browser context.")
+            self.context = await browser.new_context(
+                viewport=self.config.browser_window_size,
+                no_viewport=False,
+                user_agent=(
+                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+                    "(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
+                ),
+                java_script_enabled=True,
+                bypass_csp=self.config.disable_security,
+                ignore_https_errors=self.config.disable_security,
+                record_video_dir=self.config.save_recording_path,
+                record_video_size=self.config.browser_window_size,
+            )

+        # Handle tracing
        if self.config.trace_path:
            await self.context.tracing.start(screenshots=True, snapshots=True, sources=True)

-        # Load cookies if they exist
+        # Load cookies
        if self.config.cookies_file and os.path.exists(self.config.cookies_file):
            with open(self.config.cookies_file, "r") as f:
                cookies = json.load(f)
-                logger.info(
-                    f"Loaded {len(cookies)} cookies from {self.config.cookies_file}"
-                )
+                logger.info(f"Loaded {len(cookies)} cookies from {self.config.cookies_file}.")
                await self.context.add_cookies(cookies)

-        # Expose anti-detection scripts
+        # Inject anti-detection scripts
        await self.context.add_init_script(
            """
-            // Webdriver property
-            Object.defineProperty(navigator, 'webdriver', {
-                get: () => undefined
-            });
-
-            // Languages
-            Object.defineProperty(navigator, 'languages', {
-                get: () => ['en-US', 'en']
-            });
-
-            // Plugins
-            Object.defineProperty(navigator, 'plugins', {
-                get: () => [1, 2, 3, 4, 5]
-            });
-
-            // Chrome runtime
+            Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
+            Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
+            Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
            window.chrome = { runtime: {} };
-
-            // Permissions
            const originalQuery = window.navigator.permissions.query;
            window.navigator.permissions.query = (parameters) => (
                parameters.name === 'notifications' ?
@@ -96,42 +89,39 @@ class CustomBrowserContext(BrowserContext):
            """
        )

-        # Create an initial page
-        self._page = await self.context.new_page()
-        await self._page.goto('about:blank')  # Ensure page is ready
-        
+        # Create initial page if none exists
+        if not self.context.pages:
+            self._page = await self.context.new_page()
+            await self._page.goto('about:blank')
+
        return self.context

-    async def new_page(self) -> Page:
-        """Creates and returns a new page in this context"""
+    async def new_page(self):
+        """Creates and returns a new page in this context."""
        if not self.context:
-            await self._create_context()
+            await self._create_context(await self.browser.get_playwright_browser())
        return await self.context.new_page()

-    async def __aenter__(self):
+    async def get_current_page(self):
+        """Returns the current page or creates one if none exists."""
        if not self.context:
-            await self._create_context()
-        return self
+            await self._create_context(await self.browser.get_playwright_browser())
+        pages = self.context.pages
+        if not pages:
+            logger.warning("No existing pages in the context. Creating a new page.")
+            return await self.context.new_page()
+        return pages[0]

-    async def __aexit__(self, *args):
-        if self.context:
+    async def close(self):
+        """Override close to respect persistence setting."""
+        if not self._persistence_config.persistent_session and self.context:
            await self.context.close()
            self.context = None

    @property
    def pages(self):
-        """Returns list of pages in context"""
-        return self.context.pages if self.context else []
-
-    async def get_state(self, **kwargs):
-        if self.context:
-            pages = self.context.pages
-            if pages:
-                return await super().get_state(**kwargs)
-        return None
-
-    async def get_pages(self):
-        """Get pages in a way that works"""
+        """Returns list of pages in the context."""
        if not self.context:
+            logger.warning("Attempting to access pages but context is not initialized.")
            return []
        return self.context.pages
--- a/src/utils/stream_utils.py
+++ b/src/utils/stream_utils.py
@@ -3,24 +3,38 @@ import asyncio
 from typing import AsyncGenerator
 from playwright.async_api import BrowserContext, Error as PlaywrightError

-async def capture_screenshot(browser_context: BrowserContext) -> str:
+async def capture_screenshot(browser_context) -> str:
    """Capture and encode a screenshot"""
    try:
-        # Get the implementation context
-        context = getattr(browser_context, 'impl_context', None)
+        # Get the implementation context - handle both direct Playwright context and wrapped context
+        context = browser_context
+        if hasattr(browser_context, 'context'):
+            context = browser_context.context
+        
        if not context:
-            return "<div>No browser context implementation available</div>"
+            return "<div>No browser context available</div>"
            
        # Get all pages
-        all_pages = context.pages
-        if not all_pages:
+        pages = context.pages
+        if not pages:
            return "<div>Waiting for page to be available...</div>"
-        # Use the first page
-        for page in all_pages:
+
+        # Use the first non-blank page or fallback to first page
+        active_page = None
+        for page in pages:
            if page.url != 'about:blank':
+                active_page = page
                break
+        
+        if not active_page and pages:
+            active_page = pages[0]
+            
+        if not active_page:
+            return "<div>No active page available</div>"
+
+        # Take screenshot
        try:
-            screenshot = await page.screenshot(
+            screenshot = await active_page.screenshot(
                type='jpeg',
                quality=75,
                scale="css"
@@ -29,5 +43,6 @@ async def capture_screenshot(browser_context: BrowserContext) -> str:
            return f'<img src="data:image/jpeg;base64,{encoded}" style="width:100%; max-width:1200px; border:1px solid #ccc;">'
        except Exception as e:
            return f"<div class='error'>Screenshot failed: {str(e)}</div>"
+            
    except Exception as e:
        return f"<div class='error'>Screenshot error: {str(e)}</div>"
--- a/supervisord.conf
+++ b/supervisord.conf
@@ -0,0 +1,83 @@
+[supervisord]
+nodaemon=true
+logfile=/dev/stdout
+logfile_maxbytes=0
+loglevel=debug
+
+[program:xvfb]
+command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=100
+startsecs=3
+
+[program:vnc_setup]
+command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
+autorestart=false
+startsecs=0
+priority=150
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+
+[program:x11vnc]
+command=bash -c "sleep 3 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5900 -bg -o /var/log/x11vnc.log"
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=200
+startretries=5
+startsecs=5
+depends_on=vnc_setup
+
+[program:x11vnc_log]
+command=tail -f /var/log/x11vnc.log
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=250
+
+[program:novnc]
+command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5900 --listen 0.0.0.0:6080 --web /opt/novnc"
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=300
+startretries=5
+startsecs=3
+depends_on=x11vnc
+
+[program:persistent_browser]
+command=bash -c 'if [ "%(ENV_CHROME_PERSISTENT_SESSION)s" = "true" ]; then mkdir -p /app/data/chrome_data && sleep 8 && google-chrome --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 "data:text/html,<html><body style=\"background: #f0f0f0; margin: 0; display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial;\"><h1>Browser Ready for AI Interaction</h1></body></html>"; else echo "Persistent browser disabled"; fi'
+autorestart=%(ENV_CHROME_PERSISTENT_SESSION)s
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=350
+startretries=3
+startsecs=3
+depends_on=novnc
+
+[program:webui]
+command=python webui.py --ip 0.0.0.0 --port 7788
+directory=/app
+autorestart=true
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
+priority=400
+startretries=3
+startsecs=3
+depends_on=persistent_browser 
--- a/webui.py
+++ b/webui.py
@@ -30,6 +30,16 @@ from src.browser.custom_context import BrowserContextConfig
 from src.controller.custom_controller import CustomController
 from src.utils import utils
 from src.utils.utils import update_model_dropdown
+from src.browser.config import BrowserPersistenceConfig
+from src.browser.custom_browser import CustomBrowser
+from src.browser.custom_context import CustomBrowserContext
+from browser_use.browser.browser import BrowserConfig
+from browser_use.browser.context import BrowserContextConfig, BrowserContextWindowSize
+
+# Global variables for persistence
+_global_browser = None
+_global_browser_context = None
+_global_playwright = None
 from src.utils.file_utils import get_latest_files
 from src.utils.stream_utils import capture_screenshot

@@ -196,121 +206,113 @@ async def run_custom_agent(
        tool_call_in_content,
        browser_context=None,  # receive context
 ):
+    global _global_browser, _global_browser_context, _global_playwright
+    
    controller = CustomController()
    playwright = None
    browser = None
+    persistence_config = BrowserPersistenceConfig.from_env()
+    
    try:
-        if use_own_browser:
-            playwright = await async_playwright().start()
-            chrome_exe = os.getenv("CHROME_PATH", "")
-            chrome_use_data = os.getenv("CHROME_USER_DATA", "")
-
-            if chrome_exe == "":
-                chrome_exe = None
-            elif not os.path.exists(chrome_exe):
-                raise ValueError(f"Chrome executable not found at {chrome_exe}")
-
-            if chrome_use_data == "":
-                chrome_use_data = None
-
-            browser_context_ = await playwright.chromium.launch_persistent_context(
-                user_data_dir=chrome_use_data if chrome_use_data else "",
-                executable_path=chrome_exe,
-                no_viewport=False,
-                headless=headless,  # 保持浏览器窗口可见
-                user_agent=(
-                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
-                    "(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
-                ),
-                java_script_enabled=True,
-                bypass_csp=disable_security,
-                ignore_https_errors=disable_security,
-                record_video_dir=save_recording_path if save_recording_path else None,
-                record_video_size={"width": window_w, "height": window_h},
-            )
-        else:
-            browser_context_ = None
-
-        if browser_context is not None:
-            # Reuse context
-            agent = CustomAgent(
-                task=task,
-                add_infos=add_infos,
-                use_vision=use_vision,
-                llm=llm,
-                browser_context=browser_context,
-                controller=controller,
-                system_prompt_class=CustomSystemPrompt
-            )
-            history = await agent.run(max_steps=max_steps)
-            final_result = history.final_result()
-            errors = history.errors()
-            model_actions = history.model_actions()
-            model_thoughts = history.model_thoughts()
-            recorded_files = get_latest_files(save_recording_path)
-            trace_file = get_latest_files(save_recording_path + "/../traces")
-            return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
-        else:
-            browser = CustomBrowser(
+        # Initialize global browser if needed
+        if _global_browser is None:
+            _global_browser = CustomBrowser(
                config=BrowserConfig(
                    headless=headless,
                    disable_security=disable_security,
                    extra_chromium_args=[f"--window-size={window_w},{window_h}"],
                )
            )
-            async with await browser.new_context(
+
+        # Handle browser context based on configuration
+        if use_own_browser:
+            if _global_browser_context is None:
+                _global_playwright = await async_playwright().start()
+                chrome_exe = os.getenv("CHROME_PATH", "")
+                chrome_use_data = os.getenv("CHROME_USER_DATA", "")
+
+                browser_context = await _global_playwright.chromium.launch_persistent_context(
+                    user_data_dir=chrome_use_data,
+                    executable_path=chrome_exe,
+                    no_viewport=False,
+                    headless=headless,
+                    user_agent=(
+                        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+                        "(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
+                    ),
+                    java_script_enabled=True,
+                    bypass_csp=disable_security,
+                    ignore_https_errors=disable_security,
+                    record_video_dir=save_recording_path if save_recording_path else None,
+                    record_video_size={"width": window_w, "height": window_h},
+                )
+                _global_browser_context = await _global_browser.new_context(
                    config=BrowserContextConfig(
                        trace_path=save_trace_path if save_trace_path else None,
-                        save_recording_path=save_recording_path
-                        if save_recording_path
-                        else None,
+                        save_recording_path=save_recording_path if save_recording_path else None,
                        no_viewport=False,
                        browser_window_size=BrowserContextWindowSize(
                            width=window_w, height=window_h
                        ),
                    ),
-                    context=browser_context_,
-            ) as browser_context:
-                agent = CustomAgent(
-                    task=task,
-                    add_infos=add_infos,
-                    use_vision=use_vision,
-                    llm=llm,
-                    browser_context=browser_context,
-                    controller=controller,
-                    system_prompt_class=CustomSystemPrompt,
-                    max_actions_per_step=max_actions_per_step,
-                    tool_call_in_content=tool_call_in_content
+                    context=browser_context,
                )
-                history = await agent.run(max_steps=max_steps)
-                final_result = history.final_result()
-                errors = history.errors()
-                model_actions = history.model_actions()
-                model_thoughts = history.model_thoughts()
-                recorded_files = get_latest_files(save_recording_path)
-                trace_file = get_latest_files(save_recording_path + "/../traces")
-                return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
+        else:
+            if _global_browser_context is None:
+                _global_browser_context = await _global_browser.new_context(
+                    config=BrowserContextConfig(
+                        trace_path=save_trace_path if save_trace_path else None,
+                        save_recording_path=save_recording_path if save_recording_path else None,
+                        no_viewport=False,
+                        browser_window_size=BrowserContextWindowSize(
+                            width=window_w, height=window_h
+                        ),
+                    ),
+                )
+
+        # Create and run agent
+        agent = CustomAgent(
+            task=task,
+            add_infos=add_infos,
+            use_vision=use_vision,
+            llm=llm,
+            browser_context=_global_browser_context,
+            controller=controller,
+            system_prompt_class=CustomSystemPrompt,
+            max_actions_per_step=max_actions_per_step,
+            tool_call_in_content=tool_call_in_content
+        )
+        history = await agent.run(max_steps=max_steps)
+
+        final_result = history.final_result()
+        errors = history.errors()
+        model_actions = history.model_actions()
+        model_thoughts = history.model_thoughts()
+        recorded_files = get_latest_files(save_recording_path)
+        trace_file = get_latest_files(save_trace_path)        

    except Exception as e:
        import traceback
-
        traceback.print_exc()
-        final_result = ""
        errors = str(e) + "\n" + traceback.format_exc()
        model_actions = ""
        model_thoughts = ""
        recorded_files = {}
        trace_file = {}
    finally:
-        # 显式关闭持久化上下文
-        if browser_context_:
-            await browser_context_.close()
+        # Handle cleanup based on persistence configuration
+        if not persistence_config.persistent_session:
+            if _global_browser_context:
+                await _global_browser_context.close()
+                _global_browser_context = None

-        # 关闭 Playwright 对象
-        if playwright:
-            await playwright.stop()
-        if browser:
-            await browser.close()
+            if _global_playwright:
+                await _global_playwright.stop()
+                _global_playwright = None
+
+            if _global_browser:
+                await _global_browser.close()
+                _global_browser = None
    return final_result, errors, model_actions, model_thoughts, trace_file.get('.webm'), recorded_files.get('.zip')

 async def run_with_stream(
@@ -336,95 +338,71 @@ async def run_with_stream(
    tool_call_in_content,
 ):
    """Wrapper to run the agent and handle streaming."""
-    browser = None
+    global _global_browser, _global_browser_context
+    
    try:
-        # Initialize the browser
-        browser = CustomBrowser(
-            config=BrowserConfig(
-                headless=False,
-                disable_security=disable_security,
-                extra_chromium_args=[f"--window-size={window_w},{window_h}"],
-            )
-        )
-
-        # Create a new browser context
-        async with await browser.new_context(
-            config=BrowserContextConfig(
-                trace_path=save_trace_path if save_trace_path else None,
-                save_recording_path=save_recording_path if save_recording_path else None,
-                no_viewport=False,
-                browser_window_size=BrowserContextWindowSize(
-                    width=window_w, height=window_h
-                ),
-            )
-        ) as browser_context:
-            # Run the browser agent in the background
-            agent_task = asyncio.create_task(
-                run_browser_agent(
-                    agent_type=agent_type,
-                    llm_provider=llm_provider,
-                    llm_model_name=llm_model_name,
-                    llm_temperature=llm_temperature,
-                    llm_base_url=llm_base_url,
-                    llm_api_key=llm_api_key,
-                    use_own_browser=use_own_browser,
-                    headless=headless,
+        # Initialize the global browser if it doesn't exist
+        if _global_browser is None:
+            _global_browser = CustomBrowser(
+                config=BrowserConfig(
+                    headless=False,
                    disable_security=disable_security,
-                    window_w=window_w,
-                    window_h=window_h,
-                    save_recording_path=save_recording_path,
-                    save_trace_path=save_trace_path,
-                    enable_recording=enable_recording,
-                    task=task,
-                    add_infos=add_infos,
-                    max_steps=max_steps,
-                    use_vision=use_vision,
-                    max_actions_per_step=max_actions_per_step,
-                    tool_call_in_content=tool_call_in_content,
-                    browser_context=browser_context  # Explicit keyword argument
+                    extra_chromium_args=[f"--window-size={window_w},{window_h}"],
                )
            )

-            # Initialize values for streaming
-            html_content = "<div>Starting browser...</div>"
-            final_result = errors = model_actions = model_thoughts = ""
-            recording = trace = None
+        # Create or reuse browser context
+        if _global_browser_context is None:
+            _global_browser_context = await _global_browser.new_context(
+                config=BrowserContextConfig(
+                    trace_path=save_trace_path if save_trace_path else None,
+                    save_recording_path=save_recording_path if save_recording_path else None,
+                    no_viewport=False,
+                    browser_window_size=BrowserContextWindowSize(
+                        width=window_w, height=window_h
+                    ),
+                )
+            )

-            # Periodically update the stream while the agent task is running
-            while not agent_task.done():
-                try:
-                    html_content = await capture_screenshot(browser_context)
-                except Exception as e:
-                    html_content = f"<div class='error'>Screenshot error: {str(e)}</div>"
-                
-                yield [
-                    html_content,
-                    final_result,
-                    errors,
-                    model_actions,
-                    model_thoughts,
-                    recording,
-                    trace,
-                ]
-                await asyncio.sleep(0.01)
+        # Run the browser agent in the background
+        agent_task = asyncio.create_task(
+            run_browser_agent(
+                agent_type=agent_type,
+                llm_provider=llm_provider,
+                llm_model_name=llm_model_name,
+                llm_temperature=llm_temperature,
+                llm_base_url=llm_base_url,
+                llm_api_key=llm_api_key,
+                use_own_browser=use_own_browser,
+                headless=headless,
+                disable_security=disable_security,
+                window_w=window_w,
+                window_h=window_h,
+                save_recording_path=save_recording_path,
+                save_trace_path=save_trace_path,
+                enable_recording=enable_recording,
+                task=task,
+                add_infos=add_infos,
+                max_steps=max_steps,
+                use_vision=use_vision,
+                max_actions_per_step=max_actions_per_step,
+                tool_call_in_content=tool_call_in_content,
+                browser_context=_global_browser_context
+            )
+        )

-            # Once the agent task completes, get the results
+        # Initialize values for streaming
+        html_content = "<div>Using browser...</div>"
+        final_result = errors = model_actions = model_thoughts = ""
+        recording = trace = None
+
+        # Periodically update the stream while the agent task is running
+        while not agent_task.done():
            try:
-                result = await agent_task
-                if isinstance(result, tuple) and len(result) == 6:
-                    (
-                        final_result,
-                        errors,
-                        model_actions,
-                        model_thoughts,
-                        recording,
-                        trace,
-                    ) = result
-                else:
-                    errors = "Unexpected result format from agent"
+                html_content = await capture_screenshot(_global_browser_context)
            except Exception as e:
-                errors = f"Agent error: {str(e)}"
-
+                html_content = f"<div class='error'>Screenshot error: {str(e)}</div>"
+            
            yield [
                html_content,
                final_result,
@@ -434,10 +412,30 @@ async def run_with_stream(
                recording,
                trace,
            ]
+            await asyncio.sleep(0.01)
+
+        # Once the agent task completes, get the results
+        try:
+            result = await agent_task
+            if isinstance(result, tuple) and len(result) == 6:
+                final_result, errors, model_actions, model_thoughts, recording, trace = result
+            else:
+                errors = "Unexpected result format from agent"
+        except Exception as e:
+            errors = f"Agent error: {str(e)}"
+
+        yield [
+            html_content,
+            final_result,
+            errors,
+            model_actions,
+            model_thoughts,
+            recording,
+            trace,
+        ]

    except Exception as e:
        import traceback
-
        yield [
            f"<div class='error'>Browser error: {str(e)}</div>",
            "",
@@ -447,9 +445,30 @@ async def run_with_stream(
            None,
            None,
        ]
+
+# Update the main function to handle cleanup
+def main():
+    async def cleanup():
+        global _global_browser, _global_browser_context
+        if _global_browser_context:
+            await _global_browser_context.close()
+        if _global_browser:
+            await _global_browser.close()
+        _global_browser = None
+        _global_browser_context = None
+
+    parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
+    parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
+    parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
+    parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
+    parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
+    args = parser.parse_args()
+
+    try:
+        demo = create_ui(theme_name=args.theme)
+        demo.launch(server_name=args.ip, server_port=args.port)
    finally:
-        if browser:
-            await browser.close()
+        asyncio.get_event_loop().run_until_complete(cleanup())

 from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base

@@ -733,16 +752,5 @@ def create_ui(theme_name="Ocean"):

    return demo

-def main():
-    parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
-    parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
-    parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
-    parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
-    parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
-    args = parser.parse_args()
-
-    demo = create_ui(theme_name=args.theme)
-    demo.launch(server_name=args.ip, server_port=args.port)
-
 if __name__ == '__main__':
    main()