resolve conflict with new webui version - implement docker
This commit is contained in:
14
.env.example
14
.env.example
@@ -17,5 +17,17 @@ ANONYMIZED_TELEMETRY=true
|
||||
# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
|
||||
BROWSER_USE_LOGGING_LEVEL=info
|
||||
|
||||
# Chrome settings
|
||||
CHROME_PATH=
|
||||
CHROME_USER_DATA=
|
||||
CHROME_USER_DATA=
|
||||
CHROME_DEBUGGING_PORT=9222
|
||||
CHROME_DEBUGGING_HOST=localhost
|
||||
CHROME_PERSISTENT_SESSION=false # Set to true to keep browser open between AI tasks
|
||||
|
||||
# Display settings
|
||||
RESOLUTION=1920x1080x24 # Format: WIDTHxHEIGHTxDEPTH
|
||||
RESOLUTION_WIDTH=1920 # Width in pixels
|
||||
RESOLUTION_HEIGHT=1080 # Height in pixels
|
||||
|
||||
# VNC settings
|
||||
VNC_PASSWORD=youvncpassword
|
||||
82
Dockerfile
Normal file
82
Dockerfile
Normal file
@@ -0,0 +1,82 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
wget \
|
||||
gnupg \
|
||||
curl \
|
||||
unzip \
|
||||
xvfb \
|
||||
libgconf-2-4 \
|
||||
libxss1 \
|
||||
libnss3 \
|
||||
libnspr4 \
|
||||
libasound2 \
|
||||
libatk1.0-0 \
|
||||
libatk-bridge2.0-0 \
|
||||
libcups2 \
|
||||
libdbus-1-3 \
|
||||
libdrm2 \
|
||||
libgbm1 \
|
||||
libgtk-3-0 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxfixes3 \
|
||||
libxrandr2 \
|
||||
xdg-utils \
|
||||
fonts-liberation \
|
||||
dbus \
|
||||
xauth \
|
||||
xvfb \
|
||||
x11vnc \
|
||||
tigervnc-tools \
|
||||
supervisor \
|
||||
net-tools \
|
||||
procps \
|
||||
git \
|
||||
python3-numpy \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install noVNC
|
||||
RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
|
||||
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
|
||||
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
|
||||
|
||||
# Install Chrome
|
||||
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
|
||||
&& echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y google-chrome-stable \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set up working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy requirements and install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install Playwright and browsers with system dependencies
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
RUN playwright install --with-deps chromium
|
||||
RUN playwright install-deps
|
||||
|
||||
# Copy the application code
|
||||
COPY . .
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV BROWSER_USE_LOGGING_LEVEL=info
|
||||
ENV CHROME_PATH=/usr/bin/google-chrome
|
||||
ENV ANONYMIZED_TELEMETRY=false
|
||||
ENV DISPLAY=:99
|
||||
ENV RESOLUTION=1920x1080x24
|
||||
ENV VNC_PASSWORD=vncpassword
|
||||
|
||||
# Set up supervisor configuration
|
||||
RUN mkdir -p /var/log/supervisor
|
||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
|
||||
EXPOSE 7788 6080 5900
|
||||
|
||||
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
||||
178
README.md
178
README.md
@@ -17,9 +17,13 @@ We would like to officially thank [WarmShao](https://github.com/warmshao) for hi
|
||||
|
||||
**Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
|
||||
|
||||
<video src="https://github.com/user-attachments/assets/56bc7080-f2e3-4367-af22-6bf2245ff6cb" controls="controls" >Your browser does not support playing this video!</video>
|
||||
**Persistent Browser Sessions:** You can choose to keep the browser window open between AI tasks, allowing you to see the complete history and state of AI interactions.
|
||||
|
||||
## Installation Guide
|
||||
<video src="https://github.com/user-attachments/assets/56bc7080-f2e3-4367-af22-6bf2245ff6cb" controls="controls">Your browser does not support playing this video!</video>
|
||||
|
||||
## Installation Options
|
||||
|
||||
### Option 1: Local Installation
|
||||
|
||||
Read the [quickstart guide](https://docs.browser-use.com/quickstart#prepare-the-environment) or follow the steps below to get started.
|
||||
|
||||
@@ -49,84 +53,132 @@ Then install playwright:
|
||||
playwright install
|
||||
```
|
||||
|
||||
### Option 2: Docker Installation
|
||||
|
||||
1. **Prerequisites:**
|
||||
- Docker and Docker Compose installed on your system
|
||||
- Git to clone the repository
|
||||
|
||||
2. **Setup:**
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/browser-use/web-ui.git
|
||||
cd web-ui
|
||||
|
||||
# Copy and configure environment variables
|
||||
cp .env.example .env
|
||||
# Edit .env with your preferred text editor and add your API keys
|
||||
```
|
||||
|
||||
3. **Run with Docker:**
|
||||
```bash
|
||||
# Build and start the container with default settings (browser closes after AI tasks)
|
||||
docker compose up --build
|
||||
|
||||
# Or run with persistent browser (browser stays open between AI tasks)
|
||||
CHROME_PERSISTENT_SESSION=true docker compose up --build
|
||||
```
|
||||
|
||||
4. **Access the Application:**
|
||||
- WebUI: `http://localhost:7788`
|
||||
- VNC Viewer (to see browser interactions): `http://localhost:6080/vnc.html`
|
||||
|
||||
Default VNC password is "vncpassword". You can change it by setting the `VNC_PASSWORD` environment variable in your `.env` file.
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
1. **Run the WebUI:**
|
||||
### Local Setup
|
||||
1. Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. `cp .env.example .env`
|
||||
2. **Run the WebUI:**
|
||||
```bash
|
||||
python webui.py --ip 127.0.0.1 --port 7788
|
||||
```
|
||||
2. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
|
||||
3. **Using Your Own Browser:**
|
||||
- Close all chrome windows
|
||||
4. WebUI options:
|
||||
- `--ip`: The IP address to bind the WebUI to. Default is `127.0.0.1`.
|
||||
- `--port`: The port to bind the WebUI to. Default is `7788`.
|
||||
- `--theme`: The theme for the user interface. Default is `Ocean`.
|
||||
- **Default**: The standard theme with a balanced design.
|
||||
- **Soft**: A gentle, muted color scheme for a relaxed viewing experience.
|
||||
- **Monochrome**: A grayscale theme with minimal color for simplicity and focus.
|
||||
- **Glass**: A sleek, semi-transparent design for a modern appearance.
|
||||
- **Origin**: A classic, retro-inspired theme for a nostalgic feel.
|
||||
- **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors.
|
||||
- **Ocean** (default): A blue, ocean-inspired theme providing a calming effect.
|
||||
- `--dark-mode`: Enables dark mode for the user interface.
|
||||
3. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
|
||||
4. **Using Your Own Browser(Optional):**
|
||||
- Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser.
|
||||
- Windows
|
||||
```env
|
||||
CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
|
||||
CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
|
||||
```
|
||||
> Note: Replace `YourUsername` with your actual Windows username for Windows systems.
|
||||
- Mac
|
||||
```env
|
||||
CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||
CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1"
|
||||
```
|
||||
- Close all Chrome windows
|
||||
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
|
||||
- Check the "Use Own Browser" option within the Browser Settings.
|
||||
5. **Keep Browser Open(Optional):**
|
||||
- Set `CHROME_PERSISTENT_SESSION=true` in the `.env` file.
|
||||
|
||||
### Options:
|
||||
### Docker Setup
|
||||
1. **Environment Variables:**
|
||||
- All configuration is done through the `.env` file
|
||||
- Available environment variables:
|
||||
```
|
||||
# LLM API Keys
|
||||
OPENAI_API_KEY=your_key_here
|
||||
ANTHROPIC_API_KEY=your_key_here
|
||||
GOOGLE_API_KEY=your_key_here
|
||||
|
||||
### `--theme`
|
||||
# Browser Settings
|
||||
CHROME_PERSISTENT_SESSION=true # Set to true to keep browser open between AI tasks
|
||||
RESOLUTION=1920x1080x24 # Custom resolution format: WIDTHxHEIGHTxDEPTH
|
||||
RESOLUTION_WIDTH=1920 # Custom width in pixels
|
||||
RESOLUTION_HEIGHT=1080 # Custom height in pixels
|
||||
|
||||
- **Type**: `str`
|
||||
- **Default**: `Ocean`
|
||||
- **Description**: Specifies the theme for the user interface.
|
||||
- **Options**:
|
||||
The available themes are defined in the `theme_map` dictionary. Below are the options you can choose from:
|
||||
- **Default**: The standard theme with a balanced design.
|
||||
- **Soft**: A gentle, muted color scheme for a relaxed viewing experience.
|
||||
- **Monochrome**: A grayscale theme with minimal color for simplicity and focus.
|
||||
- **Glass**: A sleek, semi-transparent design for a modern appearance.
|
||||
- **Origin**: A classic, retro-inspired theme for a nostalgic feel.
|
||||
- **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors.
|
||||
- **Ocean** (default): A blue, ocean-inspired theme providing a calming effect.
|
||||
# VNC Settings
|
||||
VNC_PASSWORD=your_vnc_password # Optional, defaults to "vncpassword"
|
||||
```
|
||||
|
||||
**Example**:
|
||||
2. **Browser Persistence Modes:**
|
||||
- **Default Mode (CHROME_PERSISTENT_SESSION=false):**
|
||||
- Browser opens and closes with each AI task
|
||||
- Clean state for each interaction
|
||||
- Lower resource usage
|
||||
|
||||
```bash
|
||||
python webui.py --ip 127.0.0.1 --port 7788 --theme Glass
|
||||
```
|
||||
- **Persistent Mode (CHROME_PERSISTENT_SESSION=true):**
|
||||
- Browser stays open between AI tasks
|
||||
- Maintains history and state
|
||||
- Allows viewing previous AI interactions
|
||||
- Set in `.env` file or via environment variable when starting container
|
||||
|
||||
### `--dark-mode`
|
||||
3. **Viewing Browser Interactions:**
|
||||
- Access the noVNC viewer at `http://localhost:6080/vnc.html`
|
||||
- Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD)
|
||||
- You can now see all browser interactions in real-time
|
||||
|
||||
- **Type**: `boolean`
|
||||
- **Default**: Disabled
|
||||
- **Description**: Enables dark mode for the user interface. This is a simple toggle; including the flag activates dark mode, while omitting it keeps the interface in light mode.
|
||||
- **Options**:
|
||||
- **Enabled (`--dark-mode`)**: Activates dark mode, switching the interface to a dark color scheme for better visibility in low-light environments.
|
||||
- **Disabled (default)**: Keeps the interface in the default light mode.
|
||||
4. **Container Management:**
|
||||
```bash
|
||||
# Start with persistent browser
|
||||
CHROME_PERSISTENT_SESSION=true docker compose up -d
|
||||
|
||||
**Example**:
|
||||
# Start with default mode (browser closes after tasks)
|
||||
docker compose up -d
|
||||
|
||||
```bash
|
||||
python webui.py --ip 127.0.0.1 --port 7788 --dark-mode
|
||||
```
|
||||
# View logs
|
||||
docker compose logs -f
|
||||
|
||||
## (Optional) Configure Environment Variables
|
||||
|
||||
Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. With
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
**If using your own browser:** - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser.
|
||||
|
||||
You can just copy examples down below to your `.env` file.
|
||||
|
||||
### Windows
|
||||
|
||||
```env
|
||||
CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
|
||||
CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
|
||||
```
|
||||
|
||||
> Note: Replace `YourUsername` with your actual Windows username for Windows systems.
|
||||
|
||||
### Mac
|
||||
|
||||
```env
|
||||
CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||
CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1"
|
||||
```
|
||||
# Stop the container
|
||||
docker compose down
|
||||
```
|
||||
|
||||
## Changelog
|
||||
|
||||
- [x] **2025/01/06:** Thanks to @richard-devbot, a New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
|
||||
- [x] **2025/01/10:** Thanks to @casistack. Now we have Docker Setup option and also Support keep browser open between tasks.[Video tutorial demo](https://github.com/browser-use/web-ui/issues/1#issuecomment-2582511750).
|
||||
- [x] **2025/01/06:** Thanks to @richard-devbot. A New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
|
||||
51
docker-compose.yml
Normal file
51
docker-compose.yml
Normal file
@@ -0,0 +1,51 @@
|
||||
services:
|
||||
browser-use-webui:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "7788:7788" # Gradio default port
|
||||
- "6080:6080" # noVNC web interface
|
||||
- "5900:5900" # VNC port
|
||||
- "9222:9222" # Chrome remote debugging port
|
||||
environment:
|
||||
- OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
|
||||
- AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
|
||||
- AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
|
||||
- DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
|
||||
- DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
|
||||
- BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
|
||||
- ANONYMIZED_TELEMETRY=false
|
||||
- CHROME_PATH=/usr/bin/google-chrome
|
||||
- CHROME_USER_DATA=/app/data/chrome_data
|
||||
- CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
|
||||
- DISPLAY=:99
|
||||
- PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
- RESOLUTION=${RESOLUTION:-1920x1080x24}
|
||||
- RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
|
||||
- RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
|
||||
- VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
|
||||
- PERSISTENT_BROWSER_PORT=9222
|
||||
- PERSISTENT_BROWSER_HOST=localhost
|
||||
- CHROME_DEBUGGING_PORT=9222
|
||||
- CHROME_DEBUGGING_HOST=localhost
|
||||
volumes:
|
||||
- ./data:/app/data
|
||||
- ./data/chrome_data:/app/data/chrome_data
|
||||
- /tmp/.X11-unix:/tmp/.X11-unix
|
||||
restart: unless-stopped
|
||||
shm_size: '2gb'
|
||||
cap_add:
|
||||
- SYS_ADMIN
|
||||
security_opt:
|
||||
- seccomp=unconfined
|
||||
tmpfs:
|
||||
- /tmp
|
||||
healthcheck:
|
||||
test: ["CMD", "nc", "-z", "localhost", "5900"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
30
src/browser/config.py
Normal file
30
src/browser/config.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 2025/1/6
|
||||
# @Author : wenshao
|
||||
# @ProjectName: browser-use-webui
|
||||
# @FileName: config.py
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrowserPersistenceConfig:
|
||||
"""Configuration for browser persistence"""
|
||||
|
||||
persistent_session: bool = False
|
||||
user_data_dir: Optional[str] = None
|
||||
debugging_port: Optional[int] = None
|
||||
debugging_host: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def from_env(cls) -> "BrowserPersistenceConfig":
|
||||
"""Create config from environment variables"""
|
||||
return cls(
|
||||
persistent_session=os.getenv("CHROME_PERSISTENT_SESSION", "").lower()
|
||||
== "true",
|
||||
user_data_dir=os.getenv("CHROME_USER_DATA"),
|
||||
debugging_port=int(os.getenv("CHROME_DEBUGGING_PORT", "9222")),
|
||||
debugging_host=os.getenv("CHROME_DEBUGGING_HOST", "localhost"),
|
||||
)
|
||||
@@ -6,15 +6,45 @@
|
||||
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
||||
import logging
|
||||
|
||||
from .config import BrowserPersistenceConfig
|
||||
from .custom_context import CustomBrowserContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CustomBrowser(Browser):
|
||||
_global_context = None
|
||||
|
||||
async def new_context(
|
||||
self,
|
||||
config: BrowserContextConfig = BrowserContextConfig(),
|
||||
context: CustomBrowserContext = None,
|
||||
) -> BrowserContext:
|
||||
"""Create a browser context"""
|
||||
context: PlaywrightBrowserContext = None,
|
||||
) -> CustomBrowserContext:
|
||||
"""Create a browser context with persistence support"""
|
||||
persistence_config = BrowserPersistenceConfig.from_env()
|
||||
|
||||
if persistence_config.persistent_session:
|
||||
if CustomBrowser._global_context is not None:
|
||||
logger.info("Reusing existing persistent browser context")
|
||||
return CustomBrowser._global_context
|
||||
|
||||
context_instance = CustomBrowserContext(config=config, browser=self, context=context)
|
||||
CustomBrowser._global_context = context_instance
|
||||
logger.info("Created new persistent browser context")
|
||||
return context_instance
|
||||
|
||||
logger.info("Creating non-persistent browser context")
|
||||
return CustomBrowserContext(config=config, browser=self, context=context)
|
||||
|
||||
async def close(self):
|
||||
"""Override close to respect persistence setting"""
|
||||
persistence_config = BrowserPersistenceConfig.from_env()
|
||||
if not persistence_config.persistent_session:
|
||||
if CustomBrowser._global_context is not None:
|
||||
await CustomBrowser._global_context.close()
|
||||
CustomBrowser._global_context = None
|
||||
await super().close()
|
||||
else:
|
||||
logger.info("Skipping browser close due to persistent session")
|
||||
@@ -9,84 +9,77 @@ import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
from playwright.async_api import Browser as PlaywrightBrowser, Page, BrowserContext as PlaywrightContext
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from playwright.async_api import Browser as PlaywrightBrowser
|
||||
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
||||
|
||||
from .config import BrowserPersistenceConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CustomBrowserContext(BrowserContext):
|
||||
def __init__(
|
||||
self,
|
||||
browser: "CustomBrowser", # Forward declaration for CustomBrowser
|
||||
browser: "Browser",
|
||||
config: BrowserContextConfig = BrowserContextConfig(),
|
||||
context: PlaywrightContext = None
|
||||
context: PlaywrightBrowserContext = None,
|
||||
):
|
||||
super(CustomBrowserContext, self).__init__(browser=browser, config=config)
|
||||
self.context = context # Rename to avoid confusion
|
||||
self.context = context
|
||||
self._page = None
|
||||
self._persistence_config = BrowserPersistenceConfig.from_env()
|
||||
|
||||
@property
|
||||
def impl_context(self) -> PlaywrightContext:
|
||||
def impl_context(self) -> PlaywrightBrowserContext:
|
||||
"""Returns the underlying Playwright context implementation"""
|
||||
return self.context
|
||||
|
||||
async def _create_context(self, browser: PlaywrightBrowser = None):
|
||||
async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowserContext:
|
||||
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
|
||||
if self.context:
|
||||
logger.info("Browser context already exists, returning existing context.")
|
||||
return self.context
|
||||
|
||||
# If a Playwright browser is not provided, get it from our custom browser
|
||||
pw_browser = browser or await self.browser.get_playwright_browser()
|
||||
|
||||
context_args = {
|
||||
'viewport': self.config.browser_window_size,
|
||||
'no_viewport': False,
|
||||
'bypass_csp': self.config.disable_security,
|
||||
'ignore_https_errors': self.config.disable_security
|
||||
}
|
||||
|
||||
if self.config.save_recording_path:
|
||||
context_args.update({
|
||||
'record_video_dir': self.config.save_recording_path,
|
||||
'record_video_size': self.config.browser_window_size
|
||||
})
|
||||
|
||||
self.context = await pw_browser.new_context(**context_args)
|
||||
# Check for persistent context
|
||||
if self._persistence_config.persistent_session and len(browser.contexts) > 0:
|
||||
logger.info("Using existing persistent context.")
|
||||
self.context = browser.contexts[0]
|
||||
else:
|
||||
logger.info("Creating a new browser context.")
|
||||
self.context = await browser.new_context(
|
||||
viewport=self.config.browser_window_size,
|
||||
no_viewport=False,
|
||||
user_agent=(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
|
||||
),
|
||||
java_script_enabled=True,
|
||||
bypass_csp=self.config.disable_security,
|
||||
ignore_https_errors=self.config.disable_security,
|
||||
record_video_dir=self.config.save_recording_path,
|
||||
record_video_size=self.config.browser_window_size,
|
||||
)
|
||||
|
||||
# Handle tracing
|
||||
if self.config.trace_path:
|
||||
await self.context.tracing.start(screenshots=True, snapshots=True, sources=True)
|
||||
|
||||
# Load cookies if they exist
|
||||
# Load cookies
|
||||
if self.config.cookies_file and os.path.exists(self.config.cookies_file):
|
||||
with open(self.config.cookies_file, "r") as f:
|
||||
cookies = json.load(f)
|
||||
logger.info(
|
||||
f"Loaded {len(cookies)} cookies from {self.config.cookies_file}"
|
||||
)
|
||||
logger.info(f"Loaded {len(cookies)} cookies from {self.config.cookies_file}.")
|
||||
await self.context.add_cookies(cookies)
|
||||
|
||||
# Expose anti-detection scripts
|
||||
# Inject anti-detection scripts
|
||||
await self.context.add_init_script(
|
||||
"""
|
||||
// Webdriver property
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => undefined
|
||||
});
|
||||
|
||||
// Languages
|
||||
Object.defineProperty(navigator, 'languages', {
|
||||
get: () => ['en-US', 'en']
|
||||
});
|
||||
|
||||
// Plugins
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => [1, 2, 3, 4, 5]
|
||||
});
|
||||
|
||||
// Chrome runtime
|
||||
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
||||
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
|
||||
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
|
||||
window.chrome = { runtime: {} };
|
||||
|
||||
// Permissions
|
||||
const originalQuery = window.navigator.permissions.query;
|
||||
window.navigator.permissions.query = (parameters) => (
|
||||
parameters.name === 'notifications' ?
|
||||
@@ -96,42 +89,39 @@ class CustomBrowserContext(BrowserContext):
|
||||
"""
|
||||
)
|
||||
|
||||
# Create an initial page
|
||||
self._page = await self.context.new_page()
|
||||
await self._page.goto('about:blank') # Ensure page is ready
|
||||
|
||||
# Create initial page if none exists
|
||||
if not self.context.pages:
|
||||
self._page = await self.context.new_page()
|
||||
await self._page.goto('about:blank')
|
||||
|
||||
return self.context
|
||||
|
||||
async def new_page(self) -> Page:
|
||||
"""Creates and returns a new page in this context"""
|
||||
async def new_page(self):
|
||||
"""Creates and returns a new page in this context."""
|
||||
if not self.context:
|
||||
await self._create_context()
|
||||
await self._create_context(await self.browser.get_playwright_browser())
|
||||
return await self.context.new_page()
|
||||
|
||||
async def __aenter__(self):
|
||||
async def get_current_page(self):
|
||||
"""Returns the current page or creates one if none exists."""
|
||||
if not self.context:
|
||||
await self._create_context()
|
||||
return self
|
||||
await self._create_context(await self.browser.get_playwright_browser())
|
||||
pages = self.context.pages
|
||||
if not pages:
|
||||
logger.warning("No existing pages in the context. Creating a new page.")
|
||||
return await self.context.new_page()
|
||||
return pages[0]
|
||||
|
||||
async def __aexit__(self, *args):
|
||||
if self.context:
|
||||
async def close(self):
|
||||
"""Override close to respect persistence setting."""
|
||||
if not self._persistence_config.persistent_session and self.context:
|
||||
await self.context.close()
|
||||
self.context = None
|
||||
|
||||
@property
|
||||
def pages(self):
|
||||
"""Returns list of pages in context"""
|
||||
return self.context.pages if self.context else []
|
||||
|
||||
async def get_state(self, **kwargs):
|
||||
if self.context:
|
||||
pages = self.context.pages
|
||||
if pages:
|
||||
return await super().get_state(**kwargs)
|
||||
return None
|
||||
|
||||
async def get_pages(self):
|
||||
"""Get pages in a way that works"""
|
||||
"""Returns list of pages in the context."""
|
||||
if not self.context:
|
||||
logger.warning("Attempting to access pages but context is not initialized.")
|
||||
return []
|
||||
return self.context.pages
|
||||
|
||||
@@ -3,24 +3,38 @@ import asyncio
|
||||
from typing import AsyncGenerator
|
||||
from playwright.async_api import BrowserContext, Error as PlaywrightError
|
||||
|
||||
async def capture_screenshot(browser_context: BrowserContext) -> str:
|
||||
async def capture_screenshot(browser_context) -> str:
|
||||
"""Capture and encode a screenshot"""
|
||||
try:
|
||||
# Get the implementation context
|
||||
context = getattr(browser_context, 'impl_context', None)
|
||||
# Get the implementation context - handle both direct Playwright context and wrapped context
|
||||
context = browser_context
|
||||
if hasattr(browser_context, 'context'):
|
||||
context = browser_context.context
|
||||
|
||||
if not context:
|
||||
return "<div>No browser context implementation available</div>"
|
||||
return "<div>No browser context available</div>"
|
||||
|
||||
# Get all pages
|
||||
all_pages = context.pages
|
||||
if not all_pages:
|
||||
pages = context.pages
|
||||
if not pages:
|
||||
return "<div>Waiting for page to be available...</div>"
|
||||
# Use the first page
|
||||
for page in all_pages:
|
||||
|
||||
# Use the first non-blank page or fallback to first page
|
||||
active_page = None
|
||||
for page in pages:
|
||||
if page.url != 'about:blank':
|
||||
active_page = page
|
||||
break
|
||||
|
||||
if not active_page and pages:
|
||||
active_page = pages[0]
|
||||
|
||||
if not active_page:
|
||||
return "<div>No active page available</div>"
|
||||
|
||||
# Take screenshot
|
||||
try:
|
||||
screenshot = await page.screenshot(
|
||||
screenshot = await active_page.screenshot(
|
||||
type='jpeg',
|
||||
quality=75,
|
||||
scale="css"
|
||||
@@ -29,5 +43,6 @@ async def capture_screenshot(browser_context: BrowserContext) -> str:
|
||||
return f'<img src="data:image/jpeg;base64,{encoded}" style="width:100%; max-width:1200px; border:1px solid #ccc;">'
|
||||
except Exception as e:
|
||||
return f"<div class='error'>Screenshot failed: {str(e)}</div>"
|
||||
|
||||
except Exception as e:
|
||||
return f"<div class='error'>Screenshot error: {str(e)}</div>"
|
||||
83
supervisord.conf
Normal file
83
supervisord.conf
Normal file
@@ -0,0 +1,83 @@
|
||||
[supervisord]
|
||||
nodaemon=true
|
||||
logfile=/dev/stdout
|
||||
logfile_maxbytes=0
|
||||
loglevel=debug
|
||||
|
||||
[program:xvfb]
|
||||
command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
|
||||
autorestart=true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
priority=100
|
||||
startsecs=3
|
||||
|
||||
[program:vnc_setup]
|
||||
command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
|
||||
autorestart=false
|
||||
startsecs=0
|
||||
priority=150
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:x11vnc]
|
||||
command=bash -c "sleep 3 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5900 -bg -o /var/log/x11vnc.log"
|
||||
autorestart=true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
priority=200
|
||||
startretries=5
|
||||
startsecs=5
|
||||
depends_on=vnc_setup
|
||||
|
||||
[program:x11vnc_log]
|
||||
command=tail -f /var/log/x11vnc.log
|
||||
autorestart=true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
priority=250
|
||||
|
||||
[program:novnc]
|
||||
command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5900 --listen 0.0.0.0:6080 --web /opt/novnc"
|
||||
autorestart=true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
priority=300
|
||||
startretries=5
|
||||
startsecs=3
|
||||
depends_on=x11vnc
|
||||
|
||||
[program:persistent_browser]
|
||||
command=bash -c 'if [ "%(ENV_CHROME_PERSISTENT_SESSION)s" = "true" ]; then mkdir -p /app/data/chrome_data && sleep 8 && google-chrome --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 "data:text/html,<html><body style=\"background: #f0f0f0; margin: 0; display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial;\"><h1>Browser Ready for AI Interaction</h1></body></html>"; else echo "Persistent browser disabled"; fi'
|
||||
autorestart=%(ENV_CHROME_PERSISTENT_SESSION)s
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
priority=350
|
||||
startretries=3
|
||||
startsecs=3
|
||||
depends_on=novnc
|
||||
|
||||
[program:webui]
|
||||
command=python webui.py --ip 0.0.0.0 --port 7788
|
||||
directory=/app
|
||||
autorestart=true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
priority=400
|
||||
startretries=3
|
||||
startsecs=3
|
||||
depends_on=persistent_browser
|
||||
368
webui.py
368
webui.py
@@ -30,6 +30,16 @@ from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import utils
|
||||
from src.utils.utils import update_model_dropdown
|
||||
from src.browser.config import BrowserPersistenceConfig
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContext
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from browser_use.browser.context import BrowserContextConfig, BrowserContextWindowSize
|
||||
|
||||
# Global variables for persistence
|
||||
_global_browser = None
|
||||
_global_browser_context = None
|
||||
_global_playwright = None
|
||||
from src.utils.file_utils import get_latest_files
|
||||
from src.utils.stream_utils import capture_screenshot
|
||||
|
||||
@@ -196,121 +206,113 @@ async def run_custom_agent(
|
||||
tool_call_in_content,
|
||||
browser_context=None, # receive context
|
||||
):
|
||||
global _global_browser, _global_browser_context, _global_playwright
|
||||
|
||||
controller = CustomController()
|
||||
playwright = None
|
||||
browser = None
|
||||
persistence_config = BrowserPersistenceConfig.from_env()
|
||||
|
||||
try:
|
||||
if use_own_browser:
|
||||
playwright = await async_playwright().start()
|
||||
chrome_exe = os.getenv("CHROME_PATH", "")
|
||||
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
|
||||
|
||||
if chrome_exe == "":
|
||||
chrome_exe = None
|
||||
elif not os.path.exists(chrome_exe):
|
||||
raise ValueError(f"Chrome executable not found at {chrome_exe}")
|
||||
|
||||
if chrome_use_data == "":
|
||||
chrome_use_data = None
|
||||
|
||||
browser_context_ = await playwright.chromium.launch_persistent_context(
|
||||
user_data_dir=chrome_use_data if chrome_use_data else "",
|
||||
executable_path=chrome_exe,
|
||||
no_viewport=False,
|
||||
headless=headless, # 保持浏览器窗口可见
|
||||
user_agent=(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
|
||||
),
|
||||
java_script_enabled=True,
|
||||
bypass_csp=disable_security,
|
||||
ignore_https_errors=disable_security,
|
||||
record_video_dir=save_recording_path if save_recording_path else None,
|
||||
record_video_size={"width": window_w, "height": window_h},
|
||||
)
|
||||
else:
|
||||
browser_context_ = None
|
||||
|
||||
if browser_context is not None:
|
||||
# Reuse context
|
||||
agent = CustomAgent(
|
||||
task=task,
|
||||
add_infos=add_infos,
|
||||
use_vision=use_vision,
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt
|
||||
)
|
||||
history = await agent.run(max_steps=max_steps)
|
||||
final_result = history.final_result()
|
||||
errors = history.errors()
|
||||
model_actions = history.model_actions()
|
||||
model_thoughts = history.model_thoughts()
|
||||
recorded_files = get_latest_files(save_recording_path)
|
||||
trace_file = get_latest_files(save_recording_path + "/../traces")
|
||||
return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
|
||||
else:
|
||||
browser = CustomBrowser(
|
||||
# Initialize global browser if needed
|
||||
if _global_browser is None:
|
||||
_global_browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
async with await browser.new_context(
|
||||
|
||||
# Handle browser context based on configuration
|
||||
if use_own_browser:
|
||||
if _global_browser_context is None:
|
||||
_global_playwright = await async_playwright().start()
|
||||
chrome_exe = os.getenv("CHROME_PATH", "")
|
||||
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
|
||||
|
||||
browser_context = await _global_playwright.chromium.launch_persistent_context(
|
||||
user_data_dir=chrome_use_data,
|
||||
executable_path=chrome_exe,
|
||||
no_viewport=False,
|
||||
headless=headless,
|
||||
user_agent=(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
|
||||
),
|
||||
java_script_enabled=True,
|
||||
bypass_csp=disable_security,
|
||||
ignore_https_errors=disable_security,
|
||||
record_video_dir=save_recording_path if save_recording_path else None,
|
||||
record_video_size={"width": window_w, "height": window_h},
|
||||
)
|
||||
_global_browser_context = await _global_browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path
|
||||
if save_recording_path
|
||||
else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
),
|
||||
context=browser_context_,
|
||||
) as browser_context:
|
||||
agent = CustomAgent(
|
||||
task=task,
|
||||
add_infos=add_infos,
|
||||
use_vision=use_vision,
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content
|
||||
context=browser_context,
|
||||
)
|
||||
history = await agent.run(max_steps=max_steps)
|
||||
final_result = history.final_result()
|
||||
errors = history.errors()
|
||||
model_actions = history.model_actions()
|
||||
model_thoughts = history.model_thoughts()
|
||||
recorded_files = get_latest_files(save_recording_path)
|
||||
trace_file = get_latest_files(save_recording_path + "/../traces")
|
||||
return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
|
||||
else:
|
||||
if _global_browser_context is None:
|
||||
_global_browser_context = await _global_browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
# Create and run agent
|
||||
agent = CustomAgent(
|
||||
task=task,
|
||||
add_infos=add_infos,
|
||||
use_vision=use_vision,
|
||||
llm=llm,
|
||||
browser_context=_global_browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content
|
||||
)
|
||||
history = await agent.run(max_steps=max_steps)
|
||||
|
||||
final_result = history.final_result()
|
||||
errors = history.errors()
|
||||
model_actions = history.model_actions()
|
||||
model_thoughts = history.model_thoughts()
|
||||
recorded_files = get_latest_files(save_recording_path)
|
||||
trace_file = get_latest_files(save_trace_path)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
final_result = ""
|
||||
errors = str(e) + "\n" + traceback.format_exc()
|
||||
model_actions = ""
|
||||
model_thoughts = ""
|
||||
recorded_files = {}
|
||||
trace_file = {}
|
||||
finally:
|
||||
# 显式关闭持久化上下文
|
||||
if browser_context_:
|
||||
await browser_context_.close()
|
||||
# Handle cleanup based on persistence configuration
|
||||
if not persistence_config.persistent_session:
|
||||
if _global_browser_context:
|
||||
await _global_browser_context.close()
|
||||
_global_browser_context = None
|
||||
|
||||
# 关闭 Playwright 对象
|
||||
if playwright:
|
||||
await playwright.stop()
|
||||
if browser:
|
||||
await browser.close()
|
||||
if _global_playwright:
|
||||
await _global_playwright.stop()
|
||||
_global_playwright = None
|
||||
|
||||
if _global_browser:
|
||||
await _global_browser.close()
|
||||
_global_browser = None
|
||||
return final_result, errors, model_actions, model_thoughts, trace_file.get('.webm'), recorded_files.get('.zip')
|
||||
|
||||
async def run_with_stream(
|
||||
@@ -336,95 +338,71 @@ async def run_with_stream(
|
||||
tool_call_in_content,
|
||||
):
|
||||
"""Wrapper to run the agent and handle streaming."""
|
||||
browser = None
|
||||
global _global_browser, _global_browser_context
|
||||
|
||||
try:
|
||||
# Initialize the browser
|
||||
browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=disable_security,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
|
||||
# Create a new browser context
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
)
|
||||
) as browser_context:
|
||||
# Run the browser agent in the background
|
||||
agent_task = asyncio.create_task(
|
||||
run_browser_agent(
|
||||
agent_type=agent_type,
|
||||
llm_provider=llm_provider,
|
||||
llm_model_name=llm_model_name,
|
||||
llm_temperature=llm_temperature,
|
||||
llm_base_url=llm_base_url,
|
||||
llm_api_key=llm_api_key,
|
||||
use_own_browser=use_own_browser,
|
||||
headless=headless,
|
||||
# Initialize the global browser if it doesn't exist
|
||||
if _global_browser is None:
|
||||
_global_browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=disable_security,
|
||||
window_w=window_w,
|
||||
window_h=window_h,
|
||||
save_recording_path=save_recording_path,
|
||||
save_trace_path=save_trace_path,
|
||||
enable_recording=enable_recording,
|
||||
task=task,
|
||||
add_infos=add_infos,
|
||||
max_steps=max_steps,
|
||||
use_vision=use_vision,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content,
|
||||
browser_context=browser_context # Explicit keyword argument
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
|
||||
# Initialize values for streaming
|
||||
html_content = "<div>Starting browser...</div>"
|
||||
final_result = errors = model_actions = model_thoughts = ""
|
||||
recording = trace = None
|
||||
# Create or reuse browser context
|
||||
if _global_browser_context is None:
|
||||
_global_browser_context = await _global_browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Periodically update the stream while the agent task is running
|
||||
while not agent_task.done():
|
||||
try:
|
||||
html_content = await capture_screenshot(browser_context)
|
||||
except Exception as e:
|
||||
html_content = f"<div class='error'>Screenshot error: {str(e)}</div>"
|
||||
|
||||
yield [
|
||||
html_content,
|
||||
final_result,
|
||||
errors,
|
||||
model_actions,
|
||||
model_thoughts,
|
||||
recording,
|
||||
trace,
|
||||
]
|
||||
await asyncio.sleep(0.01)
|
||||
# Run the browser agent in the background
|
||||
agent_task = asyncio.create_task(
|
||||
run_browser_agent(
|
||||
agent_type=agent_type,
|
||||
llm_provider=llm_provider,
|
||||
llm_model_name=llm_model_name,
|
||||
llm_temperature=llm_temperature,
|
||||
llm_base_url=llm_base_url,
|
||||
llm_api_key=llm_api_key,
|
||||
use_own_browser=use_own_browser,
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
window_w=window_w,
|
||||
window_h=window_h,
|
||||
save_recording_path=save_recording_path,
|
||||
save_trace_path=save_trace_path,
|
||||
enable_recording=enable_recording,
|
||||
task=task,
|
||||
add_infos=add_infos,
|
||||
max_steps=max_steps,
|
||||
use_vision=use_vision,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content,
|
||||
browser_context=_global_browser_context
|
||||
)
|
||||
)
|
||||
|
||||
# Once the agent task completes, get the results
|
||||
# Initialize values for streaming
|
||||
html_content = "<div>Using browser...</div>"
|
||||
final_result = errors = model_actions = model_thoughts = ""
|
||||
recording = trace = None
|
||||
|
||||
# Periodically update the stream while the agent task is running
|
||||
while not agent_task.done():
|
||||
try:
|
||||
result = await agent_task
|
||||
if isinstance(result, tuple) and len(result) == 6:
|
||||
(
|
||||
final_result,
|
||||
errors,
|
||||
model_actions,
|
||||
model_thoughts,
|
||||
recording,
|
||||
trace,
|
||||
) = result
|
||||
else:
|
||||
errors = "Unexpected result format from agent"
|
||||
html_content = await capture_screenshot(_global_browser_context)
|
||||
except Exception as e:
|
||||
errors = f"Agent error: {str(e)}"
|
||||
|
||||
html_content = f"<div class='error'>Screenshot error: {str(e)}</div>"
|
||||
|
||||
yield [
|
||||
html_content,
|
||||
final_result,
|
||||
@@ -434,10 +412,30 @@ async def run_with_stream(
|
||||
recording,
|
||||
trace,
|
||||
]
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
# Once the agent task completes, get the results
|
||||
try:
|
||||
result = await agent_task
|
||||
if isinstance(result, tuple) and len(result) == 6:
|
||||
final_result, errors, model_actions, model_thoughts, recording, trace = result
|
||||
else:
|
||||
errors = "Unexpected result format from agent"
|
||||
except Exception as e:
|
||||
errors = f"Agent error: {str(e)}"
|
||||
|
||||
yield [
|
||||
html_content,
|
||||
final_result,
|
||||
errors,
|
||||
model_actions,
|
||||
model_thoughts,
|
||||
recording,
|
||||
trace,
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
yield [
|
||||
f"<div class='error'>Browser error: {str(e)}</div>",
|
||||
"",
|
||||
@@ -447,9 +445,30 @@ async def run_with_stream(
|
||||
None,
|
||||
None,
|
||||
]
|
||||
|
||||
# Update the main function to handle cleanup
|
||||
def main():
|
||||
async def cleanup():
|
||||
global _global_browser, _global_browser_context
|
||||
if _global_browser_context:
|
||||
await _global_browser_context.close()
|
||||
if _global_browser:
|
||||
await _global_browser.close()
|
||||
_global_browser = None
|
||||
_global_browser_context = None
|
||||
|
||||
parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
|
||||
parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
|
||||
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
|
||||
parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
|
||||
parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
demo = create_ui(theme_name=args.theme)
|
||||
demo.launch(server_name=args.ip, server_port=args.port)
|
||||
finally:
|
||||
if browser:
|
||||
await browser.close()
|
||||
asyncio.get_event_loop().run_until_complete(cleanup())
|
||||
|
||||
from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base
|
||||
|
||||
@@ -733,16 +752,5 @@ def create_ui(theme_name="Ocean"):
|
||||
|
||||
return demo
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
|
||||
parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
|
||||
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
|
||||
parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
|
||||
parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
|
||||
args = parser.parse_args()
|
||||
|
||||
demo = create_ui(theme_name=args.theme)
|
||||
demo.launch(server_name=args.ip, server_port=args.port)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user