diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..8b09300
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,11 @@
+{
+ "python.analysis.typeCheckingMode": "basic",
+ "[python]": {
+ "editor.defaultFormatter": "charliermarsh.ruff",
+ "editor.formatOnSave": true,
+ "editor.codeActionsOnSave": {
+ "source.fixAll.ruff": "explicit",
+ "source.organizeImports.ruff": "explicit"
+ }
+ }
+}
diff --git a/README.md b/README.md
index 5d6363e..9d9eb6c 100644
--- a/README.md
+++ b/README.md
@@ -1,43 +1,53 @@
-# Browser-Use WebUI
+
-## Background
+
-This project builds upon the foundation of the [browser-use](https://github.com/browser-use/browser-use), which is designed to make websites accessible for AI agents. We have enhanced the original capabilities by providing:
+[](https://github.com/browser-use/web-ui/stargazers)
+[](https://link.browser-use.com/discord)
+[](https://docs.browser-use.com)
+[](https://x.com/warmshao)
-1. **A Brand New WebUI:** We offer a comprehensive web interface that supports a wide range of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent.
+This project builds upon the foundation of the [browser-use](https://github.com/browser-use/browser-use), which is designed to make websites accessible for AI agents.
-2. **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek, Ollama etc. And we plan to add support for even more models in the future.
+We would like to officially thank [WarmShao](https://github.com/warmshao) for his contribution to this project.
-3. **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
+**WebUI:** is built on Gradio and supports a most of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent.
-4. **Customized Agent:** We've implemented a custom agent that enhances `browser-use` with Optimized prompts.
+**Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek, Ollama etc. And we plan to add support for even more models in the future.
-
+**Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
-**Changelog**
-- [x] **2025/01/06:** Thanks to @richard-devbot, a New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
+
+## Installation Guide
-## Environment Installation
+Read the [quickstart guide](https://docs.browser-use.com/quickstart#prepare-the-environment) or follow the steps below to get started.
-1. **Python Version:** Ensure you have Python 3.11 or higher installed.
-2. **Install `browser-use`:**
- ```bash
- pip install browser-use
- ```
-3. **Install Playwright:**
- ```bash
- playwright install
- ```
-4. **Install Dependencies:**
- ```bash
- pip install -r requirements.txt
- ```
-5. **Configure Environment Variables:**
- - Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM.
- - **If using your own browser:**
- - Set `CHROME_PATH` to the executable path of your browser (e.g., `C:\Program Files\Google\Chrome\Application\chrome.exe` on Windows).
- - Set `CHROME_USER_DATA` to the user data directory of your browser (e.g.,`C:\Users\\AppData\Local\Google\Chrome\User Data`).
+> Python 3.11 or higher is required.
+
+First, we recommend using [uv](https://docs.astral.sh/uv/) to setup the Python environment.
+
+```bash
+uv venv --python 3.11
+```
+
+and activate it with:
+
+```bash
+source .venv/bin/activate
+```
+
+Install the dependencies:
+
+```bash
+uv pip install -r requirements.txt
+```
+
+Then install playwright:
+
+```bash
+playwright install
+```
## Usage
@@ -50,3 +60,35 @@ This project builds upon the foundation of the [browser-use](https://github.com/
- Close all chrome windows
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
- Check the "Use Own Browser" option within the Browser Settings.
+
+## (Optional) Configure Environment Variables
+
+Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. With
+
+```bash
+cp .env.example .env
+```
+
+**If using your own browser:** - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser.
+
+You can just copy examples down below to your `.env` file.
+
+### Windows
+
+```env
+CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
+CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
+```
+
+> Note: Replace `YourUsername` with your actual Windows username for Windows systems.
+
+### Mac
+
+```env
+CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1"
+```
+
+## Changelog
+
+- [x] **2025/01/06:** Thanks to @richard-devbot, a New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
diff --git a/assets/web-ui.png b/assets/web-ui.png
new file mode 100644
index 0000000..383fffc
Binary files /dev/null and b/assets/web-ui.png differ
diff --git a/requirements.txt b/requirements.txt
index cdda0d1..1471909 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,5 +2,4 @@ browser-use
langchain-google-genai
pyperclip
gradio
-langchain-ollama
-
+langchain-ollama
\ No newline at end of file
diff --git a/webui.py b/webui.py
index eef1e3c..a1e81b4 100644
--- a/webui.py
+++ b/webui.py
@@ -4,62 +4,56 @@
# @Email : wenshaoguo1026@gmail.com
# @Project : browser-use-webui
# @FileName: webui.py
-import pdb
from dotenv import load_dotenv
load_dotenv()
import argparse
-
-import asyncio
+import os
import gradio as gr
-import asyncio
-import os
-from pprint import pprint
-from typing import List, Dict, Any
-
-from playwright.async_api import async_playwright
+from browser_use.agent.service import Agent
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import (
- BrowserContext,
BrowserContextConfig,
BrowserContextWindowSize,
)
-from browser_use.agent.service import Agent
+from playwright.async_api import async_playwright
-from src.browser.custom_browser import CustomBrowser, BrowserConfig
-from src.browser.custom_context import BrowserContext, BrowserContextConfig
-from src.controller.custom_controller import CustomController
from src.agent.custom_agent import CustomAgent
from src.agent.custom_prompts import CustomSystemPrompt
-
+from src.browser.custom_browser import BrowserConfig, CustomBrowser
+from src.browser.custom_context import BrowserContextConfig
+from src.controller.custom_controller import CustomController
from src.utils import utils
+
async def run_browser_agent(
- agent_type,
- llm_provider,
- llm_model_name,
- llm_temperature,
- llm_base_url,
- llm_api_key,
- use_own_browser,
- headless,
- disable_security,
- window_w,
- window_h,
- save_recording_path,
- task,
- add_infos,
- max_steps,
- use_vision
+ agent_type,
+ llm_provider,
+ llm_model_name,
+ llm_temperature,
+ llm_base_url,
+ llm_api_key,
+ use_own_browser,
+ headless,
+ disable_security,
+ window_w,
+ window_h,
+ save_recording_path,
+ task,
+ add_infos,
+ max_steps,
+ use_vision,
):
# Ensure the recording directory exists
os.makedirs(save_recording_path, exist_ok=True)
# Get the list of existing videos before the agent runs
- existing_videos = set(glob.glob(os.path.join(save_recording_path, '*.[mM][pP]4')) +
- glob.glob(os.path.join(save_recording_path, '*.[wW][eE][bB][mM]')))
+ existing_videos = set(
+ glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
+ + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
+ )
# Run the agent
llm = utils.get_llm_model(
@@ -67,7 +61,7 @@ async def run_browser_agent(
model_name=llm_model_name,
temperature=llm_temperature,
base_url=llm_base_url,
- api_key=llm_api_key
+ api_key=llm_api_key,
)
if agent_type == "org":
final_result, errors, model_actions, model_thoughts = await run_org_agent(
@@ -79,7 +73,7 @@ async def run_browser_agent(
save_recording_path=save_recording_path,
task=task,
max_steps=max_steps,
- use_vision=use_vision
+ use_vision=use_vision,
)
elif agent_type == "custom":
final_result, errors, model_actions, model_thoughts = await run_custom_agent(
@@ -93,14 +87,16 @@ async def run_browser_agent(
task=task,
add_infos=add_infos,
max_steps=max_steps,
- use_vision=use_vision
+ use_vision=use_vision,
)
else:
raise ValueError(f"Invalid agent type: {agent_type}")
# Get the list of videos after the agent runs
- new_videos = set(glob.glob(os.path.join(save_recording_path, '*.[mM][pP]4')) +
- glob.glob(os.path.join(save_recording_path, '*.[wW][eE][bB][mM]')))
+ new_videos = set(
+ glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
+ + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
+ )
# Find the newly created video
latest_video = None
@@ -109,31 +105,34 @@ async def run_browser_agent(
return final_result, errors, model_actions, model_thoughts, latest_video
+
async def run_org_agent(
- llm,
- headless,
- disable_security,
- window_w,
- window_h,
- save_recording_path,
- task,
- max_steps,
- use_vision
+ llm,
+ headless,
+ disable_security,
+ window_w,
+ window_h,
+ save_recording_path,
+ task,
+ max_steps,
+ use_vision,
):
browser = Browser(
config=BrowserConfig(
headless=headless,
disable_security=disable_security,
- extra_chromium_args=[f'--window-size={window_w},{window_h}'],
+ extra_chromium_args=[f"--window-size={window_w},{window_h}"],
)
)
async with await browser.new_context(
- config=BrowserContextConfig(
- trace_path='./tmp/traces',
- save_recording_path=save_recording_path if save_recording_path else None,
- no_viewport=False,
- browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
- )
+ config=BrowserContextConfig(
+ trace_path="./tmp/traces",
+ save_recording_path=save_recording_path if save_recording_path else None,
+ no_viewport=False,
+ browser_window_size=BrowserContextWindowSize(
+ width=window_w, height=window_h
+ ),
+ )
) as browser_context:
agent = Agent(
task=task,
@@ -150,18 +149,19 @@ async def run_org_agent(
await browser.close()
return final_result, errors, model_actions, model_thoughts
+
async def run_custom_agent(
- llm,
- use_own_browser,
- headless,
- disable_security,
- window_w,
- window_h,
- save_recording_path,
- task,
- add_infos,
- max_steps,
- use_vision
+ llm,
+ use_own_browser,
+ headless,
+ disable_security,
+ window_w,
+ window_h,
+ save_recording_path,
+ task,
+ add_infos,
+ max_steps,
+ use_vision,
):
controller = CustomController()
playwright = None
@@ -177,14 +177,14 @@ async def run_custom_agent(
no_viewport=False,
headless=headless, # 保持浏览器窗口可见
user_agent=(
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
- '(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+ "(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
),
java_script_enabled=True,
bypass_csp=disable_security,
ignore_https_errors=disable_security,
record_video_dir=save_recording_path if save_recording_path else None,
- record_video_size={'width': window_w, 'height': window_h}
+ record_video_size={"width": window_w, "height": window_h},
)
else:
browser_context_ = None
@@ -193,17 +193,21 @@ async def run_custom_agent(
config=BrowserConfig(
headless=headless,
disable_security=disable_security,
- extra_chromium_args=[f'--window-size={window_w},{window_h}'],
+ extra_chromium_args=[f"--window-size={window_w},{window_h}"],
)
)
async with await browser.new_context(
- config=BrowserContextConfig(
- trace_path='./tmp/result_processing',
- save_recording_path=save_recording_path if save_recording_path else None,
- no_viewport=False,
- browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
+ config=BrowserContextConfig(
+ trace_path="./tmp/result_processing",
+ save_recording_path=save_recording_path
+ if save_recording_path
+ else None,
+ no_viewport=False,
+ browser_window_size=BrowserContextWindowSize(
+ width=window_w, height=window_h
),
- context=browser_context_
+ ),
+ context=browser_context_,
) as browser_context:
agent = CustomAgent(
task=task,
@@ -212,7 +216,7 @@ async def run_custom_agent(
llm=llm,
browser_context=browser_context,
controller=controller,
- system_prompt_class=CustomSystemPrompt
+ system_prompt_class=CustomSystemPrompt,
)
history = await agent.run(max_steps=max_steps)
@@ -223,6 +227,7 @@ async def run_custom_agent(
except Exception as e:
import traceback
+
traceback.print_exc()
final_result = ""
errors = str(e) + "\n" + traceback.format_exc()
@@ -240,10 +245,9 @@ async def run_custom_agent(
return final_result, errors, model_actions, model_thoughts
-import argparse
-import gradio as gr
-from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean
-import os, glob
+import glob
+
+from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft
# Define the theme map globally
theme_map = {
@@ -253,9 +257,10 @@ theme_map = {
"Glass": Glass(),
"Origin": Origin(),
"Citrus": Citrus(),
- "Ocean": Ocean()
+ "Ocean": Ocean(),
}
+
def create_ui(theme_name="Ocean"):
css = """
.gradio-container {
@@ -283,25 +288,27 @@ def create_ui(theme_name="Ocean"):
}
}
"""
-
- with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js) as demo:
+
+ with gr.Blocks(
+ title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js
+ ) as demo:
with gr.Row():
gr.Markdown(
"""
# 🌐 Browser Use WebUI
### Control your browser with AI assistance
""",
- elem_classes=["header-text"]
+ elem_classes=["header-text"],
)
-
+
with gr.Tabs() as tabs:
- with gr.TabItem("🤖 Agent Settings", id=1):
+ with gr.TabItem("⚙️ Agent Settings", id=1):
with gr.Group():
agent_type = gr.Radio(
["org", "custom"],
label="Agent Type",
value="custom",
- info="Select the type of agent to use"
+ info="Select the type of agent to use",
)
max_steps = gr.Slider(
minimum=1,
@@ -309,26 +316,33 @@ def create_ui(theme_name="Ocean"):
value=100,
step=1,
label="Max Run Steps",
- info="Maximum number of steps the agent will take"
+ info="Maximum number of steps the agent will take",
)
use_vision = gr.Checkbox(
label="Use Vision",
value=True,
- info="Enable visual processing capabilities"
+ info="Enable visual processing capabilities",
)
with gr.TabItem("🔧 LLM Configuration", id=2):
with gr.Group():
llm_provider = gr.Dropdown(
- ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"],
+ [
+ "anthropic",
+ "openai",
+ "gemini",
+ "azure_openai",
+ "deepseek",
+ "ollama",
+ ],
label="LLM Provider",
- value="gemini",
- info="Select your preferred language model provider"
+ value="openai",
+ info="Select your preferred language model provider",
)
llm_model_name = gr.Textbox(
label="Model Name",
- value="gemini-2.0-flash-exp",
- info="Specify the model to use"
+ value="gpt-4o",
+ info="Specify the model to use",
)
llm_temperature = gr.Slider(
minimum=0.0,
@@ -336,17 +350,14 @@ def create_ui(theme_name="Ocean"):
value=1.0,
step=0.1,
label="Temperature",
- info="Controls randomness in model outputs"
+ info="Controls randomness in model outputs",
)
with gr.Row():
llm_base_url = gr.Textbox(
- label="Base URL",
- info="API endpoint URL (if required)"
+ label="Base URL", info="API endpoint URL (if required)"
)
llm_api_key = gr.Textbox(
- label="API Key",
- type="password",
- info="Your API key"
+ label="API Key", type="password", info="Your API key"
)
with gr.TabItem("🌐 Browser Settings", id=3):
@@ -355,51 +366,51 @@ def create_ui(theme_name="Ocean"):
use_own_browser = gr.Checkbox(
label="Use Own Browser",
value=False,
- info="Use your existing browser instance"
+ info="Use your existing browser instance",
)
headless = gr.Checkbox(
label="Headless Mode",
value=False,
- info="Run browser without GUI"
+ info="Run browser without GUI",
)
disable_security = gr.Checkbox(
label="Disable Security",
value=True,
- info="Disable browser security features"
+ info="Disable browser security features",
)
-
+
with gr.Row():
window_w = gr.Number(
label="Window Width",
- value=1920,
- info="Browser window width"
+ value=1280,
+ info="Browser window width",
)
window_h = gr.Number(
label="Window Height",
- value=1080,
- info="Browser window height"
+ value=1100,
+ info="Browser window height",
)
-
+
save_recording_path = gr.Textbox(
label="Recording Path",
placeholder="e.g. ./tmp/record_videos",
value="./tmp/record_videos",
- info="Path to save browser recordings"
+ info="Path to save browser recordings",
)
- with gr.TabItem("📝 Task Settings", id=4):
+ with gr.TabItem("🤖 Run Agent", id=4):
task = gr.Textbox(
label="Task Description",
lines=4,
placeholder="Enter your task here...",
value="go to google.com and type 'OpenAI' click search and give me the first url",
- info="Describe what you want the agent to do"
+ info="Describe what you want the agent to do",
)
add_infos = gr.Textbox(
label="Additional Information",
lines=3,
placeholder="Add any helpful context or instructions...",
- info="Optional hints to help the LLM complete the task"
+ info="Optional hints to help the LLM complete the task",
)
with gr.Row():
@@ -414,54 +425,74 @@ def create_ui(theme_name="Ocean"):
with gr.Row():
with gr.Column():
final_result_output = gr.Textbox(
- label="Final Result",
- lines=3,
- show_label=True
+ label="Final Result", lines=3, show_label=True
)
with gr.Column():
errors_output = gr.Textbox(
- label="Errors",
- lines=3,
- show_label=True
+ label="Errors", lines=3, show_label=True
)
with gr.Row():
with gr.Column():
model_actions_output = gr.Textbox(
- label="Model Actions",
- lines=3,
- show_label=True
+ label="Model Actions", lines=3, show_label=True
)
with gr.Column():
model_thoughts_output = gr.Textbox(
- label="Model Thoughts",
- lines=3,
- show_label=True
+ label="Model Thoughts", lines=3, show_label=True
)
# Run button click handler
run_button.click(
fn=run_browser_agent,
inputs=[
- agent_type, llm_provider, llm_model_name, llm_temperature,
- llm_base_url, llm_api_key, use_own_browser, headless,
- disable_security, window_w, window_h, save_recording_path,
- task, add_infos, max_steps, use_vision
+ agent_type,
+ llm_provider,
+ llm_model_name,
+ llm_temperature,
+ llm_base_url,
+ llm_api_key,
+ use_own_browser,
+ headless,
+ disable_security,
+ window_w,
+ window_h,
+ save_recording_path,
+ task,
+ add_infos,
+ max_steps,
+ use_vision,
+ ],
+ outputs=[
+ final_result_output,
+ errors_output,
+ model_actions_output,
+ model_thoughts_output,
+ recording_display,
],
- outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output, recording_display]
)
return demo
+
def main():
parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
- parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
+ parser.add_argument(
+ "--ip", type=str, default="127.0.0.1", help="IP address to bind to"
+ )
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
- parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
+ parser.add_argument(
+ "--theme",
+ type=str,
+ default="Ocean",
+ choices=theme_map.keys(),
+ help="Theme to use for the UI",
+ )
parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
args = parser.parse_args()
demo = create_ui(theme_name=args.theme)
demo.launch(server_name=args.ip, server_port=args.port)
-if __name__ == '__main__':
+
+if __name__ == "__main__":
main()