stream function only

This commit is contained in:
katiue
2025-01-10 01:13:19 +07:00
parent d466231034
commit cae23eb16e
7 changed files with 401 additions and 352 deletions

31
.gradio/certificate.pem Normal file
View File

@@ -0,0 +1,31 @@
-----BEGIN CERTIFICATE-----
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
-----END CERTIFICATE-----

View File

@@ -69,6 +69,11 @@ class CustomAgent(Agent):
max_actions_per_step: int = 10,
tool_call_in_content: bool = True,
):
# Store tool_call_in_content before calling parent's __init__
self.tool_call_in_content = tool_call_in_content
self.add_infos = add_infos
# Call parent's __init__ without tool_call_in_content
super().__init__(
task=task,
llm=llm,
@@ -85,9 +90,9 @@ class CustomAgent(Agent):
include_attributes=include_attributes,
max_error_length=max_error_length,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content,
)
self.add_infos = add_infos
# Initialize message manager with tool_call_in_content
self.message_manager = CustomMassageManager(
llm=self.llm,
task=self.task,
@@ -97,7 +102,7 @@ class CustomAgent(Agent):
include_attributes=self.include_attributes,
max_error_length=self.max_error_length,
max_actions_per_step=self.max_actions_per_step,
tool_call_in_content=tool_call_in_content,
tool_call_in_content=self.tool_call_in_content,
)
def _setup_action_models(self) -> None:

View File

@@ -12,7 +12,8 @@ from typing import List, Optional, Type
from browser_use.agent.message_manager.service import MessageManager
from browser_use.agent.message_manager.views import MessageHistory
from browser_use.agent.prompts import SystemPrompt
from browser_use.agent.views import ActionResult, AgentStepInfo
from browser_use.agent.views import ActionResult
from .custom_views import CustomAgentStepInfo
from browser_use.browser.views import BrowserState
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import (
@@ -40,6 +41,7 @@ class CustomMassageManager(MessageManager):
max_actions_per_step: int = 10,
tool_call_in_content: bool = False,
):
self.tool_call_in_content = tool_call_in_content
super().__init__(
llm=llm,
task=task,
@@ -51,13 +53,17 @@ class CustomMassageManager(MessageManager):
include_attributes=include_attributes,
max_error_length=max_error_length,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content,
)
# Custom: Move Task info to state_message
self.history = MessageHistory()
self._add_message_with_tokens(self.system_prompt)
tool_calls = [
tool_calls = self._create_tool_calls()
example_tool_call = self._create_example_tool_call(tool_calls)
self._add_message_with_tokens(example_tool_call)
def _create_tool_calls(self):
return [
{
'name': 'CustomAgentOutput',
'args': {
@@ -74,25 +80,25 @@ class CustomMassageManager(MessageManager):
'type': 'tool_call',
}
]
def _create_example_tool_call(self, tool_calls):
if self.tool_call_in_content:
# openai throws error if tool_calls are not responded -> move to content
example_tool_call = AIMessage(
return AIMessage(
content=f'{tool_calls}',
tool_calls=[],
)
else:
example_tool_call = AIMessage(
return AIMessage(
content=f'',
tool_calls=tool_calls,
)
self._add_message_with_tokens(example_tool_call)
def add_state_message(
self,
state: BrowserState,
result: Optional[List[ActionResult]] = None,
step_info: Optional[AgentStepInfo] = None,
self,
state: BrowserState,
result: Optional[List[ActionResult]] = None,
step_info: Optional[CustomAgentStepInfo] = None,
) -> None:
"""Add browser state as human message"""
@@ -105,7 +111,7 @@ class CustomMassageManager(MessageManager):
self._add_message_with_tokens(msg)
if r.error:
msg = HumanMessage(
content=str(r.error)[-self.max_error_length:]
content=str(r.error)[-self.max_error_length :]
)
self._add_message_with_tokens(msg)
result = None # if result in history, we dont want to add it again

View File

@@ -17,4 +17,4 @@ class CustomBrowser(Browser):
context: CustomBrowserContext = None,
) -> BrowserContext:
"""Create a browser context"""
return CustomBrowserContext(config=config, browser=self, context=context)
return CustomBrowserContext(config=config, browser=self, context=context)

View File

@@ -3,55 +3,66 @@
# @Author : wenshao
# @Email : wenshaoguo1026@gmail.com
# @Project : browser-use-webui
# @FileName: context.py
# @FileName: merged_context.py
import asyncio
import base64
import json
import logging
import os
from typing import TYPE_CHECKING
from playwright.async_api import Browser as PlaywrightBrowser, Page, BrowserContext as PlaywrightContext
from browser_use.browser.browser import Browser
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from playwright.async_api import Browser as PlaywrightBrowser
if TYPE_CHECKING:
from .custom_browser import CustomBrowser
logger = logging.getLogger(__name__)
class CustomBrowserContext(BrowserContext):
def __init__(
self,
browser: "Browser",
browser: "CustomBrowser", # Forward declaration for CustomBrowser
config: BrowserContextConfig = BrowserContextConfig(),
context: BrowserContext = None,
context: PlaywrightContext = None
):
super(CustomBrowserContext, self).__init__(browser=browser, config=config)
self.context = context
self._impl_context = context # Rename to avoid confusion
self._page = None
self.session = None # Add session attribute
async def _create_context(self, browser: PlaywrightBrowser):
@property
def impl_context(self) -> PlaywrightContext:
"""Returns the underlying Playwright context implementation"""
return self._impl_context
async def _create_context(self, browser: PlaywrightBrowser = None):
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
# If we have a context, return it directly
if self.context:
return self.context
if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
# Connect to existing Chrome instance instead of creating new one
context = browser.contexts[0]
else:
# Original code for creating new context
context = await browser.new_context(
viewport=self.config.browser_window_size,
no_viewport=False,
user_agent=(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
),
java_script_enabled=True,
bypass_csp=self.config.disable_security,
ignore_https_errors=self.config.disable_security,
record_video_dir=self.config.save_recording_path,
record_video_size=self.config.browser_window_size, # set record video size, same as windows size
)
if self._impl_context:
return self._impl_context
# If a Playwright browser is not provided, get it from our custom browser
pw_browser = browser or await self.browser.get_playwright_browser()
context_args = {
'viewport': self.config.browser_window_size,
'no_viewport': False,
'bypass_csp': self.config.disable_security,
'ignore_https_errors': self.config.disable_security
}
if self.config.save_recording_path:
context_args.update({
'record_video_dir': self.config.save_recording_path,
'record_video_size': self.config.browser_window_size
})
self._impl_context = await pw_browser.new_context(**context_args)
if self.config.trace_path:
await context.tracing.start(screenshots=True, snapshots=True, sources=True)
await self._impl_context.tracing.start(screenshots=True, snapshots=True, sources=True)
# Load cookies if they exist
if self.config.cookies_file and os.path.exists(self.config.cookies_file):
@@ -60,10 +71,10 @@ class CustomBrowserContext(BrowserContext):
logger.info(
f"Loaded {len(cookies)} cookies from {self.config.cookies_file}"
)
await context.add_cookies(cookies)
await self._impl_context.add_cookies(cookies)
# Expose anti-detection scripts
await context.add_init_script(
await self._impl_context.add_init_script(
"""
// Webdriver property
Object.defineProperty(navigator, 'webdriver', {
@@ -93,4 +104,42 @@ class CustomBrowserContext(BrowserContext):
"""
)
return context
# Create an initial page
self._page = await self._impl_context.new_page()
await self._page.goto('about:blank') # Ensure page is ready
return self._impl_context
async def new_page(self) -> Page:
"""Creates and returns a new page in this context"""
if not self._impl_context:
await self._create_context()
return await self._impl_context.new_page()
async def __aenter__(self):
if not self._impl_context:
await self._create_context()
return self
async def __aexit__(self, *args):
if self._impl_context:
await self._impl_context.close()
self._impl_context = None
@property
def pages(self):
"""Returns list of pages in context"""
return self._impl_context.pages if self._impl_context else []
async def get_state(self, **kwargs):
if self._impl_context:
pages = self._impl_context.pages
if pages:
return await super().get_state(**kwargs)
return None
async def get_pages(self):
"""Get pages in a way that works"""
if not self._impl_context:
return []
return self._impl_context.pages

View File

@@ -28,18 +28,4 @@ async def capture_screenshot(browser_context: BrowserContext) -> str:
except Exception as e:
return f"<div class='error'>Screenshot failed: {str(e)}</div>"
except Exception as e:
return f"<div class='error'>Screenshot error: {str(e)}</div>"
async def stream_browser_view(browser_context: BrowserContext) -> AsyncGenerator[str, None]:
"""Stream browser view to the UI"""
try:
while True:
try:
screenshot_html = await capture_screenshot(browser_context)
yield screenshot_html
await asyncio.sleep(0.2) # 5 FPS
except Exception as e:
yield f"<div class='error'>Screenshot error: {str(e)}</div>"
await asyncio.sleep(1) # Wait before retrying
except Exception as e:
yield f"<div class='error'>Stream error: {str(e)}</div>"
return f"<div class='error'>Screenshot error: {str(e)}</div>"

550
webui.py
View File

@@ -6,28 +6,23 @@
# @FileName: webui.py
import pdb
import glob
from dotenv import load_dotenv
load_dotenv()
import argparse
import os
import gradio as gr
import argparse
from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean
import os
import asyncio
import os, glob
from browser_use.agent.service import Agent
from playwright.async_api import async_playwright
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import (
BrowserContextConfig,
BrowserContextWindowSize,
)
from playwright.async_api import async_playwright
from browser_use.agent.service import Agent
from src.browser.custom_browser import CustomBrowser
from src.controller.custom_controller import CustomController
from src.agent.custom_agent import CustomAgent
from src.agent.custom_prompts import CustomSystemPrompt
from src.browser.custom_browser import CustomBrowser
@@ -35,8 +30,10 @@ from src.browser.custom_context import BrowserContextConfig
from src.controller.custom_controller import CustomController
from src.utils import utils
from src.utils.utils import update_model_dropdown
from src.utils.file_utils import get_latest_files
from src.utils.stream_utils import capture_screenshot
async def run_browser_agent(
agent_type,
llm_provider,
@@ -50,86 +47,84 @@ async def run_browser_agent(
window_w,
window_h,
save_recording_path,
save_trace_path,
enable_recording,
task,
add_infos,
max_steps,
use_vision,
max_actions_per_step,
tool_call_in_content
tool_call_in_content,
browser_context=None
):
# Disable recording if the checkbox is unchecked
if not enable_recording:
save_recording_path = None
# Ensure the recording directory exists if recording is enabled
if save_recording_path:
os.makedirs(save_recording_path, exist_ok=True)
# Get the list of existing videos before the agent runs
existing_videos = set()
if save_recording_path:
existing_videos = set(
glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
+ glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
"""Run the browser agent with proper browser context initialization"""
browser = None
try:
if browser_context is None:
browser = CustomBrowser(
config=BrowserConfig(
headless=headless,
disable_security=disable_security,
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
)
)
browser_context = await browser.new_context(
config=BrowserContextConfig(
trace_path='./tmp/traces',
save_recording_path=save_recording_path if enable_recording else None,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
)
)
# Run the agent
llm = utils.get_llm_model(
provider=llm_provider,
model_name=llm_model_name,
temperature=llm_temperature,
base_url=llm_base_url,
api_key=llm_api_key,
)
# Run the agent
llm = utils.get_llm_model(
provider=llm_provider,
model_name=llm_model_name,
temperature=llm_temperature,
base_url=llm_base_url,
api_key=llm_api_key,
)
if agent_type == "org":
final_result, errors, model_actions, model_thoughts = await run_org_agent(
llm=llm,
headless=headless,
disable_security=disable_security,
window_w=window_w,
window_h=window_h,
save_recording_path=save_recording_path,
save_trace_path=save_trace_path,
task=task,
max_steps=max_steps,
use_vision=use_vision,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content
)
elif agent_type == "custom":
final_result, errors, model_actions, model_thoughts = await run_custom_agent(
llm=llm,
use_own_browser=use_own_browser,
headless=headless,
disable_security=disable_security,
window_w=window_w,
window_h=window_h,
save_recording_path=save_recording_path,
save_trace_path=save_trace_path,
task=task,
add_infos=add_infos,
max_steps=max_steps,
use_vision=use_vision,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content
)
else:
raise ValueError(f"Invalid agent type: {agent_type}")
# Get the list of videos after the agent runs (if recording is enabled)
latest_video = None
if save_recording_path:
new_videos = set(
glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
+ glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
)
if new_videos - existing_videos:
latest_video = list(new_videos - existing_videos)[0] # Get the first new video
return final_result, errors, model_actions, model_thoughts, latest_video
if agent_type == "org":
result = await run_org_agent(
llm=llm,
headless=headless,
disable_security=disable_security,
window_w=window_w,
window_h=window_h,
save_recording_path=save_recording_path,
task=task,
max_steps=max_steps,
use_vision=use_vision,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content,
browser_context=browser_context,
)
elif agent_type == "custom":
result = await run_custom_agent(
llm=llm,
use_own_browser=use_own_browser,
headless=headless,
disable_security=disable_security,
window_w=window_w,
window_h=window_h,
save_recording_path=save_recording_path,
task=task,
add_infos=add_infos,
max_steps=max_steps,
use_vision=use_vision,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content,
browser_context=browser_context,
)
else:
raise ValueError(f"Invalid agent type: {agent_type}")
return result
finally:
if browser:
await browser.close()
async def run_org_agent(
llm,
@@ -138,48 +133,65 @@ async def run_org_agent(
window_w,
window_h,
save_recording_path,
save_trace_path,
task,
max_steps,
use_vision,
max_actions_per_step,
tool_call_in_content
tool_call_in_content,
browser_context=None, # receive context
):
browser = Browser(
config=BrowserConfig(
headless=headless,
disable_security=disable_security,
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
)
)
async with await browser.new_context(
config=BrowserContextConfig(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path if save_recording_path else None,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),
browser = None
if browser_context is None:
browser = Browser(
config=BrowserConfig(
headless=False, # Force non-headless for streaming
disable_security=disable_security,
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
)
) as browser_context:
)
async with await browser.new_context(
config=BrowserContextConfig(
trace_path='./tmp/traces',
save_recording_path=save_recording_path if save_recording_path else None,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
)
) as browser_context_in:
agent = Agent(
task=task,
llm=llm,
use_vision=use_vision,
browser_context=browser_context_in,
)
history = await agent.run(max_steps=max_steps)
final_result = history.final_result()
errors = history.errors()
model_actions = history.model_actions()
model_thoughts = history.model_thoughts()
recorded_files = get_latest_files(save_recording_path)
trace_file = get_latest_files(save_recording_path + "/../traces")
await browser.close()
return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
else:
# Reuse existing context
agent = Agent(
task=task,
llm=llm,
use_vision=use_vision,
browser_context=browser_context,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content
browser_context=browser_context,
)
history = await agent.run(max_steps=max_steps)
final_result = history.final_result()
errors = history.errors()
model_actions = history.model_actions()
model_thoughts = history.model_thoughts()
await browser.close()
return final_result, errors, model_actions, model_thoughts
recorded_files = get_latest_files(save_recording_path)
trace_file = get_latest_files(save_recording_path + "/../traces")
return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
async def run_custom_agent(
llm,
@@ -189,17 +201,17 @@ async def run_custom_agent(
window_w,
window_h,
save_recording_path,
save_trace_path,
task,
add_infos,
max_steps,
use_vision,
max_actions_per_step,
tool_call_in_content
tool_call_in_content,
browser_context=None, # receive context
):
controller = CustomController()
playwright = None
browser_context_ = None
browser = None
try:
if use_own_browser:
playwright = await async_playwright().start()
@@ -210,12 +222,12 @@ async def run_custom_agent(
chrome_exe = None
elif not os.path.exists(chrome_exe):
raise ValueError(f"Chrome executable not found at {chrome_exe}")
if chrome_use_data == "":
chrome_use_data = None
browser_context_ = await playwright.chromium.launch_persistent_context(
user_data_dir=chrome_use_data,
user_data_dir=chrome_use_data if chrome_use_data else "",
executable_path=chrome_exe,
no_viewport=False,
headless=headless, # 保持浏览器窗口可见
@@ -232,26 +244,8 @@ async def run_custom_agent(
else:
browser_context_ = None
browser = CustomBrowser(
config=BrowserConfig(
headless=headless,
disable_security=disable_security,
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
)
)
async with await browser.new_context(
config=BrowserContextConfig(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path
if save_recording_path
else None,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),
),
context=browser_context_,
) as browser_context:
if browser_context is not None:
# Reuse context
agent = CustomAgent(
task=task,
add_infos=add_infos,
@@ -264,11 +258,50 @@ async def run_custom_agent(
tool_call_in_content=tool_call_in_content
)
history = await agent.run(max_steps=max_steps)
final_result = history.final_result()
errors = history.errors()
model_actions = history.model_actions()
model_thoughts = history.model_thoughts()
recorded_files = get_latest_files(save_recording_path)
trace_file = get_latest_files(save_recording_path + "/../traces")
return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
else:
browser = CustomBrowser(
config=BrowserConfig(
headless=headless,
disable_security=disable_security,
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
)
)
async with await browser.new_context(
config=BrowserContextConfig(
trace_path='./tmp/result_processing',
save_recording_path=save_recording_path if save_recording_path else None,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
),
context=browser_context_
) as browser_context_in:
agent = CustomAgent(
task=task,
add_infos=add_infos,
use_vision=use_vision,
llm=llm,
browser_context=browser_context_in,
controller=controller,
system_prompt_class=CustomSystemPrompt,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content
)
history = await agent.run(max_steps=max_steps)
final_result = history.final_result()
errors = history.errors()
model_actions = history.model_actions()
model_thoughts = history.model_thoughts()
recorded_files = get_latest_files(save_recording_path)
trace_file = get_latest_files(save_recording_path + "/../traces")
except Exception as e:
import traceback
@@ -278,6 +311,8 @@ async def run_custom_agent(
errors = str(e) + "\n" + traceback.format_exc()
model_actions = ""
model_thoughts = ""
recorded_files = {}
trace_file = {}
finally:
# 显式关闭持久化上下文
if browser_context_:
@@ -286,20 +321,9 @@ async def run_custom_agent(
# 关闭 Playwright 对象
if playwright:
await playwright.stop()
await browser.close()
return final_result, errors, model_actions, model_thoughts
# Define the theme map globally
theme_map = {
"Default": Default(),
"Soft": Soft(),
"Monochrome": Monochrome(),
"Glass": Glass(),
"Origin": Origin(),
"Citrus": Citrus(),
"Ocean": Ocean(),
"Base": Base()
}
if browser:
await browser.close()
return final_result, errors, model_actions, model_thoughts, trace_file.get('.webm'), recorded_files.get('.zip')
async def run_with_stream(
agent_type,
@@ -314,8 +338,6 @@ async def run_with_stream(
window_w,
window_h,
save_recording_path,
save_trace_path,
enable_recording,
task,
add_infos,
max_steps,
@@ -338,7 +360,7 @@ async def run_with_stream(
# Create a new browser context
async with await browser.new_context(
config=BrowserContextConfig(
trace_path=save_trace_path,
trace_path="./tmp/traces",
save_recording_path=save_recording_path,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
@@ -349,26 +371,25 @@ async def run_with_stream(
# Run the browser agent in the background
agent_task = asyncio.create_task(
run_browser_agent(
agent_type,
llm_provider,
llm_model_name,
llm_temperature,
llm_base_url,
llm_api_key,
use_own_browser,
headless,
disable_security,
window_w,
window_h,
save_recording_path,
save_trace_path,
enable_recording,
task,
add_infos,
max_steps,
use_vision,
max_actions_per_step,
tool_call_in_content,
agent_type=agent_type,
llm_provider=llm_provider,
llm_model_name=llm_model_name,
llm_temperature=llm_temperature,
llm_base_url=llm_base_url,
llm_api_key=llm_api_key,
use_own_browser=use_own_browser,
headless=headless,
disable_security=disable_security,
window_w=window_w,
window_h=window_h,
save_recording_path=save_recording_path,
enable_recording=True, # Add this parameter
task=task,
add_infos=add_infos,
max_steps=max_steps,
use_vision=use_vision,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content,
browser_context=browser_context # Explicit keyword argument
)
)
@@ -439,6 +460,21 @@ async def run_with_stream(
if browser:
await browser.close()
from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base
# Define the theme map globally
theme_map = {
"Default": Default(),
"Soft": Soft(),
"Monochrome": Monochrome(),
"Glass": Glass(),
"Origin": Origin(),
"Citrus": Citrus(),
"Ocean": Ocean(),
"Base": Base()
}
# Create the Gradio UI
def create_ui(theme_name="Ocean"):
css = """
.gradio-container {
@@ -457,19 +493,8 @@ def create_ui(theme_name="Ocean"):
}
"""
js = """
function refresh() {
const url = new URL(window.location);
if (url.searchParams.get('__theme') !== 'dark') {
url.searchParams.set('__theme', 'dark');
window.location.href = url.href;
}
}
"""
with gr.Blocks(
title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js
) as demo:
with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo:
# Header
with gr.Row():
gr.Markdown(
"""
@@ -550,7 +575,7 @@ def create_ui(theme_name="Ocean"):
value=os.getenv(f"{llm_provider.value.upper()}_API_KEY", ""), # Default to .env value
info="Your API key (leave blank to use .env)"
)
with gr.TabItem("🌐 Browser Settings", id=3):
with gr.Group():
with gr.Row():
@@ -595,128 +620,75 @@ def create_ui(theme_name="Ocean"):
interactive=True, # Allow editing only if recording is enabled
)
save_trace_path = gr.Textbox(
label="Trace Path",
placeholder="e.g. ./tmp/traces",
value="./tmp/traces",
info="Path to save Agent traces",
interactive=True,
)
with gr.TabItem("🤖 Run Agent", id=4):
task = gr.Textbox(
label="Task Description",
lines=4,
placeholder="Enter your task here...",
value="go to google.com and type 'OpenAI' click search and give me the first url",
info="Describe what you want the agent to do",
)
add_infos = gr.Textbox(
label="Additional Information",
lines=3,
placeholder="Add any helpful context or instructions...",
info="Optional hints to help the LLM complete the task",
add_infos = gr.Textbox(lines=3, label="Additional Information")
# Results
with gr.Tab("📊 Results"):
browser_view = gr.HTML(
value="<div>Waiting for browser session...</div>",
label="Live Browser View",
)
final_result_output = gr.Textbox(label="Final Result", lines=3)
errors_output = gr.Textbox(label="Errors", lines=3)
model_actions_output = gr.Textbox(label="Model Actions", lines=3)
model_thoughts_output = gr.Textbox(label="Model Thoughts", lines=3)
recording_file = gr.Video(label="Latest Recording")
trace_file = gr.File(label="Trace File")
with gr.Row():
run_button = gr.Button("▶️ Run Agent", variant="primary")
with gr.Row():
run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
with gr.TabItem("📊 Results", id=5):
recording_display = gr.Video(label="Latest Recording")
with gr.Group():
gr.Markdown("### Results")
with gr.Row():
with gr.Column():
final_result_output = gr.Textbox(
label="Final Result", lines=3, show_label=True
)
with gr.Column():
errors_output = gr.Textbox(
label="Errors", lines=3, show_label=True
)
with gr.Row():
with gr.Column():
model_actions_output = gr.Textbox(
label="Model Actions", lines=3, show_label=True
)
with gr.Column():
model_thoughts_output = gr.Textbox(
label="Model Thoughts", lines=3, show_label=True
)
with gr.TabItem("🎥 Recordings", id=6):
def list_recordings(save_recording_path):
if not os.path.exists(save_recording_path):
return []
# Get all video files
recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
# Sort recordings by creation time (oldest first)
recordings.sort(key=os.path.getctime)
# Add numbering to the recordings
numbered_recordings = []
for idx, recording in enumerate(recordings, start=1):
filename = os.path.basename(recording)
numbered_recordings.append((recording, f"{idx}. {filename}"))
return numbered_recordings
recordings_gallery = gr.Gallery(
label="Recordings",
value=list_recordings("./tmp/record_videos"),
columns=3,
height="auto",
object_fit="contain"
)
refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary")
refresh_button.click(
fn=list_recordings,
inputs=save_recording_path,
outputs=recordings_gallery
)
# Attach the callback to the LLM provider dropdown
llm_provider.change(
lambda provider, api_key, base_url: update_model_dropdown(provider, api_key, base_url),
inputs=[llm_provider, llm_api_key, llm_base_url],
outputs=llm_model_name
)
# Add this after defining the components
enable_recording.change(
lambda enabled: gr.update(interactive=enabled),
inputs=enable_recording,
outputs=save_recording_path
)
# Run button click handler
# Button logic
run_button.click(
fn=run_with_stream,
inputs=[
agent_type, llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
use_own_browser, headless, disable_security, window_w, window_h, save_recording_path, save_trace_path,
enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_call_in_content
agent_type,
llm_provider,
llm_model_name,
llm_temperature,
llm_base_url,
llm_api_key,
use_own_browser,
headless,
disable_security,
window_w,
window_h,
save_recording_path,
task,
add_infos,
max_steps,
use_vision,
max_actions_per_step,
tool_call_in_content,
],
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output, recording_display],
outputs=[
browser_view,
final_result_output,
errors_output,
model_actions_output,
model_thoughts_output,
recording_file,
trace_file
],
queue=True,
)
return demo
def main():
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
parser.add_argument("--ip", type=str, default="0.0.0.0", help="IP address to bind to")
parser.add_argument("--port", type=int, default=7860, help="Port to listen on")
parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys())
args = parser.parse_args()
demo = create_ui(theme_name=args.theme)
demo.launch(server_name=args.ip, server_port=args.port)
if __name__ == '__main__':
main()
ui = create_ui(theme_name=args.theme)
ui.launch(server_name=args.ip, server_port=args.port, share=True)