Merge pull request #83 from vvincent1234/fix/own_browser_on_mac
Fix using own browser on mac
This commit is contained in:
@@ -9,6 +9,10 @@ import logging
|
||||
import pdb
|
||||
import traceback
|
||||
from typing import Optional, Type
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
import os
|
||||
import base64
|
||||
import io
|
||||
|
||||
from browser_use.agent.prompts import SystemPrompt
|
||||
from browser_use.agent.service import Agent
|
||||
@@ -227,6 +231,119 @@ class CustomAgent(Agent):
|
||||
)
|
||||
if state:
|
||||
self._make_history_item(model_output, state, result)
|
||||
def create_history_gif(
|
||||
self,
|
||||
output_path: str = 'agent_history.gif',
|
||||
duration: int = 3000,
|
||||
show_goals: bool = True,
|
||||
show_task: bool = True,
|
||||
show_logo: bool = False,
|
||||
font_size: int = 40,
|
||||
title_font_size: int = 56,
|
||||
goal_font_size: int = 44,
|
||||
margin: int = 40,
|
||||
line_spacing: float = 1.5,
|
||||
) -> None:
|
||||
"""Create a GIF from the agent's history with overlaid task and goal text."""
|
||||
if not self.history.history:
|
||||
logger.warning('No history to create GIF from')
|
||||
return
|
||||
|
||||
images = []
|
||||
# if history is empty or first screenshot is None, we can't create a gif
|
||||
if not self.history.history or not self.history.history[0].state.screenshot:
|
||||
logger.warning('No history or first screenshot to create GIF from')
|
||||
return
|
||||
|
||||
# Try to load nicer fonts
|
||||
try:
|
||||
# Try different font options in order of preference
|
||||
font_options = ['Helvetica', 'Arial', 'DejaVuSans', 'Verdana']
|
||||
font_loaded = False
|
||||
|
||||
for font_name in font_options:
|
||||
try:
|
||||
import platform
|
||||
if platform.system() == "Windows":
|
||||
# Need to specify the abs font path on Windows
|
||||
font_name = os.path.join(os.getenv("WIN_FONT_DIR", "C:\\Windows\\Fonts"), font_name + ".ttf")
|
||||
regular_font = ImageFont.truetype(font_name, font_size)
|
||||
title_font = ImageFont.truetype(font_name, title_font_size)
|
||||
goal_font = ImageFont.truetype(font_name, goal_font_size)
|
||||
font_loaded = True
|
||||
break
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if not font_loaded:
|
||||
raise OSError('No preferred fonts found')
|
||||
|
||||
except OSError:
|
||||
regular_font = ImageFont.load_default()
|
||||
title_font = ImageFont.load_default()
|
||||
|
||||
goal_font = regular_font
|
||||
|
||||
# Load logo if requested
|
||||
logo = None
|
||||
if show_logo:
|
||||
try:
|
||||
logo = Image.open('./static/browser-use.png')
|
||||
# Resize logo to be small (e.g., 40px height)
|
||||
logo_height = 150
|
||||
aspect_ratio = logo.width / logo.height
|
||||
logo_width = int(logo_height * aspect_ratio)
|
||||
logo = logo.resize((logo_width, logo_height), Image.Resampling.LANCZOS)
|
||||
except Exception as e:
|
||||
logger.warning(f'Could not load logo: {e}')
|
||||
|
||||
# Create task frame if requested
|
||||
if show_task and self.task:
|
||||
task_frame = self._create_task_frame(
|
||||
self.task,
|
||||
self.history.history[0].state.screenshot,
|
||||
title_font,
|
||||
regular_font,
|
||||
logo,
|
||||
line_spacing,
|
||||
)
|
||||
images.append(task_frame)
|
||||
|
||||
# Process each history item
|
||||
for i, item in enumerate(self.history.history, 1):
|
||||
if not item.state.screenshot:
|
||||
continue
|
||||
|
||||
# Convert base64 screenshot to PIL Image
|
||||
img_data = base64.b64decode(item.state.screenshot)
|
||||
image = Image.open(io.BytesIO(img_data))
|
||||
|
||||
if show_goals and item.model_output:
|
||||
image = self._add_overlay_to_image(
|
||||
image=image,
|
||||
step_number=i,
|
||||
goal_text=item.model_output.current_state.thought,
|
||||
regular_font=regular_font,
|
||||
title_font=title_font,
|
||||
margin=margin,
|
||||
logo=logo,
|
||||
)
|
||||
|
||||
images.append(image)
|
||||
|
||||
if images:
|
||||
# Save the GIF
|
||||
images[0].save(
|
||||
output_path,
|
||||
save_all=True,
|
||||
append_images=images[1:],
|
||||
duration=duration,
|
||||
loop=0,
|
||||
optimize=False,
|
||||
)
|
||||
logger.info(f'Created GIF at {output_path}')
|
||||
else:
|
||||
logger.warning('No images found in history to create GIF')
|
||||
|
||||
async def run(self, max_steps: int = 100) -> AgentHistoryList:
|
||||
"""Execute the task with maximum number of steps"""
|
||||
@@ -283,3 +400,6 @@ class CustomAgent(Agent):
|
||||
|
||||
if not self.injected_browser and self.browser:
|
||||
await self.browser.close()
|
||||
|
||||
if self.generate_gif:
|
||||
self.create_history_gif()
|
||||
|
||||
@@ -4,6 +4,16 @@
|
||||
# @ProjectName: browser-use-webui
|
||||
# @FileName: browser.py
|
||||
|
||||
import asyncio
|
||||
|
||||
from playwright.async_api import Browser as PlaywrightBrowser
|
||||
from playwright.async_api import (
|
||||
BrowserContext as PlaywrightBrowserContext,
|
||||
)
|
||||
from playwright.async_api import (
|
||||
Playwright,
|
||||
async_playwright,
|
||||
)
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
||||
@@ -15,36 +25,102 @@ from .custom_context import CustomBrowserContext
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CustomBrowser(Browser):
|
||||
_global_context = None
|
||||
|
||||
async def new_context(
|
||||
self,
|
||||
config: BrowserContextConfig = BrowserContextConfig(),
|
||||
context: PlaywrightBrowserContext = None,
|
||||
config: BrowserContextConfig = BrowserContextConfig()
|
||||
) -> CustomBrowserContext:
|
||||
"""Create a browser context with persistence support"""
|
||||
persistence_config = BrowserPersistenceConfig.from_env()
|
||||
|
||||
if persistence_config.persistent_session:
|
||||
if CustomBrowser._global_context is not None:
|
||||
logger.info("Reusing existing persistent browser context")
|
||||
return CustomBrowser._global_context
|
||||
|
||||
context_instance = CustomBrowserContext(config=config, browser=self, context=context)
|
||||
CustomBrowser._global_context = context_instance
|
||||
logger.info("Created new persistent browser context")
|
||||
return context_instance
|
||||
|
||||
logger.info("Creating non-persistent browser context")
|
||||
return CustomBrowserContext(config=config, browser=self, context=context)
|
||||
return CustomBrowserContext(config=config, browser=self)
|
||||
|
||||
async def _setup_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
||||
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
||||
if self.config.wss_url:
|
||||
browser = await playwright.chromium.connect(self.config.wss_url)
|
||||
return browser
|
||||
elif self.config.chrome_instance_path:
|
||||
import subprocess
|
||||
|
||||
import requests
|
||||
|
||||
try:
|
||||
# Check if browser is already running
|
||||
response = requests.get('http://localhost:9222/json/version', timeout=2)
|
||||
if response.status_code == 200:
|
||||
logger.info('Reusing existing Chrome instance')
|
||||
browser = await playwright.chromium.connect_over_cdp(
|
||||
endpoint_url='http://localhost:9222',
|
||||
timeout=20000, # 20 second timeout for connection
|
||||
)
|
||||
return browser
|
||||
except requests.ConnectionError:
|
||||
logger.debug('No existing Chrome instance found, starting a new one')
|
||||
|
||||
# Start a new Chrome instance
|
||||
subprocess.Popen(
|
||||
[
|
||||
self.config.chrome_instance_path,
|
||||
'--remote-debugging-port=9222',
|
||||
],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
# Attempt to connect again after starting a new instance
|
||||
for _ in range(10):
|
||||
try:
|
||||
response = requests.get('http://localhost:9222/json/version', timeout=2)
|
||||
if response.status_code == 200:
|
||||
break
|
||||
except requests.ConnectionError:
|
||||
pass
|
||||
await asyncio.sleep(1)
|
||||
|
||||
try:
|
||||
browser = await playwright.chromium.connect_over_cdp(
|
||||
endpoint_url='http://localhost:9222',
|
||||
timeout=20000, # 20 second timeout for connection
|
||||
)
|
||||
return browser
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
|
||||
raise RuntimeError(
|
||||
' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
|
||||
)
|
||||
|
||||
async def close(self):
|
||||
"""Override close to respect persistence setting"""
|
||||
persistence_config = BrowserPersistenceConfig.from_env()
|
||||
if not persistence_config.persistent_session:
|
||||
if CustomBrowser._global_context is not None:
|
||||
await CustomBrowser._global_context.close()
|
||||
CustomBrowser._global_context = None
|
||||
await super().close()
|
||||
else:
|
||||
logger.info("Skipping browser close due to persistent session")
|
||||
try:
|
||||
disable_security_args = []
|
||||
if self.config.disable_security:
|
||||
disable_security_args = [
|
||||
'--disable-web-security',
|
||||
'--disable-site-isolation-trials',
|
||||
'--disable-features=IsolateOrigins,site-per-process',
|
||||
]
|
||||
|
||||
browser = await playwright.chromium.launch(
|
||||
headless=self.config.headless,
|
||||
args=[
|
||||
'--no-sandbox',
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--disable-infobars',
|
||||
'--disable-background-timer-throttling',
|
||||
'--disable-popup-blocking',
|
||||
'--disable-backgrounding-occluded-windows',
|
||||
'--disable-renderer-backgrounding',
|
||||
'--disable-window-activation',
|
||||
'--disable-focus-on-load',
|
||||
'--no-first-run',
|
||||
'--no-default-browser-check',
|
||||
'--no-startup-window',
|
||||
'--window-position=0,0',
|
||||
# '--window-size=1280,1000',
|
||||
]
|
||||
+ disable_security_args
|
||||
+ self.config.extra_chromium_args,
|
||||
proxy=self.config.proxy,
|
||||
)
|
||||
|
||||
return browser
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to initialize Playwright browser: {str(e)}')
|
||||
raise
|
||||
|
||||
@@ -22,22 +22,17 @@ class CustomBrowserContext(BrowserContext):
|
||||
def __init__(
|
||||
self,
|
||||
browser: "Browser",
|
||||
config: BrowserContextConfig = BrowserContextConfig(),
|
||||
context: PlaywrightBrowserContext = None,
|
||||
config: BrowserContextConfig = BrowserContextConfig()
|
||||
):
|
||||
super(CustomBrowserContext, self).__init__(browser=browser, config=config)
|
||||
self.context = context
|
||||
self._persistence_config = BrowserPersistenceConfig.from_env()
|
||||
|
||||
async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowserContext:
|
||||
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
|
||||
# If we have a context, return it directly
|
||||
if self.context:
|
||||
return self.context
|
||||
|
||||
# Check if we should use existing context for persistence
|
||||
if self._persistence_config.persistent_session and len(browser.contexts) > 0:
|
||||
logger.info("Using existing persistent context")
|
||||
if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
|
||||
# Connect to existing Chrome instance instead of creating new one
|
||||
context = browser.contexts[0]
|
||||
else:
|
||||
# Original code for creating new context
|
||||
@@ -99,8 +94,3 @@ class CustomBrowserContext(BrowserContext):
|
||||
)
|
||||
|
||||
return context
|
||||
|
||||
async def close(self):
|
||||
"""Override close to respect persistence setting"""
|
||||
if not self._persistence_config.persistent_session:
|
||||
await super().close()
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
# @Author : wenshao
|
||||
# @ProjectName: browser-use-webui
|
||||
# @FileName: test_browser_use.py
|
||||
import pdb
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
@@ -28,20 +29,29 @@ async def test_browser_use_org():
|
||||
BrowserContextWindowSize,
|
||||
)
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="azure_openai",
|
||||
# model_name="gpt-4o",
|
||||
# temperature=0.8,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.8,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
provider="deepseek",
|
||||
model_name="deepseek-chat",
|
||||
temperature=0.8
|
||||
)
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
use_vision = False
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=True,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
@@ -59,6 +69,7 @@ async def test_browser_use_org():
|
||||
task="go to google.com and type 'OpenAI' click search and give me the first url",
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
use_vision=use_vision
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
@@ -208,6 +219,122 @@ async def test_browser_use_custom():
|
||||
await browser.close()
|
||||
|
||||
|
||||
async def test_browser_use_custom_v2():
|
||||
from browser_use.browser.context import BrowserContextWindowSize
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="azure_openai",
|
||||
# model_name="gpt-4o",
|
||||
# temperature=0.8,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="gemini",
|
||||
# model_name="gemini-2.0-flash-exp",
|
||||
# temperature=1.0,
|
||||
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="deepseek",
|
||||
model_name="deepseek-chat",
|
||||
temperature=0.8
|
||||
)
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="ollama", model_name="qwen2.5:7b", temperature=0.8
|
||||
# )
|
||||
|
||||
controller = CustomController()
|
||||
use_own_browser = True
|
||||
disable_security = True
|
||||
use_vision = False # Set to False when using DeepSeek
|
||||
tool_call_in_content = True # Set to True when using Ollama
|
||||
max_actions_per_step = 1
|
||||
playwright = None
|
||||
browser = None
|
||||
browser_context = None
|
||||
|
||||
try:
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
else:
|
||||
chrome_path = None
|
||||
browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=disable_security,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
browser_context = await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
)
|
||||
)
|
||||
agent = CustomAgent(
|
||||
task="go to google.com and type 'OpenAI' click search and give me the first url",
|
||||
add_infos="", # some hints for llm to complete the task
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
use_vision=use_vision,
|
||||
tool_call_in_content=tool_call_in_content,
|
||||
max_actions_per_step=max_actions_per_step
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
print("Final Result:")
|
||||
pprint(history.final_result(), indent=4)
|
||||
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print("\nModel Outputs:")
|
||||
pprint(history.model_actions(), indent=4)
|
||||
|
||||
print("\nThoughts:")
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
# close browser
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
# 显式关闭持久化上下文
|
||||
if browser_context:
|
||||
await browser_context.close()
|
||||
|
||||
# 关闭 Playwright 对象
|
||||
if playwright:
|
||||
await playwright.stop()
|
||||
if browser:
|
||||
await browser.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
# asyncio.run(test_browser_use_org())
|
||||
asyncio.run(test_browser_use_custom())
|
||||
# asyncio.run(test_browser_use_custom())
|
||||
asyncio.run(test_browser_use_custom_v2())
|
||||
|
||||
173
webui.py
173
webui.py
@@ -44,7 +44,6 @@ from browser_use.browser.context import BrowserContextConfig, BrowserContextWind
|
||||
# Global variables for persistence
|
||||
_global_browser = None
|
||||
_global_browser_context = None
|
||||
_global_playwright = None
|
||||
|
||||
async def run_browser_agent(
|
||||
agent_type,
|
||||
@@ -54,6 +53,7 @@ async def run_browser_agent(
|
||||
llm_base_url,
|
||||
llm_api_key,
|
||||
use_own_browser,
|
||||
keep_browser_open,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
@@ -95,6 +95,8 @@ async def run_browser_agent(
|
||||
if agent_type == "org":
|
||||
final_result, errors, model_actions, model_thoughts = await run_org_agent(
|
||||
llm=llm,
|
||||
use_own_browser=use_own_browser,
|
||||
keep_browser_open=keep_browser_open,
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
window_w=window_w,
|
||||
@@ -111,6 +113,7 @@ async def run_browser_agent(
|
||||
final_result, errors, model_actions, model_thoughts = await run_custom_agent(
|
||||
llm=llm,
|
||||
use_own_browser=use_own_browser,
|
||||
keep_browser_open=keep_browser_open,
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
window_w=window_w,
|
||||
@@ -142,6 +145,8 @@ async def run_browser_agent(
|
||||
|
||||
async def run_org_agent(
|
||||
llm,
|
||||
use_own_browser,
|
||||
keep_browser_open,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
@@ -155,28 +160,43 @@ async def run_org_agent(
|
||||
tool_call_in_content
|
||||
|
||||
):
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
try:
|
||||
global _global_browser, _global_browser_context
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
else:
|
||||
chrome_path = None
|
||||
|
||||
if _global_browser is None:
|
||||
_global_browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
) as browser_context:
|
||||
|
||||
if _global_browser_context is None:
|
||||
_global_browser_context = await _global_browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=llm,
|
||||
use_vision=use_vision,
|
||||
browser_context=browser_context,
|
||||
browser=_global_browser,
|
||||
browser_context=_global_browser_context,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content
|
||||
)
|
||||
@@ -186,13 +206,28 @@ async def run_org_agent(
|
||||
errors = history.errors()
|
||||
model_actions = history.model_actions()
|
||||
model_thoughts = history.model_thoughts()
|
||||
await browser.close()
|
||||
return final_result, errors, model_actions, model_thoughts
|
||||
return final_result, errors, model_actions, model_thoughts
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
errors = str(e) + "\n" + traceback.format_exc()
|
||||
return '', errors, '', ''
|
||||
finally:
|
||||
# Handle cleanup based on persistence configuration
|
||||
if not keep_browser_open:
|
||||
if _global_browser_context:
|
||||
await _global_browser_context.close()
|
||||
_global_browser_context = None
|
||||
|
||||
if _global_browser:
|
||||
await _global_browser.close()
|
||||
_global_browser = None
|
||||
|
||||
|
||||
async def run_custom_agent(
|
||||
llm,
|
||||
use_own_browser,
|
||||
keep_browser_open,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
@@ -206,67 +241,40 @@ async def run_custom_agent(
|
||||
max_actions_per_step,
|
||||
tool_call_in_content
|
||||
):
|
||||
global _global_browser, _global_browser_context, _global_playwright
|
||||
|
||||
controller = CustomController()
|
||||
persistence_config = BrowserPersistenceConfig.from_env()
|
||||
|
||||
try:
|
||||
global _global_browser, _global_browser_context
|
||||
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
else:
|
||||
chrome_path = None
|
||||
|
||||
controller = CustomController()
|
||||
|
||||
# Initialize global browser if needed
|
||||
if _global_browser is None:
|
||||
_global_browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
|
||||
# Handle browser context based on configuration
|
||||
if use_own_browser:
|
||||
if _global_browser_context is None:
|
||||
_global_playwright = await async_playwright().start()
|
||||
chrome_exe = os.getenv("CHROME_PATH", "")
|
||||
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
|
||||
|
||||
browser_context = await _global_playwright.chromium.launch_persistent_context(
|
||||
user_data_dir=chrome_use_data,
|
||||
executable_path=chrome_exe,
|
||||
if _global_browser_context is None:
|
||||
_global_browser_context = await _global_browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
headless=headless,
|
||||
user_agent=(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
|
||||
),
|
||||
java_script_enabled=True,
|
||||
bypass_csp=disable_security,
|
||||
ignore_https_errors=disable_security,
|
||||
record_video_dir=save_recording_path if save_recording_path else None,
|
||||
record_video_size={"width": window_w, "height": window_h},
|
||||
)
|
||||
_global_browser_context = await _global_browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
),
|
||||
context=browser_context,
|
||||
)
|
||||
else:
|
||||
if _global_browser_context is None:
|
||||
_global_browser_context = await _global_browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Create and run agent
|
||||
agent = CustomAgent(
|
||||
@@ -274,6 +282,7 @@ async def run_custom_agent(
|
||||
add_infos=add_infos,
|
||||
use_vision=use_vision,
|
||||
llm=llm,
|
||||
browser=_global_browser,
|
||||
browser_context=_global_browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
@@ -286,28 +295,24 @@ async def run_custom_agent(
|
||||
errors = history.errors()
|
||||
model_actions = history.model_actions()
|
||||
model_thoughts = history.model_thoughts()
|
||||
return final_result, errors, model_actions, model_thoughts
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
errors = str(e) + "\n" + traceback.format_exc()
|
||||
|
||||
return '', errors, '', ''
|
||||
finally:
|
||||
# Handle cleanup based on persistence configuration
|
||||
if not persistence_config.persistent_session:
|
||||
if not keep_browser_open:
|
||||
if _global_browser_context:
|
||||
await _global_browser_context.close()
|
||||
_global_browser_context = None
|
||||
|
||||
if _global_playwright:
|
||||
await _global_playwright.stop()
|
||||
_global_playwright = None
|
||||
|
||||
if _global_browser:
|
||||
await _global_browser.close()
|
||||
_global_browser = None
|
||||
|
||||
return final_result, errors, model_actions, model_thoughts
|
||||
|
||||
# Define the theme map globally
|
||||
theme_map = {
|
||||
@@ -321,6 +326,16 @@ theme_map = {
|
||||
"Base": Base()
|
||||
}
|
||||
|
||||
async def close_global_browser():
|
||||
global _global_browser, _global_browser_context
|
||||
|
||||
if _global_browser_context:
|
||||
await _global_browser_context.close()
|
||||
_global_browser_context = None
|
||||
|
||||
if _global_browser:
|
||||
await _global_browser.close()
|
||||
_global_browser = None
|
||||
|
||||
def create_ui(theme_name="Ocean"):
|
||||
css = """
|
||||
@@ -443,6 +458,11 @@ def create_ui(theme_name="Ocean"):
|
||||
value=False,
|
||||
info="Use your existing browser instance",
|
||||
)
|
||||
keep_browser_open = gr.Checkbox(
|
||||
label="Keep Browser Open",
|
||||
value=os.getenv("CHROME_PERSISTENT_SESSION", "False").lower() == "true",
|
||||
info="Keep Browser Open between Tasks",
|
||||
)
|
||||
headless = gr.Checkbox(
|
||||
label="Headless Mode",
|
||||
value=False,
|
||||
@@ -578,12 +598,15 @@ def create_ui(theme_name="Ocean"):
|
||||
outputs=save_recording_path
|
||||
)
|
||||
|
||||
use_own_browser.change(fn=close_global_browser)
|
||||
keep_browser_open.change(fn=close_global_browser)
|
||||
|
||||
# Run button click handler
|
||||
run_button.click(
|
||||
fn=run_browser_agent,
|
||||
inputs=[
|
||||
agent_type, llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
|
||||
use_own_browser, headless, disable_security, window_w, window_h, save_recording_path, save_trace_path,
|
||||
use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, save_recording_path, save_trace_path,
|
||||
enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_call_in_content
|
||||
],
|
||||
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output, recording_display],
|
||||
|
||||
Reference in New Issue
Block a user