fix macos cannot use own browser

This commit is contained in:
vvincent1234
2025-01-11 16:30:31 +08:00
parent b00ebf5ee4
commit db73db1f7c
4 changed files with 337 additions and 121 deletions

View File

@@ -4,6 +4,16 @@
# @ProjectName: browser-use-webui
# @FileName: browser.py
import asyncio
from playwright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import (
BrowserContext as PlaywrightBrowserContext,
)
from playwright.async_api import (
Playwright,
async_playwright,
)
from browser_use.browser.browser import Browser
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
@@ -15,36 +25,102 @@ from .custom_context import CustomBrowserContext
logger = logging.getLogger(__name__)
class CustomBrowser(Browser):
_global_context = None
async def new_context(
self,
config: BrowserContextConfig = BrowserContextConfig(),
context: PlaywrightBrowserContext = None,
config: BrowserContextConfig = BrowserContextConfig()
) -> CustomBrowserContext:
"""Create a browser context with persistence support"""
persistence_config = BrowserPersistenceConfig.from_env()
if persistence_config.persistent_session:
if CustomBrowser._global_context is not None:
logger.info("Reusing existing persistent browser context")
return CustomBrowser._global_context
context_instance = CustomBrowserContext(config=config, browser=self, context=context)
CustomBrowser._global_context = context_instance
logger.info("Created new persistent browser context")
return context_instance
logger.info("Creating non-persistent browser context")
return CustomBrowserContext(config=config, browser=self, context=context)
return CustomBrowserContext(config=config, browser=self)
async def _setup_browser(self, playwright: Playwright) -> PlaywrightBrowser:
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
if self.config.wss_url:
browser = await playwright.chromium.connect(self.config.wss_url)
return browser
elif self.config.chrome_instance_path:
import subprocess
import requests
try:
# Check if browser is already running
response = requests.get('http://localhost:9222/json/version', timeout=2)
if response.status_code == 200:
logger.info('Reusing existing Chrome instance')
browser = await playwright.chromium.connect_over_cdp(
endpoint_url='http://localhost:9222',
timeout=20000, # 20 second timeout for connection
)
return browser
except requests.ConnectionError:
logger.debug('No existing Chrome instance found, starting a new one')
# Start a new Chrome instance
subprocess.Popen(
[
self.config.chrome_instance_path,
'--remote-debugging-port=9222',
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
# Attempt to connect again after starting a new instance
for _ in range(10):
try:
response = requests.get('http://localhost:9222/json/version', timeout=2)
if response.status_code == 200:
break
except requests.ConnectionError:
pass
await asyncio.sleep(1)
try:
browser = await playwright.chromium.connect_over_cdp(
endpoint_url='http://localhost:9222',
timeout=20000, # 20 second timeout for connection
)
return browser
except Exception as e:
logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
raise RuntimeError(
' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
)
async def close(self):
"""Override close to respect persistence setting"""
persistence_config = BrowserPersistenceConfig.from_env()
if not persistence_config.persistent_session:
if CustomBrowser._global_context is not None:
await CustomBrowser._global_context.close()
CustomBrowser._global_context = None
await super().close()
else:
logger.info("Skipping browser close due to persistent session")
try:
disable_security_args = []
if self.config.disable_security:
disable_security_args = [
'--disable-web-security',
'--disable-site-isolation-trials',
'--disable-features=IsolateOrigins,site-per-process',
]
browser = await playwright.chromium.launch(
headless=self.config.headless,
args=[
'--no-sandbox',
'--disable-blink-features=AutomationControlled',
'--disable-infobars',
'--disable-background-timer-throttling',
'--disable-popup-blocking',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-window-activation',
'--disable-focus-on-load',
'--no-first-run',
'--no-default-browser-check',
'--no-startup-window',
'--window-position=0,0',
# '--window-size=1280,1000',
]
+ disable_security_args
+ self.config.extra_chromium_args,
proxy=self.config.proxy,
)
return browser
except Exception as e:
logger.error(f'Failed to initialize Playwright browser: {str(e)}')
raise

View File

@@ -22,22 +22,17 @@ class CustomBrowserContext(BrowserContext):
def __init__(
self,
browser: "Browser",
config: BrowserContextConfig = BrowserContextConfig(),
context: PlaywrightBrowserContext = None,
config: BrowserContextConfig = BrowserContextConfig()
):
super(CustomBrowserContext, self).__init__(browser=browser, config=config)
self.context = context
self._persistence_config = BrowserPersistenceConfig.from_env()
async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowserContext:
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
# If we have a context, return it directly
if self.context:
return self.context
# Check if we should use existing context for persistence
if self._persistence_config.persistent_session and len(browser.contexts) > 0:
logger.info("Using existing persistent context")
if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
# Connect to existing Chrome instance instead of creating new one
context = browser.contexts[0]
else:
# Original code for creating new context
@@ -99,8 +94,3 @@ class CustomBrowserContext(BrowserContext):
)
return context
async def close(self):
"""Override close to respect persistence setting"""
if not self._persistence_config.persistent_session:
await super().close()

View File

@@ -3,6 +3,7 @@
# @Author : wenshao
# @ProjectName: browser-use-webui
# @FileName: test_browser_use.py
import pdb
from dotenv import load_dotenv
@@ -28,20 +29,29 @@ async def test_browser_use_org():
BrowserContextWindowSize,
)
# llm = utils.get_llm_model(
# provider="azure_openai",
# model_name="gpt-4o",
# temperature=0.8,
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
# )
llm = utils.get_llm_model(
provider="azure_openai",
model_name="gpt-4o",
temperature=0.8,
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
provider="deepseek",
model_name="deepseek-chat",
temperature=0.8
)
window_w, window_h = 1920, 1080
use_vision = False
chrome_path = os.getenv("CHROME_PATH", None)
browser = Browser(
config=BrowserConfig(
headless=False,
disable_security=True,
chrome_instance_path=chrome_path,
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
)
)
@@ -59,6 +69,7 @@ async def test_browser_use_org():
task="go to google.com and type 'OpenAI' click search and give me the first url",
llm=llm,
browser_context=browser_context,
use_vision=use_vision
)
history: AgentHistoryList = await agent.run(max_steps=10)
@@ -208,6 +219,122 @@ async def test_browser_use_custom():
await browser.close()
async def test_browser_use_custom_v2():
from browser_use.browser.context import BrowserContextWindowSize
from browser_use.browser.browser import BrowserConfig
from playwright.async_api import async_playwright
from src.agent.custom_agent import CustomAgent
from src.agent.custom_prompts import CustomSystemPrompt
from src.browser.custom_browser import CustomBrowser
from src.browser.custom_context import BrowserContextConfig
from src.controller.custom_controller import CustomController
window_w, window_h = 1920, 1080
# llm = utils.get_llm_model(
# provider="azure_openai",
# model_name="gpt-4o",
# temperature=0.8,
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
# )
# llm = utils.get_llm_model(
# provider="gemini",
# model_name="gemini-2.0-flash-exp",
# temperature=1.0,
# api_key=os.getenv("GOOGLE_API_KEY", "")
# )
llm = utils.get_llm_model(
provider="deepseek",
model_name="deepseek-chat",
temperature=0.8
)
# llm = utils.get_llm_model(
# provider="ollama", model_name="qwen2.5:7b", temperature=0.8
# )
controller = CustomController()
use_own_browser = True
disable_security = True
use_vision = False # Set to False when using DeepSeek
tool_call_in_content = True # Set to True when using Ollama
max_actions_per_step = 1
playwright = None
browser = None
browser_context = None
try:
if use_own_browser:
chrome_path = os.getenv("CHROME_PATH", None)
if chrome_path == "":
chrome_path = None
else:
chrome_path = None
browser = CustomBrowser(
config=BrowserConfig(
headless=False,
disable_security=disable_security,
chrome_instance_path=chrome_path,
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
)
)
browser_context = await browser.new_context(
config=BrowserContextConfig(
trace_path="./tmp/traces",
save_recording_path="./tmp/record_videos",
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),
)
)
agent = CustomAgent(
task="go to google.com and type 'OpenAI' click search and give me the first url",
add_infos="", # some hints for llm to complete the task
llm=llm,
browser=browser,
browser_context=browser_context,
controller=controller,
system_prompt_class=CustomSystemPrompt,
use_vision=use_vision,
tool_call_in_content=tool_call_in_content,
max_actions_per_step=max_actions_per_step
)
history: AgentHistoryList = await agent.run(max_steps=10)
print("Final Result:")
pprint(history.final_result(), indent=4)
print("\nErrors:")
pprint(history.errors(), indent=4)
# e.g. xPaths the model clicked on
print("\nModel Outputs:")
pprint(history.model_actions(), indent=4)
print("\nThoughts:")
pprint(history.model_thoughts(), indent=4)
# close browser
except Exception:
import traceback
traceback.print_exc()
finally:
# 显式关闭持久化上下文
if browser_context:
await browser_context.close()
# 关闭 Playwright 对象
if playwright:
await playwright.stop()
if browser:
await browser.close()
if __name__ == "__main__":
# asyncio.run(test_browser_use_org())
asyncio.run(test_browser_use_custom())
# asyncio.run(test_browser_use_custom())
asyncio.run(test_browser_use_custom_v2())

173
webui.py
View File

@@ -44,7 +44,6 @@ from browser_use.browser.context import BrowserContextConfig, BrowserContextWind
# Global variables for persistence
_global_browser = None
_global_browser_context = None
_global_playwright = None
async def run_browser_agent(
agent_type,
@@ -54,6 +53,7 @@ async def run_browser_agent(
llm_base_url,
llm_api_key,
use_own_browser,
keep_browser_open,
headless,
disable_security,
window_w,
@@ -95,6 +95,8 @@ async def run_browser_agent(
if agent_type == "org":
final_result, errors, model_actions, model_thoughts = await run_org_agent(
llm=llm,
use_own_browser=use_own_browser,
keep_browser_open=keep_browser_open,
headless=headless,
disable_security=disable_security,
window_w=window_w,
@@ -111,6 +113,7 @@ async def run_browser_agent(
final_result, errors, model_actions, model_thoughts = await run_custom_agent(
llm=llm,
use_own_browser=use_own_browser,
keep_browser_open=keep_browser_open,
headless=headless,
disable_security=disable_security,
window_w=window_w,
@@ -142,6 +145,8 @@ async def run_browser_agent(
async def run_org_agent(
llm,
use_own_browser,
keep_browser_open,
headless,
disable_security,
window_w,
@@ -155,28 +160,43 @@ async def run_org_agent(
tool_call_in_content
):
browser = Browser(
config=BrowserConfig(
headless=headless,
disable_security=disable_security,
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
)
)
async with await browser.new_context(
config=BrowserContextConfig(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path if save_recording_path else None,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),
try:
global _global_browser, _global_browser_context
if use_own_browser:
chrome_path = os.getenv("CHROME_PATH", None)
if chrome_path == "":
chrome_path = None
else:
chrome_path = None
if _global_browser is None:
_global_browser = Browser(
config=BrowserConfig(
headless=headless,
disable_security=disable_security,
chrome_instance_path=chrome_path,
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
)
)
) as browser_context:
if _global_browser_context is None:
_global_browser_context = await _global_browser.new_context(
config=BrowserContextConfig(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path if save_recording_path else None,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),
)
)
agent = Agent(
task=task,
llm=llm,
use_vision=use_vision,
browser_context=browser_context,
browser=_global_browser,
browser_context=_global_browser_context,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content
)
@@ -186,13 +206,28 @@ async def run_org_agent(
errors = history.errors()
model_actions = history.model_actions()
model_thoughts = history.model_thoughts()
await browser.close()
return final_result, errors, model_actions, model_thoughts
return final_result, errors, model_actions, model_thoughts
except Exception as e:
import traceback
traceback.print_exc()
errors = str(e) + "\n" + traceback.format_exc()
return '', errors, '', ''
finally:
# Handle cleanup based on persistence configuration
if not keep_browser_open:
if _global_browser_context:
await _global_browser_context.close()
_global_browser_context = None
if _global_browser:
await _global_browser.close()
_global_browser = None
async def run_custom_agent(
llm,
use_own_browser,
keep_browser_open,
headless,
disable_security,
window_w,
@@ -206,67 +241,40 @@ async def run_custom_agent(
max_actions_per_step,
tool_call_in_content
):
global _global_browser, _global_browser_context, _global_playwright
controller = CustomController()
persistence_config = BrowserPersistenceConfig.from_env()
try:
global _global_browser, _global_browser_context
if use_own_browser:
chrome_path = os.getenv("CHROME_PATH", None)
if chrome_path == "":
chrome_path = None
else:
chrome_path = None
controller = CustomController()
# Initialize global browser if needed
if _global_browser is None:
_global_browser = CustomBrowser(
config=BrowserConfig(
headless=headless,
disable_security=disable_security,
chrome_instance_path=chrome_path,
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
)
)
# Handle browser context based on configuration
if use_own_browser:
if _global_browser_context is None:
_global_playwright = await async_playwright().start()
chrome_exe = os.getenv("CHROME_PATH", "")
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
browser_context = await _global_playwright.chromium.launch_persistent_context(
user_data_dir=chrome_use_data,
executable_path=chrome_exe,
if _global_browser_context is None:
_global_browser_context = await _global_browser.new_context(
config=BrowserContextConfig(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path if save_recording_path else None,
no_viewport=False,
headless=headless,
user_agent=(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
),
java_script_enabled=True,
bypass_csp=disable_security,
ignore_https_errors=disable_security,
record_video_dir=save_recording_path if save_recording_path else None,
record_video_size={"width": window_w, "height": window_h},
)
_global_browser_context = await _global_browser.new_context(
config=BrowserContextConfig(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path if save_recording_path else None,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),
),
context=browser_context,
)
else:
if _global_browser_context is None:
_global_browser_context = await _global_browser.new_context(
config=BrowserContextConfig(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path if save_recording_path else None,
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),
)
)
# Create and run agent
agent = CustomAgent(
@@ -274,6 +282,7 @@ async def run_custom_agent(
add_infos=add_infos,
use_vision=use_vision,
llm=llm,
browser=_global_browser,
browser_context=_global_browser_context,
controller=controller,
system_prompt_class=CustomSystemPrompt,
@@ -286,28 +295,24 @@ async def run_custom_agent(
errors = history.errors()
model_actions = history.model_actions()
model_thoughts = history.model_thoughts()
return final_result, errors, model_actions, model_thoughts
except Exception as e:
import traceback
traceback.print_exc()
errors = str(e) + "\n" + traceback.format_exc()
return '', errors, '', ''
finally:
# Handle cleanup based on persistence configuration
if not persistence_config.persistent_session:
if not keep_browser_open:
if _global_browser_context:
await _global_browser_context.close()
_global_browser_context = None
if _global_playwright:
await _global_playwright.stop()
_global_playwright = None
if _global_browser:
await _global_browser.close()
_global_browser = None
return final_result, errors, model_actions, model_thoughts
# Define the theme map globally
theme_map = {
@@ -321,6 +326,16 @@ theme_map = {
"Base": Base()
}
async def close_global_browser():
global _global_browser, _global_browser_context
if _global_browser_context:
await _global_browser_context.close()
_global_browser_context = None
if _global_browser:
await _global_browser.close()
_global_browser = None
def create_ui(theme_name="Ocean"):
css = """
@@ -443,6 +458,11 @@ def create_ui(theme_name="Ocean"):
value=False,
info="Use your existing browser instance",
)
keep_browser_open = gr.Checkbox(
label="Keep Browser Open",
value=os.getenv("CHROME_PERSISTENT_SESSION", "False").lower() == "true",
info="Keep Browser Open between Tasks",
)
headless = gr.Checkbox(
label="Headless Mode",
value=False,
@@ -578,12 +598,15 @@ def create_ui(theme_name="Ocean"):
outputs=save_recording_path
)
use_own_browser.change(fn=close_global_browser)
keep_browser_open.change(fn=close_global_browser)
# Run button click handler
run_button.click(
fn=run_browser_agent,
inputs=[
agent_type, llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
use_own_browser, headless, disable_security, window_w, window_h, save_recording_path, save_trace_path,
use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, save_recording_path, save_trace_path,
enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_call_in_content
],
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output, recording_display],