new stream function without the need to modify custom context

This commit is contained in:
katiue
2025-01-12 19:57:24 +07:00
parent 585800f3c7
commit 92069a5bb4
4 changed files with 191 additions and 154 deletions

View File

@@ -4,6 +4,16 @@
# @ProjectName: browser-use-webui
# @FileName: browser.py
import asyncio
from playwright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import (
BrowserContext as PlaywrightBrowserContext,
)
from playwright.async_api import (
Playwright,
async_playwright,
)
from browser_use.browser.browser import Browser
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
@@ -15,36 +25,102 @@ from .custom_context import CustomBrowserContext
logger = logging.getLogger(__name__)
class CustomBrowser(Browser):
_global_context = None
async def new_context(
self,
config: BrowserContextConfig = BrowserContextConfig(),
context: PlaywrightBrowserContext = None,
config: BrowserContextConfig = BrowserContextConfig()
) -> CustomBrowserContext:
"""Create a browser context with persistence support"""
persistence_config = BrowserPersistenceConfig.from_env()
if persistence_config.persistent_session:
if CustomBrowser._global_context is not None:
logger.info("Reusing existing persistent browser context")
return CustomBrowser._global_context
context_instance = CustomBrowserContext(config=config, browser=self, context=context)
CustomBrowser._global_context = context_instance
logger.info("Created new persistent browser context")
return context_instance
logger.info("Creating non-persistent browser context")
return CustomBrowserContext(config=config, browser=self, context=context)
return CustomBrowserContext(config=config, browser=self)
async def _setup_browser(self, playwright: Playwright) -> PlaywrightBrowser:
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
if self.config.wss_url:
browser = await playwright.chromium.connect(self.config.wss_url)
return browser
elif self.config.chrome_instance_path:
import subprocess
import requests
try:
# Check if browser is already running
response = requests.get('http://localhost:9222/json/version', timeout=2)
if response.status_code == 200:
logger.info('Reusing existing Chrome instance')
browser = await playwright.chromium.connect_over_cdp(
endpoint_url='http://localhost:9222',
timeout=20000, # 20 second timeout for connection
)
return browser
except requests.ConnectionError:
logger.debug('No existing Chrome instance found, starting a new one')
# Start a new Chrome instance
subprocess.Popen(
[
self.config.chrome_instance_path,
'--remote-debugging-port=9222',
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
# Attempt to connect again after starting a new instance
for _ in range(10):
try:
response = requests.get('http://localhost:9222/json/version', timeout=2)
if response.status_code == 200:
break
except requests.ConnectionError:
pass
await asyncio.sleep(1)
try:
browser = await playwright.chromium.connect_over_cdp(
endpoint_url='http://localhost:9222',
timeout=20000, # 20 second timeout for connection
)
return browser
except Exception as e:
logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
raise RuntimeError(
' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
)
async def close(self):
"""Override close to respect persistence setting"""
persistence_config = BrowserPersistenceConfig.from_env()
if not persistence_config.persistent_session:
if CustomBrowser._global_context is not None:
await CustomBrowser._global_context.close()
CustomBrowser._global_context = None
await super().close()
else:
logger.info("Skipping browser close due to persistent session")
try:
disable_security_args = []
if self.config.disable_security:
disable_security_args = [
'--disable-web-security',
'--disable-site-isolation-trials',
'--disable-features=IsolateOrigins,site-per-process',
]
browser = await playwright.chromium.launch(
headless=self.config.headless,
args=[
'--no-sandbox',
'--disable-blink-features=AutomationControlled',
'--disable-infobars',
'--disable-background-timer-throttling',
'--disable-popup-blocking',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-window-activation',
'--disable-focus-on-load',
'--no-first-run',
'--no-default-browser-check',
'--no-startup-window',
'--window-position=0,0',
# '--window-size=1280,1000',
]
+ disable_security_args
+ self.config.extra_chromium_args,
proxy=self.config.proxy,
)
return browser
except Exception as e:
logger.error(f'Failed to initialize Playwright browser: {str(e)}')
raise

View File

@@ -15,7 +15,6 @@ from playwright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
from .config import BrowserPersistenceConfig
logger = logging.getLogger(__name__)
@@ -23,33 +22,21 @@ class CustomBrowserContext(BrowserContext):
def __init__(
self,
browser: "Browser",
config: BrowserContextConfig = BrowserContextConfig(),
context: PlaywrightBrowserContext = None,
config: BrowserContextConfig = BrowserContextConfig()
):
super(CustomBrowserContext, self).__init__(browser=browser, config=config)
self.context = context
self._page = None
self._persistence_config = BrowserPersistenceConfig.from_env()
@property
def impl_context(self) -> PlaywrightBrowserContext:
"""Returns the underlying Playwright context implementation"""
if self.context is None:
raise RuntimeError("Failed to create or retrieve a browser context.")
return self.context
async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowserContext:
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
if self.context:
return self.context
# If we have a context, return it directly
# Check if we should use existing context for persistence
if self._persistence_config.persistent_session and len(browser.contexts) > 0:
logger.info("Using existing persistent context.")
self.context = browser.contexts[0]
if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
# Connect to existing Chrome instance instead of creating new one
context = browser.contexts[0]
else:
logger.info("Creating a new browser context.")
self.context = await browser.new_context(
# Original code for creating new context
context = await browser.new_context(
viewport=self.config.browser_window_size,
no_viewport=False,
user_agent=(
@@ -63,19 +50,20 @@ class CustomBrowserContext(BrowserContext):
record_video_size=self.config.browser_window_size,
)
# Handle tracing
if self.config.trace_path:
await self.context.tracing.start(screenshots=True, snapshots=True, sources=True)
await context.tracing.start(screenshots=True, snapshots=True, sources=True)
# Load cookies if they exist
if self.config.cookies_file and os.path.exists(self.config.cookies_file):
with open(self.config.cookies_file, "r") as f:
cookies = json.load(f)
logger.info(f"Loaded {len(cookies)} cookies from {self.config.cookies_file}.")
await self.context.add_cookies(cookies)
logger.info(
f"Loaded {len(cookies)} cookies from {self.config.cookies_file}"
)
await context.add_cookies(cookies)
# Expose anti-detection scripts
await self.context.add_init_script(
await context.add_init_script(
"""
// Webdriver property
Object.defineProperty(navigator, 'webdriver', {
@@ -105,41 +93,4 @@ class CustomBrowserContext(BrowserContext):
"""
)
# Create initial page if none exists
if not self.context.pages:
self._page = await self.context.new_page()
await self._page.goto('about:blank')
return self.context
async def new_page(self):
"""Creates and returns a new page in this context."""
if not self.context:
await self._create_context(await self.browser.get_playwright_browser())
return await self.context.new_page()
async def get_current_page(self):
"""Returns the current page or creates one if none exists."""
if not self.context:
await self._create_context(await self.browser.get_playwright_browser())
if not self.context:
raise RuntimeError("Browser context is not initialized.")
pages = self.context.pages
if not pages:
logger.warning("No existing pages in the context. Creating a new page.")
return await self.context.new_page()
return pages[0]
async def close(self):
"""Override close to respect persistence setting."""
if not self._persistence_config.persistent_session and self.context:
await self.context.close()
self.context = None
@property
def pages(self):
"""Returns list of pages in the context."""
if not self.context:
logger.warning("Attempting to access pages but context is not initialized.")
return []
return self.context.pages
return context

View File

@@ -15,6 +15,7 @@ from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_ollama import ChatOllama
from langchain_openai import AzureChatOpenAI, ChatOpenAI
import gradio as gr
from src.browser.custom_context import CustomBrowserContext
def get_llm_model(provider: str, **kwargs):
"""
@@ -164,36 +165,30 @@ def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Di
print(f"Error getting latest {file_type} file: {e}")
return latest_files
async def capture_screenshot(browser_context) -> str:
async def capture_screenshot(browser_context: CustomBrowserContext) -> str:
"""Capture and encode a screenshot"""
latest_screenshot = ""
try:
# Get the implementation context - handle both direct Playwright context and wrapped context
context = browser_context
if hasattr(browser_context, 'context'):
context = browser_context.context
if not context:
return "<div>No browser context available</div>"
# Get all pages
pages = context.pages
if not pages:
return "<div>Waiting for page to be available...</div>"
# Extract the Playwright browser instance
playwright_browser = browser_context.browser.playwright_browser # Ensure this is correct.
# Use the first non-blank page or fallback to first page
active_page = None
for page in pages:
if page.url != 'about:blank':
active_page = page
break
if not active_page and pages:
# Check if the browser instance is valid and if an existing context can be reused
if playwright_browser and playwright_browser.contexts:
playwright_context = playwright_browser.contexts[0]
else:
return latest_screenshot
# Access pages in the context
if playwright_context:
pages = playwright_context.pages
# Use an existing page or create a new one if none exist
if pages:
active_page = pages[0]
if not active_page:
return "<div>No active page available</div>"
for page in pages:
if page.url != "about:blank":
active_page = page
# Take screenshot
try:
screenshot = await active_page.screenshot(
@@ -202,9 +197,9 @@ async def capture_screenshot(browser_context) -> str:
scale="css"
)
encoded = base64.b64encode(screenshot).decode('utf-8')
return f'<img src="data:image/jpeg;base64,{encoded}" style="width:100%; max-width:1200px; border:1px solid #ccc;">'
return f'<img src="data:image/jpeg;base64,{encoded}" style="width:80vw; height:90vh ; border:1px solid #ccc;">'
except Exception as e:
return f"<div class='error'>Screenshot failed: {str(e)}</div>"
return f"<div class='error' style='width:80vw; height:90vh'>Screenshot failed: {str(e)}</div>"
except Exception as e:
return f"<div class='error'>Screenshot error: {str(e)}</div>"
return f"<div class='error' style='width:80vw; height:90vh'>Screenshot error: {str(e)}</div>"

View File

@@ -58,6 +58,9 @@ async def run_browser_agent(
use_vision,
max_actions_per_step,
tool_call_in_content,
browser,
browser_context,
playwright
):
# Disable recording if the checkbox is unchecked
if not enable_recording:
@@ -84,7 +87,7 @@ async def run_browser_agent(
api_key=llm_api_key,
)
if agent_type == "org":
final_result, errors, model_actions, model_thoughts = await run_org_agent(
final_result, errors, model_actions, model_thoughts, recorded_files, trace_file = await run_org_agent(
llm=llm,
headless=headless,
disable_security=disable_security,
@@ -97,9 +100,12 @@ async def run_browser_agent(
use_vision=use_vision,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content,
browser=browser,
browser_context=browser_context,
playwright=playwright
)
elif agent_type == "custom":
final_result, errors, model_actions, model_thoughts = await run_custom_agent(
final_result, errors, model_actions, model_thoughts, recorded_files, trace_file = await run_custom_agent(
llm=llm,
use_own_browser=use_own_browser,
headless=headless,
@@ -113,7 +119,10 @@ async def run_browser_agent(
max_steps=max_steps,
use_vision=use_vision,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content
tool_call_in_content=tool_call_in_content,
browser=browser,
browser_context=browser_context,
playwright=playwright
)
else:
raise ValueError(f"Invalid agent type: {agent_type}")
@@ -142,7 +151,10 @@ async def run_org_agent(
max_steps,
use_vision,
max_actions_per_step,
tool_call_in_content
tool_call_in_content,
browser,
browser_context,
playwright
):
browser = Browser(
config=BrowserConfig(
@@ -196,17 +208,18 @@ async def run_custom_agent(
max_steps,
use_vision,
max_actions_per_step,
tool_call_in_content
):
global _global_browser, _global_browser_context, _global_playwright
tool_call_in_content,
browser,
browser_context,
playwright
):
controller = CustomController()
persistence_config = BrowserPersistenceConfig.from_env()
try:
# Initialize global browser if needed
if _global_browser is None:
_global_browser = CustomBrowser(
if browser is None:
browser = CustomBrowser(
config=BrowserConfig(
headless=headless,
disable_security=disable_security,
@@ -216,12 +229,12 @@ async def run_custom_agent(
# Handle browser context based on configuration
if use_own_browser:
if _global_browser_context is None:
_global_playwright = await async_playwright().start()
if browser_context is None:
playwright = await async_playwright().start()
chrome_exe = os.getenv("CHROME_PATH", "")
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
browser_context = await _global_playwright.chromium.launch_persistent_context(
browser_context = await playwright.chromium.launch_persistent_context(
user_data_dir=chrome_use_data,
executable_path=chrome_exe,
no_viewport=False,
@@ -236,7 +249,7 @@ async def run_custom_agent(
record_video_dir=save_recording_path if save_recording_path else None,
record_video_size={"width": window_w, "height": window_h},
)
_global_browser_context = await _global_browser.new_context(
browser_context = await browser.new_context(
config=BrowserContextConfig(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path if save_recording_path else None,
@@ -245,11 +258,10 @@ async def run_custom_agent(
width=window_w, height=window_h
),
),
context=browser_context,
)
else:
if _global_browser_context is None:
_global_browser_context = await _global_browser.new_context(
if browser_context is None:
browser_context = await browser.new_context(
config=BrowserContextConfig(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path if save_recording_path else None,
@@ -266,7 +278,7 @@ async def run_custom_agent(
add_infos=add_infos,
use_vision=use_vision,
llm=llm,
browser_context=_global_browser_context,
browser_context=browser_context,
controller=controller,
system_prompt_class=CustomSystemPrompt,
max_actions_per_step=max_actions_per_step,
@@ -292,17 +304,17 @@ async def run_custom_agent(
finally:
# Handle cleanup based on persistence configuration
if not persistence_config.persistent_session:
if _global_browser_context:
await _global_browser_context.close()
_global_browser_context = None
if browser_context:
await browser_context.close()
browser_context = None
if _global_playwright:
await _global_playwright.stop()
_global_playwright = None
if playwright:
await playwright.stop()
playwright = None
if _global_browser:
await _global_browser.close()
_global_browser = None
if browser:
await browser.close()
browser = None
return final_result, errors, model_actions, model_thoughts, trace_file.get('.webm'), recorded_files.get('.zip')
async def run_with_stream(
@@ -325,7 +337,7 @@ async def run_with_stream(
max_steps,
use_vision,
max_actions_per_step,
tool_call_in_content,
tool_call_in_content
):
"""Wrapper to run the agent and handle streaming."""
global _global_browser, _global_browser_context
@@ -376,12 +388,15 @@ async def run_with_stream(
max_steps=max_steps,
use_vision=use_vision,
max_actions_per_step=max_actions_per_step,
tool_call_in_content=tool_call_in_content
tool_call_in_content=tool_call_in_content,
browser=_global_browser,
browser_context=_global_browser_context,
playwright=_global_playwright if use_own_browser else None
)
)
# Initialize values for streaming
html_content = "<div>Using browser...</div>"
html_content = "<div style='width:80vw; height:90vh'>Using browser...</div>"
final_result = errors = model_actions = model_thoughts = ""
recording = trace = None
@@ -390,7 +405,7 @@ async def run_with_stream(
try:
html_content = await capture_screenshot(_global_browser_context)
except Exception as e:
html_content = f"<div class='error'>Screenshot error: {str(e)}</div>"
html_content = f"<div class='error' style='width:80vw; height:90vh'>Screenshot error: {str(e)}</div>"
yield [
html_content,
@@ -426,7 +441,7 @@ async def run_with_stream(
except Exception as e:
import traceback
yield [
f"<div class='error'>Browser error: {str(e)}</div>",
f"<div class='error' style='width:80vw; height:90vh'>Browser error: {str(e)}</div>",
"",
f"Error: {str(e)}\n{traceback.format_exc()}",
"",
@@ -625,14 +640,14 @@ def create_ui(theme_name="Ocean"):
placeholder="Add any helpful context or instructions...",
info="Optional hints to help the LLM complete the task",
)
with gr.Row():
run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
with gr.Row():
browser_view = gr.HTML(
value="<div>Waiting for browser session...</div>",
value="<div style='width:80vw; height:90vh'>Waiting for browser session...</div>",
label="Live Browser View",
)