feat: adpat to new version of browser-use
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
browser-use==0.1.17
|
||||
langchain-google-genai
|
||||
browser-use>=0.1.18
|
||||
langchain-google-genai>=2.0.8
|
||||
pyperclip
|
||||
gradio
|
||||
langchain-ollama
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
import json
|
||||
import logging
|
||||
import pdb
|
||||
import traceback
|
||||
from typing import Optional, Type
|
||||
|
||||
from browser_use.agent.prompts import SystemPrompt
|
||||
@@ -37,51 +39,53 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class CustomAgent(Agent):
|
||||
def __init__(
|
||||
self,
|
||||
task: str,
|
||||
llm: BaseChatModel,
|
||||
add_infos: str = "",
|
||||
browser: Browser | None = None,
|
||||
browser_context: BrowserContext | None = None,
|
||||
controller: Controller = Controller(),
|
||||
use_vision: bool = True,
|
||||
save_conversation_path: Optional[str] = None,
|
||||
max_failures: int = 5,
|
||||
retry_delay: int = 10,
|
||||
system_prompt_class: Type[SystemPrompt] = SystemPrompt,
|
||||
max_input_tokens: int = 128000,
|
||||
validate_output: bool = False,
|
||||
include_attributes: list[str] = [
|
||||
"title",
|
||||
"type",
|
||||
"name",
|
||||
"role",
|
||||
"tabindex",
|
||||
"aria-label",
|
||||
"placeholder",
|
||||
"value",
|
||||
"alt",
|
||||
"aria-expanded",
|
||||
],
|
||||
max_error_length: int = 400,
|
||||
max_actions_per_step: int = 10,
|
||||
self,
|
||||
task: str,
|
||||
llm: BaseChatModel,
|
||||
add_infos: str = "",
|
||||
browser: Browser | None = None,
|
||||
browser_context: BrowserContext | None = None,
|
||||
controller: Controller = Controller(),
|
||||
use_vision: bool = True,
|
||||
save_conversation_path: Optional[str] = None,
|
||||
max_failures: int = 5,
|
||||
retry_delay: int = 10,
|
||||
system_prompt_class: Type[SystemPrompt] = SystemPrompt,
|
||||
max_input_tokens: int = 128000,
|
||||
validate_output: bool = False,
|
||||
include_attributes: list[str] = [
|
||||
"title",
|
||||
"type",
|
||||
"name",
|
||||
"role",
|
||||
"tabindex",
|
||||
"aria-label",
|
||||
"placeholder",
|
||||
"value",
|
||||
"alt",
|
||||
"aria-expanded",
|
||||
],
|
||||
max_error_length: int = 400,
|
||||
max_actions_per_step: int = 10,
|
||||
tool_call_in_content: bool = True,
|
||||
):
|
||||
super().__init__(
|
||||
task,
|
||||
llm,
|
||||
browser,
|
||||
browser_context,
|
||||
controller,
|
||||
use_vision,
|
||||
save_conversation_path,
|
||||
max_failures,
|
||||
retry_delay,
|
||||
system_prompt_class,
|
||||
max_input_tokens,
|
||||
validate_output,
|
||||
include_attributes,
|
||||
max_error_length,
|
||||
max_actions_per_step,
|
||||
task=task,
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
use_vision=use_vision,
|
||||
save_conversation_path=save_conversation_path,
|
||||
max_failures=max_failures,
|
||||
retry_delay=retry_delay,
|
||||
system_prompt_class=system_prompt_class,
|
||||
max_input_tokens=max_input_tokens,
|
||||
validate_output=validate_output,
|
||||
include_attributes=include_attributes,
|
||||
max_error_length=max_error_length,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content,
|
||||
)
|
||||
self.add_infos = add_infos
|
||||
self.message_manager = CustomMassageManager(
|
||||
@@ -93,6 +97,7 @@ class CustomAgent(Agent):
|
||||
include_attributes=self.include_attributes,
|
||||
max_error_length=self.max_error_length,
|
||||
max_actions_per_step=self.max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content,
|
||||
)
|
||||
|
||||
def _setup_action_models(self) -> None:
|
||||
@@ -122,7 +127,7 @@ class CustomAgent(Agent):
|
||||
)
|
||||
|
||||
def update_step_info(
|
||||
self, model_output: CustomAgentOutput, step_info: CustomAgentStepInfo = None
|
||||
self, model_output: CustomAgentOutput, step_info: CustomAgentStepInfo = None
|
||||
):
|
||||
"""
|
||||
update step info
|
||||
@@ -133,9 +138,9 @@ class CustomAgent(Agent):
|
||||
step_info.step_number += 1
|
||||
important_contents = model_output.current_state.important_contents
|
||||
if (
|
||||
important_contents
|
||||
and "None" not in important_contents
|
||||
and important_contents not in step_info.memory
|
||||
important_contents
|
||||
and "None" not in important_contents
|
||||
and important_contents not in step_info.memory
|
||||
):
|
||||
step_info.memory += important_contents + "\n"
|
||||
|
||||
@@ -146,16 +151,35 @@ class CustomAgent(Agent):
|
||||
@time_execution_async("--get_next_action")
|
||||
async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutput:
|
||||
"""Get next action from LLM based on current state"""
|
||||
try:
|
||||
structured_llm = self.llm.with_structured_output(self.AgentOutput, include_raw=True)
|
||||
response: dict[str, Any] = await structured_llm.ainvoke(input_messages) # type: ignore
|
||||
|
||||
ret = self.llm.invoke(input_messages)
|
||||
parsed_json = json.loads(ret.content.replace("```json", "").replace("```", ""))
|
||||
parsed: AgentOutput = self.AgentOutput(**parsed_json)
|
||||
# cut the number of actions to max_actions_per_step
|
||||
parsed.action = parsed.action[: self.max_actions_per_step]
|
||||
self._log_response(parsed)
|
||||
self.n_steps += 1
|
||||
parsed: AgentOutput = response['parsed']
|
||||
# cut the number of actions to max_actions_per_step
|
||||
parsed.action = parsed.action[: self.max_actions_per_step]
|
||||
self._log_response(parsed)
|
||||
self.n_steps += 1
|
||||
|
||||
return parsed
|
||||
return parsed
|
||||
except Exception as e:
|
||||
# If something goes wrong, try to invoke the LLM again without structured output,
|
||||
# and Manually parse the response. Temporarily solution for DeepSeek
|
||||
ret = self.llm.invoke(input_messages)
|
||||
if isinstance(ret.content, list):
|
||||
parsed_json = json.loads(ret.content[0].replace("```json", "").replace("```", ""))
|
||||
else:
|
||||
parsed_json = json.loads(ret.content.replace("```json", "").replace("```", ""))
|
||||
parsed: AgentOutput = self.AgentOutput(**parsed_json)
|
||||
if parsed is None:
|
||||
raise ValueError(f'Could not parse response.')
|
||||
|
||||
# cut the number of actions to max_actions_per_step
|
||||
parsed.action = parsed.action[: self.max_actions_per_step]
|
||||
self._log_response(parsed)
|
||||
self.n_steps += 1
|
||||
|
||||
return parsed
|
||||
|
||||
@time_execution_async("--step")
|
||||
async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
|
||||
@@ -233,7 +257,7 @@ class CustomAgent(Agent):
|
||||
|
||||
if self.history.is_done():
|
||||
if (
|
||||
self.validate_output and step < max_steps - 1
|
||||
self.validate_output and step < max_steps - 1
|
||||
): # if last step, we dont need to validate
|
||||
if not await self._validate_output():
|
||||
continue
|
||||
|
||||
@@ -17,6 +17,7 @@ from browser_use.browser.views import BrowserState
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_core.messages import (
|
||||
HumanMessage,
|
||||
AIMessage
|
||||
)
|
||||
|
||||
from .custom_prompts import CustomAgentMessagePrompt
|
||||
@@ -26,40 +27,70 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class CustomMassageManager(MessageManager):
|
||||
def __init__(
|
||||
self,
|
||||
llm: BaseChatModel,
|
||||
task: str,
|
||||
action_descriptions: str,
|
||||
system_prompt_class: Type[SystemPrompt],
|
||||
max_input_tokens: int = 128000,
|
||||
estimated_tokens_per_character: int = 3,
|
||||
image_tokens: int = 800,
|
||||
include_attributes: list[str] = [],
|
||||
max_error_length: int = 400,
|
||||
max_actions_per_step: int = 10,
|
||||
self,
|
||||
llm: BaseChatModel,
|
||||
task: str,
|
||||
action_descriptions: str,
|
||||
system_prompt_class: Type[SystemPrompt],
|
||||
max_input_tokens: int = 128000,
|
||||
estimated_tokens_per_character: int = 3,
|
||||
image_tokens: int = 800,
|
||||
include_attributes: list[str] = [],
|
||||
max_error_length: int = 400,
|
||||
max_actions_per_step: int = 10,
|
||||
tool_call_in_content: bool = False,
|
||||
):
|
||||
super().__init__(
|
||||
llm,
|
||||
task,
|
||||
action_descriptions,
|
||||
system_prompt_class,
|
||||
max_input_tokens,
|
||||
estimated_tokens_per_character,
|
||||
image_tokens,
|
||||
include_attributes,
|
||||
max_error_length,
|
||||
max_actions_per_step,
|
||||
llm=llm,
|
||||
task=task,
|
||||
action_descriptions=action_descriptions,
|
||||
system_prompt_class=system_prompt_class,
|
||||
max_input_tokens=max_input_tokens,
|
||||
estimated_tokens_per_character=estimated_tokens_per_character,
|
||||
image_tokens=image_tokens,
|
||||
include_attributes=include_attributes,
|
||||
max_error_length=max_error_length,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content,
|
||||
)
|
||||
|
||||
# Move Task info to state_message
|
||||
# Custom: Move Task info to state_message
|
||||
self.history = MessageHistory()
|
||||
self._add_message_with_tokens(self.system_prompt)
|
||||
tool_calls = [
|
||||
{
|
||||
'name': 'AgentOutput',
|
||||
'args': {
|
||||
'current_state': {
|
||||
'evaluation_previous_goal': 'Unknown - No previous actions to evaluate.',
|
||||
'memory': '',
|
||||
'next_goal': 'Obtain task from user',
|
||||
},
|
||||
'action': [],
|
||||
},
|
||||
'id': '',
|
||||
'type': 'tool_call',
|
||||
}
|
||||
]
|
||||
if self.tool_call_in_content:
|
||||
# openai throws error if tool_calls are not responded -> move to content
|
||||
example_tool_call = AIMessage(
|
||||
content=f'{tool_calls}',
|
||||
tool_calls=[],
|
||||
)
|
||||
else:
|
||||
example_tool_call = AIMessage(
|
||||
content=f'',
|
||||
tool_calls=tool_calls,
|
||||
)
|
||||
|
||||
self._add_message_with_tokens(example_tool_call)
|
||||
|
||||
def add_state_message(
|
||||
self,
|
||||
state: BrowserState,
|
||||
result: Optional[List[ActionResult]] = None,
|
||||
step_info: Optional[AgentStepInfo] = None,
|
||||
self,
|
||||
state: BrowserState,
|
||||
result: Optional[List[ActionResult]] = None,
|
||||
step_info: Optional[AgentStepInfo] = None,
|
||||
) -> None:
|
||||
"""Add browser state as human message"""
|
||||
|
||||
@@ -72,7 +103,7 @@ class CustomMassageManager(MessageManager):
|
||||
self._add_message_with_tokens(msg)
|
||||
if r.error:
|
||||
msg = HumanMessage(
|
||||
content=str(r.error)[-self.max_error_length :]
|
||||
content=str(r.error)[-self.max_error_length:]
|
||||
)
|
||||
self._add_message_with_tokens(msg)
|
||||
result = None # if result in history, we dont want to add it again
|
||||
|
||||
@@ -24,7 +24,7 @@ class CustomSystemPrompt(SystemPrompt):
|
||||
{
|
||||
"current_state": {
|
||||
"prev_action_evaluation": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Ignore the action result. The website is the ground truth. Also mention if something unexpected happened like new suggestions in an input field. Shortly state why/why not. Note that the result you output must be consistent with the reasoning you output afterwards. If you consider it to be 'Failed,' you should reflect on this during your thought.",
|
||||
"important_contents": "Output important contents closely related to user\'s instruction or task on the current page. If there is, please output the contents. If not, please output \"None\".",
|
||||
"important_contents": "Output important contents closely related to user\'s instruction or task on the current page. If there is, please output the contents. If not, please output empty string ''.",
|
||||
"completed_contents": "Update the input Task Progress. Completed contents is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the current page and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button",
|
||||
"thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If the output of prev_action_evaluation is 'Failed', please reflect and output your reflection here. If you think you have entered the wrong page, consider to go back to the previous page in next action.",
|
||||
"summary": "Please generate a brief natural language description for the operation in next actions based on your Thought."
|
||||
@@ -148,12 +148,12 @@ class CustomSystemPrompt(SystemPrompt):
|
||||
|
||||
class CustomAgentMessagePrompt:
|
||||
def __init__(
|
||||
self,
|
||||
state: BrowserState,
|
||||
result: Optional[List[ActionResult]] = None,
|
||||
include_attributes: list[str] = [],
|
||||
max_error_length: int = 400,
|
||||
step_info: Optional[CustomAgentStepInfo] = None,
|
||||
self,
|
||||
state: BrowserState,
|
||||
result: Optional[List[ActionResult]] = None,
|
||||
include_attributes: list[str] = [],
|
||||
max_error_length: int = 400,
|
||||
step_info: Optional[CustomAgentStepInfo] = None,
|
||||
):
|
||||
self.state = state
|
||||
self.result = result
|
||||
@@ -183,7 +183,7 @@ class CustomAgentMessagePrompt:
|
||||
state_description += f"\nResult of action {i + 1}/{len(self.result)}: {result.extracted_content}"
|
||||
if result.error:
|
||||
# only use last 300 characters of error
|
||||
error = result.error[-self.max_error_length :]
|
||||
error = result.error[-self.max_error_length:]
|
||||
state_description += (
|
||||
f"\nError of action {i + 1}/{len(self.result)}: ...{error}"
|
||||
)
|
||||
|
||||
@@ -23,11 +23,12 @@ class CustomBrowserContext(BrowserContext):
|
||||
config: BrowserContextConfig = BrowserContextConfig(),
|
||||
context: BrowserContext = None,
|
||||
):
|
||||
super(CustomBrowserContext, self).__init__(browser, config)
|
||||
super(CustomBrowserContext, self).__init__(browser=browser, config=config)
|
||||
self.context = context
|
||||
|
||||
async def _create_context(self, browser: PlaywrightBrowser):
|
||||
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
|
||||
# If we have a context, return it directly
|
||||
if self.context:
|
||||
return self.context
|
||||
if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
|
||||
@@ -46,7 +47,7 @@ class CustomBrowserContext(BrowserContext):
|
||||
bypass_csp=self.config.disable_security,
|
||||
ignore_https_errors=self.config.disable_security,
|
||||
record_video_dir=self.config.save_recording_path,
|
||||
record_video_size=self.config.browser_window_size, # set record video size
|
||||
record_video_size=self.config.browser_window_size, # set record video size, same as windows size
|
||||
)
|
||||
|
||||
if self.config.trace_path:
|
||||
|
||||
@@ -86,6 +86,7 @@ def get_llm_model(provider: str, **kwargs):
|
||||
return ChatOllama(
|
||||
model=kwargs.get("model_name", "qwen2.5:7b"),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
num_ctx=128000,
|
||||
)
|
||||
elif provider == "azure_openai":
|
||||
if not kwargs.get("base_url", ""):
|
||||
|
||||
@@ -80,11 +80,12 @@ async def test_browser_use_org():
|
||||
|
||||
async def test_browser_use_custom():
|
||||
from browser_use.browser.context import BrowserContextWindowSize
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt
|
||||
from src.browser.custom_browser import BrowserConfig, CustomBrowser
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
@@ -95,15 +96,15 @@ async def test_browser_use_custom():
|
||||
# model_name="gpt-4o",
|
||||
# temperature=0.8,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", "")
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="gemini",
|
||||
# model_name="gemini-2.0-flash-exp",
|
||||
# temperature=1.0,
|
||||
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
# )
|
||||
llm = utils.get_llm_model(
|
||||
provider="gemini",
|
||||
model_name="gemini-2.0-flash-exp",
|
||||
temperature=1.0,
|
||||
api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
)
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
@@ -111,14 +112,16 @@ async def test_browser_use_custom():
|
||||
# temperature=0.8
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="ollama", model_name="qwen2.5:7b", temperature=0.8
|
||||
)
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="ollama", model_name="qwen2.5:7b", temperature=0.8
|
||||
# )
|
||||
|
||||
controller = CustomController()
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
use_vision = False
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
tool_call_in_content = True # Set to True when using Ollama
|
||||
max_actions_per_step = 1
|
||||
playwright = None
|
||||
browser_context_ = None
|
||||
try:
|
||||
@@ -171,6 +174,8 @@ async def test_browser_use_custom():
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
use_vision=use_vision,
|
||||
tool_call_in_content=tool_call_in_content,
|
||||
max_actions_per_step=max_actions_per_step
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
|
||||
140
webui.py
140
webui.py
@@ -29,22 +29,24 @@ from src.utils import utils
|
||||
|
||||
|
||||
async def run_browser_agent(
|
||||
agent_type,
|
||||
llm_provider,
|
||||
llm_model_name,
|
||||
llm_temperature,
|
||||
llm_base_url,
|
||||
llm_api_key,
|
||||
use_own_browser,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
add_infos,
|
||||
max_steps,
|
||||
use_vision,
|
||||
agent_type,
|
||||
llm_provider,
|
||||
llm_model_name,
|
||||
llm_temperature,
|
||||
llm_base_url,
|
||||
llm_api_key,
|
||||
use_own_browser,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
add_infos,
|
||||
max_steps,
|
||||
use_vision,
|
||||
max_actions_per_step,
|
||||
tool_call_in_content
|
||||
):
|
||||
# Ensure the recording directory exists
|
||||
os.makedirs(save_recording_path, exist_ok=True)
|
||||
@@ -74,6 +76,8 @@ async def run_browser_agent(
|
||||
task=task,
|
||||
max_steps=max_steps,
|
||||
use_vision=use_vision,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content
|
||||
)
|
||||
elif agent_type == "custom":
|
||||
final_result, errors, model_actions, model_thoughts = await run_custom_agent(
|
||||
@@ -88,6 +92,8 @@ async def run_browser_agent(
|
||||
add_infos=add_infos,
|
||||
max_steps=max_steps,
|
||||
use_vision=use_vision,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Invalid agent type: {agent_type}")
|
||||
@@ -107,15 +113,18 @@ async def run_browser_agent(
|
||||
|
||||
|
||||
async def run_org_agent(
|
||||
llm,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
max_steps,
|
||||
use_vision,
|
||||
llm,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
max_steps,
|
||||
use_vision,
|
||||
max_actions_per_step,
|
||||
tool_call_in_content
|
||||
|
||||
):
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
@@ -125,20 +134,22 @@ async def run_org_agent(
|
||||
)
|
||||
)
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
)
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
)
|
||||
) as browser_context:
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=llm,
|
||||
use_vision=use_vision,
|
||||
browser_context=browser_context,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content
|
||||
)
|
||||
history = await agent.run(max_steps=max_steps)
|
||||
|
||||
@@ -151,17 +162,19 @@ async def run_org_agent(
|
||||
|
||||
|
||||
async def run_custom_agent(
|
||||
llm,
|
||||
use_own_browser,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
add_infos,
|
||||
max_steps,
|
||||
use_vision,
|
||||
llm,
|
||||
use_own_browser,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
add_infos,
|
||||
max_steps,
|
||||
use_vision,
|
||||
max_actions_per_step,
|
||||
tool_call_in_content
|
||||
):
|
||||
controller = CustomController()
|
||||
playwright = None
|
||||
@@ -197,17 +210,17 @@ async def run_custom_agent(
|
||||
)
|
||||
)
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/result_processing",
|
||||
save_recording_path=save_recording_path
|
||||
if save_recording_path
|
||||
else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/result_processing",
|
||||
save_recording_path=save_recording_path
|
||||
if save_recording_path
|
||||
else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
),
|
||||
),
|
||||
context=browser_context_,
|
||||
context=browser_context_,
|
||||
) as browser_context:
|
||||
agent = CustomAgent(
|
||||
task=task,
|
||||
@@ -217,6 +230,8 @@ async def run_custom_agent(
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
tool_call_in_content=tool_call_in_content
|
||||
)
|
||||
history = await agent.run(max_steps=max_steps)
|
||||
|
||||
@@ -290,7 +305,7 @@ def create_ui(theme_name="Ocean"):
|
||||
"""
|
||||
|
||||
with gr.Blocks(
|
||||
title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js
|
||||
title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js
|
||||
) as demo:
|
||||
with gr.Row():
|
||||
gr.Markdown(
|
||||
@@ -318,11 +333,24 @@ def create_ui(theme_name="Ocean"):
|
||||
label="Max Run Steps",
|
||||
info="Maximum number of steps the agent will take",
|
||||
)
|
||||
max_actions_per_step = gr.Slider(
|
||||
minimum=1,
|
||||
maximum=20,
|
||||
value=10,
|
||||
step=1,
|
||||
label="Max Actions per Step",
|
||||
info="Maximum number of actions the agent will take per step",
|
||||
)
|
||||
use_vision = gr.Checkbox(
|
||||
label="Use Vision",
|
||||
value=True,
|
||||
info="Enable visual processing capabilities",
|
||||
)
|
||||
tool_call_in_content = gr.Checkbox(
|
||||
label="Use Tool Calls in Content",
|
||||
value=True,
|
||||
info="Enable Tool Calls in content",
|
||||
)
|
||||
|
||||
with gr.TabItem("🔧 LLM Configuration", id=2):
|
||||
with gr.Group():
|
||||
@@ -461,6 +489,8 @@ def create_ui(theme_name="Ocean"):
|
||||
add_infos,
|
||||
max_steps,
|
||||
use_vision,
|
||||
max_actions_per_step,
|
||||
tool_call_in_content
|
||||
],
|
||||
outputs=[
|
||||
final_result_output,
|
||||
|
||||
Reference in New Issue
Block a user