Merge pull request #642 from k-w-lee/improve-use-cases

Improve use cases
This commit is contained in:
Magnus Müller
2025-02-11 19:10:30 -08:00
committed by GitHub
9 changed files with 119 additions and 61 deletions

View File

@@ -0,0 +1,15 @@
# Use Cases of Browser-Use
| File Name | Description |
|-----------|------------|
| `captcha.py` | Automates CAPTCHA solving on a demo website. |
| `check_appointment.py` | Checks for available visa appointment slots on the Greece MFA website. |
| `find_and_apply_to_jobs.py` | Searches for job listings, evaluates relevance based on a CV, and applies automatically. |
| `online_coding_agent.py` | Implements a multi-agent system for online code editors, with separate agents for coding and execution. |
| `post-twitter.py` | Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies. |
| `scrolling_page.py` | Automates webpage scrolling with various scrolling actions and text search functionality. |
| `twitter_post_using_cookies.py` | Automates posting on X (Twitter) using stored authentication cookies. |
| `web_voyager_agent.py` | A general-purpose web navigation agent for tasks like flight booking and course searching. |

View File

@@ -1,7 +1,11 @@
"""
Simple try of the agent.
Goal: Automates CAPTCHA solving on a demo website.
Simple try of the agent.
@dev You need to add OPENAI_API_KEY to your environment variables.
NOTE: captchas are hard. For this example it works. But e.g. for iframes it does not.
for this example it helps to zoom in.
"""
import os
@@ -10,23 +14,23 @@ import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import asyncio
from langchain_openai import ChatOpenAI
from browser_use import Agent
from dotenv import load_dotenv
# NOTE: captchas are hard. For this example it works. But e.g. for iframes it does not.
# for this example it helps to zoom in.
llm = ChatOpenAI(model='gpt-4o')
agent = Agent(
task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
llm=llm,
)
# Load environment variables
load_dotenv()
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
async def main():
await agent.run()
input('Press Enter to exit')
llm = ChatOpenAI(model='gpt-4o')
agent = Agent(
task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
llm=llm,
)
await agent.run()
input('Press Enter to exit')
asyncio.run(main())
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,29 +1,36 @@
# Goal: Checks for available visa appointment slots on the Greece MFA website.
import asyncio
import os
import dotenv
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, SecretStr
from browser_use.agent.service import Agent
from browser_use.controller.service import Controller
dotenv.load_dotenv()
# Load environment variables
load_dotenv()
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
controller = Controller()
class WebpageInfo(BaseModel):
"""Model for webpage link."""
link: str = 'https://appointment.mfa.gr/en/reservations/aero/ireland-grcon-dub/'
@controller.action('Go to the webpage', param_model=WebpageInfo)
def go_to_webpage(webpage_info: WebpageInfo):
"""Returns the webpage link."""
return webpage_info.link
async def main():
"""Main function to execute the agent task."""
task = (
'Go to the Greece MFA webpage via the link I provided you.'
'Check the visa appointment dates. If there is no available date in this month, check the next month.'
@@ -33,8 +40,8 @@ async def main():
model = ChatOpenAI(model='gpt-4o-mini', api_key=SecretStr(os.getenv('OPENAI_API_KEY', '')))
agent = Agent(task, model, controller=controller, use_vision=True)
result = await agent.run()
await agent.run()
if __name__ == '__main__':
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,34 +1,35 @@
"""
Find and apply to jobs.
Goal: Searches for job listings, evaluates relevance based on a CV, and applies
@dev You need to add OPENAI_API_KEY to your environment variables.
Also you have to install PyPDF2 to read pdf files: pip install PyPDF2
"""
import csv
import os
import re
import sys
from pathlib import Path
from PyPDF2 import PdfReader
from browser_use.browser.browser import Browser, BrowserConfig
import logging
from typing import List, Optional
import asyncio
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import asyncio
from typing import List, Optional
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain_openai import AzureChatOpenAI, ChatOpenAI
from pydantic import BaseModel, SecretStr
from browser_use import ActionResult, Agent, Controller
from browser_use.browser.context import BrowserContext
from browser_use.browser.browser import Browser, BrowserConfig
# Validate required environment variables
load_dotenv()
import logging
required_env_vars = ["AZURE_OPENAI_KEY", "AZURE_OPENAI_ENDPOINT"]
for var in required_env_vars:
if not os.getenv(var):
raise ValueError(f"{var} is not set. Please add it to your environment variables.")
logger = logging.getLogger(__name__)
# full screen mode
@@ -99,7 +100,7 @@ async def upload_cv(index: int, browser: BrowserContext):
try:
await file_upload_el.set_input_files(path)
msg = f'Successfully uploaded file to index {index}'
msg = f'Successfully uploaded file "{path}" to index {index}'
logger.info(msg)
return ActionResult(extracted_content=msg)
except Exception as e:
@@ -155,5 +156,5 @@ async def main():
await asyncio.gather(*[agent.run() for agent in agents])
if __name__ == '__main__':
asyncio.run(main())
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,13 +1,20 @@
# Goal: Implements a multi-agent system for online code editors, with separate agents for coding and execution.
import os
import sys
from langchain_openai import ChatOpenAI
import asyncio
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import asyncio
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from browser_use import Agent, Browser, Controller
from browser_use import Agent, Browser
# Load environment variables
load_dotenv()
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
async def main():
browser = Browser()
@@ -35,4 +42,5 @@ async def main():
await executor.run()
await coder.run()
asyncio.run(main())
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,4 +1,6 @@
"""
Goal: Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies.
X Posting Template using browser-use
----------------------------------------
@@ -20,18 +22,21 @@ Any issues, contact me on X @defichemist95
import os
import sys
from typing import Optional
from dataclasses import dataclass
from dotenv import load_dotenv
load_dotenv()
import asyncio
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import asyncio
from dataclasses import dataclass
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use import Agent, Controller
# Load environment variables
load_dotenv()
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
# ============ Configuration Section ============
@dataclass
@@ -114,9 +119,9 @@ async def post_tweet(agent: Agent):
print(f"Error posting tweet: {str(e)}")
def main():
async def main():
agent = create_twitter_agent(config)
asyncio.run(post_tweet(agent))
await agent.run()
if __name__ == "__main__":
main()
asyncio.run(main())

View File

@@ -1,15 +1,21 @@
# Goal: Automates webpage scrolling with various scrolling actions and text search functionality.
import os
import sys
from browser_use.browser.browser import Browser, BrowserConfig
import asyncio
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import asyncio
from langchain_openai import ChatOpenAI
from browser_use import Agent
from dotenv import load_dotenv
from browser_use.browser.browser import Browser, BrowserConfig
# Load environment variables
load_dotenv()
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set')
"""
Example: Using the 'Scroll down' action.
@@ -32,5 +38,5 @@ async def main():
await agent.run()
if __name__ == '__main__':
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,3 +1,5 @@
# Goal: Automates posting on X (Twitter) using stored authentication cookies.
import asyncio
import os
@@ -8,7 +10,6 @@ from pydantic import SecretStr
from browser_use import Agent
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from browser_use.controller.service import Controller
load_dotenv()
api_key = os.getenv('GEMINI_API_KEY')
@@ -27,7 +28,7 @@ file_path = os.path.join(os.path.dirname(__file__), 'twitter_cookies.txt')
context = BrowserContext(browser=browser, config=BrowserContextConfig(cookies_file=file_path))
async def run_search():
async def main():
agent = Agent(
browser_context=context,
task=('go to https://x.com. write a new post with the text "browser-use ftw", and submit it'),
@@ -38,5 +39,5 @@ async def run_search():
input('Press Enter to close the browser...')
if __name__ == '__main__':
asyncio.run(run_search())
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -1,17 +1,28 @@
# Goal: A general-purpose web navigation agent for tasks like flight booking and course searching.
import os
import sys
import asyncio
# Adjust Python path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import asyncio
import os
from langchain_openai import AzureChatOpenAI
from dotenv import load_dotenv
from pydantic import SecretStr
from langchain_openai import AzureChatOpenAI
from browser_use.agent.service import Agent
from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
# Load environment variables
load_dotenv()
# Validate required environment variables
required_env_vars = ["AZURE_OPENAI_KEY", "AZURE_OPENAI_ENDPOINT"]
for var in required_env_vars:
if not os.getenv(var):
raise ValueError(f"{var} is not set. Please add it to your environment variables.")
browser = Browser(
config=BrowserConfig(
headless=False, # This is True in production
@@ -57,5 +68,5 @@ async def main():
history.save_to_file('./tmp/history.json')
if __name__ == '__main__':
if __name__ == "__main__":
asyncio.run(main())