Merge pull request #642 from k-w-lee/improve-use-cases

Improve use cases
2025-02-18 01:18:20 +03:00 · 2025-02-11 19:10:30 -08:00
parent a1c7f83908 6822ace171
commit f67222ed67
9 changed files with 119 additions and 61 deletions
--- a/examples/use-cases/README.md
+++ b/examples/use-cases/README.md
@@ -0,0 +1,15 @@
+# Use Cases of Browser-Use
+
+| File Name | Description |
+|-----------|------------|
+| `captcha.py` | Automates CAPTCHA solving on a demo website. |
+| `check_appointment.py` | Checks for available visa appointment slots on the Greece MFA website. |
+| `find_and_apply_to_jobs.py` | Searches for job listings, evaluates relevance based on a CV, and applies automatically. |
+| `online_coding_agent.py` | Implements a multi-agent system for online code editors, with separate agents for coding and execution. |
+| `post-twitter.py` | Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies. |
+| `scrolling_page.py` | Automates webpage scrolling with various scrolling actions and text search functionality. |
+| `twitter_post_using_cookies.py` | Automates posting on X (Twitter) using stored authentication cookies. |
+| `web_voyager_agent.py` | A general-purpose web navigation agent for tasks like flight booking and course searching. |
+
+
+
--- a/examples/use-cases/captcha.py
+++ b/examples/use-cases/captcha.py
@@ -1,7 +1,11 @@
 """
-Simple try of the agent.
+Goal: Automates CAPTCHA solving on a demo website.

+
+Simple try of the agent.
@dev You need to add OPENAI_API_KEY to your environment variables.
+NOTE: captchas are hard. For this example it works. But e.g. for iframes it does not.
+for this example it helps to zoom in.
 """

 import os
@@ -10,23 +14,23 @@ import sys
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 import asyncio
-
 from langchain_openai import ChatOpenAI
-
 from browser_use import Agent
+from dotenv import load_dotenv

-# NOTE: captchas are hard. For this example it works. But e.g. for iframes it does not.
-# for this example it helps to zoom in.
-llm = ChatOpenAI(model='gpt-4o')
-agent = Agent(
-	task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
-	llm=llm,
-)
-
+# Load environment variables
+load_dotenv()
+if not os.getenv('OPENAI_API_KEY'):
+    raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')

 async def main():
-	await agent.run()
-	input('Press Enter to exit')
+    llm = ChatOpenAI(model='gpt-4o')
+    agent = Agent(
+		task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
+		llm=llm,
+	)
+    await agent.run()
+    input('Press Enter to exit')

-
-asyncio.run(main())
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/examples/use-cases/check_appointment.py
+++ b/examples/use-cases/check_appointment.py
@@ -1,29 +1,36 @@
+# Goal: Checks for available visa appointment slots on the Greece MFA website.
+
 import asyncio
 import os

-import dotenv
+from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
 from pydantic import BaseModel, SecretStr

 from browser_use.agent.service import Agent
 from browser_use.controller.service import Controller

-dotenv.load_dotenv()
-
+# Load environment variables
+load_dotenv()
+if not os.getenv('OPENAI_API_KEY'):
+    raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')

 controller = Controller()


 class WebpageInfo(BaseModel):
+	"""Model for webpage link."""
 	link: str = 'https://appointment.mfa.gr/en/reservations/aero/ireland-grcon-dub/'


@controller.action('Go to the webpage', param_model=WebpageInfo)
 def go_to_webpage(webpage_info: WebpageInfo):
+	"""Returns the webpage link."""
 	return webpage_info.link


 async def main():
+	"""Main function to execute the agent task."""
 	task = (
 		'Go to the Greece MFA webpage via the link I provided you.'
 		'Check the visa appointment dates. If there is no available date in this month, check the next month.'
@@ -33,8 +40,8 @@ async def main():
 	model = ChatOpenAI(model='gpt-4o-mini', api_key=SecretStr(os.getenv('OPENAI_API_KEY', '')))
 	agent = Agent(task, model, controller=controller, use_vision=True)

-	result = await agent.run()
+	await agent.run()


-if __name__ == '__main__':
+if __name__ == "__main__":
 	asyncio.run(main())
--- a/examples/use-cases/find_and_apply_to_jobs.py
+++ b/examples/use-cases/find_and_apply_to_jobs.py
@@ -1,34 +1,35 @@
 """
-Find and apply to jobs.
+Goal: Searches for job listings, evaluates relevance based on a CV, and applies 

@dev You need to add OPENAI_API_KEY to your environment variables.
-
 Also you have to install PyPDF2 to read pdf files: pip install PyPDF2
 """

 import csv
 import os
-import re
 import sys
 from pathlib import Path
-
-from PyPDF2 import PdfReader
-
-from browser_use.browser.browser import Browser, BrowserConfig
+import logging
+from typing import List, Optional
+import asyncio

 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-import asyncio
-from typing import List, Optional

 from dotenv import load_dotenv
+from PyPDF2 import PdfReader
 from langchain_openai import AzureChatOpenAI, ChatOpenAI
 from pydantic import BaseModel, SecretStr

 from browser_use import ActionResult, Agent, Controller
 from browser_use.browser.context import BrowserContext
+from browser_use.browser.browser import Browser, BrowserConfig

+# Validate required environment variables
 load_dotenv()
-import logging
+required_env_vars = ["AZURE_OPENAI_KEY", "AZURE_OPENAI_ENDPOINT"]
+for var in required_env_vars:
+    if not os.getenv(var):
+        raise ValueError(f"{var} is not set. Please add it to your environment variables.")

 logger = logging.getLogger(__name__)
 # full screen mode
@@ -99,7 +100,7 @@ async def upload_cv(index: int, browser: BrowserContext):

 	try:
 		await file_upload_el.set_input_files(path)
-		msg = f'Successfully uploaded file to index {index}'
+		msg = f'Successfully uploaded file "{path}" to index {index}'
 		logger.info(msg)
 		return ActionResult(extracted_content=msg)
 	except Exception as e:
@@ -155,5 +156,5 @@ async def main():
 	await asyncio.gather(*[agent.run() for agent in agents])


-if __name__ == '__main__':
-	asyncio.run(main())
+if __name__ == "__main__":
+	asyncio.run(main())
--- a/examples/use-cases/online_coding_agent.py
+++ b/examples/use-cases/online_coding_agent.py
@@ -1,13 +1,20 @@
+# Goal: Implements a multi-agent system for online code editors, with separate agents for coding and execution.
+
 import os
 import sys
-
-from langchain_openai import ChatOpenAI
+import asyncio

 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-import asyncio
+from langchain_openai import ChatOpenAI
+from dotenv import load_dotenv

-from browser_use import Agent, Browser, Controller
+from browser_use import Agent, Browser
+
+# Load environment variables
+load_dotenv()
+if not os.getenv('OPENAI_API_KEY'):
+    raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')

 async def main():
    browser = Browser()
@@ -35,4 +42,5 @@ async def main():
        await executor.run()
        await coder.run()

-asyncio.run(main())
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/examples/use-cases/post-twitter.py
+++ b/examples/use-cases/post-twitter.py
@@ -1,4 +1,6 @@
 """
+Goal: Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies.
+
 X Posting Template using browser-use
 ----------------------------------------

@@ -20,18 +22,21 @@ Any issues, contact me on X @defichemist95
 import os
 import sys
 from typing import Optional
-from dataclasses import dataclass
-from dotenv import load_dotenv
-
-load_dotenv()
+import asyncio

 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-import asyncio
+from dataclasses import dataclass
+from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
+
 from browser_use.browser.browser import Browser, BrowserConfig
 from browser_use import Agent, Controller

+# Load environment variables
+load_dotenv()
+if not os.getenv('OPENAI_API_KEY'):
+    raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')

 # ============ Configuration Section ============
@dataclass
@@ -114,9 +119,9 @@ async def post_tweet(agent: Agent):
        print(f"Error posting tweet: {str(e)}")


-def main():
+async def main():
    agent = create_twitter_agent(config)
-    asyncio.run(post_tweet(agent))
+    await agent.run()

 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
--- a/examples/use-cases/scrolling_page.py
+++ b/examples/use-cases/scrolling_page.py
@@ -1,15 +1,21 @@
+# Goal: Automates webpage scrolling with various scrolling actions and text search functionality.
+
 import os
 import sys
-
-from browser_use.browser.browser import Browser, BrowserConfig
+import asyncio

 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-import asyncio
-
 from langchain_openai import ChatOpenAI
-
 from browser_use import Agent
+from dotenv import load_dotenv
+
+from browser_use.browser.browser import Browser, BrowserConfig
+
+# Load environment variables
+load_dotenv()
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set')

 """
 Example: Using the 'Scroll down' action.
@@ -32,5 +38,5 @@ async def main():
 	await agent.run()


-if __name__ == '__main__':
+if __name__ == "__main__":
 	asyncio.run(main())
--- a/examples/use-cases/twitter_post_using_cookies.py
+++ b/examples/use-cases/twitter_post_using_cookies.py
@@ -1,3 +1,5 @@
+# Goal: Automates posting on X (Twitter) using stored authentication cookies. 
+
 import asyncio
 import os

@@ -8,7 +10,6 @@ from pydantic import SecretStr
 from browser_use import Agent
 from browser_use.browser.browser import Browser, BrowserConfig
 from browser_use.browser.context import BrowserContext, BrowserContextConfig
-from browser_use.controller.service import Controller

 load_dotenv()
 api_key = os.getenv('GEMINI_API_KEY')
@@ -27,7 +28,7 @@ file_path = os.path.join(os.path.dirname(__file__), 'twitter_cookies.txt')
 context = BrowserContext(browser=browser, config=BrowserContextConfig(cookies_file=file_path))


-async def run_search():
+async def main():
 	agent = Agent(
 		browser_context=context,
 		task=('go to https://x.com. write a new post with the text "browser-use ftw", and submit it'),
@@ -38,5 +39,5 @@ async def run_search():
 	input('Press Enter to close the browser...')


-if __name__ == '__main__':
-	asyncio.run(run_search())
+if __name__ == "__main__":
+	asyncio.run(main())
--- a/examples/use-cases/web_voyager_agent.py
+++ b/examples/use-cases/web_voyager_agent.py
@@ -1,17 +1,28 @@
+# Goal: A general-purpose web navigation agent for tasks like flight booking and course searching.
+
 import os
 import sys
+import asyncio

+# Adjust Python path
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-import asyncio
-import os
-
-from langchain_openai import AzureChatOpenAI
+from dotenv import load_dotenv
 from pydantic import SecretStr
+from langchain_openai import AzureChatOpenAI

 from browser_use.agent.service import Agent
 from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig

+# Load environment variables
+load_dotenv()
+
+# Validate required environment variables
+required_env_vars = ["AZURE_OPENAI_KEY", "AZURE_OPENAI_ENDPOINT"]
+for var in required_env_vars:
+    if not os.getenv(var):
+        raise ValueError(f"{var} is not set. Please add it to your environment variables.")
+
 browser = Browser(
 	config=BrowserConfig(
 		headless=False,  # This is True in production
@@ -57,5 +68,5 @@ async def main():
 	history.save_to_file('./tmp/history.json')


-if __name__ == '__main__':
+if __name__ == "__main__":
 	asyncio.run(main())