mirror of
https://github.com/UmutAlihan/custom_agent_tutorial.git
synced 2024-05-19 15:57:30 +03:00
First Commit
This commit is contained in:
47
README.md
Normal file
47
README.md
Normal file
@@ -0,0 +1,47 @@
|
||||
|
||||
# Setting Up and Running Custom Agent Script
|
||||
|
||||
### Prerequisites
|
||||
1. **Install Anaconda:**
|
||||
Download Anaconda from [https://www.anaconda.com/](https://www.anaconda.com/).
|
||||
|
||||
2. **Create a Virtual Environment:**
|
||||
```bash
|
||||
conda create -n crew_env python=3.10 pip
|
||||
```
|
||||
|
||||
3. **Activate the Virtual Environment:**
|
||||
```bash
|
||||
conda activate agent_env
|
||||
```
|
||||
|
||||
### Clone and Navigate to the Repository
|
||||
1. **Clone the Repo:**
|
||||
```bash
|
||||
git clone https://github.com/john-adeojo/custom_agent_tutorial.git
|
||||
```
|
||||
|
||||
2. **Navigate to the Repo:**
|
||||
```bash
|
||||
cd /path/to/your-repo/custom_agent_tutorial
|
||||
```
|
||||
|
||||
3. **Install Requirements:**
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Configure API Keys
|
||||
1. **Open the `config.yaml`:**
|
||||
```bash
|
||||
nano config.yaml
|
||||
```
|
||||
|
||||
2. **Enter API Keys:**
|
||||
- **Serper API Key:** Get it from [https://serper.dev/](https://serper.dev/)
|
||||
- **OpenAI API Key:** Get it from [https://openai.com/](https://openai.com/)
|
||||
|
||||
### Run Your Query
|
||||
```bash
|
||||
python app.py "YOUR QUERY"
|
||||
```
|
||||
161
agent.py
Normal file
161
agent.py
Normal file
@@ -0,0 +1,161 @@
|
||||
import os
|
||||
import yaml
|
||||
import json
|
||||
import requests
|
||||
from termcolor import colored
|
||||
from prompts import planning_agent_prompt, integration_agent_prompt
|
||||
from search import WebSearcher
|
||||
|
||||
|
||||
def load_config(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
config = yaml.safe_load(file)
|
||||
for key, value in config.items():
|
||||
os.environ[key] = value
|
||||
|
||||
class Agent:
|
||||
def __init__(self, model, tool, temperature=0, max_tokens=1000, planning_agent_prompt=None, integration_agent_prompt=None, verbose=False):
|
||||
load_config('config.yaml')
|
||||
self.api_key = os.getenv("OPENAI_API_KEY")
|
||||
self.url = 'https://api.openai.com/v1/chat/completions'
|
||||
self.headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
}
|
||||
self.temperature = temperature
|
||||
self.max_tokens = max_tokens
|
||||
self.tool = tool
|
||||
self.tool_specs = tool.__doc__
|
||||
self.planning_agent_prompt = planning_agent_prompt
|
||||
self.integration_agent_prompt = integration_agent_prompt
|
||||
self.model = model
|
||||
self.verbose = verbose
|
||||
|
||||
def run_planning_agent(self, query, plan=None, outputs=None, feedback=None):
|
||||
|
||||
system_prompt = self.planning_agent_prompt.format(
|
||||
outputs=outputs,
|
||||
plan=plan,
|
||||
feedback=feedback,
|
||||
tool_specs=self.tool_specs
|
||||
)
|
||||
|
||||
data = {
|
||||
"model": self.model,
|
||||
"messages": [{"role": "user", "content": query},
|
||||
{"role": "system", "content": system_prompt}],
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_tokens
|
||||
}
|
||||
|
||||
json_data = json.dumps(data)
|
||||
response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
|
||||
response_dict = response.json()
|
||||
content = response_dict['choices'][0]['message']['content']
|
||||
print(colored(f"Planning Agent: {content}", 'green'))
|
||||
|
||||
return content
|
||||
|
||||
def run_integration_agent(self, query, plan, outputs):
|
||||
system_prompt = self.integration_agent_prompt.format(
|
||||
outputs=outputs,
|
||||
plan=plan
|
||||
)
|
||||
|
||||
data = {
|
||||
"model": self.model,
|
||||
"messages": [{"role": "user", "content": query},
|
||||
{"role": "system", "content": system_prompt}],
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_tokens
|
||||
}
|
||||
|
||||
json_data = json.dumps(data)
|
||||
response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
|
||||
response_dict = response.json()
|
||||
content = response_dict['choices'][0]['message']['content']
|
||||
print(colored(f"Integration Agent: {content}", 'blue'))
|
||||
# print("Integration Agent:", content)
|
||||
|
||||
return content
|
||||
|
||||
def check_response(self, response, query):
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "respose_checker",
|
||||
"description": "Checck if the response meets the requirements",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"meets_requirements": {
|
||||
"type": "string",
|
||||
"description": """Check if the response meets the requirements of the query based on the following:
|
||||
1. The response should be relevant to the query.
|
||||
2. The response should be coherent and well-structured with citations.
|
||||
3. The response should be comprehensive and address the query in its entirety.
|
||||
Return 'yes' if the response meets the requirements and 'no' otherwise.
|
||||
"""
|
||||
},
|
||||
},
|
||||
"required": ["meets_requirements"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
data = {
|
||||
"model": self.model,
|
||||
"messages": [{"role": "user", "content": f"Response: {response} \n Query: {query}"},],
|
||||
"temperature": 0,
|
||||
"tools": tools,
|
||||
"tool_choice": "required"
|
||||
}
|
||||
|
||||
json_data = json.dumps(data)
|
||||
response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
|
||||
response_dict = response.json()
|
||||
|
||||
tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
|
||||
arguments_json = json.loads(tool_calls['function']['arguments'])
|
||||
response = arguments_json['meets_requirements']
|
||||
|
||||
if response == 'yes':
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def execute(self, iterations=5):
|
||||
query = input("Enter your query: ")
|
||||
tool = self.tool(model=self.model, verbose=self.verbose)
|
||||
meets_requirements = False
|
||||
plan = None
|
||||
outputs = None
|
||||
response = None
|
||||
iterations = 0
|
||||
|
||||
while not meets_requirements and iterations < 5:
|
||||
iterations += 1
|
||||
plan = self.run_planning_agent(query, plan=plan, outputs=outputs, feedback=response)
|
||||
outputs = tool.use_tool(plan=plan, query=query)
|
||||
response = self.run_integration_agent(query, plan, outputs)
|
||||
meets_requirements = self.check_response(response, query)
|
||||
|
||||
print(colored(f"Final Response: {response}", 'cyan'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
agent = Agent(model="gpt-3.5-turbo",
|
||||
tool=WebSearcher,
|
||||
planning_agent_prompt=planning_agent_prompt,
|
||||
integration_agent_prompt=integration_agent_prompt,
|
||||
verbose=True
|
||||
)
|
||||
agent.execute()
|
||||
|
||||
|
||||
|
||||
|
||||
52
architecture/architecture.drawio
Normal file
52
architecture/architecture.drawio
Normal file
@@ -0,0 +1,52 @@
|
||||
<mxfile host="app.diagrams.net" modified="2024-05-14T11:14:08.035Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" etag="VuY0quERgOPRJ4QZz9Re" version="24.3.1" type="device">
|
||||
<diagram name="Page-1" id="rU8JHjLior_zJRwLBU05">
|
||||
<mxGraphModel dx="2072" dy="1084" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1169" pageHeight="1654" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0" />
|
||||
<mxCell id="1" parent="0" />
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-2" value="<font style="font-size: 24px;">Overall Workflow</font>" style="swimlane;whiteSpace=wrap;html=1;startSize=40;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="130" width="530" height="340" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-4" value="<font style="font-size: 24px;">Agents&nbsp;</font><div><font style="font-size: 24px;">powered by LLMs</font></div>" style="whiteSpace=wrap;html=1;aspect=fixed;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-2">
|
||||
<mxGeometry x="290" y="87.5" width="165" height="165" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-23" value="<font style="font-size: 24px;">Web Search Tool</font>" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-2">
|
||||
<mxGeometry x="63.75" y="87.5" width="165" height="165" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-5" value="<font style="font-size: 24px;">Agents&nbsp;</font>" style="swimlane;whiteSpace=wrap;html=1;startSize=40;" vertex="1" parent="1">
|
||||
<mxGeometry x="60" y="590" width="960" height="500" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-6" target="Qcd6xKoqnelJGYbhtqcp-3">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<mxPoint x="380" y="336" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-6" value="<font style="font-size: 24px;">Planning Agent</font>" style="whiteSpace=wrap;html=1;aspect=fixed;align=center;fillColor=#008a00;fontColor=#ffffff;strokeColor=#005700;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
|
||||
<mxGeometry x="30" y="220" width="232.5" height="232.5" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-10" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-7" target="Qcd6xKoqnelJGYbhtqcp-6">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<Array as="points">
|
||||
<mxPoint x="763" y="130" />
|
||||
<mxPoint x="146" y="130" />
|
||||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-11" value="<font style="font-size: 24px;">Feedback</font>" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="Qcd6xKoqnelJGYbhtqcp-10">
|
||||
<mxGeometry x="0.0146" y="-3" relative="1" as="geometry">
|
||||
<mxPoint x="-1" as="offset" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-7" value="<font style="font-size: 24px;">Integration Agent</font>" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#a20025;fontColor=#ffffff;strokeColor=#6F0000;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
|
||||
<mxGeometry x="640" y="213.75" width="245" height="245" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-22" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-3" target="Qcd6xKoqnelJGYbhtqcp-7">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="Qcd6xKoqnelJGYbhtqcp-3" value="<font style="font-size: 24px;">Web Search Tool</font>" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
|
||||
<mxGeometry x="370" y="253.75" width="165" height="165" as="geometry" />
|
||||
</mxCell>
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
</mxfile>
|
||||
BIN
architecture/architecture.drawio.png
Normal file
BIN
architecture/architecture.drawio.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 90 KiB |
2
config.yaml
Normal file
2
config.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
OPENAI_API_KEY: "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"
|
||||
SERPER_DEV_API_KEY: "fb88bf47b61286938021be89361132737fa6f3b9"
|
||||
23
prompts.py
Normal file
23
prompts.py
Normal file
@@ -0,0 +1,23 @@
|
||||
planning_agent_prompt = ("You are an AI planning agent working with an integration agent. You have access to specialised tools. When addressing queries, you should follow this two-step methodology:\n"
|
||||
"Step 1: Thought. Begin by contemplating the problem thoroughly and devising a plan of action."
|
||||
"Step 2: Action. Clearly state the inputs you will use with any tools necessary to address the problem. This preparation is essential for executing your plan effectively.\n"
|
||||
"You must ensure your plan takes into account any feedback (if available)\n\n."
|
||||
"here are the outputs from the tools you have used: {outputs}\n\n"
|
||||
"Here is your previous plan: {plan}\n\n"
|
||||
"Here's the feedback:{feedback} \n\n"
|
||||
"Here are the specifications of your tools:\n"
|
||||
"{tool_specs}\n"
|
||||
"Continue this process until you have gathered enough information to comprehensively answer the query."
|
||||
)
|
||||
|
||||
integration_agent_prompt = ("You are an AI Integration Agent working with a planning agent. Your job is to synthesise the outputs from the planning agent into a coherent response.\n"
|
||||
"You must do this by considering the plan, the outputs from tools, and the original query.\n"
|
||||
"If any of the information is not sufficient, you should provide feedback to the planning agent to refine the plan.\n"
|
||||
"If the information is sufficient, you should provide a comprehenisve response to the query with appropriate citations. \n"
|
||||
"Your response to the query must be based on the outputs from the tools\n"
|
||||
"The output of the tool is a dictionary where the \n"
|
||||
"key is the URL source of the info and the value is the content of the URL \n"
|
||||
"You should use the source in citation \n"
|
||||
"Here are the outputs from the tool: {outputs}\n\n"
|
||||
"Here is the plan from the planning agent: {plan}\n\n"
|
||||
)
|
||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
termcolor==2.4.0
|
||||
PyYAML==6.0.1
|
||||
requests==2.31.0
|
||||
beautifulsoup4==4.12.3
|
||||
224
search.py
Normal file
224
search.py
Normal file
@@ -0,0 +1,224 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import yaml
|
||||
from termcolor import colored
|
||||
import os
|
||||
|
||||
def load_config(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
config = yaml.safe_load(file)
|
||||
for key, value in config.items():
|
||||
os.environ[key] = value
|
||||
|
||||
class WebSearcher:
|
||||
|
||||
"""
|
||||
A class that encapsulates methods for generating search queries, fetching search results,
|
||||
determining the best search pages, and scraping web content using the OpenAI API and other web services.
|
||||
|
||||
This class is designed to interact with the OpenAI API to leverage its capabilities for generating
|
||||
search queries based on a provided plan and query. It integrates with the serper.dev API to fetch
|
||||
search results and then uses a combination of these results and additional OpenAI API calls to determine
|
||||
the most relevant web pages. Finally, it scrapes the content of the determined best page.
|
||||
|
||||
Methods:
|
||||
__init__(self): Initializes the WebSearcher instance, loads API keys from a configuration file,
|
||||
and sets up headers for HTTP requests.
|
||||
generate_searches(self, plan: str, query: str) -> str: Generates search queries based on provided plan and query.
|
||||
get_search_page(self, search_results: str, plan: str, query: str) -> str: Determines the best search page URLs
|
||||
based on the results and context.
|
||||
format_results(self, organic_results: list) -> str: Formats the search results to a more readable format.
|
||||
fetch_search_results(self, search_queries: str) -> str: Fetches detailed search results from serper.dev API.
|
||||
scrape_website_content(self, website_url: str) -> dict: Scrapes and returns the content of the given website URL.
|
||||
use_tool(self, verbose: bool = False, plan: str = None, query: str = None) -> dict: Orchestrates the use of other methods
|
||||
to perform a complete search-and-retrieve
|
||||
operation based on the specified plan and query.
|
||||
|
||||
Usage Example:
|
||||
searcher = WebSearcher()
|
||||
results_dict = searcher.use_tool(verbose=True, plan="Research new AI techniques", query="Latest trends in AI")
|
||||
results_dict will contain the URL as a key and the scraped content from that URL as the value.
|
||||
"""
|
||||
def __init__(self, model, verbose=False):
|
||||
load_config('config.yaml')
|
||||
self.api_key = os.getenv("OPENAI_API_KEY")
|
||||
self.url = 'https://api.openai.com/v1/chat/completions'
|
||||
self.headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
}
|
||||
self.model = model
|
||||
self.verbose = verbose
|
||||
|
||||
def generate_searches(self, plan, query):
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "fetch_search_results",
|
||||
"description": "Fetch search results based on the search query",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"search_engine_queries": {
|
||||
"type": "string",
|
||||
"description": "The most suitable search query for the plan"
|
||||
},
|
||||
},
|
||||
"required": ["search_engine_queries"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
data = {
|
||||
"model": self.model,
|
||||
"messages": [{"role": "user", "content": f"Query:{query}\n\n Plan:{plan}"}],
|
||||
"temperature": 0,
|
||||
"tools": tools,
|
||||
"tool_choice": "required"
|
||||
}
|
||||
|
||||
json_data = json.dumps(data)
|
||||
response = requests.post(self.url, headers=self.headers, data=json_data)
|
||||
response_dict = response.json()
|
||||
|
||||
tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
|
||||
arguments_json = json.loads(tool_calls['function']['arguments'])
|
||||
search_queries = arguments_json['search_engine_queries']
|
||||
print(colored(f"Search Engine Queries:, {search_queries}", 'yellow'))
|
||||
|
||||
return search_queries
|
||||
|
||||
def get_search_page(self, search_results, plan, query):
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "decide_best_pages",
|
||||
"description": "Decide the best pages to visit based on the search results",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"best_search_page": {
|
||||
"type": "string",
|
||||
"description": "The URL link of best search page based on the Search Results, Plan and Query. Do not select pdf files."
|
||||
},
|
||||
},
|
||||
"required": ["best_search_page"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
data = {
|
||||
"model": self.model,
|
||||
"messages": [{"role": "user", "content": f"Query:{query}\n\n Plan:{plan} \n\n Search Results:{search_results}"}],
|
||||
"temperature": 0,
|
||||
"tools": tools,
|
||||
"tool_choice": "required"
|
||||
}
|
||||
|
||||
json_data = json.dumps(data)
|
||||
response = requests.post(self.url, headers=self.headers, data=json_data)
|
||||
response_dict = response.json()
|
||||
|
||||
tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
|
||||
arguments_json = json.loads(tool_calls['function']['arguments'])
|
||||
search_queries = arguments_json['best_search_page']
|
||||
print(colored(f"Best Pages:, {search_queries}", 'yellow'))
|
||||
|
||||
return search_queries
|
||||
|
||||
def format_results(self, organic_results):
|
||||
|
||||
result_strings = []
|
||||
for result in organic_results:
|
||||
title = result.get('title', 'No Title')
|
||||
link = result.get('link', '#')
|
||||
snippet = result.get('snippet', 'No snippet available.')
|
||||
result_strings.append(f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n---")
|
||||
|
||||
return '\n'.join(result_strings)
|
||||
|
||||
def fetch_search_results(self, search_queries: str):
|
||||
|
||||
search_url = "https://google.serper.dev/search"
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'X-API-KEY': os.environ['SERPER_DEV_API_KEY'] # Ensure this environment variable is set with your API key
|
||||
}
|
||||
payload = json.dumps({"q": search_queries})
|
||||
|
||||
# Attempt to make the HTTP POST request
|
||||
try:
|
||||
response = requests.post(search_url, headers=headers, data=payload)
|
||||
response.raise_for_status() # Raise an HTTPError for bad responses (4XX, 5XX)
|
||||
results = response.json()
|
||||
|
||||
# Check if 'organic' results are in the response
|
||||
if 'organic' in results:
|
||||
return self.format_results(results['organic'])
|
||||
else:
|
||||
return "No organic results found."
|
||||
|
||||
except requests.exceptions.HTTPError as http_err:
|
||||
return f"HTTP error occurred: {http_err}"
|
||||
except requests.exceptions.RequestException as req_err:
|
||||
return f"Request exception occurred: {req_err}"
|
||||
except KeyError as key_err:
|
||||
return f"Key error in handling response: {key_err}"
|
||||
|
||||
def scrape_website_content(self, website_url):
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Referer': 'https://www.google.com/',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Accept-Encoding': 'gzip, deflate, br'
|
||||
}
|
||||
|
||||
try:
|
||||
# Making a GET request to the website
|
||||
response = requests.get(website_url, headers=headers, timeout=15)
|
||||
response.raise_for_status() # This will raise an exception for HTTP errors
|
||||
|
||||
# Parsing the page content using BeautifulSoup
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
text = soup.get_text(separator='\n')
|
||||
# Cleaning up the text: removing excess whitespace
|
||||
clean_text = '\n'.join([line.strip() for line in text.splitlines() if line.strip()])
|
||||
|
||||
return {website_url: clean_text}
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error retrieving content from {website_url}: {e}")
|
||||
return {website_url: f"Failed to retrieve content due to an error: {e}"}
|
||||
|
||||
def use_tool(self, plan=None, query=None):
|
||||
|
||||
search = WebSearcher(self.model)
|
||||
# plan = "Find the best way to cook a turkey"
|
||||
# query = "How long should I cook a turkey for?"
|
||||
|
||||
search_queries = search.generate_searches(plan, query)
|
||||
search_results = search.fetch_search_results(search_queries)
|
||||
best_page = search.get_search_page(search_results, plan, query)
|
||||
results_dict = search.scrape_website_content(best_page)
|
||||
|
||||
if self.verbose:
|
||||
print(colored(f"SEARCH RESULTS {search_results}", 'yellow'))
|
||||
print(colored(f"RESULTS DICT {results_dict}", 'yellow'))
|
||||
|
||||
return results_dict
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
search = WebSearcher()
|
||||
search.use_tool()
|
||||
Reference in New Issue
Block a user