First Commit

This commit is contained in:
John Adeojo
2024-05-14 20:14:13 +01:00
committed by GitHub
commit 5af3211916
8 changed files with 513 additions and 0 deletions

47
README.md Normal file
View File

@@ -0,0 +1,47 @@
# Setting Up and Running Custom Agent Script
### Prerequisites
1. **Install Anaconda:**
Download Anaconda from [https://www.anaconda.com/](https://www.anaconda.com/).
2. **Create a Virtual Environment:**
```bash
conda create -n crew_env python=3.10 pip
```
3. **Activate the Virtual Environment:**
```bash
conda activate agent_env
```
### Clone and Navigate to the Repository
1. **Clone the Repo:**
```bash
git clone https://github.com/john-adeojo/custom_agent_tutorial.git
```
2. **Navigate to the Repo:**
```bash
cd /path/to/your-repo/custom_agent_tutorial
```
3. **Install Requirements:**
```bash
pip install -r requirements.txt
```
### Configure API Keys
1. **Open the `config.yaml`:**
```bash
nano config.yaml
```
2. **Enter API Keys:**
- **Serper API Key:** Get it from [https://serper.dev/](https://serper.dev/)
- **OpenAI API Key:** Get it from [https://openai.com/](https://openai.com/)
### Run Your Query
```bash
python app.py "YOUR QUERY"
```

161
agent.py Normal file
View File

@@ -0,0 +1,161 @@
import os
import yaml
import json
import requests
from termcolor import colored
from prompts import planning_agent_prompt, integration_agent_prompt
from search import WebSearcher
def load_config(file_path):
with open(file_path, 'r') as file:
config = yaml.safe_load(file)
for key, value in config.items():
os.environ[key] = value
class Agent:
def __init__(self, model, tool, temperature=0, max_tokens=1000, planning_agent_prompt=None, integration_agent_prompt=None, verbose=False):
load_config('config.yaml')
self.api_key = os.getenv("OPENAI_API_KEY")
self.url = 'https://api.openai.com/v1/chat/completions'
self.headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}'
}
self.temperature = temperature
self.max_tokens = max_tokens
self.tool = tool
self.tool_specs = tool.__doc__
self.planning_agent_prompt = planning_agent_prompt
self.integration_agent_prompt = integration_agent_prompt
self.model = model
self.verbose = verbose
def run_planning_agent(self, query, plan=None, outputs=None, feedback=None):
system_prompt = self.planning_agent_prompt.format(
outputs=outputs,
plan=plan,
feedback=feedback,
tool_specs=self.tool_specs
)
data = {
"model": self.model,
"messages": [{"role": "user", "content": query},
{"role": "system", "content": system_prompt}],
"temperature": self.temperature,
"max_tokens": self.max_tokens
}
json_data = json.dumps(data)
response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
response_dict = response.json()
content = response_dict['choices'][0]['message']['content']
print(colored(f"Planning Agent: {content}", 'green'))
return content
def run_integration_agent(self, query, plan, outputs):
system_prompt = self.integration_agent_prompt.format(
outputs=outputs,
plan=plan
)
data = {
"model": self.model,
"messages": [{"role": "user", "content": query},
{"role": "system", "content": system_prompt}],
"temperature": self.temperature,
"max_tokens": self.max_tokens
}
json_data = json.dumps(data)
response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
response_dict = response.json()
content = response_dict['choices'][0]['message']['content']
print(colored(f"Integration Agent: {content}", 'blue'))
# print("Integration Agent:", content)
return content
def check_response(self, response, query):
tools = [
{
"type": "function",
"function": {
"name": "respose_checker",
"description": "Checck if the response meets the requirements",
"parameters": {
"type": "object",
"properties": {
"meets_requirements": {
"type": "string",
"description": """Check if the response meets the requirements of the query based on the following:
1. The response should be relevant to the query.
2. The response should be coherent and well-structured with citations.
3. The response should be comprehensive and address the query in its entirety.
Return 'yes' if the response meets the requirements and 'no' otherwise.
"""
},
},
"required": ["meets_requirements"]
}
}
}
]
data = {
"model": self.model,
"messages": [{"role": "user", "content": f"Response: {response} \n Query: {query}"},],
"temperature": 0,
"tools": tools,
"tool_choice": "required"
}
json_data = json.dumps(data)
response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
response_dict = response.json()
tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
arguments_json = json.loads(tool_calls['function']['arguments'])
response = arguments_json['meets_requirements']
if response == 'yes':
return True
else:
return False
def execute(self, iterations=5):
query = input("Enter your query: ")
tool = self.tool(model=self.model, verbose=self.verbose)
meets_requirements = False
plan = None
outputs = None
response = None
iterations = 0
while not meets_requirements and iterations < 5:
iterations += 1
plan = self.run_planning_agent(query, plan=plan, outputs=outputs, feedback=response)
outputs = tool.use_tool(plan=plan, query=query)
response = self.run_integration_agent(query, plan, outputs)
meets_requirements = self.check_response(response, query)
print(colored(f"Final Response: {response}", 'cyan'))
if __name__ == '__main__':
agent = Agent(model="gpt-3.5-turbo",
tool=WebSearcher,
planning_agent_prompt=planning_agent_prompt,
integration_agent_prompt=integration_agent_prompt,
verbose=True
)
agent.execute()

View File

@@ -0,0 +1,52 @@
<mxfile host="app.diagrams.net" modified="2024-05-14T11:14:08.035Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" etag="VuY0quERgOPRJ4QZz9Re" version="24.3.1" type="device">
<diagram name="Page-1" id="rU8JHjLior_zJRwLBU05">
<mxGraphModel dx="2072" dy="1084" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1169" pageHeight="1654" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="Qcd6xKoqnelJGYbhtqcp-2" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Overall Workflow&lt;/font&gt;" style="swimlane;whiteSpace=wrap;html=1;startSize=40;" vertex="1" parent="1">
<mxGeometry x="60" y="130" width="530" height="340" as="geometry" />
</mxCell>
<mxCell id="Qcd6xKoqnelJGYbhtqcp-4" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Agents&amp;nbsp;&lt;/font&gt;&lt;div&gt;&lt;font style=&quot;font-size: 24px;&quot;&gt;powered by LLMs&lt;/font&gt;&lt;/div&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-2">
<mxGeometry x="290" y="87.5" width="165" height="165" as="geometry" />
</mxCell>
<mxCell id="Qcd6xKoqnelJGYbhtqcp-23" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Web Search Tool&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-2">
<mxGeometry x="63.75" y="87.5" width="165" height="165" as="geometry" />
</mxCell>
<mxCell id="Qcd6xKoqnelJGYbhtqcp-5" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Agents&amp;nbsp;&lt;/font&gt;" style="swimlane;whiteSpace=wrap;html=1;startSize=40;" vertex="1" parent="1">
<mxGeometry x="60" y="590" width="960" height="500" as="geometry" />
</mxCell>
<mxCell id="Qcd6xKoqnelJGYbhtqcp-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-6" target="Qcd6xKoqnelJGYbhtqcp-3">
<mxGeometry relative="1" as="geometry">
<mxPoint x="380" y="336" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="Qcd6xKoqnelJGYbhtqcp-6" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Planning Agent&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;align=center;fillColor=#008a00;fontColor=#ffffff;strokeColor=#005700;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
<mxGeometry x="30" y="220" width="232.5" height="232.5" as="geometry" />
</mxCell>
<mxCell id="Qcd6xKoqnelJGYbhtqcp-10" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-7" target="Qcd6xKoqnelJGYbhtqcp-6">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="763" y="130" />
<mxPoint x="146" y="130" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="Qcd6xKoqnelJGYbhtqcp-11" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Feedback&lt;/font&gt;" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="Qcd6xKoqnelJGYbhtqcp-10">
<mxGeometry x="0.0146" y="-3" relative="1" as="geometry">
<mxPoint x="-1" as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="Qcd6xKoqnelJGYbhtqcp-7" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Integration Agent&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#a20025;fontColor=#ffffff;strokeColor=#6F0000;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
<mxGeometry x="640" y="213.75" width="245" height="245" as="geometry" />
</mxCell>
<mxCell id="Qcd6xKoqnelJGYbhtqcp-22" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-3" target="Qcd6xKoqnelJGYbhtqcp-7">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="Qcd6xKoqnelJGYbhtqcp-3" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Web Search Tool&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
<mxGeometry x="370" y="253.75" width="165" height="165" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

2
config.yaml Normal file
View File

@@ -0,0 +1,2 @@
OPENAI_API_KEY: "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"
SERPER_DEV_API_KEY: "fb88bf47b61286938021be89361132737fa6f3b9"

23
prompts.py Normal file
View File

@@ -0,0 +1,23 @@
planning_agent_prompt = ("You are an AI planning agent working with an integration agent. You have access to specialised tools. When addressing queries, you should follow this two-step methodology:\n"
"Step 1: Thought. Begin by contemplating the problem thoroughly and devising a plan of action."
"Step 2: Action. Clearly state the inputs you will use with any tools necessary to address the problem. This preparation is essential for executing your plan effectively.\n"
"You must ensure your plan takes into account any feedback (if available)\n\n."
"here are the outputs from the tools you have used: {outputs}\n\n"
"Here is your previous plan: {plan}\n\n"
"Here's the feedback:{feedback} \n\n"
"Here are the specifications of your tools:\n"
"{tool_specs}\n"
"Continue this process until you have gathered enough information to comprehensively answer the query."
)
integration_agent_prompt = ("You are an AI Integration Agent working with a planning agent. Your job is to synthesise the outputs from the planning agent into a coherent response.\n"
"You must do this by considering the plan, the outputs from tools, and the original query.\n"
"If any of the information is not sufficient, you should provide feedback to the planning agent to refine the plan.\n"
"If the information is sufficient, you should provide a comprehenisve response to the query with appropriate citations. \n"
"Your response to the query must be based on the outputs from the tools\n"
"The output of the tool is a dictionary where the \n"
"key is the URL source of the info and the value is the content of the URL \n"
"You should use the source in citation \n"
"Here are the outputs from the tool: {outputs}\n\n"
"Here is the plan from the planning agent: {plan}\n\n"
)

4
requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
termcolor==2.4.0
PyYAML==6.0.1
requests==2.31.0
beautifulsoup4==4.12.3

224
search.py Normal file
View File

@@ -0,0 +1,224 @@
import requests
from bs4 import BeautifulSoup
import json
import yaml
from termcolor import colored
import os
def load_config(file_path):
with open(file_path, 'r') as file:
config = yaml.safe_load(file)
for key, value in config.items():
os.environ[key] = value
class WebSearcher:
"""
A class that encapsulates methods for generating search queries, fetching search results,
determining the best search pages, and scraping web content using the OpenAI API and other web services.
This class is designed to interact with the OpenAI API to leverage its capabilities for generating
search queries based on a provided plan and query. It integrates with the serper.dev API to fetch
search results and then uses a combination of these results and additional OpenAI API calls to determine
the most relevant web pages. Finally, it scrapes the content of the determined best page.
Methods:
__init__(self): Initializes the WebSearcher instance, loads API keys from a configuration file,
and sets up headers for HTTP requests.
generate_searches(self, plan: str, query: str) -> str: Generates search queries based on provided plan and query.
get_search_page(self, search_results: str, plan: str, query: str) -> str: Determines the best search page URLs
based on the results and context.
format_results(self, organic_results: list) -> str: Formats the search results to a more readable format.
fetch_search_results(self, search_queries: str) -> str: Fetches detailed search results from serper.dev API.
scrape_website_content(self, website_url: str) -> dict: Scrapes and returns the content of the given website URL.
use_tool(self, verbose: bool = False, plan: str = None, query: str = None) -> dict: Orchestrates the use of other methods
to perform a complete search-and-retrieve
operation based on the specified plan and query.
Usage Example:
searcher = WebSearcher()
results_dict = searcher.use_tool(verbose=True, plan="Research new AI techniques", query="Latest trends in AI")
results_dict will contain the URL as a key and the scraped content from that URL as the value.
"""
def __init__(self, model, verbose=False):
load_config('config.yaml')
self.api_key = os.getenv("OPENAI_API_KEY")
self.url = 'https://api.openai.com/v1/chat/completions'
self.headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}'
}
self.model = model
self.verbose = verbose
def generate_searches(self, plan, query):
tools = [
{
"type": "function",
"function": {
"name": "fetch_search_results",
"description": "Fetch search results based on the search query",
"parameters": {
"type": "object",
"properties": {
"search_engine_queries": {
"type": "string",
"description": "The most suitable search query for the plan"
},
},
"required": ["search_engine_queries"]
}
}
}
]
data = {
"model": self.model,
"messages": [{"role": "user", "content": f"Query:{query}\n\n Plan:{plan}"}],
"temperature": 0,
"tools": tools,
"tool_choice": "required"
}
json_data = json.dumps(data)
response = requests.post(self.url, headers=self.headers, data=json_data)
response_dict = response.json()
tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
arguments_json = json.loads(tool_calls['function']['arguments'])
search_queries = arguments_json['search_engine_queries']
print(colored(f"Search Engine Queries:, {search_queries}", 'yellow'))
return search_queries
def get_search_page(self, search_results, plan, query):
tools = [
{
"type": "function",
"function": {
"name": "decide_best_pages",
"description": "Decide the best pages to visit based on the search results",
"parameters": {
"type": "object",
"properties": {
"best_search_page": {
"type": "string",
"description": "The URL link of best search page based on the Search Results, Plan and Query. Do not select pdf files."
},
},
"required": ["best_search_page"]
}
}
}
]
data = {
"model": self.model,
"messages": [{"role": "user", "content": f"Query:{query}\n\n Plan:{plan} \n\n Search Results:{search_results}"}],
"temperature": 0,
"tools": tools,
"tool_choice": "required"
}
json_data = json.dumps(data)
response = requests.post(self.url, headers=self.headers, data=json_data)
response_dict = response.json()
tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
arguments_json = json.loads(tool_calls['function']['arguments'])
search_queries = arguments_json['best_search_page']
print(colored(f"Best Pages:, {search_queries}", 'yellow'))
return search_queries
def format_results(self, organic_results):
result_strings = []
for result in organic_results:
title = result.get('title', 'No Title')
link = result.get('link', '#')
snippet = result.get('snippet', 'No snippet available.')
result_strings.append(f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n---")
return '\n'.join(result_strings)
def fetch_search_results(self, search_queries: str):
search_url = "https://google.serper.dev/search"
headers = {
'Content-Type': 'application/json',
'X-API-KEY': os.environ['SERPER_DEV_API_KEY'] # Ensure this environment variable is set with your API key
}
payload = json.dumps({"q": search_queries})
# Attempt to make the HTTP POST request
try:
response = requests.post(search_url, headers=headers, data=payload)
response.raise_for_status() # Raise an HTTPError for bad responses (4XX, 5XX)
results = response.json()
# Check if 'organic' results are in the response
if 'organic' in results:
return self.format_results(results['organic'])
else:
return "No organic results found."
except requests.exceptions.HTTPError as http_err:
return f"HTTP error occurred: {http_err}"
except requests.exceptions.RequestException as req_err:
return f"Request exception occurred: {req_err}"
except KeyError as key_err:
return f"Key error in handling response: {key_err}"
def scrape_website_content(self, website_url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'en-US,en;q=0.9',
'Referer': 'https://www.google.com/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Accept-Encoding': 'gzip, deflate, br'
}
try:
# Making a GET request to the website
response = requests.get(website_url, headers=headers, timeout=15)
response.raise_for_status() # This will raise an exception for HTTP errors
# Parsing the page content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
text = soup.get_text(separator='\n')
# Cleaning up the text: removing excess whitespace
clean_text = '\n'.join([line.strip() for line in text.splitlines() if line.strip()])
return {website_url: clean_text}
except requests.exceptions.RequestException as e:
print(f"Error retrieving content from {website_url}: {e}")
return {website_url: f"Failed to retrieve content due to an error: {e}"}
def use_tool(self, plan=None, query=None):
search = WebSearcher(self.model)
# plan = "Find the best way to cook a turkey"
# query = "How long should I cook a turkey for?"
search_queries = search.generate_searches(plan, query)
search_results = search.fetch_search_results(search_queries)
best_page = search.get_search_page(search_results, plan, query)
results_dict = search.scrape_website_content(best_page)
if self.verbose:
print(colored(f"SEARCH RESULTS {search_results}", 'yellow'))
print(colored(f"RESULTS DICT {results_dict}", 'yellow'))
return results_dict
if __name__ == '__main__':
search = WebSearcher()
search.use_tool()