First Commit

2024-05-19 15:57:30 +03:00 · 2024-05-14 20:14:13 +01:00
commit 5af3211916
8 changed files with 513 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,47 @@
+
+# Setting Up and Running Custom Agent Script
+
+### Prerequisites
+1. **Install Anaconda:**  
+   Download Anaconda from [https://www.anaconda.com/](https://www.anaconda.com/).
+
+2. **Create a Virtual Environment:**
+   ```bash
+   conda create -n crew_env python=3.10 pip
+   ```
+   
+3. **Activate the Virtual Environment:**
+   ```bash
+   conda activate agent_env
+   ```
+
+### Clone and Navigate to the Repository
+1. **Clone the Repo:**
+   ```bash
+   git clone https://github.com/john-adeojo/custom_agent_tutorial.git
+   ```
+
+2. **Navigate to the Repo:**
+   ```bash
+   cd /path/to/your-repo/custom_agent_tutorial
+   ```
+
+3. **Install Requirements:**
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+### Configure API Keys
+1. **Open the `config.yaml`:**
+   ```bash
+   nano config.yaml
+   ```
+
+2. **Enter API Keys:**
+   - **Serper API Key:** Get it from [https://serper.dev/](https://serper.dev/)
+   - **OpenAI API Key:** Get it from [https://openai.com/](https://openai.com/)
+
+### Run Your Query
+```bash
+python app.py "YOUR QUERY"
+```
--- a/agent.py
+++ b/agent.py
@@ -0,0 +1,161 @@
+import os 
+import yaml
+import json
+import requests
+from termcolor import colored
+from prompts import planning_agent_prompt, integration_agent_prompt
+from search import WebSearcher
+
+
+def load_config(file_path):
+    with open(file_path, 'r') as file:
+        config = yaml.safe_load(file)
+        for key, value in config.items():
+            os.environ[key] = value
+
+class Agent:
+    def __init__(self, model, tool, temperature=0, max_tokens=1000, planning_agent_prompt=None, integration_agent_prompt=None, verbose=False):
+        load_config('config.yaml')
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        self.url = 'https://api.openai.com/v1/chat/completions'
+        self.headers = {
+            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {self.api_key}'
+        }
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        self.tool = tool
+        self.tool_specs = tool.__doc__
+        self.planning_agent_prompt = planning_agent_prompt
+        self.integration_agent_prompt = integration_agent_prompt
+        self.model = model
+        self.verbose = verbose
+    
+    def run_planning_agent(self, query, plan=None, outputs=None, feedback=None):
+
+        system_prompt = self.planning_agent_prompt.format(
+            outputs=outputs,
+            plan=plan,
+            feedback=feedback,
+            tool_specs=self.tool_specs
+        )
+
+        data = {
+            "model": self.model,
+            "messages": [{"role": "user", "content": query},
+                         {"role": "system", "content": system_prompt}],
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens
+        }
+
+        json_data = json.dumps(data)
+        response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
+        response_dict = response.json()
+        content = response_dict['choices'][0]['message']['content']
+        print(colored(f"Planning Agent: {content}", 'green'))
+
+        return content
+    
+    def run_integration_agent(self, query, plan, outputs):
+        system_prompt = self.integration_agent_prompt.format(
+            outputs=outputs,
+            plan=plan
+        )
+
+        data = {
+            "model": self.model,
+            "messages": [{"role": "user", "content": query},
+                         {"role": "system", "content": system_prompt}],
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens
+        }
+
+        json_data = json.dumps(data)
+        response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
+        response_dict = response.json()
+        content = response_dict['choices'][0]['message']['content']
+        print(colored(f"Integration Agent: {content}", 'blue'))
+        # print("Integration Agent:", content)
+
+        return content
+    
+    def check_response(self, response, query):
+        
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "respose_checker",
+                    "description": "Checck if the response meets the requirements",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "meets_requirements": {
+                                "type": "string",
+                                "description": """Check if the response meets the requirements of the query based on the following:
+                                1. The response should be relevant to the query.
+                                2. The response should be coherent and well-structured with citations.
+                                3. The response should be comprehensive and address the query in its entirety.
+                                Return 'yes' if the response meets the requirements and 'no' otherwise.
+                                """
+                            },
+                        },
+                        "required": ["meets_requirements"]
+                    }
+                }
+            }
+        ]
+
+        data = {
+            "model": self.model,
+            "messages": [{"role": "user", "content": f"Response: {response} \n Query: {query}"},],
+            "temperature": 0,
+            "tools": tools,
+            "tool_choice": "required"
+        }
+
+        json_data = json.dumps(data)
+        response = requests.post(self.url, headers=self.headers, data=json_data, timeout=180)
+        response_dict = response.json()
+
+        tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
+        arguments_json = json.loads(tool_calls['function']['arguments'])
+        response = arguments_json['meets_requirements']
+
+        if response == 'yes':
+            return True
+        else:
+            return False
+
+         
+    def execute(self, iterations=5):
+        query = input("Enter your query: ")
+        tool =  self.tool(model=self.model, verbose=self.verbose)
+        meets_requirements = False
+        plan = None
+        outputs = None
+        response = None
+        iterations = 0
+
+        while not meets_requirements and iterations < 5:
+            iterations += 1  
+            plan = self.run_planning_agent(query, plan=plan, outputs=outputs, feedback=response)
+            outputs = tool.use_tool(plan=plan, query=query)
+            response = self.run_integration_agent(query, plan, outputs)
+            meets_requirements = self.check_response(response, query)
+
+        print(colored(f"Final Response: {response}", 'cyan'))
+
+        
+if __name__ == '__main__':
+    agent = Agent(model="gpt-3.5-turbo",
+                  tool=WebSearcher, 
+                  planning_agent_prompt=planning_agent_prompt, 
+                  integration_agent_prompt=integration_agent_prompt,
+                  verbose=True
+                  )
+    agent.execute()
+
+
+    
+
--- a/architecture/architecture.drawio
+++ b/architecture/architecture.drawio
@@ -0,0 +1,52 @@
+<mxfile host="app.diagrams.net" modified="2024-05-14T11:14:08.035Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" etag="VuY0quERgOPRJ4QZz9Re" version="24.3.1" type="device">
+  <diagram name="Page-1" id="rU8JHjLior_zJRwLBU05">
+    <mxGraphModel dx="2072" dy="1084" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1169" pageHeight="1654" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-2" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Overall Workflow&lt;/font&gt;" style="swimlane;whiteSpace=wrap;html=1;startSize=40;" vertex="1" parent="1">
+          <mxGeometry x="60" y="130" width="530" height="340" as="geometry" />
+        </mxCell>
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-4" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Agents&amp;nbsp;&lt;/font&gt;&lt;div&gt;&lt;font style=&quot;font-size: 24px;&quot;&gt;powered by LLMs&lt;/font&gt;&lt;/div&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-2">
+          <mxGeometry x="290" y="87.5" width="165" height="165" as="geometry" />
+        </mxCell>
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-23" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Web Search Tool&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-2">
+          <mxGeometry x="63.75" y="87.5" width="165" height="165" as="geometry" />
+        </mxCell>
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-5" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Agents&amp;nbsp;&lt;/font&gt;" style="swimlane;whiteSpace=wrap;html=1;startSize=40;" vertex="1" parent="1">
+          <mxGeometry x="60" y="590" width="960" height="500" as="geometry" />
+        </mxCell>
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-6" target="Qcd6xKoqnelJGYbhtqcp-3">
+          <mxGeometry relative="1" as="geometry">
+            <mxPoint x="380" y="336" as="targetPoint" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-6" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Planning Agent&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;align=center;fillColor=#008a00;fontColor=#ffffff;strokeColor=#005700;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
+          <mxGeometry x="30" y="220" width="232.5" height="232.5" as="geometry" />
+        </mxCell>
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-10" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-7" target="Qcd6xKoqnelJGYbhtqcp-6">
+          <mxGeometry relative="1" as="geometry">
+            <Array as="points">
+              <mxPoint x="763" y="130" />
+              <mxPoint x="146" y="130" />
+            </Array>
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-11" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Feedback&lt;/font&gt;" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="Qcd6xKoqnelJGYbhtqcp-10">
+          <mxGeometry x="0.0146" y="-3" relative="1" as="geometry">
+            <mxPoint x="-1" as="offset" />
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-7" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Integration Agent&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#a20025;fontColor=#ffffff;strokeColor=#6F0000;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
+          <mxGeometry x="640" y="213.75" width="245" height="245" as="geometry" />
+        </mxCell>
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-22" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="Qcd6xKoqnelJGYbhtqcp-5" source="Qcd6xKoqnelJGYbhtqcp-3" target="Qcd6xKoqnelJGYbhtqcp-7">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="Qcd6xKoqnelJGYbhtqcp-3" value="&lt;font style=&quot;font-size: 24px;&quot;&gt;Web Search Tool&lt;/font&gt;" style="whiteSpace=wrap;html=1;aspect=fixed;fillColor=#e3c800;fontColor=#000000;strokeColor=#B09500;" vertex="1" parent="Qcd6xKoqnelJGYbhtqcp-5">
+          <mxGeometry x="370" y="253.75" width="165" height="165" as="geometry" />
+        </mxCell>
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>
--- a/architecture/architecture.drawio.png
+++ b/architecture/architecture.drawio.png
--- a/config.yaml
+++ b/config.yaml
@@ -0,0 +1,2 @@
+OPENAI_API_KEY: "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"
+SERPER_DEV_API_KEY: "fb88bf47b61286938021be89361132737fa6f3b9"
--- a/prompts.py
+++ b/prompts.py
@@ -0,0 +1,23 @@
+planning_agent_prompt = ("You are an AI planning agent working with an integration agent. You have access to specialised tools. When addressing queries, you should follow this two-step methodology:\n"
+                "Step 1: Thought. Begin by contemplating the problem thoroughly and devising a plan of action."
+                "Step 2: Action. Clearly state the inputs you will use with any tools necessary to address the problem. This preparation is essential for executing your plan effectively.\n"
+                "You must ensure your plan takes into account any feedback (if available)\n\n."
+                "here are the outputs from the tools you have used: {outputs}\n\n"
+                "Here is your previous plan: {plan}\n\n"
+                "Here's the feedback:{feedback} \n\n"
+                "Here are the specifications of your tools:\n"
+                "{tool_specs}\n"
+                "Continue this process until you have gathered enough information to comprehensively answer the query."
+                )
+
+integration_agent_prompt = ("You are an AI Integration Agent working with a planning agent. Your job is to synthesise the outputs from the planning agent into a coherent response.\n"
+                     "You must do this by considering the plan, the outputs from tools, and the original query.\n"
+                     "If any of the information is not sufficient, you should provide feedback to the planning agent to refine the plan.\n"
+                     "If the information is sufficient, you should provide a comprehenisve response to the query with appropriate citations. \n"
+                     "Your response to the query must be based on the outputs from the tools\n"
+                     "The output of the tool is a dictionary where the \n"
+                     "key is the URL source of the info and the value is the content of the URL \n"
+                     "You should use the source in citation \n"
+                     "Here are the outputs from the tool: {outputs}\n\n"
+                     "Here is the plan from the planning agent: {plan}\n\n"
+                     )
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+termcolor==2.4.0
+PyYAML==6.0.1
+requests==2.31.0
+beautifulsoup4==4.12.3
--- a/search.py
+++ b/search.py
@@ -0,0 +1,224 @@
+import requests
+from bs4 import BeautifulSoup
+import json
+import yaml
+from termcolor import colored
+import os
+
+def load_config(file_path):
+    with open(file_path, 'r') as file:
+        config = yaml.safe_load(file)
+        for key, value in config.items():
+            os.environ[key] = value
+
+class WebSearcher:
+
+    """
+    A class that encapsulates methods for generating search queries, fetching search results,
+    determining the best search pages, and scraping web content using the OpenAI API and other web services.
+
+    This class is designed to interact with the OpenAI API to leverage its capabilities for generating
+    search queries based on a provided plan and query. It integrates with the serper.dev API to fetch
+    search results and then uses a combination of these results and additional OpenAI API calls to determine
+    the most relevant web pages. Finally, it scrapes the content of the determined best page.
+
+    Methods:
+        __init__(self): Initializes the WebSearcher instance, loads API keys from a configuration file,
+                       and sets up headers for HTTP requests.
+        generate_searches(self, plan: str, query: str) -> str: Generates search queries based on provided plan and query.
+        get_search_page(self, search_results: str, plan: str, query: str) -> str: Determines the best search page URLs
+                                                                               based on the results and context.
+        format_results(self, organic_results: list) -> str: Formats the search results to a more readable format.
+        fetch_search_results(self, search_queries: str) -> str: Fetches detailed search results from serper.dev API.
+        scrape_website_content(self, website_url: str) -> dict: Scrapes and returns the content of the given website URL.
+        use_tool(self, verbose: bool = False, plan: str = None, query: str = None) -> dict: Orchestrates the use of other methods
+                                                                                          to perform a complete search-and-retrieve
+                                                                                          operation based on the specified plan and query.
+
+    Usage Example:
+        searcher = WebSearcher()
+        results_dict = searcher.use_tool(verbose=True, plan="Research new AI techniques", query="Latest trends in AI")
+        results_dict will contain the URL as a key and the scraped content from that URL as the value.
+    """
+    def __init__(self, model, verbose=False):
+        load_config('config.yaml')
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        self.url = 'https://api.openai.com/v1/chat/completions'
+        self.headers = {
+            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {self.api_key}'
+        }
+        self.model = model
+        self.verbose = verbose
+
+    def generate_searches(self, plan, query):
+
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "fetch_search_results",
+                    "description": "Fetch search results based on the search query",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "search_engine_queries": {
+                                "type": "string",
+                                "description": "The most suitable search query for the plan"
+                            },
+                        },
+                        "required": ["search_engine_queries"]
+                    }
+                }
+            }
+        ]
+
+        data = {
+            "model": self.model,
+            "messages": [{"role": "user", "content": f"Query:{query}\n\n Plan:{plan}"}],
+            "temperature": 0,
+            "tools": tools,
+            "tool_choice": "required"
+        }
+
+        json_data = json.dumps(data)
+        response = requests.post(self.url, headers=self.headers, data=json_data)
+        response_dict = response.json()
+
+        tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
+        arguments_json = json.loads(tool_calls['function']['arguments'])
+        search_queries = arguments_json['search_engine_queries']
+        print(colored(f"Search Engine Queries:, {search_queries}", 'yellow'))
+
+        return search_queries
+    
+    def get_search_page(self, search_results, plan, query):
+
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "decide_best_pages",
+                    "description": "Decide the best pages to visit based on the search results",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "best_search_page": {
+                                "type": "string",
+                                "description": "The URL link of best search page based on the Search Results, Plan and Query. Do not select pdf files."
+                            },
+                        },
+                        "required": ["best_search_page"]
+                    }
+                }
+            }
+        ]
+
+        data = {
+            "model": self.model,
+            "messages": [{"role": "user", "content": f"Query:{query}\n\n Plan:{plan} \n\n Search Results:{search_results}"}],
+            "temperature": 0,
+            "tools": tools,
+            "tool_choice": "required"
+        }
+
+        json_data = json.dumps(data)
+        response = requests.post(self.url, headers=self.headers, data=json_data)
+        response_dict = response.json()
+
+        tool_calls = response_dict['choices'][0]['message']['tool_calls'][0]
+        arguments_json = json.loads(tool_calls['function']['arguments'])
+        search_queries = arguments_json['best_search_page']
+        print(colored(f"Best Pages:, {search_queries}", 'yellow'))
+
+        return search_queries
+    
+    def format_results(self, organic_results):
+
+        result_strings = []
+        for result in organic_results:
+            title = result.get('title', 'No Title')
+            link = result.get('link', '#')
+            snippet = result.get('snippet', 'No snippet available.')
+            result_strings.append(f"Title: {title}\nLink: {link}\nSnippet: {snippet}\n---")
+        
+        return '\n'.join(result_strings)
+    
+    def fetch_search_results(self, search_queries: str):
+
+        search_url = "https://google.serper.dev/search"
+        headers = {
+            'Content-Type': 'application/json',
+            'X-API-KEY': os.environ['SERPER_DEV_API_KEY']  # Ensure this environment variable is set with your API key
+        }
+        payload = json.dumps({"q": search_queries})
+        
+        # Attempt to make the HTTP POST request
+        try:
+            response = requests.post(search_url, headers=headers, data=payload)
+            response.raise_for_status()  # Raise an HTTPError for bad responses (4XX, 5XX)
+            results = response.json()
+            
+            # Check if 'organic' results are in the response
+            if 'organic' in results:
+                return self.format_results(results['organic'])
+            else:
+                return "No organic results found."
+
+        except requests.exceptions.HTTPError as http_err:
+            return f"HTTP error occurred: {http_err}"
+        except requests.exceptions.RequestException as req_err:
+            return f"Request exception occurred: {req_err}"
+        except KeyError as key_err:
+            return f"Key error in handling response: {key_err}"
+        
+    def scrape_website_content(self, website_url):
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Referer': 'https://www.google.com/',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+            'Accept-Encoding': 'gzip, deflate, br'
+        }
+        
+        try:
+            # Making a GET request to the website
+            response = requests.get(website_url, headers=headers, timeout=15)
+            response.raise_for_status()  # This will raise an exception for HTTP errors
+
+            # Parsing the page content using BeautifulSoup
+            soup = BeautifulSoup(response.content, 'html.parser')
+            text = soup.get_text(separator='\n')
+            # Cleaning up the text: removing excess whitespace
+            clean_text = '\n'.join([line.strip() for line in text.splitlines() if line.strip()])
+
+            return {website_url: clean_text}
+
+        except requests.exceptions.RequestException as e:
+            print(f"Error retrieving content from {website_url}: {e}")
+            return {website_url: f"Failed to retrieve content due to an error: {e}"}
+    
+    def use_tool(self, plan=None, query=None):
+
+        search = WebSearcher(self.model)
+        # plan = "Find the best way to cook a turkey"
+        # query = "How long should I cook a turkey for?"
+
+        search_queries = search.generate_searches(plan, query)
+        search_results = search.fetch_search_results(search_queries)
+        best_page = search.get_search_page(search_results, plan, query)
+        results_dict = search.scrape_website_content(best_page)
+
+        if self.verbose:
+            print(colored(f"SEARCH RESULTS {search_results}", 'yellow'))
+            print(colored(f"RESULTS DICT {results_dict}", 'yellow'))
+
+        return results_dict
+        
+
+if __name__ == '__main__':
+
+    search = WebSearcher()
+    search.use_tool()