commit c76a8011765117e8292f1261ae1368189cf949e7 Author: SamuelSchmidgall Date: Tue Jan 7 21:02:04 2025 -0500 initial commit diff --git a/LICENSE b/LICENSE new file mode 100755 index 0000000..23833fb --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Samuel Schmidgall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100755 index 0000000..52ab68e --- /dev/null +++ b/README.md @@ -0,0 +1,174 @@ +# Agent Laboratory: Using LLM Agents as Research Assistants + + +

+ Demonstration of the flow of AgentClinic +

+ +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】 +

+ +## 📖 Overview + +- **Agent Laboratory** is an end-to-end autonomous research workflow meant to assist **you** as the human researcher toward **implementing your research ideas**. Agent Laboratory consists of specialized agents driven by large language models to support you through the entire research workflow—from conducting literature reviews and formulating plans to executing experiments and writing comprehensive reports. +- This system is not designed to replace your creativity but to complement it, enabling you to focus on ideation and critical thinking while automating repetitive and time-intensive tasks like coding and documentation. By accommodating varying levels of computational resources and human involvement, Agent Laboratory aims to accelerate scientific discovery and optimize your research productivity. + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 How does Agent Laboratory work? + +- Agent Laboratory consists of three primary phases that systematically guide the research process: (1) Literature Review, (2) Experimentation, and (3) Report Writing. During each phase, specialized agents driven by LLMs collaborate to accomplish distinct objectives, integrating external tools like arXiv, Hugging Face, Python, and LaTeX to optimize outcomes. This structured workflow begins with the independent collection and analysis of relevant research papers, progresses through collaborative planning and data preparation, and results in automated experimentation and comprehensive report generation. Details on specific agent roles and their contributions across these phases are discussed in the paper. + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ Installation + +### Python venv option + + +1. **Clone the GitHub Repository**: Begin by cloning the repository using the command: +```bash +git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git +``` + +2. **Set up and Activate Python Environment** +```bash +python -m venv venv_agent_lab +``` +- Now activate this environment: +```bash +source venv_agent_lab/bin/activate +``` + +3. **Install required libraries** +```bash +pip install -r requirements.txt +``` + +4. **Install pdflatex [OPTIONAL]** +```bash +sudo apt install pdflatex +``` +- This enables latex source to be compiled by the agents. +- **[IMPORTANT]** If this step cannot be run due to not having sudo access, pdf compiling can be turned off via running Agent Laboratory via setting the `--compile-latex` flag to false: `--compile-latex "false"` + + + +5. **Now run Agent Laboratory!** + +`python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"` + +or, if you don't have pdflatex installed + +`python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile-latex "false"` + +### Co-Pilot mode + +To run Agent Laboratory in copilot mode, simply set the copilot-mode flag to `"true"` + +`python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --copilot-mode "true"` + +----- +## Tips for better research outcomes + + +#### [Tip #1] 📝 Make sure to write extensive notes! 📝 + +**Writing extensive notes is important** for helping your agent understand what you're looking to accomplish in your project, as well as any style preferences. Notes can include any experiments you want the agents to perform, providing API keys, certain plots or figures you want included, or anything you want the agent to know when performing research. + +This is also your opportunity to let the agent know **what compute resources it has access to**, e.g. GPUs (how many, what type of GPU, how many GBs), CPUs (how many cores, what type of CPUs), storage limitations, and hardware specs. + +In order to add notes, you must modify the task_notes_LLM structure inside of `ai_lab_repo.py`. Provided below is an example set of notes used for some of our experiments. + + +``` +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f'Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n'}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [Tip #2] 🚀 Using more powerful models generally leads to better research 🚀 + +When conducting research, **the choice of model can significantly impact the quality of results**. More powerful models tend to have higher accuracy, better reasoning capabilities, and better report generation. If computational resources allow, prioritize the use of advanced models such as o1-(mini/preview) or similar state-of-the-art large language models. + +However, **it’s important to balance performance and cost-effectiveness**. While powerful models may yield better results, they are often more expensive and time-consuming to run. Consider using them selectively—for instance, for key experiments or final analyses—while relying on smaller, more efficient models for iterative tasks or initial prototyping. + +When resources are limited, **optimize by fine-tuning smaller models** on your specific dataset or combining pre-trained models with task-specific prompts to achieve the desired balance between performance and computational efficiency. + +----- + +#### [Tip #3] ✅ You can load previous saves from checkpoints ✅ + +**If you lose progress, internet connection, or if a subtask fails, you can always load from a previous state.** All of your progress is saved by default in the `state_saves` variable, which stores each individual checkpoint. Just pass the following arguments when running `ai_lab_repo.py` + +`python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"` + +----- + + + +#### [Tip #4] 🈯 If you are running in a language other than English 🈲 + +If you are running Agent Laboratory in a language other than English, no problem, just make sure to provide a language flag to the agents to perform research in your preferred language. Note that we have not extensively studied running Agent Laboratory in other languages, so be sure to report any problems you encounter. + +For example, if you are running in Chinese: + +`python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"` + +---- + + +#### [Tip #5] 🌟 There is a lot of room for improvement 🌟 + +There is a lot of room to improve this codebase, so if you end up making changes and want to help the community, please feel free to share the changes you've made! We hope this tool helps you! + + +## 📜 License + +Source Code Licensing: Our project's source code is licensed under the MIT License. This license permits the use, modification, and distribution of the code, subject to certain conditions outlined in the MIT License. + +## 📬 Contact + +If you would like to get in touch, feel free to reach out to [sschmi46@jhu.edu](mailto:sschmi46@jhu.edu) + +## Reference / Bibtex + + + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/agents.py b/agents.py new file mode 100755 index 0000000..1822ef3 --- /dev/null +++ b/agents.py @@ -0,0 +1,694 @@ +from utils import * +from tools import * +from inference import * + + +def extract_json_between_markers(llm_output): + # Regular expression pattern to find JSON content between ```json and ``` + json_pattern = r"```json(.*?)```" + matches = re.findall(json_pattern, llm_output, re.DOTALL) + + if not matches: + # Fallback: Try to find any JSON-like content in the output + json_pattern = r"\{.*?\}" + matches = re.findall(json_pattern, llm_output, re.DOTALL) + + for json_string in matches: + json_string = json_string.strip() + try: + parsed_json = json.loads(json_string) + return parsed_json + except json.JSONDecodeError: + # Attempt to fix common JSON issues + try: + # Remove invalid control characters + json_string_clean = re.sub(r"[\x00-\x1F\x7F]", "", json_string) + parsed_json = json.loads(json_string_clean) + return parsed_json + except json.JSONDecodeError: + continue # Try next match + + return None # No valid JSON found + + + +def get_score(outlined_plan, latex, reward_model_llm, reviewer_type=None, attempts=3, openai_api_key=None): + e = str() + for _attempt in range(attempts): + try: + # todo: have a reward function here + # template inherited from the AI Scientist (good work on this prompt Sakana AI team :D) + template_instructions = """ + Respond in the following format: + + THOUGHT: + + + REVIEW JSON: + ```json + + ``` + + In , first briefly discuss your intuitions and reasoning for the evaluation. + Detail your high-level arguments, necessary choices and desired outcomes of the review. + Do not make generic comments here, but be specific to your current paper. + Treat this as the note-taking phase of your review. + + In , provide the review in JSON format with the following fields in the order: + - "Summary": A summary of the paper content and its contributions. + - "Strengths": A list of strengths of the paper. + - "Weaknesses": A list of weaknesses of the paper. + - "Originality": A rating from 1 to 4 (low, medium, high, very high). + - "Quality": A rating from 1 to 4 (low, medium, high, very high). + - "Clarity": A rating from 1 to 4 (low, medium, high, very high). + - "Significance": A rating from 1 to 4 (low, medium, high, very high). + - "Questions": A set of clarifying questions to be answered by the paper authors. + - "Limitations": A set of limitations and potential negative societal impacts of the work. + - "Ethical Concerns": A boolean value indicating whether there are ethical concerns. + - "Soundness": A rating from 1 to 4 (poor, fair, good, excellent). + - "Presentation": A rating from 1 to 4 (poor, fair, good, excellent). + - "Contribution": A rating from 1 to 4 (poor, fair, good, excellent). + - "Overall": A rating from 1 to 10 (very strong reject to award quality). + - "Confidence": A rating from 1 to 5 (low, medium, high, very high, absolute). + - "Decision": A decision that has to be one of the following: Accept, Reject. + + For the "Decision" field, don't use Weak Accept, Borderline Accept, Borderline Reject, or Strong Reject. Instead, only use Accept or Reject. + This JSON will be automatically parsed, so ensure the format is precise. + """ + neurips_form = (""" + ## Review Form + Below is a description of the questions you will be asked on the review form for each paper and some guidelines on what to consider when answering these questions. + When writing your review, please keep in mind that after decisions have been made, reviews and meta-reviews of accepted papers and opted-in rejected papers will be made public. + + 1. Summary: Briefly summarize the paper and its contributions. This is not the place to critique the paper; the authors should generally agree with a well-written summary. + - Strengths and Weaknesses: Please provide a thorough assessment of the strengths and weaknesses of the paper, touching on each of the following dimensions: + - Originality: Are the tasks or methods new? Is the work a novel combination of well-known techniques? (This can be valuable!) Is it clear how this work differs from previous contributions? Is related work adequately cited + - Quality: Is the submission technically sound? Are claims well supported (e.g., by theoretical analysis or experimental results)? Are the methods used appropriate? Is this a complete piece of work or work in progress? Are the authors careful and honest about evaluating both the strengths and weaknesses of their work + - Clarity: Is the submission clearly written? Is it well organized? (If not, please make constructive suggestions for improving its clarity.) Does it adequately inform the reader? (Note that a superbly written paper provides enough information for an expert reader to reproduce its results.) + - Significance: Are the results important? Are others (researchers or practitioners) likely to use the ideas or build on them? Does the submission address a difficult task in a better way than previous work? Does it advance the state of the art in a demonstrable way? Does it provide unique data, unique conclusions about existing data, or a unique theoretical or experimental approach? + + 2. Questions: Please list up and carefully describe any questions and suggestions for the authors. Think of the things where a response from the author can change your opinion, clarify a confusion or address a limitation. This can be very important for a productive rebuttal and discussion phase with the authors. + + 3. Limitations: Have the authors adequately addressed the limitations and potential negative societal impact of their work? If not, please include constructive suggestions for improvement. + In general, authors should be rewarded rather than punished for being up front about the limitations of their work and any potential negative societal impact. You are encouraged to think through whether any critical points are missing and provide these as feedback for the authors. + + 4. Ethical concerns: If there are ethical issues with this paper, please flag the paper for an ethics review. For guidance on when this is appropriate, please review the NeurIPS ethics guidelines. + + 5. Soundness: Please assign the paper a numerical rating on the following scale to indicate the soundness of the technical claims, experimental and research methodology and on whether the central claims of the paper are adequately supported with evidence. + 4: excellent + 3: good + 2: fair + 1: poor + + 6. Presentation: Please assign the paper a numerical rating on the following scale to indicate the quality of the presentation. This should take into account the writing style and clarity, as well as contextualization relative to prior work. + 4: excellent + 3: good + 2: fair + 1: poor + + 7. Contribution: Please assign the paper a numerical rating on the following scale to indicate the quality of the overall contribution this paper makes to the research area being studied. Are the questions being asked important? Does the paper bring a significant originality of ideas and/or execution? Are the results valuable to share with the broader NeurIPS community. + 4: excellent + 3: good + 2: fair + 1: poor + + 8. Overall: Please provide an "overall score" for this submission. Choices: + 10: Award quality: Technically flawless paper with groundbreaking impact on one or more areas of AI, with exceptionally strong evaluation, reproducibility, and resources, and no unaddressed ethical considerations. + 9: Very Strong Accept: Technically flawless paper with groundbreaking impact on at least one area of AI and excellent impact on multiple areas of AI, with flawless evaluation, resources, and reproducibility, and no unaddressed ethical considerations. + 8: Strong Accept: Technically strong paper with, with novel ideas, excellent impact on at least one area of AI or high-to-excellent impact on multiple areas of AI, with excellent evaluation, resources, and reproducibility, and no unaddressed ethical considerations. + 7: Accept: Technically solid paper, with high impact on at least one sub-area of AI or moderate-to-high impact on more than one area of AI, with good-to-excellent evaluation, resources, reproducibility, and no unaddressed ethical considerations. + 6: Weak Accept: Technically solid, moderate-to-high impact paper, with no major concerns with respect to evaluation, resources, reproducibility, ethical considerations. + 5: Borderline accept: Technically solid paper where reasons to accept outweigh reasons to reject, e.g., limited evaluation. Please use sparingly. + 4: Borderline reject: Technically solid paper where reasons to reject, e.g., limited evaluation, outweigh reasons to accept, e.g., good evaluation. Please use sparingly. + 3: Reject: For instance, a paper with technical flaws, weak evaluation, inadequate reproducibility and incompletely addressed ethical considerations. + 2: Strong Reject: For instance, a paper with major technical flaws, and/or poor evaluation, limited impact, poor reproducibility and mostly unaddressed ethical considerations. + 1: Very Strong Reject: For instance, a paper with trivial results or unaddressed ethical considerations + + 9. Confidence: Please provide a "confidence score" for your assessment of this submission to indicate how confident you are in your evaluation. Choices: + 5: You are absolutely certain about your assessment. You are very familiar with the related work and checked the math/other details carefully. + 4: You are confident in your assessment, but not absolutely certain. It is unlikely, but not impossible, that you did not understand some parts of the submission or that you are unfamiliar with some pieces of related work. + 3: You are fairly confident in your assessment. It is possible that you did not understand some parts of the submission or that you are unfamiliar with some pieces of related work. Math/other details were not carefully checked. + 2: You are willing to defend your assessment, but it is quite likely that you did not understand the central parts of the submission or that you are unfamiliar with some pieces of related work. Math/other details were not carefully checked. + 1: Your assessment is an educated guess. The submission is not in your area or the submission was difficult to understand. Math/other details were not carefully checked. + + You must make sure that all sections are properly created: abstract, introduction, methods, results, and discussion. Points must be reduced from your scores if any of these are missing. + """ + template_instructions) + if reviewer_type is None: reviewer_type = "" + sys = ( + "You are an AI researcher who is reviewing a paper that was submitted to a prestigious ML venue. " + f"Be critical and cautious in your decision. {reviewer_type}\n" + ) + neurips_form + scoring = query_model( + model_str=f"{reward_model_llm}", + system_prompt=sys, + openai_api_key=openai_api_key, + prompt=( + f"Outlined in the following text is the research plan that the machine learning engineer was tasked with building: {outlined_plan}\n\n" + f"The following text is the research latex that the model produced: \n{latex}\n\n"), temp=0.0) + review_json = extract_json_between_markers(scoring) + + overall = int(review_json["Overall"]) / 10 + soundness = int(review_json["Soundness"]) / 4 + confidence = int(review_json["Confidence"]) / 5 + contribution = int(review_json["Contribution"]) / 4 + presentation = int(review_json["Presentation"]) / 4 + clarity = int(review_json["Clarity"]) / 4 + originality = int(review_json["Originality"]) / 4 + quality = int(review_json["Quality"]) / 4 + significance = int(review_json["Significance"]) / 4 + + clarity_weight = 0.1 + quality_weight = 0.1 + overall_weight = 1.0 + soundness_weight = 0.1 + confidence_weight = 0.1 + originality_weight = 0.1 + significance_weight = 0.1 + contribution_weight = 0.4 + presentation_weight = 0.2 + + # max possible + max_score = ( + clarity_weight + quality_weight + overall_weight + soundness_weight + confidence_weight + originality_weight + significance_weight + contribution_weight + presentation_weight) + + performance = (( + soundness_weight * soundness + presentation_weight * presentation + confidence_weight * confidence + contribution_weight * contribution + overall_weight * overall + originality_weight * originality + significance * significance_weight + clarity_weight * clarity + quality_weight * quality) / max_score) * 10 + return performance, f"The performance of your submission is: {performance}" + scoring, True + except Exception as e: + print(e) + return None, str(e), False + return 0, e + + +class ReviewersAgent: + def __init__(self, model="gpt-4o-mini", notes=None, openai_api_key=None): + if notes is None: self.notes = [] + else: self.notes = notes + self.model = model + self.openai_api_key = openai_api_key + + def inference(self, plan, report): + reviewer_1 = "You are a harsh but fair reviewer and expect good experiments that lead to insights for the research topic." + review_1 = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_1, openai_api_key=self.openai_api_key) + + reviewer_2 = "You are a harsh and critical but fair fair reviewer who is looking for idea that would be impactful in the field." + review_2 = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_2, openai_api_key=self.openai_api_key) + + reviewer_3 = "You are a harsh but fair open-minded reviewer that is looking for novel ideas that have not been proposed before." + review_3 = get_score(outlined_plan=plan, latex=report, reward_model_llm=self.model, reviewer_type=reviewer_3, openai_api_key=self.openai_api_key) + + return f"Reviewer #1:\n{review_1}, \nReviewer #2:\n{review_2}, \nReviewer #3:\n{review_3}" + + +class BaseAgent: + def __init__(self, model="gpt-4o-mini", notes=None, max_steps=100, openai_api_key=None): + if notes is None: self.notes = [] + else: self.notes = notes + self.max_steps = max_steps + self.model = model + self.phases = [] + self.plan = str() + self.report = str() + self.history = list() + self.prev_comm = str() + self.prev_report = str() + self.exp_results = str() + self.dataset_code = str() + self.results_code = str() + self.lit_review_sum = str() + self.interpretation = str() + self.prev_exp_results = str() + self.reviewer_response = str() + self.prev_results_code = str() + self.prev_interpretation = str() + self.openai_api_key = openai_api_key + + self.second_round = False + self.max_hist_len = 15 + + def set_model_backbone(self, model): + self.model = model + + @staticmethod + def clean_text(text): + """ + Fix minor corrections + :return: (str) corrected text + """ + text = text.replace("```\n", "```") + return text + + def inference(self, research_topic, phase, step, feedback="", temp=None): + sys_prompt = f"""You are {self.role_description()} \nTask instructions: {self.phase_prompt(phase)}\n{self.command_descriptions(phase)}"""#\n{self.example_command(phase)} + context = self.context(phase) + history_str = "\n".join([_[1] for _ in self.history]) + phase_notes = [_note for _note in self.notes if phase in _note["phases"]] + notes_str = f"Notes for the task objective: {phase_notes}\n" if len(phase_notes) > 0 else "" + complete_str = str() + if step/(self.max_steps-1) > 0.7: complete_str = "You must finish this task and submit as soon as possible!" + prompt = ( + f"""{context}\n{'~' * 10}\nHistory: {history_str}\n{'~' * 10}\n""" + f"Current Step #{step}, Phase: {phase}\n{complete_str}\n" + f"[Objective] Your goal is to perform research on the following topic: {research_topic}\n" + f"Feedback: {feedback}\nNotes: {notes_str}\nYour previous command was: {self.prev_comm}. Make sure your new output is very different.\nPlease produce a single command below:\n") + model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, temp=temp, openai_api_key=self.openai_api_key) + print("^"*50, phase, "^"*50) + model_resp = self.clean_text(model_resp) + self.prev_comm = model_resp + steps_exp = None + if feedback is not None and "```EXPIRATION" in feedback: + steps_exp = int(feedback.split("\n")[0].replace("```EXPIRATION ", "")) + feedback = extract_prompt(feedback, "EXPIRATION") + self.history.append((steps_exp, f"Step #{step}, Phase: {phase}, Feedback: {feedback}, Your response: {model_resp}")) + # remove histories that have expiration dates + for _i in reversed(range(len(self.history))): + if self.history[_i][0] is not None: + self.history[_i] = self.history[_i] = self.history[_i][0] - 1, self.history[_i][1] + if self.history[_i][0] < 0: + self.history.pop(_i) + if len(self.history) >= self.max_hist_len: + self.history.pop(0) + return model_resp + + def reset(self): + self.history.clear() # Clear the deque + self.prev_comm = "" + + def context(self, phase): + raise NotImplementedError("Subclasses should implement this method.") + + def phase_prompt(self, phase): + raise NotImplementedError("Subclasses should implement this method.") + + def role_description(self): + raise NotImplementedError("Subclasses should implement this method.") + + def command_descriptions(self, phase): + raise NotImplementedError("Subclasses should implement this method.") + + def example_command(self, phase): + raise NotImplementedError("Subclasses should implement this method.") + + +class ProfessorAgent(BaseAgent): + def __init__(self, model="gpt4omini", notes=None, max_steps=100, openai_api_key=None): + super().__init__(model, notes, max_steps, openai_api_key) + self.phases = ["report writing"] + + def generate_readme(self): + sys_prompt = f"""You are {self.role_description()} \n Here is the written paper \n{self.report}. Task instructions: Your goal is to integrate all of the knowledge, code, reports, and notes provided to you and generate a readme.md for a github repository.""" + history_str = "\n".join([_[1] for _ in self.history]) + prompt = ( + f"""History: {history_str}\n{'~' * 10}\n""" + f"Please produce the readme below in markdown:\n") + model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key) + return model_resp.replace("```markdown", "") + + def context(self, phase): + #sr_str = str() + #if self.second_round: + # sr_str = ( + # f"The following are results from the previous experiments\n", + # f"Previous Experiment code: {self.prev_results_code}\n" + # f"Previous Results: {self.prev_exp_results}\n" + # f"Previous Interpretation of results: {self.prev_interpretation}\n" + # f"Previous Report: {self.prev_report}\n" + # f"{self.reviewer_response}\n\n\n" + # ) + #if phase == "report writing": + # return ( + # sr_str, + # f"Current Literature Review: {self.lit_review_sum}\n" + # f"Current Plan: {self.plan}\n" + # f"Current Dataset code: {self.dataset_code}\n" + # f"Current Experiment code: {self.results_code}\n" + # f"Current Results: {self.exp_results}\n" + # f"Current Interpretation of results: {self.interpretation}\n" + # ) + return "" + + def example_command(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + return ( + "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where dialogue here is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n" + "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\n\n``` where COMMAND is the specific command you want to run (e.g. REPORT, DIALOGUE).\n") + + def command_descriptions(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + return ( + "When you believe a good report has been arrived at between you and the PhD student you can use the following command to end the dialogue and submit the plan ```LATEX\nreport here\n```\n where report here is the actual report written in compilable latex to be transmitted and LATEX is just the word LATEX.\n" + "Your report should include numbers, relevant metrics to the experiment (e.g. accuracy or loss) and measures of significance. You must propagate this information accurately. You must also submit the report promptly. Do not delay too long.\n" + "You must be incredibly detailed about what you did for the experiment and all of the findings.\n" + ) + + def phase_prompt(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + phase_str = ( + "You are directing a PhD student to help them write a report in latex based on results from an experiment, and you interact with them through dialogue.\n" + "Your goal is to write a report in latex for an experiment. You should read through the code, read through the interpretation, and look at the results to understand what occurred. You should then discuss with the PhD student how they can write up the results and give their feedback to improve their thoughts.\n" + ) + return phase_str + + def role_description(self): + return "a computer science professor at a top university." + + +class PostdocAgent(BaseAgent): + def __init__(self, model="gpt4omini", notes=None, max_steps=100, openai_api_key=None): + super().__init__(model, notes, max_steps, openai_api_key) + self.phases = ["plan formulation", "results interpretation"] + + def context(self, phase): + sr_str = str() + if self.second_round: + sr_str = ( + f"The following are results from the previous experiments\n", + f"Previous Experiment code: {self.prev_results_code}\n" + f"Previous Results: {self.prev_exp_results}\n" + f"Previous Interpretation of results: {self.prev_interpretation}\n" + f"Previous Report: {self.prev_report}\n" + f"{self.reviewer_response}\n\n\n" + ) + if phase == "plan formulation": + return ( + sr_str, + f"Current Literature Review: {self.lit_review_sum}", + ) + elif phase == "results interpretation": + return ( + sr_str, + f"Current Literature Review: {self.lit_review_sum}\n" + f"Current Plan: {self.plan}\n" + f"Current Dataset code: {self.dataset_code}\n" + f"Current Experiment code: {self.results_code}\n" + f"Current Results: {self.exp_results}" + ) + return "" + + def example_command(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + return () + + def command_descriptions(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + if phase == "plan formulation": + return ( + "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where dialogue here is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n" + "When you believe a good plan has been arrived at between you and the PhD student you can use the following command to end the dialogue and submit the plan ```PLAN\nplan here\n```\n where plan here is the actual plan to be transmitted and PLAN is just the word PLAN. Plan here should provide a clear outline for how to achieve the task, including what machine learning models to use and implement, what types of datasets should be searched for and used to train the model, and the exact details of the experiment.\n" + "You can only use a SINGLE command per inference turn. Do not use more than one command per inference. If you use multiple commands, then only one of them will be executed, NOT BOTH.\n" + "Make sure not to produce too much dialogue and to submit an plan in reasonable time." + "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. PLAN, DIALOGUE).\n" + ) + elif phase == "results interpretation": + return ( + "When you believe a good interpretation has been arrived at between you and the PhD student you can use the following command to end the dialogue and submit the plan ```INTERPRETATION\ninterpretation here\n```\n where interpretation here is the actual interpretation to be transmitted and INTERPRETATION is just the word INTERPRETATION. Please provide an INTERPRETATION in a reasonable amount of time.\n" + "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where dialogue here is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n" + "You must submit the interpretation during this phase in a reasonable amount of time. Do not delay the submission." + "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. INTERPRETATION, DIALOGUE).\n" + ) + + def phase_prompt(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + if phase == "plan formulation": + phase_str = ( + "You are directing a PhD student to help them come up with a good plan, and you interact with them through dialogue.\n" + "Your goal is to produce plans that would make good experiments for the given topic. You should aim for a very simple experiment that showcases your plan, not a complex one. You should integrate the provided literature review and come up with plans on how to expand and build on these works for the given topic. Your plans should provide a clear outline for how to achieve the task, including what machine learning models to use and implement, what types of datasets should be searched for and used to train the model, and the exact details of the experiment.\n" + ) + elif phase == "results interpretation": + phase_str = ( + "You are directing a PhD student to help them come up with an interpretation for results from an experiment, and you interact with them through dialogue.\n" + "Your goal is to interpret results from experiments that were previously run. You should read through the code and look at the results to understand what occurred. You should then discuss with the PhD student how they can interpret the results and give their feedback to improve their thoughts. You should integrate the provided literature review, code, and plans to come up with an exciting interpretation that could make a compelling paper. Your plans should provide a clear outline that can be used to write an academic paper.\n" + "Your interpretation should include numbers, relevant metrics to the experiment (e.g. accuracy or loss) and measures of significance. You must propagate this information accurately. You must also complete this in a reasonable amount of time and then submit your results.\n" + ) + return phase_str + + def role_description(self): + return "a computer science postdoctoral student at a top university." + + +class MLEngineerAgent(BaseAgent): + def __init__(self, model="gpt4omini", notes=None, max_steps=100, openai_api_key=None): + super().__init__(model, notes, max_steps, openai_api_key) + self.phases = [ + "data preparation", + "running experiments", + ] + + def context(self, phase): + sr_str = str() + if self.second_round: + sr_str = ( + f"The following are results from the previous experiments\n", + f"Previous Experiment code: {self.prev_results_code}\n" + f"Previous Results: {self.prev_exp_results}\n" + f"Previous Interpretation of results: {self.prev_interpretation}\n" + f"Previous Report: {self.prev_report}\n" + f"{self.reviewer_response}\n\n\n" + ) + if phase == "data preparation": + return ( + sr_str, + f"Current Literature Review: {self.lit_review_sum}\nPlan: {self.plan}", + f"Current Plan: {self.plan}") + #elif phase == "running experiments": + # return ( + # sr_str, + # f"Current Literature Review: {self.lit_review_sum}\n" + # f"Current Plan: {self.plan}\n" + # f"Current Dataset code: {self.dataset_code}\n" + # ) + return "" + + def example_command(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + return () + + def command_descriptions(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + if phase == "data preparation": + return ( + "You can produce code using the following command: ```python\ncode here\n```\n where code here is the actual code you will execute in a Python terminal, and python is just the word python. Try to incorporate some print functions. Do not use any classes or functions. If your code returns any errors, they will be provided to you, and you are also able to see print statements. You will receive all print statement results from the code. Make sure function variables are created inside the function or passed as a function parameter.\n" # Try to avoid creating functions. + "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where dialogue here is the actual dialogue you will send, and DIALOGUE is just the word DIALOGUE.\n" + "You also have access to HuggingFace datasets. You can search the datasets repository using the following command: ```SEARCH_HF\nsearch query here\n``` where search query here is the query used to search HuggingFace datasets, and SEARCH_HF is the word SEARCH_HF. This will return a list of HuggingFace dataset descriptions which can be loaded into Python using the datasets library. Your code MUST use an external HuggingFace directory.\n" + "You MUST use a HuggingFace dataset in your code. DO NOT CREATE A MAIN FUNCTION. Try to make the code very simple.\n" + "You can only use a SINGLE command per inference turn. Do not use more than one command per inference. If you use multiple commands, then only one of them will be executed, NOT BOTH.\n" + "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. python, DIALOGUE, SEARCH_HF).\n") + return () + + def phase_prompt(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + if phase == "data preparation": + phase_str = ( + "You are a machine learning engineer being directed by a PhD student who will help you write the code, and you can interact with them through dialogue.\n" + "Your goal is to produce code that prepares the data for the provided experiment. You should aim for simple code to prepare the data, not complex code. You should integrate the provided literature review and the plan and come up with code to prepare data for this experiment.\n" + ) + return phase_str + + def role_description(self): + return "a machine learning engineer working at a top university." + + +class PhDStudentAgent(BaseAgent): + def __init__(self, model="gpt4omini", notes=None, max_steps=100, openai_api_key=None): + super().__init__(model, notes, max_steps, openai_api_key) + self.phases = [ + "literature review", + "plan formulation", + "data preparation", + "running experiments", + "results interpretation", + "report writing", + "report refinement", + ] + self.lit_review = [] + + def context(self, phase): + sr_str = str() + if self.second_round: + sr_str = ( + f"The following are results from the previous experiments\n", + f"Previous Experiment code: {self.prev_results_code}\n" + f"Previous Results: {self.prev_exp_results}\n" + f"Previous Interpretation of results: {self.prev_interpretation}\n" + f"Previous Report: {self.prev_report}\n" + f"{self.reviewer_response}\n\n\n" + ) + if phase == "plan formulation": + return ( + sr_str, + f"Current Literature Review: {self.lit_review_sum}",) + elif phase == "data preparation": + return ( + sr_str, + f"Current Literature Review: {self.lit_review_sum}\n" + f"Current Plan: {self.plan}" + ) + #elif phase == "running experiments": + # return ( + # sr_str, + # f"Current Literature Review: {self.lit_review_sum}\n" + # f"Current Plan: {self.plan}\n" + # f"Current Dataset code: {self.dataset_code}\n" + # ) + elif phase == "results interpretation": + return ( + sr_str, + f"Current Literature Review: {self.lit_review_sum}\n" + f"Current Plan: {self.plan}\n" + f"Current Dataset code: {self.dataset_code}\n" + f"Current Experiment code: {self.results_code}\n" + f"Current Results: {self.exp_results}" + ) + #elif phase == "report writing": + # return ( + # sr_str, + # f"Current Literature Review: {self.lit_review_sum}\n" + # f"Current Plan: {self.plan}\n" + # f"Current Dataset code: {self.dataset_code}\n" + # f"Current Experiment code: {self.results_code}\n" + # f"Current Results: {self.exp_results}\n" + # f"Current Interpretation of results: {self.interpretation}" + # ) + elif phase == "report refinement": + return ( + sr_str, + f"Current Literature Review: {self.lit_review_sum}\n" + f"Current Plan: {self.plan}\n" + f"Current Dataset code: {self.dataset_code}\n" + f"Current Experiment code: {self.results_code}\n" + f"Current Results: {self.exp_results}\n" + f"Current Interpretation of results: {self.interpretation}" + ) + elif phase == "literature review": + return sr_str + else: + return "" + + def requirements_txt(self): + sys_prompt = f"""You are {self.role_description()} \nTask instructions: Your goal is to integrate all of the knowledge, code, reports, and notes provided to you and generate a requirements.txt for a github repository for all of the code.""" + history_str = "\n".join([_[1] for _ in self.history]) + prompt = ( + f"""History: {history_str}\n{'~' * 10}\n""" + f"Please produce the requirements.txt below in markdown:\n") + model_resp = query_model(model_str=self.model, system_prompt=sys_prompt, prompt=prompt, openai_api_key=self.openai_api_key) + return model_resp + + def example_command(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + return () + + def command_descriptions(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + if phase == "literature review": + return ( + "To collect paper summaries, use the following command: ```SUMMARY\nSEARCH QUERY\n```\n where SEARCH QUERY is a string that will be used to find papers with semantically similar content and SUMMARY is just the word SUMMARY. Make sure your search queries are very short.\n" + "To get the full paper text for an arXiv paper, use the following command: ```FULL_TEXT\narXiv paper ID\n```\n where arXiv paper ID is the ID of the arXiv paper (which can be found by using the SUMMARY command), and FULL_TEXT is just the word FULL_TEXT. Make sure to read the full text using the FULL_TEXT command before adding it to your list of relevant papers.\n" + "If you believe a paper is relevant to the research project proposal, you can add it to the official review after reading using the following command: ```ADD_PAPER\narXiv_paper_ID\nPAPER_SUMMARY\n```\nwhere arXiv_paper_ID is the ID of the arXiv paper, PAPER_SUMMARY is a brief summary of the paper, and ADD_PAPER is just the word ADD_PAPER. You can only add one paper at a time. \n" + "Make sure to use ADD_PAPER when you see a relevant paper. DO NOT use SUMMARY too many times." + "You can only use a single command per inference turn. Do not use more than one command per inference. If you use multiple commands, then only one of them will be executed, not both.\n" + "Make sure to extensively discuss the experimental results in your summary.\n" + "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. ADD_PAPER, FULL_TEXT, SUMMARY). Do not use the word COMMAND make sure to use the actual command, e.g. your command should look exactly like this: ```ADD_PAPER\ntext\n``` (where the command could be from ADD_PAPER, FULL_TEXT, SUMMARY)\n") + elif phase == "plan formulation": + return ( + "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where 'dialogue here' is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n" + "You can only use a single command per inference turn. Do not use more than one command per inference. If you use multiple commands, then only one of them will be executed, not both.\n" + "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. DIALOGUE).\n" + ) + elif phase == "data preparation": + return ( + "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where 'dialogue here' is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n" + "When you and the ML engineer have finalized your dataset preparation code and are ready to submit the final code, please use the following command: ```SUBMIT_CODE\ncode here\n```\n where 'code here' is the finalized code you will send and SUBMIT_CODE is just the word SUBMIT_CODE. Do not use any classes or functions. The submitted code must have a HuggingFace dataset import and must use an external HuggingFace dataset. If your code returns any errors, they will be provided to you, and you are also able to see print statements. Make sure function variables are created inside the function or passed as a function parameter. DO NOT CREATE A MAIN FUNCTION.\n" + "Make sure to submit code in a reasonable amount of time. Do not make the code too complex, try to make it simple. Do not take too long to submit code. Submit the code early. You should submit the code ASAP.\n" + "You can only use a single command per inference turn. Do not use more than one command per inference. If you use multiple commands, then only one of them will be executed, not both.\n" + "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. SUBMIT_CODE, DIALOGUE).\n") + elif phase == "results interpretation": + return ( + "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where 'dialogue here' is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n" + "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. DIALOGUE).\n" + ) + #elif phase == "report writing": + # return ( + # "You can produce dialogue using the following command: ```DIALOGUE\ndialogue here\n```\n where 'dialogue here' is the actual dialogue you will send and DIALOGUE is just the word DIALOGUE.\n" + # "When performing a command, make sure to include the three ticks (```) at the top and bottom ```COMMAND\ntext\n``` where COMMAND is the specific command you want to run (e.g. DIALOGUE).\n") + elif phase == "report refinement": + return "" + return "" + + def phase_prompt(self, phase): + if phase not in self.phases: + raise Exception(f"Invalid phase: {phase}") + + if phase == "literature review": + phase_str = ( + "Your goal is to perform a literature review for the presented task and add papers to the literature review.\n" + "You have access to arXiv and can perform two search operations: (1) finding many different paper summaries from a search query and (2) getting a single full paper text for an arXiv paper.\n" + ) + rev_papers = "Papers in your review so far: " + " ".join([_paper["arxiv_id"] for _paper in self.lit_review]) + phase_str += rev_papers if len(self.lit_review) > 0 else "" + elif phase == "plan formulation": + phase_str = ( + "You are a PhD student being directed by a postdoc who will help you come up with a good plan, and you interact with them through dialogue.\n" + "Your goal is to produce plans that would make good experiments for the given topic. You should aim for a very simple experiment that showcases your plan, not a complex one. You should integrate the provided literature review and come up with plans on how to expand and build on these works for the given topic. Your plans should provide a clear outline for how to achieve the task, including what machine learning models to use and implement, what types of datasets should be searched for and used to train the model, and the exact details of the experiment.\n" + ) + elif phase == "data preparation": + phase_str = ( + "You are a PhD student directing a machine learning engineer, where the machine learning engineer will be writing the code, and you can interact with them through dialogue.\n" + "Your goal is to help the ML engineer produce code that prepares the data for the provided experiment. You should aim for very simple code to prepare the data, not complex code. You should integrate the provided literature review and the plan and come up with code to prepare data for this experiment.\n" + ) + elif phase == "results interpretation": + phase_str = ( + "You are a PhD student being directed by a postdoc who will help you come up with an interpretation for results from an experiment, and you interact with them through dialogue.\n" + "Your goal is to interpret results from experiments that were previously run. You should read through the code and look at the results to understand what occurred. You should then discuss with the postdoc your interpretation and use their feedback to improve your thoughts. You should integrate the provided literature review, code, and plans to come up with an exciting interpretation that could make a compelling paper. Your plans should provide a clear outline that can be used to write an academic paper.\n" + "Your interpretation should include numbers, relevant metrics to the experiment (e.g. accuracy or loss) and measures of significance. You must propagate this information accurately.\n" + "You must submit the interpretation during this phase in a reasonable amount of time. Do not delay the submission." + ) + #elif phase == "report writing": + # phase_str = ( + # "You are a PhD student being directed by a professor who will help you write a report based on results from an experiment, and you interact with them through dialogue.\n" + # "Your goal is to write a report for an experiment entirely in latex. You should read through the code, read through the interpretation, and look at the results to understand what occurred. You should then discuss with the professor how you can write up the results and receive their feedback to improve your thoughts.\n" + # "Your report should include numbers, relevant metrics to the experiment (e.g. accuracy or loss) and measures of significance in latex. You must propagate this information accurately.\n" + # "You must be incredibly detailed about what you did for the experiment and all of the findings.\n" + # ) + elif phase == "report refinement": + phase_str = ( + "You are a PhD student who has submitted their paper to an ML conference called ICLR. Your goal was to write a research paper and get high scores from the reviewers so that it get accepted to the conference.\n" + ) + else: + phase_str = "" + return phase_str + + def role_description(self): + return "a computer science PhD student at a top university." + + def add_review(self, review, arx_eng): + try: + arxiv_id, review_text = review.strip().split("\n", 1) + full_text = arx_eng.retrieve_full_paper_text(arxiv_id) + review_entry = { + "arxiv_id": arxiv_id, + "full_text": full_text, + "summary": review_text, + } + self.lit_review.append(review_entry) + return f"Successfully added paper {arxiv_id}", full_text + except Exception as e: + return f"Error trying to add review -- bad formatting, try again: {str(e)}", "" + + def format_review(self): + return "Provided here is a literature review on this topic:\n" + "\n".join( + f"arXiv ID: {_l['arxiv_id']}, Summary: {_l['summary']}" + for _l in self.lit_review) + + + diff --git a/ai_lab_repo.py b/ai_lab_repo.py new file mode 100755 index 0000000..58f10cd --- /dev/null +++ b/ai_lab_repo.py @@ -0,0 +1,722 @@ +from agents import * +from copy import copy +from common_imports import * +from mlesolver import MLESolver +from torch.backends.mkl import verbose + +import argparse +import pickle + +DEFAULT_LLM_BACKBONE = "o1-mini" + + +class LaboratoryWorkflow: + def __init__(self, research_topic, openai_api_key, max_steps=100, num_papers_lit_review=5, agent_model_backbone=f"{DEFAULT_LLM_BACKBONE}", notes=list(), human_in_loop_flag=None, compile_pdf=True, mlesolver_max_steps=3, papersolver_max_steps=5): + """ + Initialize laboratory workflow + @param research_topic: (str) description of research idea to explore + @param max_steps: (int) max number of steps for each phase, i.e. compute tolerance budget + @param num_papers_lit_review: (int) number of papers to include in the lit review + @param agent_model_backbone: (str or dict) model backbone to use for agents + @param notes: (list) notes for agent to follow during tasks + """ + + self.notes = notes + self.max_steps = max_steps + self.compile_pdf = compile_pdf + self.openai_api_key = openai_api_key + self.research_topic = research_topic + self.model_backbone = agent_model_backbone + self.num_papers_lit_review = num_papers_lit_review + + self.print_cost = True + self.review_override = True # should review be overridden? + self.review_ovrd_steps = 0 # review steps so far + self.arxiv_paper_exp_time = 3 + self.reference_papers = list() + + ########################################## + ####### COMPUTE BUDGET PARAMETERS ######## + ########################################## + self.num_ref_papers = 1 + self.review_total_steps = 0 # num steps to take if overridden + self.arxiv_num_summaries = 5 + self.mlesolver_max_steps = mlesolver_max_steps + self.papersolver_max_steps = papersolver_max_steps + + self.phases = [ + ("literature review", ["literature review"]), + ("plan formulation", ["plan formulation"]), + ("experimentation", ["data preparation", "running experiments"]), + ("results interpretation", ["results interpretation", "report writing", "report refinement"]), + ] + self.phase_status = dict() + for phase, subtasks in self.phases: + for subtask in subtasks: + self.phase_status[subtask] = False + + self.phase_models = dict() + if type(agent_model_backbone) == str: + for phase, subtasks in self.phases: + for subtask in subtasks: + self.phase_models[subtask] = agent_model_backbone + elif type(agent_model_backbone) == dict: + # todo: check if valid + self.phase_models = agent_model_backbone + + + self.human_in_loop_flag = human_in_loop_flag + + self.statistics_per_phase = { + "literature review": {"time": 0.0, "steps": 0.0,}, + "plan formulation": {"time": 0.0, "steps": 0.0,}, + "data preparation": {"time": 0.0, "steps": 0.0,}, + "running experiments": {"time": 0.0, "steps": 0.0,}, + "results interpretation": {"time": 0.0, "steps": 0.0,}, + "report writing": {"time": 0.0, "steps": 0.0,}, + "report refinement": {"time": 0.0, "steps": 0.0,}, + } + + self.save = True + self.verbose = True + self.reviewers = ReviewersAgent(model=self.model_backbone, notes=self.notes, openai_api_key=self.openai_api_key) + self.phd = PhDStudentAgent(model=self.model_backbone, notes=self.notes, max_steps=self.max_steps, openai_api_key=self.openai_api_key) + self.postdoc = PostdocAgent(model=self.model_backbone, notes=self.notes, max_steps=self.max_steps, openai_api_key=self.openai_api_key) + self.professor = ProfessorAgent(model=self.model_backbone, notes=self.notes, max_steps=self.max_steps, openai_api_key=self.openai_api_key) + self.ml_engineer = MLEngineerAgent(model=self.model_backbone, notes=self.notes, max_steps=self.max_steps, openai_api_key=self.openai_api_key) + + # remove previous files + remove_figures() + remove_directory("research_dir") + # make src and research directory + if not os.path.exists("state_saves"): + os.mkdir(os.path.join(".", "state_saves")) + os.mkdir(os.path.join(".", "research_dir")) + os.mkdir(os.path.join("./research_dir", "src")) + os.mkdir(os.path.join("./research_dir", "tex")) + + def set_model(self, model): + self.set_agent_attr("model", model) + self.reviewers.model = model + + def save_state(self, phase): + """ + Save state for phase + @param phase: (str) phase string + @return: None + """ + phase = phase.replace(" ", "_") + with open(f"state_saves/{phase}.pkl", "wb") as f: + pickle.dump(self, f) + + def set_agent_attr(self, attr, obj): + """ + Set attribute for all agents + @param attr: (str) agent attribute + @param obj: (object) object attribute + @return: None + """ + setattr(self.phd, attr, obj) + setattr(self.postdoc, attr, obj) + setattr(self.professor, attr, obj) + setattr(self.ml_engineer, attr, obj) + + def reset_agents(self): + """ + Reset all agent states + @return: None + """ + self.phd.reset() + self.postdoc.reset() + self.professor.reset() + self.ml_engineer.reset() + + def perform_research(self): + """ + Loop through all research phases + @return: None + """ + for phase, subtasks in self.phases: + phase_start_time = time.time() # Start timing the phase + if self.verbose: print(f"{'*'*50}\nBeginning phase: {phase}\n{'*'*50}") + for subtask in subtasks: + if self.verbose: print(f"{'&'*30}\nBeginning subtask: {subtask}\n{'&'*30}") + if type(self.phase_models) == dict: + if subtask in self.phase_models: + self.set_model(self.phase_models[subtask]) + else: self.set_model(f"{DEFAULT_LLM_BACKBONE}") + if (subtask not in self.phase_status or not self.phase_status[subtask]) and subtask == "literature review": + repeat = True + while repeat: repeat = self.literature_review() + self.phase_status[subtask] = True + if (subtask not in self.phase_status or not self.phase_status[subtask]) and subtask == "plan formulation": + repeat = True + while repeat: repeat = self.plan_formulation() + self.phase_status[subtask] = True + if (subtask not in self.phase_status or not self.phase_status[subtask]) and subtask == "data preparation": + repeat = True + while repeat: repeat = self.data_preparation() + self.phase_status[subtask] = True + if (subtask not in self.phase_status or not self.phase_status[subtask]) and subtask == "running experiments": + repeat = True + while repeat: repeat = self.running_experiments() + self.phase_status[subtask] = True + if (subtask not in self.phase_status or not self.phase_status[subtask]) and subtask == "results interpretation": + repeat = True + while repeat: repeat = self.results_interpretation() + self.phase_status[subtask] = True + if (subtask not in self.phase_status or not self.phase_status[subtask]) and subtask == "report writing": + repeat = True + while repeat: repeat = self.report_writing() + self.phase_status[subtask] = True + if (subtask not in self.phase_status or not self.phase_status[subtask]) and subtask == "report refinement": + return_to_exp_phase = self.report_refinement() + + if not return_to_exp_phase: + if self.save: self.save_state(subtask) + return + + self.set_agent_attr("second_round", return_to_exp_phase) + self.set_agent_attr("prev_report", copy(self.phd.report)) + self.set_agent_attr("prev_exp_results", copy(self.phd.exp_results)) + self.set_agent_attr("prev_results_code", copy(self.phd.results_code)) + self.set_agent_attr("prev_interpretation", copy(self.phd.interpretation)) + + self.phase_status["plan formulation"] = False + self.phase_status["data preparation"] = False + self.phase_status["running experiments"] = False + self.phase_status["results interpretation"] = False + self.phase_status["report writing"] = False + self.phase_status["report refinement"] = False + self.perform_research() + if self.save: self.save_state(subtask) + # Calculate and print the duration of the phase + phase_end_time = time.time() + phase_duration = phase_end_time - phase_start_time + print(f"Subtask '{subtask}' completed in {phase_duration:.2f} seconds.") + self.statistics_per_phase[subtask]["time"] = phase_duration + + def report_refinement(self): + """ + Perform report refinement phase + @return: (bool) whether to repeat the phase + """ + reviews = self.reviewers.inference(self.phd.plan, self.phd.report) + print("Reviews:", reviews) + if self.human_in_loop_flag["report refinement"]: + print(f"Provided are reviews from a set of three reviewers: {reviews}") + input("Would you like to be completed with the project or should the agents go back and improve their experimental results?\n (y) for go back (n) for complete project: ") + else: + review_prompt = f"Provided are reviews from a set of three reviewers: {reviews}. Would you like to be completed with the project or do you want to go back to the planning phase and improve your experiments?\n Type y and nothing else to go back, type n and nothing else for complete project." + self.phd.phases.append("report refinement") + if self.review_override: + if self.review_total_steps == self.review_ovrd_steps: + response = "n" + else: + response = "y" + self.review_ovrd_steps += 1 + else: + response = self.phd.inference( + research_topic=self.research_topic, phase="report refinement", feedback=review_prompt, step=0) + if len(response) == 0: + raise Exception("Model did not respond") + response = response.lower().strip()[0] + if response == "n": + if verbose: print("*"*40, "\n", "REVIEW COMPLETE", "\n", "*"*40) + return False + elif response == "y": + self.set_agent_attr("reviewer_response", f"Provided are reviews from a set of three reviewers: {reviews}.") + return True + else: raise Exception("Model did not respond") + + def report_writing(self): + """ + Perform report writing phase + @return: (bool) whether to repeat the phase + """ + # experiment notes + report_notes = [_note["note"] for _note in self.ml_engineer.notes if "report writing" in _note["phases"]] + report_notes = f"Notes for the task objective: {report_notes}\n" if len(report_notes) > 0 else "" + # instantiate mle-solver + from papersolver import PaperSolver + self.reference_papers = [] + solver = PaperSolver(notes=report_notes, max_steps=self.papersolver_max_steps, plan=lab.phd.plan, exp_code=lab.phd.results_code, exp_results=lab.phd.exp_results, insights=lab.phd.interpretation, lit_review=lab.phd.lit_review, ref_papers=self.reference_papers, topic=research_topic, openai_api_key=self.openai_api_key, llm_str=self.model_backbone["report writing"], compile_pdf=compile_pdf) + # run initialization for solver + solver.initial_solve() + # run solver for N mle optimization steps + for _ in range(self.papersolver_max_steps): + solver.solve() + # get best report results + report = "\n".join(solver.best_report[0][0]) + score = solver.best_report[0][1] + if self.verbose: print(f"Report writing completed, reward function score: {score}") + if self.human_in_loop_flag["report writing"]: + retry = self.human_in_loop("report writing", report) + if retry: return retry + self.set_agent_attr("report", report) + readme = self.professor.generate_readme() + save_to_file("./research_dir", "readme.md", readme) + save_to_file("./research_dir", "report.txt", report) + self.reset_agents() + return False + + def results_interpretation(self): + """ + Perform results interpretation phase + @return: (bool) whether to repeat the phase + """ + max_tries = self.max_steps + dialogue = str() + # iterate until max num tries to complete task is exhausted + for _i in range(max_tries): + resp = self.postdoc.inference(self.research_topic, "results interpretation", feedback=dialogue, step=_i) + if self.verbose: print("Postdoc: ", resp, "\n~~~~~~~~~~~") + dialogue = str() + if "```DIALOGUE" in resp: + dialogue = extract_prompt(resp, "DIALOGUE") + dialogue = f"The following is dialogue produced by the postdoctoral researcher: {dialogue}" + if self.verbose: print("#"*40, "\n", "Postdoc Dialogue:", dialogue, "\n", "#"*40) + if "```INTERPRETATION" in resp: + interpretation = extract_prompt(resp, "INTERPRETATION") + if self.human_in_loop_flag["results interpretation"]: + retry = self.human_in_loop("results interpretation", interpretation) + if retry: return retry + self.set_agent_attr("interpretation", interpretation) + # reset agent state + self.reset_agents() + self.statistics_per_phase["results interpretation"]["steps"] = _i + return False + resp = self.phd.inference(self.research_topic, "results interpretation", feedback=dialogue, step=_i) + if self.verbose: print("PhD Student: ", resp, "\n~~~~~~~~~~~") + dialogue = str() + if "```DIALOGUE" in resp: + dialogue = extract_prompt(resp, "DIALOGUE") + dialogue = f"The following is dialogue produced by the PhD student: {dialogue}" + if self.verbose: print("#"*40, "\n", "PhD Dialogue:", dialogue, "#"*40, "\n") + raise Exception("Max tries during phase: Results Interpretation") + + def running_experiments(self): + """ + Perform running experiments phase + @return: (bool) whether to repeat the phase + """ + # experiment notes + experiment_notes = [_note["note"] for _note in self.ml_engineer.notes if "running experiments" in _note["phases"]] + experiment_notes = f"Notes for the task objective: {experiment_notes}\n" if len(experiment_notes) > 0 else "" + # instantiate mle-solver + solver = MLESolver(dataset_code=self.ml_engineer.dataset_code, notes=experiment_notes, insights=self.ml_engineer.lit_review_sum, max_steps=self.mlesolver_max_steps, plan=self.ml_engineer.plan, openai_api_key=self.openai_api_key, llm_str=self.model_backbone["running experiments"]) + # run initialization for solver + solver.initial_solve() + # run solver for N mle optimization steps + for _ in range(self.mlesolver_max_steps-1): + solver.solve() + # get best code results + code = "\n".join(solver.best_codes[0][0]) + # regenerate figures from top code + execute_code(code) + score = solver.best_codes[0][1] + exp_results = solver.best_codes[0][2] + if self.verbose: print(f"Running experiments completed, reward function score: {score}") + if self.human_in_loop_flag["running experiments"]: + retry = self.human_in_loop("data preparation", code) + if retry: return retry + save_to_file("./research_dir/src", "run_experiments.py", code) + self.set_agent_attr("results_code", code) + self.set_agent_attr("exp_results", exp_results) + # reset agent state + self.reset_agents() + return False + + def data_preparation(self): + """ + Perform data preparation phase + @return: (bool) whether to repeat the phase + """ + max_tries = self.max_steps + ml_feedback = str() + ml_dialogue = str() + phd_feedback = str() + ml_command = str() + hf_engine = HFDataSearch() + # iterate until max num tries to complete task is exhausted + for _i in range(max_tries): + if ml_feedback != "": + ml_feedback_in = "Feedback provided to the ML agent: " + ml_feedback + else: ml_feedback_in = "" + resp = self.phd.inference(self.research_topic, "data preparation", feedback=f"{ml_dialogue}\nFeedback from previous command: {phd_feedback}\n{ml_command}{ml_feedback_in}", step=_i) + #if self.verbose: print("PhD Student: ", resp, "\n~~~~~~~~~~~") + phd_feedback = str() + phd_dialogue = str() + if "```DIALOGUE" in resp: + dialogue = extract_prompt(resp, "DIALOGUE") + phd_dialogue = f"\nThe following is dialogue produced by the PhD Student: {dialogue}\n" + if self.verbose: print("#"*40, f"\nThe following is dialogue produced by the PhD Student: {dialogue}", "\n", "#"*40) + if "```SUBMIT_CODE" in resp: + final_code = extract_prompt(resp, "SUBMIT_CODE") + code_resp = execute_code(final_code, timeout=60) + if self.verbose: print("!"*100, "\n", f"CODE RESPONSE: {code_resp}")#print("!"*100, "\n", self.phd.dataset_code, "\n", "$"*100, "\n", final_code, "\n", "!"*100, "\n", f"CODE RESPONSE: {code_resp}") + phd_feedback += f"\nCode Response: {code_resp}\n" + if "[CODE EXECUTION ERROR]" in code_resp: + phd_feedback += "\nERROR: Final code had an error and could not be submitted! You must address and fix this error.\n" + else: + if self.human_in_loop_flag["data preparation"]: + retry = self.human_in_loop("data preparation", final_code) + if retry: return retry + save_to_file("./research_dir/src", "load_data.py", final_code) + self.set_agent_attr("dataset_code", final_code) + # reset agent state + self.reset_agents() + self.statistics_per_phase["data preparation"]["steps"] = _i + return False + + if ml_feedback != "": + ml_feedback_in = "Feedback from previous command: " + ml_feedback + else: + ml_feedback_in = "" + resp = self.ml_engineer.inference( + self.research_topic, "data preparation", + feedback=f"{phd_dialogue}\n{ml_feedback_in}", step=_i) + #if self.verbose: print("ML Engineer: ", resp, "\n~~~~~~~~~~~") + ml_feedback = str() + ml_dialogue = str() + ml_command = str() + if "```DIALOGUE" in resp: + dialogue = extract_prompt(resp, "DIALOGUE") + ml_dialogue = f"\nThe following is dialogue produced by the ML Engineer: {dialogue}\n" + if self.verbose: print("#" * 40, f"\nThe following is dialogue produced by the ML Engineer: {dialogue}", "#" * 40, "\n") + if "```python" in resp: + code = extract_prompt(resp, "python") + code = self.ml_engineer.dataset_code + "\n" + code + code_resp = execute_code(code, timeout=120) + ml_command = f"Code produced by the ML agent:\n{code}" + ml_feedback += f"\nCode Response: {code_resp}\n" + if self.verbose: print("!"*100, "\n", f"CODE RESPONSE: {code_resp}") + if "```SEARCH_HF" in resp: + hf_query = extract_prompt(resp, "SEARCH_HF") + hf_res = "\n".join(hf_engine.results_str(hf_engine.retrieve_ds(hf_query))) + ml_command = f"HF search command produced by the ML agent:\n{hf_query}" + ml_feedback += f"Huggingface results: {hf_res}\n" + raise Exception("Max tries during phase: Data Preparation") + + def plan_formulation(self): + """ + Perform plan formulation phase + @return: (bool) whether to repeat the phase + """ + max_tries = self.max_steps + dialogue = str() + # iterate until max num tries to complete task is exhausted + for _i in range(max_tries): + # inference postdoc to + resp = self.postdoc.inference(self.research_topic, "plan formulation", feedback=dialogue, step=_i) + if self.verbose: print("Postdoc: ", resp, "\n~~~~~~~~~~~") + dialogue = str() + + if "```DIALOGUE" in resp: + dialogue = extract_prompt(resp, "DIALOGUE") + dialogue = f"The following is dialogue produced by the postdoctoral researcher: {dialogue}" + if self.verbose: print("#"*40, "\n", "Postdoc Dialogue:", dialogue, "\n", "#"*40) + + if "```PLAN" in resp: + plan = extract_prompt(resp, "PLAN") + if self.human_in_loop_flag["plan formulation"]: + retry = self.human_in_loop("plan formulation", plan) + if retry: return retry + self.set_agent_attr("plan", plan) + # reset agent state + self.reset_agents() + self.statistics_per_phase["plan formulation"]["steps"] = _i + return False + + resp = self.phd.inference(self.research_topic, "plan formulation", feedback=dialogue, step=_i) + if self.verbose: print("PhD Student: ", resp, "\n~~~~~~~~~~~") + + dialogue = str() + if "```DIALOGUE" in resp: + dialogue = extract_prompt(resp, "DIALOGUE") + dialogue = f"The following is dialogue produced by the PhD student: {dialogue}" + if self.verbose: print("#"*40, "\n", "PhD Dialogue:", dialogue, "#"*40, "\n") + raise Exception("Max tries during phase: Plan Formulation") + + def literature_review(self): + """ + Perform literature review phase + @return: (bool) whether to repeat the phase + """ + arx_eng = ArxivSearch() + max_tries = self.max_steps * 5 # lit review often requires extra steps + # get initial response from PhD agent + resp = self.phd.inference(self.research_topic, "literature review", step=0, temp=0.8) + if self.verbose: print(resp, "\n~~~~~~~~~~~") + # iterate until max num tries to complete task is exhausted + for _i in range(max_tries): + feedback = str() + + # grab summary of papers from arxiv + if "```SUMMARY" in resp: + query = extract_prompt(resp, "SUMMARY") + papers = arx_eng.find_papers_by_str(query, N=self.arxiv_num_summaries) + feedback = f"You requested arXiv papers related to the query {query}, here was the response\n{papers}" + + # grab full text from arxiv ID + elif "```FULL_TEXT" in resp: + query = extract_prompt(resp, "FULL_TEXT") + # expiration timer so that paper does not remain in context too long + arxiv_paper = f"```EXPIRATION {self.arxiv_paper_exp_time}\n" + arx_eng.retrieve_full_paper_text(query) + "```" + feedback = arxiv_paper + + # if add paper, extract and add to lit review, provide feedback + elif "```ADD_PAPER" in resp: + query = extract_prompt(resp, "ADD_PAPER") + feedback, text = self.phd.add_review(query, arx_eng) + if len(self.reference_papers) < self.num_ref_papers: + self.reference_papers.append(text) + + # completion condition + if len(self.phd.lit_review) >= self.num_papers_lit_review: + # generate formal review + lit_review_sum = self.phd.format_review() + # if human in loop -> check if human is happy with the produced review + if self.human_in_loop_flag["literature review"]: + retry = self.human_in_loop("literature review", lit_review_sum) + # if not happy, repeat the process with human feedback + if retry: + self.phd.lit_review = [] + return retry + # otherwise, return lit review and move on to next stage + if self.verbose: print(self.phd.lit_review_sum) + # set agent + self.set_agent_attr("lit_review_sum", lit_review_sum) + # reset agent state + self.reset_agents() + self.statistics_per_phase["literature review"]["steps"] = _i + return False + resp = self.phd.inference(self.research_topic, "literature review", feedback=feedback, step=_i + 1, temp=0.8) + if self.verbose: print(resp, "\n~~~~~~~~~~~") + raise Exception("Max tries during phase: Literature Review") + + def human_in_loop(self, phase, phase_prod): + """ + Get human feedback for phase output + @param phase: (str) current phase + @param phase_prod: (str) current phase result + @return: (bool) whether to repeat the loop + """ + print("\n\n\n\n\n") + print(f"Presented is the result of the phase [{phase}]: {phase_prod}") + y_or_no = None + # repeat until a valid answer is provided + while y_or_no not in ["y", "n"]: + y_or_no = input("\n\n\nAre you happy with the presented content? Respond Y or N: ").strip().lower() + # if person is happy with feedback, move on to next stage + if y_or_no == "y": pass + # if not ask for feedback and repeat + elif y_or_no == "n": + # ask the human for feedback + notes_for_agent = input("Please provide notes for the agent so that they can try again and improve performance: ") + # reset agent state + self.reset_agents() + # add suggestions to the notes + self.notes.append({ + "phases": [phase], + "note": notes_for_agent}) + return True + else: print("Invalid response, type Y or N") + return False + + + +def parse_arguments(): + parser = argparse.ArgumentParser(description="AgentLaboratory Research Workflow") + + parser.add_argument( + '--copilot-mode', + type=str, + default="True", + help='Enable human interaction mode.' + ) + + parser.add_argument( + '--load-existing', + type=str, + default="False", + help='Do not load existing state; start a new workflow.' + ) + + parser.add_argument( + '--load-existing-path', + type=str, + help='Path to load existing state; start a new workflow, e.g. state_saves/results_interpretation.pkl' + ) + + parser.add_argument( + '--research-topic', + type=str, + help='Specify the research topic.' + ) + + parser.add_argument( + '--api-key', + type=str, + help='Provide the OpenAI API key.' + ) + + parser.add_argument( + '--compile-latex', + type=str, + default="True", + help='Compile latex into pdf during paper writing phase. Disable if you can not install pdflatex.' + ) + + parser.add_argument( + '--llm-backend', + type=str, + default="o1-mini", + help='Backend LLM to use for agents in Agent Laboratory.' + ) + + parser.add_argument( + '--language', + type=str, + default="English", + help='Language to operate Agent Laboratory in.' + ) + + parser.add_argument( + '--num-papers-lit-review', + type=str, + default="5", + help='Total number of papers to summarize in literature review stage' + ) + + parser.add_argument( + '--mlesolver-max-steps', + type=str, + default="3", + help='Total number of mle-solver steps' + ) + + parser.add_argument( + '--papersolver-max-steps', + type=str, + default="5", + help='Total number of paper-solver steps' + ) + + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_arguments() + + llm_backend = args.llm_backend + human_mode = args.copilot_mode.lower() == "true" + compile_pdf = args.compile_latex.lower() == "true" + load_existing = args.load_existing.lower() == "true" + try: + num_papers_lit_review = int(args.num_papers_lit_review.lower()) + except Exception: + raise Exception("args.num_papers_lit_review must be a valid integer!") + try: + papersolver_max_steps = int(args.papersolver_max_steps.lower()) + except Exception: + raise Exception("args.papersolver_max_steps must be a valid integer!") + try: + mlesolver_max_steps = int(args.mlesolver_max_steps.lower()) + except Exception: + raise Exception("args.papersolver_max_steps must be a valid integer!") + + + api_key = os.getenv('OPENAI_API_KEY') or args.api_key or "your-default-api-key" + if not api_key: + raise ValueError("API key must be provided via --api-key or the OPENAI_API_KEY environment variable.") + + ########################################################## + # Research question that the agents are going to explore # + ########################################################## + if human_mode or args.research_topic is None: + research_topic = input("Please name an experiment idea for AgentLaboratory to perform: ") + else: + research_topic = args.research_topic + + task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f'Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n'}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] + + task_notes_LLM.append( + {"phases": ["literature review", "plan formulation", "data preparation", "running experiments", "results interpretation", "report writing", "report refinement"], + "note": f"You should always write in the following language to converse and to write the report {args.language}"}, + ) + + #################################################### + ### Stages where human input will be requested ### + #################################################### + human_in_loop = { + "literature review": human_mode, + "plan formulation": human_mode, + "data preparation": human_mode, + "running experiments": human_mode, + "results interpretation": human_mode, + "report writing": human_mode, + "report refinement": human_mode, + } + + ################################################### + ### LLM Backend used for the different phases ### + ################################################### + agent_models = { + "literature review": llm_backend, + "plan formulation": llm_backend, + "data preparation": llm_backend, + "running experiments": llm_backend, + "report writing": llm_backend, + "results interpretation": llm_backend, + "paper refinement": llm_backend, + } + + if load_existing: + load_path = args.load_existing_path + if load_path is None: + raise ValueError("Please provide path to load existing state.") + with open(load_path, "rb") as f: + lab = pickle.load(f) + else: + lab = LaboratoryWorkflow( + research_topic=research_topic, + notes=task_notes_LLM, + agent_model_backbone=agent_models, + human_in_loop_flag=human_in_loop, + openai_api_key=api_key, + compile_pdf=compile_pdf, + num_papers_lit_review=num_papers_lit_review, + papersolver_max_steps=papersolver_max_steps, + mlesolver_max_steps=mlesolver_max_steps, + ) + + lab.perform_research() + + + + + + diff --git a/common_imports.py b/common_imports.py new file mode 100755 index 0000000..7d968f3 --- /dev/null +++ b/common_imports.py @@ -0,0 +1,113 @@ +# General-purpose imports +import os +import sys +import json +import time +import re +import math +import logging +import random +import shutil +import pathlib +import argparse +import itertools +import datetime +import collections +import subprocess + +# Data manipulation and analysis +import pandas as pd +import numpy as np +import csv +import json +import yaml +import h5py +import sqlite3 +import pickle + +# Visualization +import matplotlib.pyplot as plt +import seaborn as sns +import plotly.express as px +import plotly.graph_objects as go + +# Hugging Face & Transformers +import transformers + +# Deep learning frameworks +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +from torch.utils.data import DataLoader, Dataset, random_split +import tensorflow as tf +#import keras + +# NLP Libraries +import tiktoken +import nltk +from nltk.tokenize import word_tokenize, sent_tokenize +from nltk.corpus import stopwords +from nltk.stem import PorterStemmer, WordNetLemmatizer +import spacy +import sacremoses +# Diffusers for image generation and stable diffusion +import diffusers +from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler + +# Performance acceleration libraries +import accelerate +from accelerate import Accelerator + +# Hugging Face Hub utilities +import huggingface_hub +from huggingface_hub import HfApi, notebook_login + +# Scikit-learn for machine learning +import sklearn +from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix +from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder +from sklearn.decomposition import PCA +from sklearn.cluster import KMeans +from sklearn.svm import SVC +from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer +from sklearn.metrics.pairwise import linear_kernel, cosine_similarity + +# Statistical analysis +import scipy +from scipy import stats, signal, spatial +from scipy.optimize import minimize +from scipy.spatial.distance import euclidean, cosine +from scipy.linalg import svd, eig +from statsmodels.api import OLS, Logit +from statsmodels.tsa.arima_model import ARIMA +from statsmodels.tsa.stattools import adfuller, pacf, acf + +# Image processing and handling +from PIL import Image +import imageio +from skimage import io, color, filters, transform, exposure + +# File handling and I/O +import gzip +import zipfile +import tarfile +import glob + +# Parallel processing +import multiprocessing +from multiprocessing import Pool + +# Miscellaneous utilities +import hashlib +import uuid +import base64 +import warnings +from tqdm import tqdm +from functools import partial, lru_cache + +# Other advanced libraries +import pydantic +import requests +import aiohttp diff --git a/inference.py b/inference.py new file mode 100755 index 0000000..4fec787 --- /dev/null +++ b/inference.py @@ -0,0 +1,146 @@ +import time, tiktoken +from openai import OpenAI +import openai +import os, anthropic, json + +TOKENS_IN = dict() +TOKENS_OUT = dict() + +encoding = tiktoken.get_encoding("cl100k_base") + +def curr_cost_est(): + costmap_in = { + "gpt-4o": 2.50 / 1000000, + "gpt-4o-mini": 0.150 / 1000000, + "o1-preview": 15.00 / 1000000, + "o1-mini": 3.00 / 1000000, + "claude-3-5-sonnet": 3.00 / 1000000, + } + costmap_out = { + "gpt-4o": 10.00/ 1000000, + "gpt-4o-mini": 0.6 / 1000000, + "o1-preview": 60.00 / 1000000, + "o1-mini": 12.00 / 1000000, + "claude-3-5-sonnet": 12.00 / 1000000, + } + return sum([costmap_in[_]*TOKENS_IN[_] for _ in TOKENS_IN]) + sum([costmap_out[_]*TOKENS_OUT[_] for _ in TOKENS_OUT]) + +def query_model(model_str, prompt, system_prompt, openai_api_key=None, anthropic_api_key=None, tries=5, timeout=5.0, temp=None, print_cost=True, version="1.5"): + preloaded_api = os.getenv('OPENAI_API_KEY') + if openai_api_key is None and preloaded_api is not None: + openai_api_key = preloaded_api + if openai_api_key is None and anthropic_api_key is None: + raise Exception("No API key provided in query_model function") + if openai_api_key is not None: + openai.api_key = openai_api_key + os.environ["OPENAI_API_KEY"] = openai_api_key + if anthropic_api_key is not None: + os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key + for _ in range(tries): + try: + if model_str == "gpt-4o-mini" or model_str == "gpt4omini" or model_str == "gpt-4omini" or model_str == "gpt4o-mini": + model_str = "gpt-4o-mini" + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}] + if version == "0.28": + if temp is None: + completion = openai.ChatCompletion.create( + model=f"{model_str}", # engine = "deployment_name". + messages=messages + ) + else: + completion = openai.ChatCompletion.create( + model=f"{model_str}", # engine = "deployment_name". + messages=messages, temperature=temp + ) + else: + client = OpenAI() + if temp is None: + completion = client.chat.completions.create( + model="gpt-4o-mini-2024-07-18", messages=messages, ) + else: + completion = client.chat.completions.create( + model="gpt-4o-mini-2024-07-18", messages=messages, temperature=temp) + answer = completion.choices[0].message.content + elif model_str == "claude-3.5-sonnet": + client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) + message = client.messages.create( + model="claude-3-5-sonnet-latest", + system=system_prompt, + messages=[{"role": "user", "content": prompt}]) + answer = json.loads(message.to_json())["content"][0]["text"] + elif model_str == "gpt4o" or model_str == "gpt-4o": + model_str = "gpt-4o" + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}] + if version == "0.28": + if temp is None: + completion = openai.ChatCompletion.create( + model=f"{model_str}", # engine = "deployment_name". + messages=messages + ) + else: + completion = openai.ChatCompletion.create( + model=f"{model_str}", # engine = "deployment_name". + messages=messages, temperature=temp + ) + + else: + client = OpenAI() + if temp is None: + completion = client.chat.completions.create( + model="gpt-4o-2024-08-06", messages=messages, ) + else: + completion = client.chat.completions.create( + model="gpt-4o-2024-08-06", messages=messages, temperature=temp) + answer = completion.choices[0].message.content + elif model_str == "o1-mini": + model_str = "o1-mini" + messages = [ + {"role": "user", "content": system_prompt + prompt}] + if version == "0.28": + completion = openai.ChatCompletion.create( + model=f"{model_str}", # engine = "deployment_name". + messages=messages + ) + else: + client = OpenAI() + completion = client.chat.completions.create( + model="o1-mini-2024-09-12", messages=messages) + answer = completion.choices[0].message.content + elif model_str == "o1-preview": + model_str = "o1-preview" + messages = [ + {"role": "user", "content": system_prompt + prompt}] + if version == "0.28": + completion = openai.ChatCompletion.create( + model=f"{model_str}", # engine = "deployment_name". + messages=messages + ) + else: + client = OpenAI() + completion = client.chat.completions.create( + model="o1-preview", messages=messages) + answer = completion.choices[0].message.content + + if model_str in ["o1-preview", "o1-mini", "claude-3.5-sonnet"]: + encoding = tiktoken.encoding_for_model("gpt-4o") + else: encoding = tiktoken.encoding_for_model(model_str) + if model_str not in TOKENS_IN: + TOKENS_IN[model_str] = 0 + TOKENS_OUT[model_str] = 0 + TOKENS_IN[model_str] += len(encoding.encode(system_prompt + prompt)) + TOKENS_OUT[model_str] += len(encoding.encode(answer)) + if print_cost: + print(f"Current experiment cost = ${curr_cost_est()}, ** Approximate values, may not reflect true cost") + return answer + except Exception as e: + print("Inference Exception:", e) + time.sleep(timeout) + continue + raise Exception("Max retries: timeout") + + +#print(query_model(model_str="o1-mini", prompt="hi", system_prompt="hey")) \ No newline at end of file diff --git a/media/AgentLab.png b/media/AgentLab.png new file mode 100755 index 0000000..799cab2 Binary files /dev/null and b/media/AgentLab.png differ diff --git a/media/AgentLabLogo.png b/media/AgentLabLogo.png new file mode 100755 index 0000000..4030c9b Binary files /dev/null and b/media/AgentLabLogo.png differ diff --git a/media/AgentLabWF.png b/media/AgentLabWF.png new file mode 100755 index 0000000..06721ed Binary files /dev/null and b/media/AgentLabWF.png differ diff --git a/mlesolver.py b/mlesolver.py new file mode 100755 index 0000000..cfc4896 --- /dev/null +++ b/mlesolver.py @@ -0,0 +1,575 @@ +import random +from copy import copy +from copy import deepcopy +from common_imports import * +from abc import abstractmethod + + +from tools import * +from inference import * +from pathlib import Path + + +from contextlib import contextmanager +import sys, os + +@contextmanager +def suppress_stdout(): + with open(os.devnull, "w") as devnull: + old_stdout = sys.stdout + sys.stdout = devnull + try: + yield + finally: + sys.stdout = old_stdout + + +os.environ["JOBLIB_VERBOSITY"] = "0" +logging.basicConfig(level=logging.WARNING) +warnings.filterwarnings("ignore") +warnings.simplefilter(action='ignore', category=FutureWarning) +import logging +logging.getLogger('sklearn.model_selection').setLevel(logging.WARNING) + + +GLOBAL_REPAIR_ATTEMPTS = 2 + + +class Command: + def __init__(self): + self.cmd_type = "OTHER" + + @abstractmethod + def docstring(self) -> str: + pass + + @abstractmethod + def execute_command(self, *args) -> str: + pass + + @abstractmethod + def matches_command(self, cmd_str) -> bool: + pass + + @abstractmethod + def parse_command(self, cmd_str) -> tuple: + pass + + +""" +@@@@@@@@@@@@@@@@@@ +@@ CODING TOOLS @@ +@@@@@@@@@@@@@@@@@@ +""" + +class Replace(Command): + def __init__(self): + super().__init__() + self.cmd_type = "CODE-replace" + + def docstring(self) -> str: + return ( + "============= REWRITE CODE EDITING TOOL =============\n" + "You also have access to a code replacing tool. \n" + "This tool allows you to entirely re-write/replace all of the current code and erase all existing code.\n" + "You can use this tool via the following command: ```REPLACE\n\n```, where REPLACE is the word REPLACE and will be the new code that is replacing the entire set of old code. This tool is useful if you want to make very significant changes, such as entirely changing the model, or the learning process. Before changing the existing code to be your new code, your new code will be tested and if it returns an error it will not replace the existing code. Try limiting the use of rewriting and aim for editing the code more." + ) + + def execute_command(self, *args) -> str: + # args[0] -> new code + args = args[0] + return args[0] + + def matches_command(self, cmd_str) -> bool: + if "```REPLACE" in cmd_str: return True + return False + + def parse_command(self, *args) -> tuple: + new_code = extract_prompt(args[0], "REPLACE") + code_exec = f"{args[1]}\n{new_code}" + code_ret = execute_code(code_exec) + if "[CODE EXECUTION ERROR]" in code_ret: return False, (None, code_ret,) + return True, (new_code.split("\n"), code_ret) + + + +class Edit(Command): + def __init__(self): + super().__init__() + self.cmd_type = "CODE-edit" + + def docstring(self) -> str: + return ( + "============= CODE EDITING TOOL =============\n" + "You also have access to a code editing tool. \n" + "This tool allows you to replace lines indexed n through m (n:m) of the current code with as many lines of new code as you want to add. This removal is inclusive meaning that line n and m and everything between n and m is removed. This will be the primary way that you interact with code. \n" + "You can edit code using the following command: ```EDIT N M\n\n``` EDIT is the word EDIT, N is the first line index you want to replace and M the the last line index you want to replace (everything inbetween will also be removed), and will be the new code that is replacing the old code. Before changing the existing code to be your new code, your new code will be tested and if it returns an error it will not replace the existing code. Your changes should significantly change the functionality of the code." + ) + + def execute_command(self, *args) -> str: + # args[0] -> N (int) + # args[1] -> M (int) + # args[2] -> old code + # args[3] -> new lines to replace + # args[4] -> new lines to replace + try: + args = args[0] + current_code = args[2] + lines_to_add = list(reversed(args[3])) + lines_to_replace = list(reversed(range(args[0], args[1]+1))) + for _ln in lines_to_replace: + current_code.pop(_ln) + for _line in lines_to_add: + current_code.insert(args[0], _line) + new_code = "\n".join(current_code) + code_exec = f"{args[4]}\n{new_code}" + code_ret = execute_code(code_exec) + if "CODE EXECUTION ERROR" in code_ret: return (False, None, code_ret) + return (True, current_code, code_ret) + except Exception as e: + return (False, None, str(e)) + + def matches_command(self, cmd_str) -> bool: + if "```EDIT" in cmd_str: return True + return False + + def parse_command(self, *args) -> tuple: + cmd_str, codelines, datasetcode = args[0], args[1], args[2] + success = True + try: + text = extract_prompt(cmd_str, "EDIT").split("\n") + if len(text) == 0: return False, None + lines_to_edit = text[0].split(" ") + if len(lines_to_edit) != 2: return False, None + lines_to_edit = [int(_) for _ in lines_to_edit] + if len(text[1:]) == 0: return False, None + return success, (lines_to_edit[0], lines_to_edit[1], codelines, text[1:], datasetcode) + except Exception as e: + return False, (None, None, None, None, None) + + +def get_score(outlined_plan, code, code_return, REWARD_MODEL_LLM, attempts=3, openai_api_key=None): + e = str() + for _attempt in range(attempts): + try: + # todo: have a reward function here + sys = ( + f"You are a professor agent who is serving as an expert reward model that can read a research plan, research code, and code output and are able to determine how well a model followed the plan, built the code, and got the proper output scored from 0 to 1 as a float.\n\n" + f"You must structure your score exactly in the following way: ```SCORE\n\n``` where SCORE is just the word score, is a floating point number between 0 and 1 representing how well the model followed the plan, built the code, and got the proper output." + ) + scoring = query_model( + model_str=f"{REWARD_MODEL_LLM}", + system_prompt=sys, + openai_api_key=openai_api_key, + prompt=( + f"Outlined in the following text is the research plan that the machine learning engineer was tasked with building: {outlined_plan}\n\n" + f"The following text is the research code that the model produced: \n{code}\n\n" + f"The following is the output from the model: {code_return}\n\n"), temp=0.6) + performance = extract_prompt(text=scoring, word="SCORE") + performance = float(performance) + return performance, f"The performance of your submission is: {performance}", True + except Exception as e: + return None, str(e), False + return 0, e + + +def code_repair(code, error, ctype, REPAIR_LLM, openai_api_key=None): + if ctype == "replace": + repair_sys = ( + "You are an automated code repair tool.\n" + "Your goal is to take in code and an error and repair the code to make sure the same error does not repeat itself, and also to remove any other potential errors from the code without affecting the code output.\n" + "Your output should match the original code as closely as possible.\n" + "You must wrap the code in the following ```python\n\n```\n" + "Do not forget the opening ```python and the closing ```." + ) + model_resp = query_model( + openai_api_key=openai_api_key, + model_str=f"{REPAIR_LLM}", + system_prompt=repair_sys, + prompt=f"Provided here is the error: {error}\n\nProvided below is the code:\n\n{code}", temp=0.8) + return extract_prompt(model_resp, "python") + elif ctype == "edit": + repair_sys = ( + "You are an automated code repair tool.\n" + "Your goal is to take in code and an error and repair the code to make sure the same error does not repeat itself, and also to remove any other potential errors from the code without affecting the code output.\n" + "Your output should match the original code as closely as possible.\n" + + "============= CODE EDITING TOOL =============\n" + "You have access to a code editing tool. \n" + "This tool allows you to replace lines indexed n through m (n:m) of the current code with as many lines of new code as you want to add. This removal is inclusive meaning that line n and m and everything between n and m is removed. This will be the primary way that you interact with code. \n" + "You can edit code using the following command: ```EDIT N M\n\n``` EDIT is the word EDIT, N is the first line index you want to replace and M the the last line index you want to replace (everything inbetween will also be removed), and will be the new code that is replacing the old code. Before changing the existing code to be your new code, your new code will be tested and if it returns an error it will not replace the existing code.\n" + "Please use the code editing tool to fix this code." + "Do not forget the opening ```EDIT N M and the closing ```." + "Your output should look like the following\n\n```EDIT N M\n\n```" + ) + model_resp = query_model( + openai_api_key=openai_api_key, + model_str=f"{REPAIR_LLM}", + system_prompt=repair_sys, + prompt=f"Provided here is the error: {error}\n\nProvided below is the code:\n\n{code}", temp=0.2) + return model_resp + + +class MLESolver: + def __init__(self, dataset_code, openai_api_key=None, notes=None, max_steps=10, insights=None, plan=None, llm_str=None): + if notes is None: self.notes = [] + else: self.notes = notes + self.dataset_code = dataset_code + if plan is None: self.plan = "" + else: self.plan = plan + self.llm_str = llm_str + self.verbose = False + self.max_codes = 2 + self.st_hist_len = 2 + self.min_gen_trials = 2 + self.code_lines = str() + self.st_history = list() + self.insights = insights + self.code_reflect = str() + self.max_steps = max_steps + self.prev_code_ret = str() + self.should_execute_code = True + self.openai_api_key = openai_api_key + + def initial_solve(self): + """ + Initialize the solver and get an initial set of code and a return + @return: None + """ + # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + # @@ Initial CodeGen Commands @@ + # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + self.best_score = None + self.commands = [Replace()] + self.model = f"{self.llm_str}" + init_code, init_return, self.best_score = self.gen_initial_code() + self.best_codes = [(copy(init_code), self.best_score, init_return) for _ in range(1)] + + self.code_lines = init_code + self.model = f"{self.llm_str}" + self.commands = [Edit(), Replace()] + self.prev_working_code = copy(self.code_lines) + + @staticmethod + def clean_text(text): + text = text.replace("```\n", "```") + text = text.replace("```python\n", "```REPLACE\n") + return text + + def gen_initial_code(self): + num_attempts = 0 + error_hist = list() + while True: + if num_attempts == 0: + err = str() + err_hist = str() + else: + err = f"The following was the previous command generated: {model_resp}. This was the error return {cmd_str}. You should make sure not to repeat this error and to solve the presented problem." + error_hist.append(err) + if len(error_hist) == 5: _ = error_hist.pop(0) + err = "\n".join(error_hist) + err_hist = "The following is a history of your previous errors\n" + err + "\nDO NOT REPEAT THESE." + model_resp = query_model( + openai_api_key=self.openai_api_key, + model_str=self.model, + system_prompt=self.system_prompt(), + prompt=f"{err_hist}\nYou should now use ```REPLACE to create initial code to solve the challenge. Now please enter the ```REPLACE command below:\n ", temp=1.0) + model_resp = self.clean_text(model_resp) + cmd_str, code_lines, prev_code_ret, should_execute_code, score = self.process_command(model_resp) + print(f"@@@ INIT ATTEMPT: Command Exec // Attempt {num_attempts}: ", str(cmd_str).replace("\n", " | ")) + print(f"$$$ Score: {score}") + if score is not None: break + num_attempts += 1 + return code_lines, prev_code_ret, score + + def solve(self): + num_attempts = 0 + best_pkg = None + top_score = None + self.prev_code_ret = None + self.should_execute_code = False + while True: + if len(self.commands) == 2: cmd_app_str = "You must output either the ```EDIT or ```REPLACE command immediately. " + else: cmd_app_str = "" + model_resp = query_model( + openai_api_key=self.openai_api_key, + model_str=self.model, + system_prompt=self.system_prompt(), + prompt=f"The following is your history:{self.history_str()}\n\n{cmd_app_str}Now please enter a command: ", temp=1.0) + model_resp = self.clean_text(model_resp) + self.code_lines = copy(random.choice(self.best_codes)[0]) + cmd_str, code_lines, prev_code_ret, should_execute_code, score = self.process_command(model_resp) + self.st_history.append([model_resp, prev_code_ret, code_lines, cmd_str]) + if len(self.st_history) > self.st_hist_len: self.st_history.pop(0) + if score is not None: + if top_score is None: + best_pkg = copy(code_lines), copy(prev_code_ret), copy(should_execute_code), copy(model_resp), copy(cmd_str) + top_score = score + elif score > top_score: + best_pkg = copy(code_lines), copy(prev_code_ret), copy(should_execute_code), copy(model_resp), copy(cmd_str) + top_score = score + print(f"@@@ Command Exec // Attempt {num_attempts}: ", str(cmd_str).replace("\n", " | ")) + print(f"$$$ Score: {score}") + if num_attempts >= self.min_gen_trials and top_score is not None: break + num_attempts += 1 + self.code_lines, self.prev_code_ret, self.should_execute_code, model_resp, cmd_str = best_pkg + # add top scoring code that was successful to the best codes + if top_score > self.best_codes[-1][1]: + # replace the lowest scoring one + if len(self.best_codes) >= self.max_codes: + self.best_codes.pop(-1) + self.code_reflect = self.reflect_code() + self.best_codes.append((copy(self.code_lines), copy(top_score), self.prev_code_ret)) + # sort by score, to make sure lowest are removed in future + self.best_codes.sort(key=lambda x: x[1], reverse=True) + return model_resp, cmd_str + + def reflect_code(self): + """ + Provide a reflection on produced behavior for next execution + @return: (str) language model-produced reflection + """ + code_strs = ("$"*40 + "\n\n").join([self.generate_code_lines(_code[0]) + f"\nCode Return {_code[1]}" for _code in self.best_codes]) + code_strs = f"Please reflect on the following sets of code: {code_strs} and come up with generalizable insights that will help you improve your performance on this benchmark." + syst = self.system_prompt(commands=False) + code_strs + return query_model(prompt="Please reflect on ideas for how to improve your current code. Examine the provided code and think very specifically (with precise ideas) on how to improve performance, which methods to use, how to improve generalization on the test set with line-by-line examples below:\n", system_prompt=syst, model_str=f"{self.llm_str}", openai_api_key=self.openai_api_key) + + def process_command(self, model_resp): + """ + Take command from language model and execute if valid + @param model_resp: (str) language model output + @return: (tuple) tuple containing the following items + - cmd_str: (str) code execution return and success flag + - code_lines: (list) list of code lines as strings + - prev_code_ret: (str) output from running code + - should_execute_code: (bool) did the code change, if so we need to re-execute it + - score: (float) score of model + """ + prev_code_ret = self.prev_code_ret + should_execute_code = self.should_execute_code + code_lines = copy(self.code_lines) + remove_figures() + with suppress_stdout(): # shhh + for cmd in self.commands: + if cmd.matches_command(model_resp): + # attempt to execute the code edit command + if cmd.cmd_type == "CODE-edit": + score = None + failed = True + code_err = str() + for _tries in range(GLOBAL_REPAIR_ATTEMPTS): + success, args = cmd.parse_command(model_resp, copy(self.code_lines), self.dataset_code) + if success: + cmd_return = cmd.execute_command(args) + code_err = f"Return from executing code: {cmd_return[2]}" + if cmd_return[0]: # if success + code_lines = copy(cmd_return[1]) + score, cmd_str, is_valid = get_score(self.plan, "\n".join(code_lines), cmd_return[2], openai_api_key=self.openai_api_key, REWARD_MODEL_LLM=self.llm_str) + if is_valid: + failed = False + break + code_err += f"\nReturn from executing code on real test set {cmd_str}" + repaired_code = code_repair(model_resp, code_err, REPAIR_LLM=self.llm_str, ctype="edit", openai_api_key=self.openai_api_key) + model_resp = repaired_code + print(f" * Attempting repair // try {_tries}*") + if failed: + cmd_str = f"Code editing FAILED due to the following error: {code_err}. Code was reverted back to original state before edits." + print("$$$$ CODE EDIT (failed)") + else: + cmd_str = "Code was successfully edited." + prev_code_ret = copy(cmd_return[2]) + print("$$$$ CODE EDIT (success)") + should_execute_code = True + return cmd_str, code_lines, prev_code_ret, should_execute_code, score + # attempt to execute the code replace command + elif cmd.cmd_type == "CODE-replace": # DONE + score = None + failed = True + code_err = str() + for _tries in range(GLOBAL_REPAIR_ATTEMPTS): + success, args = cmd.parse_command(model_resp, self.dataset_code) + code_err = f"Return from executing code: {args[1]}" + if success: + code_lines = copy(args[0]) + score, cmd_str, is_valid = get_score(self.plan, "\n".join(code_lines), args[1], openai_api_key=self.openai_api_key, REWARD_MODEL_LLM=self.llm_str) + if is_valid: + failed = False + break + code_err += f"\nReturn from executing code on real test set {cmd_str}" + repaired_code = code_repair(extract_prompt(model_resp, "REPLACE", ), code_err, ctype="replace", openai_api_key=self.openai_api_key, REPAIR_LLM=self.llm_str) + repaired_code = f"```REPLACE\n{repaired_code}\n```" + model_resp = repaired_code + print(f" * Attempting repair // try {_tries}*") + if failed: + cmd_str = f"Code replacement FAILED due to the following error: {code_err}. Code was reverted back to original state before edits." + print("$$$$ CODE REPLACE (failed)") + else: + cmd_str = "Code was successfully replaced." + code_lines = copy(args[0]) + prev_code_ret = copy(args[1]) + print("$$$$ CODE REPLACE (success)") + should_execute_code = True + return cmd_str, code_lines, prev_code_ret, should_execute_code, score + print("$$$$ INVALID COMMAND (failed)") + return "Command not supported, choose from existing commands", None, None, None, None + + def history_str(self): + """ + Well-formatted history string + @return: (str) history string + """ + hist_str = "" + for _hist in range(len(self.st_history)): + hist_str += f"-------- History ({len(self.st_history)-_hist} steps ago) -----\n" + hist_str += f"Because of the following response: {self.st_history[_hist][0]}\n" if len(self.st_history[_hist][0]) > 0 else "" + hist_str += f"and the following COMMAND response output: {self.st_history[_hist][3]}\n" + hist_str += f"With the following code used: {'#'*20}\n{self.st_history[_hist][2]}\n{'#'*20}\n\n" + hist_str += f"The environment feedback and reflection was as follows: {self.st_history[_hist][1]}\n" + hist_str += f"-------- End of history ({len(self.st_history)-_hist} steps ago) -------\n" + return hist_str + + def system_prompt(self, commands=True): + """ + Produce a system prompt for the mle-solver to solve ml problems + @param commands: (bool) whether to use command prompt + @return: (str) system prompt + """ + return ( + # ROLE DESCRIPTION + f"{self.role_description()}.\n" + # TASK INSTRUCTIONS + f"The following are your task instructions: {self.phase_prompt()}\n" + # LIT REVIEW INSIGHTS + f"Provided below are some insights from a literature review summary:\n{self.insights}\n" + # CODE INSIGHTS + f"{self.code_reflect}" + # NOTES + f"The following are notes, instructions, and general tips for you: {self.notes}" + # PLAN DESCRIPTION + f"You are given a machine learning research task described, where the plan is described as follows: {self.plan}\n" + # DATASET DESCRIPTION + f"{self.generate_dataset_descr_prompt()}" + # Create Figures + f"You should also try generating at least two figures to showcase the results, titled Figure_1.png and Figure_2.png\n" + f"Your method MUST not get 0% accuracy. If it does, you have done something wrong and must correct this. Make sure to check your accuracy calculation is correct.\n" + # transition + f"Your goal is to solve the research plan as well as possible. You will receive a score after you write the code and should aim to maximize the score by following the plan instructions and writing high quality code.\n" + f"Before each experiment please include a print statement explaining exactly what the results are meant to show in great detail before printing the results out.\n" + # COMMAND SET + f"The following are commands you have access to: {self.command_descriptions()}\n. You should try to have a diversity of command responses if appropriate. Do not repeat the same commend too many times. Please consider looking through your history and not repeating commands too many times." if commands else "" + ) + + def generate_code_lines(self, code): + """ + Generate well-formatted code lines with line numbers + @param code: (list) list of code line strings + @return: (str) code lines formatted with line numbers + """ + codestr = str() + for _index in range(len(code)): + codestr += f"{_index} |{code[_index]}\n" + return codestr + + def feedback(self, code_return): + """ + Provide execution feedback after command is run + @param code_return: (str) return from code execution + @return: (str) feedback string + """ + if code_return is not None: + code_str = self.generate_code_lines(self.code_lines) + if "[CODE EXECUTION ERROR]" in code_return: + print(f"@@@@ ERROR") # , {code_return.replace('\n', '')}") + reflect_prompt = f"This is your code: {code_str}\n\nYour code returned the following error {code_return}. Please provide a detailed reflection on why this error was returned, which lines in the code caused this error, and exactly (line by line) how you hope to fix this in the next update. This step is mostly meant to reflect in order to help your future self fix the error better. Do not provide entirely new code but provide suggestions on how to fix the bug using LINE EDITS." + elif os.path.exists("submission.csv"): + self.prev_working_code = copy(self.code_lines) + grade_return = get_score(self.plan, "\n".join(self.prev_working_code), code_return, openai_api_key=self.openai_api_key)[0] + print(f"@@@@ SUBMISSION: model score {grade_return}", REWARD_MODEL_LLM=self.llm_str) + f"Your code was properly submitted and you have just received a grade for your model.\nYour score was {grade_return}.\n\n" + reflect_prompt = f"This is your code: {code_str}\n\nYour code successfully returned a submission csv. Consider further improving your technique through advanced learning techniques, data augmentation, or hyperparamter tuning to increase the score. Please provide a detailed reflection on how to improve your performance, which lines in the code could be improved upon, and exactly (line by line) how you hope to improve this in the next update. This step is mostly meant to reflect in order to help your future self." + + for file in os.listdir("."): + if file.endswith(".csv"): + os.system(f"rm {file}") + else: + print("@@@@ No return") + reflect_prompt = f"This is your code: {code_str}\n\nYour code did not return an error, but also did not successfully submit a submission csv file. Please reflect on how you can improve your submission for the next cycle to submit a file and obtain a high score." + elif not self.should_execute_code: + code_return = "No changes were made to the code." + reflect_prompt = "Reflect on your future plans and next steps to improve the code." + reflection = self.reflection(reflect_prompt, code_str, code_return) + return f"Code return: {code_return}\n\nReflection: {reflection}" + + def reflection(self, reflect_prompt, code_str, code_return): + """ + Reflect on your future plans and next steps to improve the code + @param reflect_prompt: (str) reflection prompt + @param code_str: (str) code string + @return: (str) reflection string + """ + refl = query_model(prompt=reflect_prompt, system_prompt=self.system_prompt(commands=False), model_str=f"{self.llm_str}", openai_api_key=self.openai_api_key) + return f"During the previous execution, the following code was run: \n\n{code_str}\n\nThis code returned the following: \n{code_return}\nThe following is your reflection from this feedback {refl}\n" + + def generate_dataset_descr_prompt(self): + """ + Generate description prompt for kaggle dataset + @param data_loader: (DataLoader) data loader + @return: (str) description prompt + """ + return f"\n- The following dataset code will be added to the beginning of your code always, so this does not need to be rewritten: {self.dataset_code}" + + def phase_prompt(self,): + """ + Describe system role and general tips for mle-solver + @return: (str) system role + """ + phase_str = ( + "You are an ML engineer and you will be writing the code for a research project.\n" + "Your goal is to produce code that obtains final results for a set of research experiments. You should aim for simple code to collect all results, not complex code. You should integrate the provided literature review and the plan to make sure you are implementing everything outlined in the plan. The dataset code will be added to the beginning of your code always, so this does not need to be rewritten. Make sure you do not write functions, only loose code.\n" + "I would recommend writing smaller code so you do not run out of time but make sure to work on all points in the plan in the same code. You code should run every experiment outlined in the plan for a single code.\n", + "You cannot pip install new libraries, but many machine learning libraries already work. If you wish to use a language model in your code, please use the following:\nAnything you decide to print inside your code will be provided to you as input, and you will be able to see that part of the code. Using print statements is useful for figuring out what is wrong and understanding your code better." + ) + return phase_str + + def role_description(self): + """ + Provide role description + @return: (str) role description + """ + return "You are an expert machine learning engineer working at a top university to write code to solve machine learning research challenges using your machine learning expertise." + + @staticmethod + def _common_code_errors(): + """ + Some general tips to avoid common code errors, also TF has many errors so we avoid this and ask to use pytorch + @return: (str) common code errors + """ + return ( + "Make sure to import everything that you are using.\n" + "Reflect on the code before writing it to make sure there are no bugs or compilation issues.\n" + "YOU MUST USE COMMANDS PROPERLY. Do not use the word COMMAND for the command that is incorrect. You must use an actual command (e.g. EDIT, REPLACE...) NOT THE WORD COMMAND. Do not make this mistake.\n" + "Under no circumstances should you use tensorflow or keras. Only use pytorch for scikitlearn for deep learning.\n" + ) + + def command_descriptions(self): + """ + Provide command descriptions + @return: (str) command descriptions + """ + cmd_strings = "\n".join([_cmd.docstring() for _cmd in self.commands]) + return f"\nYou also have access to tools which can be interacted with using the following structure: ```COMMAND\n\n```, where COMMAND is whichever command you want to run (e.g. EDIT, REPLACE...), is information used for the command, such as code to run or a search query, and ``` are meant to encapsulate the command. ``` must be included as part of the command both at the beginning and at the end of the code. DO NOT FORGOT TO HAVE ``` AT THE TOP AND BOTTOM OF CODE. and this structure must be followed to execute a command correctly. YOU CAN ONLY EXECUTE A SINGLE COMMAND AT A TIME! Do not try to perform multiple commands EVER only one. {self._common_code_errors()}" + cmd_strings + + def run_code(self): + """ + Actually execute the code that was generated + @return: (str) code return + """ + if self.prev_code_ret is not None: + return self.prev_code_ret + elif self.should_execute_code: + return execute_code("\n".join(self.code_lines)) + return "Changes have not yet been made to the code." + + + + diff --git a/papersolver.py b/papersolver.py new file mode 100755 index 0000000..18e7a95 --- /dev/null +++ b/papersolver.py @@ -0,0 +1,587 @@ +import random +import string +from utils import * +from tools import * +from copy import copy +from inference import * +from pathlib import Path +from copy import deepcopy +from common_imports import * +from agents import get_score +from abc import abstractmethod + +from contextlib import contextmanager +import sys, os + +@contextmanager +def suppress_stdout(): + with open(os.devnull, "w") as devnull: + old_stdout = sys.stdout + sys.stdout = devnull + try: + yield + finally: + sys.stdout = old_stdout + +class Command: + def __init__(self): + self.cmd_type = "OTHER" + + @abstractmethod + def docstring(self) -> str: + pass + + @abstractmethod + def execute_command(self, *args) -> str: + pass + + @abstractmethod + def matches_command(self, cmd_str) -> bool: + pass + + @abstractmethod + def parse_command(self, cmd_str) -> tuple: + pass + + +def execute_latex(): + return True + + +""" +@@@@@@@@@@@@@@@@@@ +@@ SEARCH TOOLS @@ +@@@@@@@@@@@@@@@@@@ +""" + +class Arxiv(Command): + def __init__(self): + super().__init__() + self.arxiv_eng = ArxivSearch() + self.num_papers_per_search = 10 + self.cmd_type = "SEARCH-arxiv" + + def docstring(self) -> str: + return ( + "============= ARXIV SEARCH TOOL =============" + "You also have access to machine learning paper from Arxiv. " + "To search for summaries of papers on arxiv you can use the following command: ```SUMMARY\n\n```\n where is a string that will be used as the search query to find papers with semantically similar content and SUMMARY is just the word SUMMARY.\n" + "To get the full paper text for an arXiv paper, use the following command: ```FULL_TEXT\n\n```\n where is the ID of the arXiv paper (which can be found by using the SUMMARY command), and FULL_TEXT is just the word FULL_TEXT. Make sure to read the full text using the FULL_TEXT command before adding it to your list of relevant papers.\n" + "When you read arxiv paper, make sure to take note of the techniques they are using to solve their problem as well as the hyperparameters and implementation details. These are very important for successfully solving machine learning problems." + ) + + def execute_command(self, *args) -> str: + # args[0] -> command + # args[1] -> query + if args[0] == "SUMMARY": + return self.arxiv_eng.find_papers_by_str(args[1], self.num_papers_per_search) + elif args[0] == "FULL_TEXT": + return self.arxiv_eng.retrieve_full_paper_text(args[1]) + raise Exception("Invalid Arxiv Search") + + def matches_command(self, cmd_str) -> bool: + if "```SUMMARY" in cmd_str: return True + elif "```FULL_TEXT" in cmd_str: return True + return False + + def parse_command(self, *args) -> tuple: + sum_text = extract_prompt(args[0], "SUMMARY").split("\n") + full_text = extract_prompt(args[0], "FULL_TEXT").split("\n") + if len(sum_text) == 0 and len(full_text) == 0: return False, None + if len(sum_text) > 0: return True, ("SUMMARY", sum_text,) + if len(full_text) > 0: return True, ("FULL_TEXT", sum_text,) + + +""" +@@@@@@@@@@@@@@@@@@@ +@@ WRITING TOOLS @@ +@@@@@@@@@@@@@@@@@@@ +""" + +class PaperReplace(Command): + def __init__(self): + super().__init__() + self.cmd_type = "PAPER-replace" + + def docstring(self) -> str: + return ( + "============= PAPER REPLACING TOOL =============\n" + "You also have access to a paper replacing tool. \n" + "This tool allows you to entirely re-write/replace all of the current latex and erase all existing latex.\n" + "You can use this tool via the following command: ```REPLACE\n\n```, where REPLACE is the word REPLACE and will be the new latex that is replacing the entire set of old latex. This tool is useful if you want to make very significant changes, such as entirely changing the model, or the learning process. Before changing the existing latex to be your new latex, your new latex will be tested and if it returns an error it will not replace the existing latex. Try limiting the use of rewriting and aim for editing the latex more." + ) + + def execute_command(self, *args) -> str: + # args[0] -> new latex + args = args[0] + return args[0] + + def matches_command(self, cmd_str) -> bool: + if "```REPLACE" in cmd_str: return True + return False + + def parse_command(self, *args) -> tuple: + new_latex = extract_prompt(args[0], "REPLACE") + latex_ret = compile_latex(new_latex, compile=args[1]) + if "[CODE EXECUTION ERROR]" in latex_ret: return False, (None, latex_ret,) + return True, (new_latex.split("\n"), latex_ret) + + + +class PaperEdit(Command): + def __init__(self): + super().__init__() + self.cmd_type = "PAPER-edit" + + def docstring(self) -> str: + return ( + "============= PAPER EDITING TOOL =============\n" + "You also have access to a paper editing tool. \n" + "This tool allows you to replace lines indexed n through m (n:m) of the current latex with as many lines of new latex as you want to add. This removal is inclusive meaning that line n and m and everything between n and m is removed. This will be the primary way that you interact with latex. \n" + "You can edit latex using the following command: ```EDIT N M\n\n``` EDIT is the word EDIT, N is the first line index you want to replace and M the the last line index you want to replace (everything inbetween will also be removed), and will be the new latex that is replacing the old latex. Before changing the existing latex to be your new latex, your new latex will be tested and if it returns an error it will not replace the existing latex. Your changes should significantly change the latex. You should write new paragraphs and update old ones. Try using the edit command often. Make sure to generate lots of text. You should also avoid editing lines 0 0, and should edit the main text of the paragraphs, such as editing lines in the middle of the text body." + ) + + def execute_command(self, *args) -> str: + # args[0] -> N (int) + # args[1] -> M (int) + # args[2] -> old latex + # args[3] -> new lines to replace + try: + args = args[0] + current_latex = args[2] + lines_to_add = list(reversed(args[3])) + lines_to_replace = list(reversed(range(args[0], args[1]+1))) + for _ln in lines_to_replace: + current_latex.pop(_ln) + for _line in lines_to_add: + current_latex.insert(args[0], _line) + new_latex = "\n".join(current_latex) + latex_exec = f"{new_latex}" + latex_ret = compile_latex(latex_exec, compile=args[4]) + if "error" in latex_ret.lower(): return (False, None, latex_ret) + return (True, current_latex, latex_ret) + except Exception as e: + return (False, None, str(e)) + + def matches_command(self, cmd_str) -> bool: + if "```EDIT" in cmd_str: return True + return False + + def parse_command(self, *args) -> tuple: + cmd_str, latexlines = args[0], args[1] + success = True + try: + text = extract_prompt(cmd_str, "EDIT").split("\n") + if len(text) == 0: return False, (None, None, None, None) + lines_to_edit = text[0].split(" ") + if len(lines_to_edit) != 2: return False, (None, None, None, None) + lines_to_edit = [int(_) for _ in lines_to_edit] + if len(text[1:]) == 0: return False, (None, None, None, None) + return success, (lines_to_edit[0], lines_to_edit[1], latexlines, text[1:]) + except Exception as e: + return False, (None, None, None, None) + + + + +# Modified version of section tips from the AI scientist paper! +# Good work guys :) https://github.com/SakanaAI/AI-Scientist/blob/main/ai_scientist/perform_writeup.py +per_section_tips = { + "abstract": """ +- TL;DR of the paper +- What are we trying to do and why is it relevant? +- Why is this hard? +- How do we solve it (i.e. our contribution!) +- How do we verify that we solved it (e.g. Experiments and results) +- This must only be a single paragraph not more. + +Please make sure the abstract reads smoothly and is well-motivated. This should be one continuous paragraph with no breaks between the lines. +""", + "introduction": """ +- Longer version of the Abstract, i.e. of the entire paper +- What are we trying to do and why is it relevant? +- Why is this hard? +- How do we solve it (i.e. our contribution!) +- How do we verify that we solved it (e.g. Experiments and results) +- New trend: specifically list your contributions as bullet points +- Extra space? Future work! +""", + "related work": """ +- Academic siblings of our work, i.e. alternative attempts in literature at trying to solve the same problem. +- Goal is to “Compare and contrast” - how does their approach differ in either assumptions or method? If their method is applicable to our Problem Setting I expect a comparison in the experimental section. If not, there needs to be a clear statement why a given method is not applicable. +- Note: Just describing what another paper is doing is not enough. We need to compare and contrast. +""", + "background": """ +- Academic Ancestors of our work, i.e. all concepts and prior work that are required for understanding our method. +- Usually includes a subsection, Problem Setting, which formally introduces the problem setting and notation (Formalism) for our method. Highlights any specific assumptions that are made that are unusual. +- Make sure to use mathematical notation when necessary. +- Note: If our paper introduces a novel problem setting as part of its contributions, it's best to have a separate Section. +""", + "methods": """ +- What we do. Why we do it. All described using the general Formalism introduced in the Problem Setting and building on top of the concepts / foundations introduced in Background. +- Make sure you clearly report precise mathematical equations in the methods section and the precise methodology. +""", + "experimental setup": """ +- How do we test that our stuff works? Introduces a specific instantiation of the Problem Setting and specific implementation details of our Method for this Problem Setting. +- Do not imagine unknown hardware details. +- Includes a description of the dataset, evaluation metrics, important hyperparameters, and implementation details. +""", + "results": """ +- Shows the results of running Method on our problem described in Experimental Setup. +- Includes statements on hyperparameters and other potential issues of fairness. +- Only includes results that have actually been run and saved in the logs. Do not hallucinate results that don't exist. +- Make sure you clearly and numerically report experimental results in the results section. +- If results exist: compares to baselines and includes statistics and confidence intervals. +- If results exist: includes ablation studies to show that specific parts of the method are relevant. +- Discusses limitations of the method. +- Make sure to include all the results from the experiments, and include all relevant figures. +""", + "discussion": """ +- Brief recap of the entire paper. +- To keep going with the analogy, you can think of future work as (potential) academic offspring. +""", +} + +class PaperSolver: + def __init__(self, llm_str, notes=None, max_steps=10, insights=None, plan=None, exp_code=None, exp_results=None, lit_review=None, ref_papers=None, topic=None, openai_api_key=None, compile_pdf=True): + if notes is None: self.notes = [] + else: self.notes = notes + if plan is None: self.plan = "" + else: self.plan = plan + if exp_code is None: self.exp_code = "" + else: self.exp_code = exp_code + if exp_results is None: self.exp_results = "" + else: self.exp_results = exp_results + if lit_review is None: self.lit_review = "" + else: self.lit_review = lit_review + if insights is None: self.insights = "" + else: self.insights = insights + if ref_papers is None: self.ref_papers = "" + else: self.ref_papers = ref_papers + if topic is None: self.topic = "" + else: self.topic = topic + self.compile_pdf = compile_pdf + self.llm_str = llm_str + self.notes = notes + self.max_papers = 1 + self.st_hist_len = 10 + self.min_gen_trials = 2 + self.max_steps = max_steps + self.paper_lines = str() + self.prev_paper_ret = str() + self.section_related_work = {} + self.openai_api_key = openai_api_key + + def solve(self): + num_attempts = 0 + best_pkg = None + top_score = None + self.prev_paper_ret = None + while True: + self.paper_lines = copy(random.choice(self.best_report)[0]) + model_resp = query_model( + model_str=self.model, + system_prompt=self.system_prompt(), + prompt=f"\nNow please enter a command: ", + temp=1.0, + openai_api_key=self.openai_api_key) + #print(model_resp) + model_resp = self.clean_text(model_resp) + cmd_str, paper_lines, prev_paper_ret, score = self.process_command(model_resp) + if score is not None: + if top_score is None: + best_pkg = copy(paper_lines), copy(prev_paper_ret), copy(model_resp), copy(cmd_str) + top_score = score + elif score > top_score: + best_pkg = copy(paper_lines), copy(prev_paper_ret), copy(model_resp), copy(cmd_str) + top_score = score + if num_attempts >= self.min_gen_trials and top_score is not None: break + print(f"@@@ Command Exec // Attempt {num_attempts}: ", str(cmd_str).replace("\n", " | ")) + print(f"$$$ Score: {score}") + num_attempts += 1 + self.paper_lines, self.prev_paper_ret, model_resp, cmd_str = best_pkg + # add top scoring paper that was successful to the best papers + if top_score > self.best_report[-1][1]: + # replace the lowest scoring one + if len(self.best_report) >= self.max_papers: + self.best_report.pop(-1) + self.best_report.append((copy(self.paper_lines), copy(top_score), self.prev_paper_ret)) + # sort by score, to make sure lowest are removed in future + self.best_report.sort(key=lambda x: x[1], reverse=True) + return model_resp, cmd_str + + def initial_solve(self): + """ + Initialize the solver and get an initial set of papers and a return + @return: None + """ + # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + # @@ Initial PaperGen Commands @@ + # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + self.best_score = None + self.commands = [PaperReplace()] + self.model = f"{self.llm_str}" + init_report, init_return, self.best_score = self.gen_initial_report() + self.best_report = [(copy(init_report), self.best_score, init_return) for _ in range(1)] + + self.paper_lines = init_report + self.model = f"{self.llm_str}" + self.commands = [PaperEdit()] #, Replace()] + self.prev_working_report = copy(self.paper_lines) + + @staticmethod + def clean_text(text): + text = text.replace("```\n", "```") + return text + + def gen_initial_report(self): + num_attempts = 0 + arx = ArxivSearch() + section_scaffold = str() + # 1. Abstract 2. Introduction, 3. Background, 4. Methods, 5. Experimental Setup 6. Results, and 7. Discussion + for _section in ["scaffold", "abstract", "introduction", "related work", "background", "methods", "experimental setup", "results", "discussion"]: + section_complete = False + if _section in ["introduction", "related work", "background", "methods", "discussion"]: + attempts = 0 + papers = str() + first_attempt = True + while len(papers) == 0: + att_str = str() + if attempts > 5: + break + if not first_attempt: + att_str = "This is not your first attempt please try to come up with a simpler search query." + search_query = query_model(model_str=f"{self.llm_str}", prompt=f"Given the following research topic {self.topic} and research plan: \n\n{self.plan}\n\nPlease come up with a search query to find relevant papers on arXiv. Respond only with the search query and nothing else. This should be a a string that will be used to find papers with semantically similar content. {att_str}", system_prompt=f"You are a research paper finder. You must find papers for the section {_section}. Query must be text nothing else.", openai_api_key=self.openai_api_key) + search_query.replace('"', '') + papers = arx.find_papers_by_str(query=search_query, N=10) + first_attempt = False + attempts += 1 + if len(papers) != 0: + self.section_related_work[_section] = papers + while not section_complete: + section_scaffold_temp = copy(section_scaffold) + if num_attempts == 0: err = str() + else: err = f"The following was the previous command generated: {model_resp}. This was the error return {cmd_str}. You should make sure not to repeat this error and to solve the presented problem." + if _section == "scaffold": + prompt = f"{err}\nNow please enter the ```REPLACE command to create the scaffold:\n " + else: + rp = str() + if _section in self.section_related_work: + rp = f"Here are related papers you can cite: {self.section_related_work[_section]}. You can cite them just by putting the arxiv ID in parentheses, e.g. (arXiv 2308.11483v1)\n" + prompt = f"{err}\n{rp}\nNow please enter the ```REPLACE command to create the designated section, make sure to only write the text for that section and nothing else. Do not include packages or section titles, just the section content:\n " + model_resp = query_model( + model_str=self.model, + system_prompt=self.system_prompt(section=_section), + prompt=f"{prompt}", + temp=0.8, + openai_api_key=self.openai_api_key) + model_resp = self.clean_text(model_resp) + if _section == "scaffold": + # minimal scaffold (some other sections can be combined) + for _sect in ["[ABSTRACT HERE]", "[INTRODUCTION HERE]", "[METHODS HERE]", "[RESULTS HERE]", "[DISCUSSION HERE]"]: + if _sect not in model_resp: + cmd_str = "Error: scaffold section placeholders were not present (e.g. [ABSTRACT HERE])." + print("@@@ INIT ATTEMPT:", cmd_str) + continue + elif _section != "scaffold": + new_text = extract_prompt(model_resp, "REPLACE") + section_scaffold_temp = section_scaffold_temp.replace(f"[{_section.upper()} HERE]", new_text) + model_resp = '```REPLACE\n' + copy(section_scaffold_temp) + '\n```' + if "documentclass{article}" in new_text or "usepackage{" in new_text: + cmd_str = "Error: You must not include packages or documentclass in the text! Your latex must only include the section text, equations, and tables." + print("@@@ INIT ATTEMPT:", cmd_str) + continue + cmd_str, latex_lines, prev_latex_ret, score = self.process_command(model_resp, scoring=False) + print(f"@@@ INIT ATTEMPT: Command Exec // Attempt {num_attempts}: ", str(cmd_str).replace("\n", " | ")) + #print(f"$$$ Score: {score}") + if score is not None: + section_complete = True + section_scaffold = "\n".join(latex_lines) + num_attempts += 1 + self.paper_lines = section_scaffold.split("\n") + print("$"*10, f"SCAFFOLD [{_section}] CREATED", "$"*10) + print("$"*10, "SCAFFOLD CREATED", "$"*10) + return latex_lines, prev_latex_ret, score + + def process_command(self, model_resp, scoring=True): + """ + Take command from language model and execute if valid + @param model_resp: (str) language model output + @return: (tuple) tuple containing the following items + - cmd_str: (str) paper execution return and success flag + - paper_lines: (list) list of paper lines as strings + - prev_paper_ret: (str) output from running paper + - score: (float) score of model + """ + cmd_str = None + score = None + prev_paper_ret = self.prev_paper_ret + paper_lines = copy(self.paper_lines) + if "\\includegraphics[width=\\textwidth]{Figure_1.png}" in model_resp or "\\includegraphics[width=\\textwidth]{Figure_2.png}" in model_resp: + cwd = os.getcwd() + model_resp = model_resp.replace("\\includegraphics[width=\\textwidth]{Figure_1.png}", "\\includegraphics[width=\\textwidth]{" + cwd + "/Figure_1.png}") + model_resp = model_resp.replace("\\includegraphics[width=\\textwidth]{Figure_2.png}", "\\includegraphics[width=\\textwidth]{" + cwd + "/Figure_2.png}") + for cmd in self.commands: + if cmd.matches_command(model_resp): + # attempt to execute the paper edit command + if cmd.cmd_type == "PAPER-edit": # DONE + score = None + failed = True + success, args = cmd.parse_command(model_resp, paper_lines) + paper_err = f"Return from executing latex: {args[1]}" + if success: + # True, current_latex, latex_ret + args = cmd.execute_command((args[0], args[1], paper_lines, args[3], self.compile_pdf)) + success = success and args[0] + if not success: pass + else: + paper_lines = copy(args[1]) # + if scoring: + score, cmd_str, is_valid = get_score(self.plan, "\n".join(paper_lines), reward_model_llm=self.llm_str) + else: + score, cmd_str, is_valid = 0.0, "Paper scored successfully", True + if is_valid: failed = False + paper_err += f"\nReturn from executing latex: {cmd_str}" + print("$$$$ PAPER EDIT (success)") + if failed: + cmd_str = f"Paper edit FAILED due to the following error: {paper_err}. Paper was reverted back to original state before edits." + print("$$$$ PAPER EDIT (failed)") + else: + cmd_str = "Paper was successfully edited." + paper_lines = copy(args[1]) + prev_paper_ret = copy(args[2]) + print("$$$$ PAPER EDIT (success)") + elif cmd.cmd_type == "PAPER-replace": # DONE + score = None + failed = True + success, args = cmd.parse_command(model_resp, self.compile_pdf) + paper_err = f"Return from executing latex: {args[1]}" + if success: + paper_lines = copy(args[0]) # + if scoring: + score, cmd_str, is_valid = get_score(self.plan, "\n".join(paper_lines), reward_model_llm=self.llm_str) + else: + score, cmd_str, is_valid = 0.0, "Paper scored successfully", True + if is_valid: failed = False + paper_err += f"\nReturn from executing code on real test set {cmd_str}" + if failed: + cmd_str = f"Paper replacement FAILED due to the following error: {paper_err}. Paper was reverted back to original state before edits." + print("$$$$ PAPER REPLACE (failed)") + else: + cmd_str = "Paper was successfully replaced." + paper_lines = copy(args[0]) + prev_paper_ret = copy(args[1]) + print("$$$$ PAPER REPLACE (success)") + return cmd_str, paper_lines, prev_paper_ret, score + + def generate_paper_lines(self, code): + """ + Generate well-formatted code lines with line numbers + @param code: (list) list of code line strings + @return: (str) code lines formatted with line numbers + """ + codestr = str() + for _index in range(len(code)): + codestr += f"{_index} |{code[_index]}\n" + return codestr + + def system_prompt(self, commands=True, section=None): + """ + Produce a system prompt for the paper-solver + @param commands: (bool) whether to use command prompt + @return: (str) system prompt + """ + if section == "abstract": length = "This section should be ONLY 1 paragraph." + else: length = "This section should be approximately 2-4 paragraphs and so your output should be several paragraphs of latex." + methods_str = str() + if section == "methods": + fig1_text="""\n\\begin{figure}[h] +\\caption{} +\\centering +\\includegraphics[width=\\textwidth]{Figure_1.png} +\\label{fig:fig1} +\\end{figure}\n""" + fig2_text="""\n\\begin{figure}[h] +\\caption{} +\\centering +\\includegraphics[width=\\textwidth]{Figure_2.png} +\\label{fig:fig1} +\\end{figure}\n""" + if os.path.exists("Figure_1.png") and os.path.exists("Figure_2.png"): + methods_str += f"You ABSOLUTELY must without fail also include Figure_1.png and Figure_2.png in your paper using {fig1_text} and {fig2_text} on a new line. Make sure to place these figures in separate locations." + elif os.path.exists("Figure_1.png"): + methods_str += f"You ABSOLUTELY must without fail also include Figure_1.png in your paper using {fig1_text} on a new line.\n" + elif os.path.exists("Figure_2.png"): + methods_str += f"You ABSOLUTELY must without fail also include Figure_2.png in your paper using {fig2_text} on a new line.\n" + if section is not None and section == "scaffold": section_cmd = f"Your objective right now is to only build the scaffolding for the paper. You should not include any text in the body of the paper, but should have an empty scaffold for each of the sections. Where the sections go, write [ABSTRACT HERE] for abstract, and write [INTRODUCTION HERE] for the introduction... etc. Your paper should have the following sections: 1. Abstract 2. Introduction, 3. Background, 4. Related Work 5. Methods, 6. Experimental Setup 7. Results, and 8. Discussion. Just create the scaffolding as compilable latex. Your title should start with Research Report: [title here] where title here is a title you choose. For author write Agent Laboratory." + elif section is not None: section_cmd = f"Your only goal is to generate latex for the following {section}. DO NOT INCLUDE ANY PACKAGES OR ANY SECTION COMMANDS. DO NOT INCLUDE A TITLE OR DATE ONLY TEXT. You only have to generate text for this specific section and do not have to output anything else. {length} I repeat DO NOT INCLUDE ANY PACKAGES OR ANY SECTION COMMANDS. DO NOT INCLUDE A TITLE OR DATE ONLY TEXT. Use as many equations as you find necessary. You should include mathematical equations, numbers, and tables where necessary. Remember that to include a percentage sign % you must add a backslash \% or else it will become a comment. Here are some tips {per_section_tips[section]} {methods_str}.\n\n" + else: section_cmd = "" + paper_len = sum([i.strip(string.punctuation).isalpha() for i in ("".join(self.paper_lines)).split()]) + #paper_len2 = len(("".join(self.paper_lines)).split()) + if paper_len < 4000: paper_progress = f"The current length of the paper is {paper_len} words, you must increase this by {4000-paper_len} words." + else: paper_progress = "" + print(paper_progress) + cmd_set = f"The following are commands you have access to: {self.command_descriptions()}\n." if commands else "" + if len(self.ref_papers) == 0: ref_papers = "" + else: + refpapers = '\n'.join(self.ref_papers) + ref_papers = f"Here is a reference paper that is high quality:\n{refpapers}\n\n\n" + lit_review_str = str(self.lit_review)[:20000] + #print(len(f"{self.exp_results}"), len(f"{self.exp_code}"), len(f"{self.plan}"), len(f"{self.lit_review}"), len(f"{self.role_description()}"), len(f"{self.phase_prompt()}"), len(f"{self.generate_paper_lines(self.paper_lines)}"), len(f"{section_cmd}"), len(f"{cmd_set}"), len(f"{ref_papers}")) + return ( + f"{ref_papers}" + # ROLE DESCRIPTION + f"{self.role_description()}.\n" + # TASK INSTRUCTIONS + f"The following are your task instructions: {self.phase_prompt()}\n" + # NOTES + f"The following are notes, instructions, and general tips for you: {self.notes}" + # LIT REVIEW + f"The following literature review was provided for the paper:\n{lit_review_str}\n" + # PLAN DESCRIPTION + f"You are given a paper report writing task. The original research plan was described as follows: {self.plan}\n" + # EXPERIMENT CODE + f"A team of research wrote the following code, following this plan: {self.exp_code}\n" + # EXPERIMENT RESULTS + f"After running this code, the following results were observed: {self.exp_results}\n" + # EXPERIMENT RESULT INSIGHTS + f"Provided was an interpretation of the experimental results:\n{self.insights}\n" + f"Your writing style should be boring and objective.\n" + # transition + f"Your goal is to write a research paper as well as possible. You will receive a score after you write the paper and should aim to maximize the score by writing a high quality research paper. The paper length should be 8 pages or 4000 words in total. It should be quite long and comprehensive. Remember, the paper MUST BE LONG. {paper_progress}\n" + # COMMAND SET + f"{cmd_set}\n" + # PAPER + f"Provided here is your current paper {self.generate_paper_lines(self.paper_lines)}" + # optional section command + f"{section_cmd}" + ) + + def command_descriptions(self): + """ + Provide command descriptions + @return: (str) command descriptions + """ + cmd_strings = "\n".join([_cmd.docstring() for _cmd in self.commands]) + return f"\nYou also have access to tools which can be interacted with using the following structure: ```COMMAND\n\n```, where COMMAND is whichever command you want to run (e.g. EDIT,...), is information used for the command and ``` are meant to encapsulate the command. ``` must be included as part of the command both at the beginning and at the end of the command. DO NOT FORGOT TO HAVE ``` AT THE TOP AND BOTTOM OF COMMAND. and this structure must be followed to execute a command correctly. YOU CAN ONLY EXECUTE A SINGLE COMMAND AT A TIME! Do not try to perform multiple commands EVER only one." + cmd_strings + + def role_description(self): + """ + Provide role description + @return: (str) role description + """ + return "You are a computer science PhD student at a top university who has submitted their paper to an ML conference called ICLR. Your goal was to write a research paper and get high scores from the reviewers so that it get accepted to the conference. Your paper should be approximately 8 pages and around 4000 words. Your article should ONLY CONTAIN EIGHT sections as follows: 1. Abstract 2. Introduction, 3. Background, 4. Related Work 5. Methods, 6. Experimental Setup 7. Results, and 8. Discussion.\n" + + + def phase_prompt(self,): + """ + Describe system role and general tips for mle-solver + @return: (str) system role + """ + phase_str = ( + "You are a PhD student who has submitted their paper to an ML conference called ICLR. Your goal was to write a research paper and get high scores from the reviewers so that it get accepted to the conference.\n" + ) + return phase_str + + + diff --git a/readme/README-arabic.md b/readme/README-arabic.md new file mode 100755 index 0000000..e858199 --- /dev/null +++ b/readme/README-arabic.md @@ -0,0 +1,162 @@ +# مختبر الوكيل: استخدام وكلاء النماذج اللغوية الكبيرة كمساعدين بحثيين + +

+ Demonstration of the flow of AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ + +

+ 【🌐 الموقع الإلكتروني | 💻 البرمجيات | 🎥 الفيديو | 📚 مثال على ورقة بحثية | 📰 الاستشهاد】 +

+ +## 📖 نظرة عامة + +- **مختبر الوكيل** هو سير عمل بحثي مستقل من البداية للنهاية مصمم لمساعدتك كباحث بشري في **تنفيذ أفكار بحثك**. يتكون مختبر الوكيل من وكلاء متخصصين مدفوعين بنماذج لغوية كبيرة لدعمك طوال سير العمل البحثي بالكامل — من إجراء مراجعات الأدبيات وصياغة الخطط إلى تنفيذ التجارب وكتابة تقارير شاملة. +- هذا النظام ليس مصممًا لاستبدال إبداعك بل لتكملته، مما يتيح لك التركيز على توليد الأفكار والتفكير النقدي بينما يقوم بأتمتة المهام المتكررة والتي تستغرق وقتًا طويلاً مثل البرمجة والتوثيق. من خلال استيعاب مستويات مختلفة من الموارد الحاسوبية والمشاركة البشرية، يهدف مختبر الوكيل إلى تسريع الاكتشافات العلمية وتحسين إنتاجيتك البحثية. + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 كيف يعمل مختبر الوكيل؟ + +- يتكون مختبر الوكيل من ثلاث مراحل رئيسية توجه عملية البحث بشكل منهجي: (1) مراجعة الأدبيات، (2) التجارب، و(3) كتابة التقارير. خلال كل مرحلة، يتعاون وكلاء متخصصون مدفوعون بنماذج لغوية كبيرة لتحقيق أهداف مميزة، مع دمج أدوات خارجية مثل arXiv، Hugging Face، Python، وLaTeX لتحسين النتائج. يبدأ سير العمل هذا بجمع وتحليل مستقل للأوراق البحثية ذات الصلة، يتقدم من خلال التخطيط التعاوني وإعداد البيانات، وينتهي بتنفيذ التجارب تلقائيًا وتوليد تقارير شاملة. يتم مناقشة تفاصيل أدوار الوكلاء المحددة ومساهماتهم عبر هذه المراحل في الورقة البحثية. + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ التثبيت + + +### خيار البيئة الافتراضية للبايثون + +1. **استنساخ مستودع GitHub**: ابدأ باستنساخ المستودع باستخدام الأمر: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **إعداد وتفعيل بيئة البايثون** + ```bash + python -m venv venv_agent_lab + ``` + + - الآن قم بتفعيل هذه البيئة: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **تثبيت المكتبات المطلوبة** + ```bash + pip install -r requirements.txt + ``` + +4. **تثبيت pdflatex [اختياري]** + ```bash + sudo apt install pdflatex + ``` + + - هذا يمكن الوكلاء من تجميع مصدر LaTeX. + - **[مهم]** إذا لم تتمكن من تشغيل هذه الخطوة بسبب عدم وجود صلاحيات sudo، يمكن إيقاف تجميع PDF عن طريق تشغيل مختبر الوكيل مع تعيين العلم --compile_latex إلى false: + ```bash + --compile_latex=False + ``` + +5. **الآن قم بتشغيل مختبر الوكيل!** + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + + أو، إذا لم يكن لديك pdflatex مثبتًا + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- +## نصائح لتحقيق نتائج بحثية أفضل + +#### [نصيحة #1] 📝 تأكد من كتابة ملاحظات شاملة! 📝 + +**كتابة ملاحظات شاملة أمر مهم** لمساعدة وكيلك على فهم ما تسعى إلى تحقيقه في مشروعك، بالإضافة إلى أي تفضيلات أسلوبية. يمكن أن تشمل الملاحظات أي تجارب ترغب في أن يقوم الوكلاء بتنفيذها، توفير مفاتيح API، بعض الرسوم البيانية أو الأشكال التي ترغب في تضمينها، أو أي شيء تريد أن يعرفه الوكيل عند إجراء البحث. + +هذه أيضًا فرصتك لإعلام الوكيل **بالموارد الحاسوبية التي يمكنه الوصول إليها**، مثل وحدات معالجة الرسومات (عددها، نوعها، حجم الذاكرة)، وحدات المعالجة المركزية (عدد النوى، نوعها)، قيود التخزين، ومواصفات الأجهزة. + +لإضافة ملاحظات، يجب تعديل هيكل task_notes_LLM داخل ملف ai_lab_repo.py. فيما يلي مثال على مجموعة من الملاحظات المستخدمة لبعض تجاربنا. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [نصيحة #2] 🚀 استخدام نماذج أكثر قوة يؤدي عمومًا إلى أبحاث أفضل 🚀 + +عند إجراء البحث، **يمكن أن يؤثر اختيار النموذج بشكل كبير على جودة النتائج**. النماذج الأكثر قوة تميل إلى أن تكون أكثر دقة، ولديها قدرات تفكير أفضل، وتوليد تقارير أفضل. إذا سمحت الموارد الحاسوبية، أعطِ الأولوية لاستخدام النماذج المتقدمة مثل o1-(mini/preview) أو نماذج لغوية كبيرة حديثة مماثلة. + +ومع ذلك، **من المهم تحقيق التوازن بين الأداء والفعالية من حيث التكلفة**. بينما قد تؤدي النماذج القوية إلى نتائج أفضل، فهي غالبًا ما تكون أكثر تكلفة وتستغرق وقتًا أطول للتشغيل. فكر في استخدامها بشكل انتقائي — على سبيل المثال، للتجارب الرئيسية أو التحليلات النهائية — بينما تعتمد على نماذج أصغر وأكثر كفاءة للمهام التكرارية أو النمذجة الأولية. + +عندما تكون الموارد محدودة، **قم بتحسين الأداء عن طريق ضبط النماذج الأصغر** على مجموعة البيانات الخاصة بك أو عن طريق دمج النماذج المدربة مسبقًا مع مطالبات محددة بالمهام لتحقيق التوازن المطلوب بين الأداء والكفاءة الحاسوبية. + +----- + +#### [نصيحة #3] ✅ يمكنك تحميل الحفظات السابقة من نقاط التفتيش ✅ + +**إذا فقدت تقدمك، أو انقطعت اتصال الإنترنت، أو فشلت مهمة فرعية، يمكنك دائمًا التحميل من حالة سابقة.** يتم حفظ كل تقدمك افتراضيًا في متغير state_saves، الذي يخزن كل نقطة تفتيش فردية. فقط مرر الحجج التالية عند تشغيل ai_lab_repo.py + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [نصيحة #4] 🈯 إذا كنت تعمل بلغة غير الإنجليزية 🈲 + +إذا كنت تشغل مختبر الوكيل بلغة غير الإنجليزية، لا مشكلة، فقط تأكد من توفير علم اللغة للوكلاء لأداء البحث بلغتك المفضلة. لاحظ أننا لم ندرس تشغيل مختبر الوكيل بلغات أخرى بشكل موسع، لذا تأكد من الإبلاغ عن أي مشكلات تواجهها. + +على سبيل المثال، إذا كنت تعمل بالصينية: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [نصيحة #5] 🌟 هناك الكثير من المجال للتحسين 🌟 + +هناك الكثير من المجال لتحسين قاعدة الشيفرة هذه، لذا إذا قمت بإجراء تغييرات وترغب في مساعدة المجتمع، لا تتردد في مشاركة التغييرات التي قمت بها! نأمل أن تساعدك هذه الأداة! + +## المرجع / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-bengali.md b/readme/README-bengali.md new file mode 100755 index 0000000..c00685a --- /dev/null +++ b/readme/README-bengali.md @@ -0,0 +1,156 @@ +# এজেন্ট ল্যাবরেটরি: গবেষণা সহকারী হিসেবে LLM এজেন্ট ব্যবহার + +

+ Demonstration of the flow of AgentClinic +

+ +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】 +

+ +## 📖 ওভারভিউ + +- **এজেন্ট ল্যাবরেটরি** একটি এন্ড-টু-এন্ড স্বায়ত্তশাসিত গবেষণা ওয়ার্কফ্লো যা **আপনাকে** মানব গবেষক হিসেবে **আপনার গবেষণা ধারণাগুলি বাস্তবায়নে** সহায়তা করার জন্য ডিজাইন করা হয়েছে। এজেন্ট ল্যাবরেটরি বড় ভাষা মডেল দ্বারা চালিত বিশেষায়িত এজেন্টের সমন্বয়ে গঠিত যা আপনাকে সম্পূর্ণ গবেষণা ওয়ার্কফ্লো জুড়ে সহায়তা করে—সাহিত্য পর্যালোচনা পরিচালনা থেকে পরিকল্পনা গঠন, পরীক্ষা সম্পাদন এবং বিস্তৃত প্রতিবেদন লেখা পর্যন্ত। +- এই সিস্টেমটি আপনার সৃজনশীলতাকে প্রতিস্থাপন করার জন্য ডিজাইন করা হয়নি বরং এটি সম্পূরক করার জন্য, আপনাকে ধারণা গঠন এবং সমালোচনামূলক চিন্তাভাবনায় মনোনিবেশ করার পাশাপাশি কোডিং এবং ডকুমেন্টেশন মত পুনরাবৃত্তিমূলক এবং সময়সাপেক্ষ কাজগুলি স্বয়ংক্রিয়করণের সুযোগ দেয়। বিভিন্ন স্তরের গণনামূলক সম্পদ এবং মানব সম্পৃক্ততাকে সমন্বিত করে, এজেন্ট ল্যাবরেটরি বৈজ্ঞানিক আবিষ্কারকে ত্বরান্বিত করা এবং আপনার গবেষণা উৎপাদনশীলতাকে সর্বাধিক করতে লক্ষ্য রাখে। + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 এজেন্ট ল্যাবরেটরি কীভাবে কাজ করে? + +- এজেন্ট ল্যাবরেটরি তিনটি প্রধান পর্যায় নিয়ে গঠিত যা পদ্ধতিগতভাবে গবেষণা প্রক্রিয়াকে নির্দেশ করে: (১) সাহিত্য পর্যালোচনা, (২) পরীক্ষা, এবং (৩) প্রতিবেদন লেখা। প্রতিটি পর্যায়ে, LLM দ্বারা চালিত বিশেষায়িত এজেন্টরা পৃথক লক্ষ্য অর্জনের জন্য সহযোগিতা করে, ফলাফল অপ্টিমাইজ করার জন্য arXiv, Hugging Face, Python এবং LaTeX এর মত বহিরাগত সরঞ্জামগুলিকে সংহত করে। এই কাঠামোবদ্ধ ওয়ার্কফ্লো প্রাসঙ্গিক গবেষণা পত্রের স্বাধীন সংগ্রহ এবং বিশ্লেষণ দিয়ে শুরু হয়, সহযোগিতামূলক পরিকল্পনা এবং তথ্য প্রস্তুতির মাধ্যমে অগ্রসর হয়, এবং স্বয়ংক্রিয় পরীক্ষণ এবং বিস্তৃত প্রতিবেদন তৈরিতে শেষ হয়। এই পর্যায়গুলির জুড়ে নির্দিষ্ট এজেন্ট ভূমিকা এবং তাদের অবদান সম্পর্কে বিস্তারিত গবেষণাপত্রে আলোচনা করা হয়েছে। + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ ইনস্টলেশন + +### পাইথন venv বিকল্প + +1. **GitHub রিপোজিটরি ক্লোন করুন**: কমান্ডটি ব্যবহার করে রিপোজিটরিটি ক্লোন করা শুরু করুন: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **পাইথন পরিবেশ সেট আপ এবং সক্রিয় করুন** + ```bash + python -m venv venv_agent_lab + ``` + + - এখন এই পরিবেশটি সক্রিয় করুন: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **প্রয়োজনীয় লাইব্রেরিগুলি ইনস্টল করুন** + ```bash + pip install -r requirements.txt + ``` + +4. **pdflatex ইনস্টল করুন [ঐচ্ছিক]** + ```bash + sudo apt install pdflatex + ``` + + - এটি এজেন্ট দ্বারা ল্যাটেক্স সোর্স কম্পাইল করা সক্ষম করে। + - **[গুরুত্বপূর্ণ]** যদি sudo অ্যাক্সেস না থাকার কারণে এই ধাপটি চালানো না যায়, তাহলে --compile_latex ফ্ল্যাগটি false এ সেট করে এজেন্ট ল্যাবরেটরি চালিয়ে pdf কম্পাইলিং বন্ধ করা যেতে পারে: --compile_latex=False + +5. **এখন এজেন্ট ল্যাবরেটরি চালান!** + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + অথবা, যদি আপনি pdflatex ইনস্টল না করে থাকেন + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- + +## গবেষণার ফলাফল উন্নত করার টিপস + +#### [টিপ #১] 📝 ব্যাপক নোট লেখার বিষয়টি নিশ্চিত করুন! 📝 + +**ব্যাপক নোট লেখা গুরুত্বপূর্ণ** কারণ এটি আপনার এজেন্টকে আপনার প্রকল্পে আপনি কী অর্জন করতে চাইছেন তা বোঝাতে এবং যে কোনও স্টাইল পছন্দ রয়েছে তা বুঝতে সাহায্য করে। নোটগুলিতে যে কোনও পরীক্ষা আপনি এজেন্টদের সম্পাদন করতে চান, API কী সরবরাহ করা, আপনি যে নির্দিষ্ট প্লট বা চিত্র অন্তর্ভুক্ত করতে চান, অথবা গবেষণা পরিচালনা করার সময় এজেন্টকে যা কিছু জানাতে চান তা অন্তর্ভুক্ত থাকতে পারে। + +এটি এছাড়াও আপনার সুযোগ আপনার এজেন্টকে জানানোর **কোন কম্পিউট সম্পদগুলিতে এটি প্রবেশাধিকার রয়েছে**, উদাহরণস্বরূপ, GPUs (কতগুলো, কোন ধরণের GPU, কতগুলো GB), CPUs (কতগুলো কোর, কোন ধরণের CPU), স্টোরেজ সীমাবদ্ধতা, এবং হার্ডওয়্যার স্পেসিফিকেশন। + +নোট যুক্ত করার জন্য, আপনাকে ai_lab_repo.py এর ভিতরে task_notes_LLM গঠনটি পরিবর্তন করতে হবে। নীচে কিছু পরীক্ষার জন্য ব্যবহৃত নোটগুলির একটি উদাহরণ দেওয়া হল। + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [টিপ #২] 🚀 আরও শক্তিশালী মডেলগুলি সাধারণত আরও ভাল গবেষণার দিকে নিয়ে যায় 🚀 + +গবেষণা পরিচালনার সময়, **মডেলের নির্বাচন ফলাফলের গুণমানকে উল্লেখযোগ্যভাবে প্রভাবিত করতে পারে**। আরও শক্তিশালী মডেলগুলির সাধারণত উচ্চতর নির্ভুলতা, উন্নত যুক্তিবিদ্যা ক্ষমতা, এবং উন্নত প্রতিবেদন তৈরির ক্ষমতা থাকে। যদি গণনামূলক সম্পদ অনুমতি দেয়, তাহলে o1-(mini/preview) বা অনুরূপ অত্যাধুনিক বড় ভাষা মডেলগুলির মতো উন্নত মডেলগুলির ব্যবহারে অগ্রাধিকার দিন। + +তবে, **কর্মক্ষমতা এবং ব্যয়-কার্যকারিতা মধ্যে ভারসাম্য বজায় রাখা গুরুত্বপূর্ণ**। শক্তিশালী মডেলগুলি যদিও ভাল ফলাফল দিতে পারে, তবে এগুলি প্রায়শই চালাতে বেশি ব্যয়বহুল এবং সময়সাপেক্ষ হয়। সেগুলি নির্বাচিতভাবে ব্যবহার করার কথা বিবেচনা করুন—উদাহরণস্বরূপ, মূল পরীক্ষাগুলি বা চূড়ান্ত বিশ্লেষণের জন্য—অব iterativeative কাজ বা প্রাথমিক প্রোটোটাইপিংয়ের জন্য ছোট, আরও দক্ষ মডেলগুলির উপর নির্ভর করে। + +যখন সম্পদ সীমিত থাকে, **আপনার নির্দিষ্ট ডেটাসেটে ছোট মডেলগুলিকে সূক্ষ্ম-সংশোধন করে বা কার্য-নির্দিষ্ট প্রম্পটগুলির সাথে পূর্ব-প্রশিক্ষিত মডেলগুলিকে সংযোজন করে কর্মক্ষমতা এবং গণনামূলক দক্ষতার মধ্যে কাঙ্ক্ষিত ভারসাম্য অর্জন করুন**। + +----- + +#### [টিপ #৩] ✅ আপনি চেকপয়েন্টগুলি থেকে পূর্ববর্তী সেভগুলি লোড করতে পারেন ✅ + +**যদি আপনি অগ্রগতি হারান, ইন্টারনেট সংযোগ হারান, বা যদি একটি উপ-কাজ ব্যর্থ হয়, তবে আপনি সর্বদা পূর্ববর্তী অবস্থান থেকে লোড করতে পারেন।** আপনার সমস্ত অগ্রগতি ডিফল্টভাবে state_saves ভেরিয়েবলে সংরক্ষিত থাকে, যা প্রতিটি পৃথক চেকপয়েন্ট সংরক্ষণ করে। ai_lab_repo.py চালানোর সময় কেবল নিম্নলিখিত আর্গুমেন্টগুলি প্রদান করুন + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [টিপ #৪] 🈯 আপনি যদি ইংরেজির বাইরে অন্য কোনো ভাষায় চালাচ্ছেন 🈲 + +আপনি যদি এজেন্ট ল্যাবরেটরি ইংরেজির বাইরে অন্য কোনো ভাষায় চালাচ্ছেন, সমস্যা নেই, কেবল নিশ্চিত করুন যে আপনি এজেন্টদের আপনার পছন্দের ভাষায় গবেষণা সম্পাদনের জন্য একটি ভাষা ফ্ল্যাগ সরবরাহ করেছেন। লক্ষ্য করুন যে আমরা অন্যান্য ভাষায় এজেন্ট ল্যাবরেটরি চালানোর ব্যাপকভাবে অধ্যয়ন করি নি, তাই আপনি যে কোনও সমস্যা সম্মুখীন হলে তা রিপোর্ট করতে ভুলবেন না। + +উদাহরণস্বরূপ, আপনি যদি চীনা ভাষায় চালাচ্ছেন: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [টিপ #৫] 🌟 উন্নতির জন্য অনেক জায়গা রয়েছে 🌟 + +এই কোডবেস উন্নত করার জন্য অনেক সুযোগ রয়েছে, তাই আপনি যদি পরিবর্তন করতে পারেন এবং কমিউনিটির সাহায্য করতে চান, তবে দয়া করে আপনার করা পরিবর্তনগুলি ভাগ করতে দ্বিধা করবেন না! আমরা আশা করি এই টুলটি আপনাকে সাহায্য করবে! + +## রেফারেন্স / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-chinese.md b/readme/README-chinese.md new file mode 100755 index 0000000..583a885 --- /dev/null +++ b/readme/README-chinese.md @@ -0,0 +1,150 @@ +# Agent Laboratory: 使用大型语言模型代理作为研究助理 + +

+ Demonstration of the flow of AgentClinic +

+ +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 网站 | 💻 软件 | 🎥 视频 | 📚 示例论文 | 📰 引用】 +

+ +## 📖 概述 + +- **Agent Laboratory** 是一个端到端的自主研究工作流程,旨在协助**您**作为人类研究人员**实现您的研究想法**。Agent Laboratory 由由大型语言模型驱动的专业代理组成,支持您完成整个研究工作流程——从进行文献综述和制定计划,到执行实验和撰写综合报告。 +- 该系统并非旨在取代您的创造力,而是为了补充它,使您能够专注于创意和批判性思维,同时自动化重复且耗时的任务,如编码和文档编写。通过适应不同水平的计算资源和人类参与,Agent Laboratory 旨在加速科学发现并优化您的研究生产力。 + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 Agent Laboratory 如何工作? + +- Agent Laboratory 包含三个主要阶段,系统地引导研究过程:(1)文献综述,(2)实验,(3)报告撰写。在每个阶段,由大型语言模型驱动的专业代理协作完成不同的目标,整合了如 arXiv、Hugging Face、Python 和 LaTeX 等外部工具以优化结果。这一结构化的工作流程始于独立收集和分析相关研究论文,经过协作计划和数据准备,最终实现自动化实验和综合报告生成。论文中讨论了具体代理角色及其在这些阶段的贡献。 + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ 安装 + + +### Python 虚拟环境选项 + +1. **克隆 GitHub 仓库**:首先使用以下命令克隆仓库: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **设置并激活 Python 环境** + ```bash + python -m venv venv_agent_lab + ``` + - 现在激活此环境: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **安装所需库** + ```bash + pip install -r requirements.txt + ``` + +4. **安装 pdflatex [可选]** + ```bash + sudo apt install pdflatex + ``` + - 这使得代理能够编译 latex 源代码。 + - **[重要]** 如果由于没有 sudo 权限而无法运行此步骤,可以通过将 `--compile_latex` 标志设置为 false 来关闭 pdf 编译:`--compile_latex=False` + +5. **现在运行 Agent Laboratory!** + + `python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA"` + + 或者,如果您没有安装 pdflatex + + `python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False` + +----- + +## 提高研究成果的技巧 + +#### [技巧 #1] 📝 确保写下详尽的笔记! 📝 + +**写下详尽的笔记非常重要**,帮助您的代理理解您在项目中希望实现的目标,以及任何风格偏好。笔记可以包括您希望代理执行的任何实验、提供 API 密钥、希望包含的特定图表或图形,或任何您希望代理在进行研究时了解的内容。 + +这也是您让代理知道**它可以访问的计算资源**的机会,例如 GPU(数量、类型、内存大小)、CPU(核心数量、类型)、存储限制和硬件规格。 + +为了添加笔记,您必须修改 `ai_lab_repo.py` 中的 `task_notes_LLM` 结构。以下是我们的一些实验中使用的笔记示例。 + +``` +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [技巧 #2] 🚀 使用更强大的模型通常会带来更好的研究 🚀 + +在进行研究时,**模型的选择会显著影响结果的质量**。更强大的模型往往具有更高的准确性、更好的推理能力和更优秀的报告生成能力。如果计算资源允许,优先使用先进的模型,如 o1-(mini/preview) 或类似的最先进大型语言模型。 + +然而,**在性能和成本效益之间取得平衡也很重要**。虽然强大的模型可能会产生更好的结果,但它们通常更昂贵且运行时间更长。考虑选择性地使用它们,例如用于关键实验或最终分析,同时在迭代任务或初步原型设计中依赖较小、更高效的模型。 + +当资源有限时,**通过在您的特定数据集上微调较小的模型或将预训练模型与特定任务的提示相结合来优化,以实现性能与计算效率之间的理想平衡**。 + +----- + +#### [技巧 #3] ✅ 您可以从检查点加载之前的保存 ✅ + +**如果您丢失了进度、互联网连接中断或子任务失败,您始终可以从先前的状态加载。** 您的所有进度默认保存在 `state_saves` 变量中,该变量存储每个单独的检查点。只需在运行 `ai_lab_repo.py` 时传递以下参数 + +`python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH"` + +----- + +#### [技巧 #4] 🈯 如果您使用非英语语言运行 🈲 + +如果您使用非英语语言运行 Agent Laboratory,没问题,只需确保向代理提供一个语言标志,以便用您喜欢的语言进行研究。请注意,我们尚未广泛研究使用其他语言运行 Agent Laboratory,因此请务必报告您遇到的任何问题。 + +例如,如果您使用中文运行: + +`python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文"` + +---- + +#### [技巧 #5] 🌟 还有很大的改进空间 🌟 + +这个代码库还有很大的改进空间,因此如果您进行了更改并希望帮助社区,请随时分享您所做的更改!我们希望这个工具对您有帮助! + +## 参考文献 / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-farsi.md b/readme/README-farsi.md new file mode 100755 index 0000000..611cd1c --- /dev/null +++ b/readme/README-farsi.md @@ -0,0 +1,160 @@ +# آزمایشگاه ایجینت ها: استفاده از نمایندگان مدل‌های زبانی بزرگ به عنوان دستیار برای محققان + +

+ Demonstration of the flow of AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】 +

+ +## 📖 نمای کلی + +- **آزمایشگاه ایجینت ها** یک سیستم کاملا اتوماتیک برای کارهای تحقیقاتی است که به منظور کمک به **شما** به عنوان پژوهشگر انسانی برای **اجرای ایده‌های تحقیقاتی خود** طراحی شده است. آزمایشگاه ایجینت ها شامل نمایندگان تخصصی است که توسط مدل‌های زبان بزرگ هدایت می‌شوند تاتا در تمام مراحل تحقیق از انجام مطالعه و تدوین برنامه‌ها تا اجرای آزمایش‌ها و نوشتن گزارش‌های جامع از شما حمایت کنند. +- این سیستم برای جایگزینی خلاقیت شما طراحی نشده است، بلکه برای تکمیل آن است، به شما این امکان را می‌دهد که بر ایده‌پردازی و تفکر انتقادی تمرکز کنید در حالی که وظایف تکراری و زمان‌بر مانند کدنویسی و مستندسازی خودکار می‌شوند. با پذیرش سطوح مختلف منابع محاسباتی و مشارکت انسانی، آزمایشگاه ایجنت ها هدف دارد تا کشف علمی را تسریع کرده و بهره‌وری تحقیقاتی شما را بهینه کند. + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 آزمایشگاه ایجنت ها چگونه کار می‌کند؟ + +- آزمایشگاه ایجنت ها شامل سه مرحله اصلی است که به طور سیستماتیک فرآیند تحقیق را هدایت می‌کنند: (1) مرور ادبیات، (2) آزمایش‌گری، و (3) نوشتن گزارش. در هر مرحله، عوامل تخصصی هدایت‌شده توسط مدل‌های زبان بزرگ با هم همکاری می‌کنند تا اهداف متمایز را محقق کنند و ابزارهای خارجی مانند arXiv، Hugging Face، Python، و LaTeX را برای بهینه‌سازی نتایج ادغام می‌کنند. این جریان کاری ساختاریافته با جمع‌آوری و تحلیل مستقل مقالات تحقیقاتی مرتبط آغاز می‌شود، از طریق برنامه‌ریزی مشارکتی و آماده‌سازی داده‌ها پیش می‌رود، و به آزمایش‌گری خودکار و تولید گزارش جامع منتهی می‌شود. جزئیات نقش‌های خاص عوامل و مشارکت‌های آن‌ها در این مراحل در مقاله مورد بحث قرار گرفته است. + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ نصب + +### گزینه محیط مجازی پایتون (venv) + +1. **کلون کردن مخزن گیت‌هاب**: با استفاده از دستور زیر، مخزن را کلون کنید: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **تنظیم و فعال‌سازی محیط پایتون** + ```bash + python -m venv venv_agent_lab + ``` + + - این محیط را فعال کنید: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **نصب کتابخانه‌های مورد نیاز** + ```bash + pip install -r requirements.txt + ``` + +4. **نصب pdflatex [اختیاری]** + ```bash + sudo apt install pdflatex + ``` + + - این امکان را می‌دهد تا منبع LaTeX توسط عوامل کامپایل شود. + - **[مهم]** اگر به دلیل نداشتن دسترسی sudo نمی‌توانید این مرحله را اجرا کنید، می‌توانید کامپایل PDF را با اجرای آزمایشگاه ایجنت ها و تنظیم فلگ --compile_latex به false غیرفعال کنید: + ``` + --compile_latex=False + ``` + +5. **اکنون آزمایشگاه ایجنت ها را اجرا کنید!** + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + + یا اگر pdflatex نصب نکرده‌اید: + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- +## نکات برای نتایج بهتر تحقیق + +#### [نکته #1] 📝 حتماً یادداشت‌های گسترده‌ای بنویسید! 📝 + +**نوشتن یادداشت‌های دقیق مهم است** تا به ایجنت ها شما در درک آنچه می‌خواهید در پروژه‌تان انجام دهید و همچنین هرگونه ترجیحات سبک کمک کند. یادداشت‌ها می‌توانند شامل هر آزمایشی باشند که می‌خواهید عوامل انجام دهند، ارائه کلیدهای API، نمودارها یا شکل‌های خاصی که می‌خواهید گنجانده شوند، یا هر چیزی که می‌خواهید ایجنت ها هنگام انجام تحقیق بداند. + +این همچنین فرصت شماست تا به ایجنت ها اطلاع دهید **به چه منابع محاسباتی دسترسی دارد**، مثلاً GPUها (تعداد، نوع GPU، میزان GB)، CPUها (تعداد هسته، نوع CPUها)، محدودیت‌های ذخیره‌سازی، و مشخصات سخت‌افزاری. + +برای افزودن یادداشت‌ها، باید ساختار task_notes_LLM را در داخل ai_lab_repo.py تغییر دهید. در زیر نمونه‌ای از مجموعه یادداشت‌هایی که برای برخی از آزمایش‌های ما استفاده شده است ارائه شده است. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [نکته #2] 🚀 استفاده از مدل‌های قدرتمندتر به طور کلی منجر به تحقیقات بهتر می‌شود 🚀 + +هنگام انجام تحقیقات، **انتخاب مدل می‌تواند به طور قابل توجهی بر کیفیت نتایج تأثیر بگذارد**. مدل‌های قدرتمندتر معمولاً دقت بالاتری دارند، قابلیت‌های استدلال بهتری ارائه می‌دهند و گزارش‌های بهتری تولید می‌کنند. اگر منابع محاسباتی اجازه می‌دهد، استفاده از مدل‌های پیشرفته مانند o1-(mini/preview) یا مدل‌های زبان بزرگ مشابه پیشرفته را در اولویت قرار دهید. + +با این حال، **مهم است که تعادل بین عملکرد و هزینه را رعایت کنید**. در حالی که مدل‌های قدرتمند ممکن است نتایج بهتری ارائه دهند، اغلب هزینه‌بر و زمان‌بر هستند. در نظر بگیرید که از آن‌ها به صورت انتخابی استفاده کنید — به عنوان مثال، برای آزمایش‌های کلیدی یا تحلیل‌های نهایی — در حالی که برای وظایف تکراری یا نمونه‌سازی اولیه از مدل‌های کوچک‌تر و کارآمدتر استفاده کنید. + +وقتی منابع محدود هستند، **با تنظیم دقیق مدل‌های کوچک‌تر بر روی مجموعه داده‌های خاص خود یا ترکیب مدل‌های پیش‌آموزش‌دیده با پرامپت‌های خاص وظیفه‌ای بهینه‌سازی کنید** تا تعادل مطلوب بین عملکرد و کارایی محاسباتی را به دست آورید. + +----- + +#### [نکته #3] ✅ می‌توانید ذخیره‌های قبلی را از نقاط بازگشت بارگذاری کنید ✅ + +**اگر پیشرفت خود را از دست دادید، اتصال اینترنت قطع شد، یا یک زیروظیفه شکست خورد، همیشه می‌توانید از وضعیت قبلی بارگذاری کنید.** تمام پیشرفت‌های شما به طور پیش‌فرض در متغیر state_saves ذخیره می‌شوند که هر نقطه بازگشت را ذخیره می‌کند. فقط هنگام اجرای ai_lab_repo.py از آرگومان‌های زیر استفاده کنید: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [نکته #4] 🈯 اگر به زبانی غیر از انگلیسی اجرا می‌کنید 🈲 + +اگر آزمایشگاه ایحنت ها را به زبانی غیر از انگلیسی اجرا می‌کنید، مشکلی نیست، فقط مطمئن شوید که پرچم زبان را به عوامل ارائه دهید تا به زبان مورد نظر شما تحقیق انجام دهند. توجه داشته باشید که ما به طور گسترده‌ای اجرای آزمایشگاه ایجنت ها را به زبان‌های دیگر مطالعه نکرده‌ایم، بنابراین حتماً هر مشکلی که با آن مواجه شدید را گزارش دهید. + +برای مثال، اگر به زبان چینی اجرا می‌کنید: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [نکته #5] 🌟 جای پیشرفت زیادی وجود دارد 🌟 + +جای پیشرفت زیادی برای بهبود این کدبیس وجود دارد، بنابراین اگر در نهایت تغییراتی ایجاد کردید و می‌خواهید به جامعه کمک کنید، لطفاً تغییراتی که ایجاد کرده‌اید را به اشتراک بگذارید! امیدواریم این ابزار به شما کمک کند! + +## مراجع / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` diff --git a/readme/README-filipino.md b/readme/README-filipino.md new file mode 100755 index 0000000..a12bc88 --- /dev/null +++ b/readme/README-filipino.md @@ -0,0 +1,157 @@ +# Agent Laboratory: Paggamit ng LLM Agents bilang mga Tagapag-Asistang Pang-research + +

+ Demonstrasyon ng daloy ng AgentClinic +

+ +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】 +

+ +## 📖 Pangkalahatang-ideya + +- **Agent Laboratory** ay isang end-to-end na autonomous na workflow sa pananaliksik na nilalayong tulungan **ikaw** bilang isang human researcher sa **pagpapatupad ng iyong mga ideya sa pananaliksik**. Binubuo ang Agent Laboratory ng mga espesyalistang ahente na pinapagana ng malalaking modelo ng wika upang suportahan ka sa buong workflow ng pananaliksik—mula sa pagsasagawa ng mga pagsusuri sa literatura at pagbuo ng mga plano hanggang sa pagpapatupad ng mga eksperimento at pagsulat ng komprehensibong mga ulat. +- Ang sistemang ito ay hindi dinisenyo upang palitan ang iyong pagkamalikhain kundi upang kumpletuhin ito, na nagbibigay-daan sa iyo na magpokus sa ideasyon at kritikal na pag-iisip habang ina-automate ang mga paulit-ulit at matagal na gawain tulad ng pag-cocode at dokumentasyon. Sa pamamagitan ng pag-aakma sa iba't ibang antas ng computational na mga mapagkukunan at partisipasyon ng tao, layunin ng Agent Laboratory na pabilisin ang siyentipikong pagtuklas at i-optimize ang iyong produktibidad sa pananaliksik. + +

+ Demonstrasyon ng daloy ng AgentClinic +

+ +### 🔬 Paano gumagana ang Agent Laboratory? + +- Binubuo ang Agent Laboratory ng tatlong pangunahing yugto na sistematikong ginagabayan ang proseso ng pananaliksik: (1) Pagsusuri ng Literatura, (2) Eksperimentasyon, at (3) Pagsulat ng Ulat. Sa bawat yugto, ang mga espesyalistang ahente na pinapagana ng LLMs ay nagtutulungan upang makamit ang mga natatanging layunin, na nag-iintegrate ng mga panlabas na kagamitan tulad ng arXiv, Hugging Face, Python, at LaTeX upang i-optimize ang mga resulta. Nagsisimula ang estrukturadong workflow na ito sa malayang koleksyon at pagsusuri ng mga kaugnay na papel sa pananaliksik, sumusulong sa pamamagitan ng kolaboratibong pagpaplano at paghahanda ng datos, at nagreresulta sa automated na eksperimento at komprehensibong paggawa ng ulat. Ang mga detalye tungkol sa mga tiyak na papel ng ahente at kanilang mga kontribusyon sa mga yugtong ito ay tinalakay sa papel. + +

+ Demonstrasyon ng daloy ng AgentClinic +

+ +## 🖥️ Pag-install + +### Python venv na opsyon + +1. **I-clone ang GitHub Repository**: Magsimula sa pamamagitan ng pag-clone ng repository gamit ang utos: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **I-set up at I-activate ang Python Environment** + ```bash + python -m venv venv_agent_lab + ``` + - Ngayon i-activate ang environment na ito: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **I-install ang mga kinakailangang library** + ```bash + pip install -r requirements.txt + ``` + +4. **I-install ang pdflatex [OPTIONAL]** + ```bash + sudo apt install pdflatex + ``` + - Pinapayagan nitong ma-compile ng mga ahente ang latex source. + - **[MAHALAGA]** Kung hindi maisagawa ang hakbang na ito dahil sa kawalan ng sudo access, maaaring i-off ang pdf compiling sa pamamagitan ng pagpapatakbo ng Agent Laboratory gamit ang pag-set ng `--compile_latex` flag sa false: + ```bash + --compile_latex=False + ``` + +5. **Ngayon patakbuhin ang Agent Laboratory!** + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + o, kung wala kang naka-install na pdflatex + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- + +## Mga Tip para sa Mas Mabuting Resulta ng Pananaliksik + +#### [Tip #1] 📝 Tiyaking sumulat ng malawak na mga tala! 📝 + +**Mahalaga ang pagsusulat ng malawak na mga tala** upang matulungan ang iyong ahente na maunawaan kung ano ang nais mong makamit sa iyong proyekto, pati na rin ang anumang mga paboritong estilo. Maaaring kabilang sa mga tala ang anumang mga eksperimento na nais mong isagawa ng mga ahente, pagbibigay ng mga API key, tiyak na mga plot o figure na nais mong isama, o anumang nais mong malaman ng ahente kapag nagsasagawa ng pananaliksik. + +Ito rin ang iyong pagkakataon upang ipaalam sa ahente **kung anong mga compute resources ang mayroon ito**, halimbawa, GPUs (ilan, anong uri ng GPU, ilang GBs), CPUs (ilang cores, anong uri ng CPUs), mga limitasyon sa storage, at mga specs ng hardware. + +Upang magdagdag ng mga tala, kailangan mong baguhin ang `task_notes_LLM` na istraktura sa loob ng `ai_lab_repo.py`. Ibinigay sa ibaba ang isang halimbawa ng mga tala na ginamit para sa ilan sa aming mga eksperimento. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [Tip #2] 🚀 Ang paggamit ng mas malalakas na mga modelo ay karaniwang nagdudulot ng mas magagandang pananaliksik 🚀 + +Kapag nagsasagawa ng pananaliksik, **ang pagpili ng modelo ay maaaring malaki ang epekto sa kalidad ng mga resulta**. Ang mas malalakas na mga modelo ay karaniwang may mas mataas na katumpakan, mas mahusay na kakayahan sa pag-iisip, at mas magaling na paggawa ng ulat. Kung pinapayagan ng mga computational na mapagkukunan, bigyang prioridad ang paggamit ng mga advanced na modelo tulad ng o1-(mini/preview) o katulad na mga state-of-the-art na malalaking modelo ng wika. + +Gayunpaman, **mahalagang balansehin ang pagganap at pagiging cost-effective**. Habang ang mga malalakas na modelo ay maaaring magbigay ng mas magagandang resulta, madalas silang mas mahal at mas matagal patakbuhin. Isaalang-alang ang paggamit ng mga ito nang selektibo—halimbawa, para sa mga pangunahing eksperimento o panghuling pagsusuri—habang umaasa sa mas maliit, mas mahusay na mga modelo para sa mga iteratibong gawain o paunang prototyping. + +Kapag limitado ang mga mapagkukunan, **i-optimize sa pamamagitan ng fine-tuning ng mas maliliit na mga modelo** sa iyong partikular na dataset o pagsasama ng mga pre-trained na modelo sa mga task-specific na prompt upang makamit ang nais na balanse sa pagitan ng pagganap at computational na kahusayan. + +----- + +#### [Tip #3] ✅ Maaari kang mag-load ng mga naunang save mula sa mga checkpoint ✅ + +**Kung mawalan ka ng progreso, koneksyon sa internet, o kung mabigo ang isang subtask, maaari mong laging i-load mula sa isang naunang estado.** Ang lahat ng iyong progreso ay naka-save bilang default sa `state_saves` variable, na nag-iimbak ng bawat indibidwal na checkpoint. Ibigay lamang ang mga sumusunod na argumento kapag nagpapatakbo ng `ai_lab_repo.py`: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [Tip #4] 🈯 Kung ikaw ay nagpapatakbo sa isang wika maliban sa Ingles 🈲 + +Kung nagpapatakbo ka ng Agent Laboratory sa isang wika maliban sa Ingles, walang problema, siguraduhing magbigay ng language flag sa mga ahente upang magsagawa ng pananaliksik sa iyong nais na wika. Tandaan na hindi pa namin lubusang pinag-aralan ang pagpapatakbo ng Agent Laboratory sa ibang mga wika, kaya siguraduhing iulat ang anumang mga problemang iyong makaharap. + +Halimbawa, kung nagpapatakbo ka sa Chinese: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [Tip #5] 🌟 Mayroong maraming puwang para sa pagpapabuti 🌟 + +Mayroong maraming puwang upang mapabuti ang codebase na ito, kaya kung ikaw ay gagawa ng mga pagbabago at nais makatulong sa komunidad, huwag mag-atubiling ibahagi ang mga pagbabagong iyong ginawa! Inaasahan naming makakatulong ang tool na ito sa iyo! + +## Reference / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-french.md b/readme/README-french.md new file mode 100755 index 0000000..ed57aa9 --- /dev/null +++ b/readme/README-french.md @@ -0,0 +1,158 @@ +# Laboratoire d'Agent : Utilisation des agents LLM comme assistants de recherche + +

+ Démonstration du flux de AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Site Web | 💻 Logiciel | 🎥 Vidéo | 📚 Article Exemple | 📰 Citation】 +

+ +## 📖 Aperçu + +- **Laboratoire d'Agent** est un flux de travail de recherche autonome de bout en bout destiné à vous assister en tant que chercheur humain dans **la mise en œuvre de vos idées de recherche**. Le Laboratoire d'Agent est composé d'agents spécialisés alimentés par de grands modèles de langage pour vous soutenir tout au long du processus de recherche—de la réalisation des revues de littérature et de la formulation de plans à l'exécution des expériences et à la rédaction de rapports complets. +- Ce système n'est pas conçu pour remplacer votre créativité, mais pour la compléter, vous permettant de vous concentrer sur l’idéation et la pensée critique tout en automatisant les tâches répétitives et chronophages telles que la programmation et la documentation. En s'adaptant à différents niveaux de ressources informatiques et d'implication humaine, le Laboratoire d'Agent vise à accélérer la découverte scientifique et à optimiser votre productivité en recherche. + +

+ Démonstration du flux de AgentClinic +

+ +### 🔬 Comment fonctionne le Laboratoire d'Agent ? + +- Le Laboratoire d'Agent se compose de trois phases principales qui guident systématiquement le processus de recherche : (1) Revue de littérature, (2) Expérimentation et (3) Rédaction de rapports. Pendant chaque phase, des agents spécialisés alimentés par des LLM collaborent pour atteindre des objectifs distincts, en intégrant des outils externes tels qu'arXiv, Hugging Face, Python et LaTeX afin d'optimiser les résultats. Ce flux de travail structuré commence par la collecte et l'analyse indépendantes des articles de recherche pertinents, progresse par la planification collaborative et la préparation des données, et aboutit à l'expérimentation automatisée et à la génération de rapports complets. Les détails sur les rôles spécifiques des agents et leurs contributions au cours de ces phases sont abordés dans l'article. + +

+ Démonstration du flux de AgentClinic +

+ +## 🖥️ Installation + +### Option d'environnement virtuel Python + +1. **Cloner le dépôt GitHub** : Commencez par cloner le dépôt en utilisant la commande : + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **Configurer et activer l'environnement Python** + ```bash + python -m venv venv_agent_lab + ``` + + - Activez maintenant cet environnement : + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **Installer les bibliothèques requises** + ```bash + pip install -r requirements.txt + ``` + +4. **Installer pdflatex [OPTIONNEL]** + ```bash + sudo apt install pdflatex + ``` + + - Cela permet aux agents de compiler le code source LaTeX. + - **[IMPORTANT]** Si cette étape ne peut pas être exécutée en raison de l'absence d'accès sudo, la compilation PDF peut être désactivée en exécutant le Laboratoire d'Agent avec le drapeau `--compile_latex` défini sur `false` : `--compile_latex=False` + +5. **Lancez maintenant le Laboratoire d'Agent !** + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "VOTRE IDÉE DE RECHERCHE" + ``` + + ou, si vous n'avez pas installé pdflatex + + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "VOTRE IDÉE DE RECHERCHE" --compile_latex=False + ``` + +----- +## Conseils pour de meilleurs résultats de recherche + +#### [Conseil n°1] 📝 Assurez-vous de prendre des notes détaillées ! 📝 + +**Prendre des notes détaillées est important** pour aider votre agent à comprendre ce que vous cherchez à accomplir dans votre projet, ainsi que toute préférence de style. Les notes peuvent inclure les expériences que vous souhaitez que les agents réalisent, la fourniture de clés API, certains graphiques ou figures que vous souhaitez inclure, ou tout ce que vous souhaitez que l'agent sache lors de la réalisation de recherches. + +C'est également votre opportunité d'informer l'agent **quelles ressources informatiques il peut utiliser**, par exemple les GPU (combien, quel type de GPU, combien de Go), les CPU (combien de cœurs, quel type de CPU), les limitations de stockage et les spécifications matérielles. + +Pour ajouter des notes, vous devez modifier la structure `task_notes_LLM` à l'intérieur de `ai_lab_repo.py`. Ci-dessous, un exemple de jeu de notes utilisé pour certaines de nos expériences. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [Conseil n°2] 🚀 Utiliser des modèles plus puissants conduit généralement à une meilleure recherche 🚀 + +Lors de la conduite de recherches, **le choix du modèle peut avoir un impact significatif sur la qualité des résultats**. Les modèles plus puissants ont tendance à avoir une précision plus élevée, de meilleures capacités de raisonnement et une meilleure génération de rapports. Si les ressources informatiques le permettent, privilégiez l'utilisation de modèles avancés tels que o1-(mini/preview) ou d'autres grands modèles de langage à la pointe de la technologie. + +Cependant, **il est important de trouver un équilibre entre performance et rentabilité**. Bien que les modèles puissants puissent donner de meilleurs résultats, ils sont souvent plus coûteux et plus longs à exécuter. Envisagez de les utiliser de manière sélective—par exemple, pour des expériences clés ou des analyses finales—tout en comptant sur des modèles plus petits et plus efficaces pour des tâches itératives ou du prototypage initial. + +Lorsque les ressources sont limitées, **optimisez en affinant des modèles plus petits** sur votre jeu de données spécifique ou en combinant des modèles pré-entraînés avec des invites spécifiques à la tâche afin d'atteindre l'équilibre souhaité entre performance et efficacité computationnelle. + +----- + +#### [Conseil n°3] ✅ Vous pouvez charger des sauvegardes précédentes depuis des points de contrôle ✅ + +**Si vous perdez des progrès, la connexion Internet ou si une sous-tâche échoue, vous pouvez toujours charger à partir d'un état précédent.** Tous vos progrès sont enregistrés par défaut dans la variable `state_saves`, qui stocke chaque point de contrôle individuel. Il vous suffit de passer les arguments suivants lors de l'exécution de `ai_lab_repo.py` + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [Conseil n°4] 🈯 Si vous utilisez une langue autre que l'anglais 🈲 + +Si vous exécutez le Laboratoire d'Agent dans une langue autre que l'anglais, pas de problème, assurez-vous simplement de fournir un drapeau de langue aux agents pour effectuer des recherches dans votre langue préférée. Notez que nous n'avons pas étudié de manière approfondie l'exécution du Laboratoire d'Agent dans d'autres langues, alors assurez-vous de signaler tout problème que vous rencontrez. + +Par exemple, si vous utilisez le chinois : + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [Conseil n°5] 🌟 Il y a beaucoup de place pour l'amélioration 🌟 + +Il y a beaucoup de possibilités d'améliorer cette base de code, donc si vous finissez par apporter des modifications et souhaitez aider la communauté, n'hésitez pas à partager les changements que vous avez effectués ! Nous espérons que cet outil vous sera utile ! + +## Référence / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-hindi.md b/readme/README-hindi.md new file mode 100755 index 0000000..8049b3e --- /dev/null +++ b/readme/README-hindi.md @@ -0,0 +1,153 @@ + +# एजेंट लैबोरेटरी: अनुसंधान सहायकों के रूप में LLM एजेंटों का उपयोग + +

+ AgentClinic के प्रवाह का प्रदर्शन +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】 +

+ +## 📖 अवलोकन + +- **एजेंट लैबोरेटरी** एक अंत-से-अंत स्वायत्त अनुसंधान कार्यप्रवाह है जिसे **आप** को मानव शोधकर्ता के रूप में **अपने अनुसंधान विचारों को लागू करने** में सहायता करने के लिए डिज़ाइन किया गया है। एजेंट लैबोरेटरी में बड़े भाषा मॉडल द्वारा संचालित विशेषीकृत एजेंट शामिल हैं जो आपको संपूर्ण अनुसंधान कार्यप्रवाह के माध्यम से समर्थन करते हैं—साहित्य समीक्षा करने और योजनाएँ बनाने से लेकर प्रयोगों को निष्पादित करने और व्यापक रिपोर्ट लिखने तक। +- यह प्रणाली आपकी रचनात्मकता को बदलने के लिए नहीं बल्कि इसे पूरा करने के लिए डिज़ाइन की गई है, जिससे आप विचार-विमर्श और महत्वपूर्ण सोच पर ध्यान केंद्रित कर सकते हैं, जबकि कोडिंग और दस्तावेजीकरण जैसे दोहराए जाने वाले और समय-गहन कार्यों को स्वचालित किया जाता है। विभिन्न स्तर के संगणनात्मक संसाधनों और मानव भागीदारी को समायोजित करके, एजेंट लैबोरेटरी वैज्ञानिक खोज को तेज करने और आपके अनुसंधान उत्पादकता को अनुकूलित करने का लक्ष्य रखता है। + +

+ AgentClinic के प्रवाह का प्रदर्शन +

+ +### 🔬 एजेंट लैबोरेटरी कैसे काम करता है? + +- एजेंट लैबोरेटरी तीन मुख्य चरणों से मिलकर बनता है जो अनुसंधान प्रक्रिया का व्यवस्थित रूप से मार्गदर्शन करते हैं: (1) साहित्य समीक्षा, (2) प्रयोग, और (3) रिपोर्ट लेखन। प्रत्येक चरण के दौरान, LLM द्वारा संचालित विशेषीकृत एजेंट विशिष्ट उद्देश्यों को प्राप्त करने के लिए सहयोग करते हैं, परिणामों को अनुकूलित करने के लिए arXiv, Hugging Face, Python, और LaTeX जैसे बाहरी उपकरणों को एकीकृत करते हैं। यह संरचित कार्यप्रवाह संबंधित अनुसंधान पत्रों के स्वतंत्र संग्रह और विश्लेषण से शुरू होता है, सहयोगात्मक योजना और डेटा तैयारी के माध्यम से प्रगति करता है, और स्वचालित प्रयोग और व्यापक रिपोर्ट जनरेशन में समाप्त होता है। इन चरणों में विशिष्ट एजेंट भूमिकाओं और उनके योगदान के विवरण पेपर में चर्चा किए गए हैं। + +

+ AgentClinic के प्रवाह का प्रदर्शन +

+ +## 🖥️ स्थापना + +### Python venv विकल्प + +1. **GitHub रिपॉजिटरी क्लोन करें**: रिपॉजिटरी को क्लोन करना शुरू करें निम्न कमांड का उपयोग करके: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **पायथन पर्यावरण सेटअप और सक्रिय करें** + ```bash + python -m venv venv_agent_lab + ``` + - अब इस पर्यावरण को सक्रिय करें: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **आवश्यक पुस्तकालय स्थापित करें** + ```bash + pip install -r requirements.txt + ``` + +4. **pdflatex स्थापित करें [वैकल्पिक]** + ```bash + sudo apt install pdflatex + ``` + - यह एजेंटों द्वारा latex स्रोत को संकलित करने में सक्षम बनाता है। + - **[महत्वपूर्ण]** यदि इस चरण को sudo एक्सेस न होने के कारण नहीं चलाया जा सकता है, तो Agent Laboratory को --compile_latex फ्लैग को false सेट करके pdf संकलन बंद किया जा सकता है: `--compile_latex=False` + +5. **अब Agent Laboratory चलाएं!** + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + या, यदि आपने pdflatex स्थापित नहीं किया है: + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- +## बेहतर अनुसंधान परिणामों के लिए सुझाव + +#### [सुझाव #1] 📝 विस्तृत नोट्स लिखना सुनिश्चित करें! 📝 + +**विस्तृत नोट्स लिखना महत्वपूर्ण है** ताकि आपका एजेंट समझ सके कि आप अपने प्रोजेक्ट में क्या हासिल करना चाहते हैं, साथ ही किसी भी शैली की प्राथमिकताएँ। नोट्स में उन किसी भी प्रयोगों को शामिल किया जा सकता है जिन्हें आप एजेंटों से करने के लिए चाहते हैं, API कुंजी प्रदान करना, कुछ प्लॉट या आकृतियाँ शामिल करना, या कुछ भी जिसे आप अनुसंधान करते समय एजेंट को जानना चाहते हैं। + +यह आपका अवसर भी है कि एजेंट को बताएं **कौन से कंप्यूट संसाधनों तक उसकी पहुंच है**, जैसे GPUs (कितने, किस प्रकार के GPU, कितने GBs), CPUs (कितने कोर, किस प्रकार के CPUs), स्टोरेज सीमाएँ, और हार्डवेयर स्पेसिफिकेशन। + +नोट्स जोड़ने के लिए, आपको ai_lab_repo.py के अंदर task_notes_LLM संरचना को संशोधित करना होगा। नीचे हमारे कुछ प्रयोगों के लिए उपयोग किए गए नोट्स का एक उदाहरण दिया गया है। + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [सुझाव #2] 🚀 अधिक शक्तिशाली मॉडल का उपयोग सामान्यतः बेहतर अनुसंधान की ओर ले जाता है 🚀 + +अनुसंधान करते समय, **मॉडल का चयन परिणामों की गुणवत्ता पर महत्वपूर्ण प्रभाव डाल सकता है**। अधिक शक्तिशाली मॉडल आमतौर पर उच्च सटीकता, बेहतर तर्क क्षमताओं, और बेहतर रिपोर्ट जनरेशन प्रदान करते हैं। यदि संगणनात्मक संसाधन अनुमति देते हैं, तो o1-(mini/preview) या इसी तरह के अत्याधुनिक बड़े भाषा मॉडल जैसे उन्नत मॉडलों के उपयोग को प्राथमिकता दें। + +हालांकि, **प्रदर्शन और लागत-प्रभावशीलता के बीच संतुलन बनाना महत्वपूर्ण है**। जबकि शक्तिशाली मॉडल बेहतर परिणाम दे सकते हैं, उन्हें चलाने में अक्सर अधिक खर्च और समय लगता है। उन्हें चयनात्मक रूप से उपयोग करने पर विचार करें—उदाहरण के लिए, मुख्य प्रयोगों या अंतिम विश्लेषणों के लिए—जबकि पुनरावृत्त कार्यों या प्रारंभिक प्रोटोटाइपिंग के लिए छोटे, अधिक कुशल मॉडलों पर निर्भर रहें। + +जब संसाधन सीमित हों, **अपने विशिष्ट डेटासेट पर छोटे मॉडलों को फाइन-ट्यून करके या कार्य-विशिष्ट प्रॉम्प्ट के साथ पूर्व-प्रशिक्षित मॉडलों को मिलाकर प्रदर्शन और संगणनात्मक दक्षता के बीच वांछित संतुलन प्राप्त करें**। + +----- + +#### [सुझाव #3] ✅ आप चेकपॉइंट से पिछले सहेजनों को लोड कर सकते हैं ✅ + +**यदि आप प्रगति खो देते हैं, इंटरनेट कनेक्शन खोते हैं, या कोई उपकार्य विफल हो जाता है, तो आप हमेशा पिछले स्थिति से लोड कर सकते हैं।** आपकी सभी प्रगति डिफ़ॉल्ट रूप से state_saves वेरिएबल में सहेजी जाती है, जो प्रत्येक व्यक्तिगत चेकपॉइंट को संग्रहीत करता है। बस ai_lab_repo.py चलाते समय निम्नलिखित तर्क पास करें: +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [सुझाव #4] 🈯 यदि आप अंग्रेजी के अलावा किसी अन्य भाषा में चला रहे हैं 🈲 + +यदि आप एजेंट लैबोरेटरी को अंग्रेजी के अलावा किसी अन्य भाषा में चला रहे हैं, तो कोई समस्या नहीं है, बस सुनिश्चित करें कि एजेंटों को आपके पसंदीदा भाषा में अनुसंधान करने के लिए एक भाषा फ्लैग प्रदान करें। ध्यान दें कि हमने अन्य भाषाओं में एजेंट लैबोरेटरी चलाने का व्यापक अध्ययन नहीं किया है, इसलिए किसी भी समस्या का सामना करने पर रिपोर्ट करना सुनिश्चित करें। + +उदाहरण के लिए, यदि आप चीनी में चला रहे हैं: +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [सुझाव #5] 🌟 सुधार के लिए बहुत गुंजाइश है 🌟 + +इस कोडबेस में सुधार की बहुत गुंजाइश है, इसलिए यदि आप अंततः परिवर्तन करते हैं और समुदाय की मदद करना चाहते हैं, तो कृपया आप जो परिवर्तन किए हैं उन्हें साझा करने में संकोच न करें! हमें उम्मीद है कि यह उपकरण आपकी मदद करेगा! + +## संदर्भ / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-italian.md b/readme/README-italian.md new file mode 100755 index 0000000..e1d2913 --- /dev/null +++ b/readme/README-italian.md @@ -0,0 +1,155 @@ +# Laboratorio Agenti: Utilizzo di Agenti LLM come Assistenti di Ricerca + +

+ Dimostrazione del flusso di AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Sito web | 💻 Software | 🎥 Video | 📚 Documento di esempio | 📰 Citazione】 +

+ +## 📖 Panoramica + +- **Agent Laboratory** è un flusso di lavoro di ricerca autonomo end-to-end progettato per assistere **te** come ricercatore umano nell'**implementazione delle tue idee di ricerca**. Agent Laboratory è composto da agenti specializzati guidati da grandi modelli linguistici per supportarti durante l'intero flusso di lavoro di ricerca—dalla conduzione di revisioni della letteratura e formulazione di piani all'esecuzione di esperimenti e alla scrittura di rapporti completi. +- Questo sistema non è progettato per sostituire la tua creatività ma per complementarla, permettendoti di concentrarti sull'ideazione e il pensiero critico mentre automatizza compiti ripetitivi e che richiedono tempo come la codifica e la documentazione. Accomodando diversi livelli di risorse computazionali e coinvolgimento umano, Agent Laboratory mira ad accelerare la scoperta scientifica e ottimizzare la tua produttività di ricerca. + +

+ Dimostrazione del flusso di AgentClinic +

+ +### 🔬 Come funziona Agent Laboratory? + +- Agent Laboratory è composto da tre fasi principali che guidano sistematicamente il processo di ricerca: (1) Revisione della letteratura, (2) Sperimentazione e (3) Scrittura del rapporto. Durante ogni fase, agenti specializzati guidati da LLM collaborano per raggiungere obiettivi distinti, integrando strumenti esterni come arXiv, Hugging Face, Python e LaTeX per ottimizzare i risultati. Questo flusso di lavoro strutturato inizia con la raccolta e analisi indipendente di documenti di ricerca pertinenti, prosegue attraverso la pianificazione collaborativa e la preparazione dei dati, e si conclude con la sperimentazione automatizzata e la generazione di rapporti completi. I dettagli sui ruoli specifici degli agenti e i loro contributi in queste fasi sono discussi nel documento. + +

+ Dimostrazione del flusso di AgentClinic +

+ +## 🖥️ Installazione + +### Opzione Python venv + +1. **Clona il Repository GitHub**: Inizia clonando il repository usando il comando: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **Configura e Attiva l'Ambiente Python** + ```bash + python -m venv venv_agent_lab + ``` + - Ora attiva questo ambiente: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **Installa le librerie richieste** + ```bash + pip install -r requirements.txt + ``` + +4. **Installa pdflatex [OPZIONALE]** + ```bash + sudo apt install pdflatex + ``` + - Questo permette agli agenti di compilare il codice sorgente LaTeX. + - **[IMPORTANTE]** Se questo passaggio non può essere eseguito a causa della mancanza di accesso sudo, la compilazione del pdf può essere disattivata eseguendo Agent Laboratory impostando il flag --compile_latex su false: --compile_latex=False + +5. **Ora esegui Agent Laboratory!** + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + oppure, se non hai installato pdflatex + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- + +## Consigli per migliori risultati di ricerca + +#### [Consiglio #1] 📝 Assicurati di scrivere appunti dettagliati! 📝 + +**Scrivere appunti dettagliati è importante** per aiutare il tuo agente a comprendere cosa intendi realizzare nel tuo progetto, nonché eventuali preferenze di stile. Gli appunti possono includere qualsiasi esperimento che desideri che gli agenti eseguano, fornire chiavi API, determinati grafici o figure che desideri includere, o qualsiasi cosa tu voglia che l'agente sappia durante la ricerca. + +Questa è anche la tua opportunità di far sapere all'agente **a quali risorse computazionali ha accesso**, ad esempio GPU (quante, che tipo di GPU, quanti GB), CPU (quanti core, che tipo di CPU), limitazioni di archiviazione e specifiche hardware. + +Per aggiungere appunti, devi modificare la struttura task_notes_LLM all'interno di ai_lab_repo.py. Di seguito è fornito un esempio di set di appunti utilizzati per alcuni dei nostri esperimenti. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [Consiglio #2] 🚀 Utilizzare modelli più potenti generalmente porta a migliori ricerche 🚀 + +Quando si conduce una ricerca, **la scelta del modello può influenzare significativamente la qualità dei risultati**. I modelli più potenti tendono ad avere una maggiore accuratezza, migliori capacità di ragionamento e una migliore generazione dei rapporti. Se le risorse computazionali lo consentono, dà priorità all'uso di modelli avanzati come o1-(mini/preview) o simili modelli linguistici di grandi dimensioni all'avanguardia. + +Tuttavia, **è importante bilanciare le prestazioni e l'efficienza dei costi**. Sebbene i modelli potenti possano fornire risultati migliori, spesso sono più costosi e richiedono più tempo per essere eseguiti. Considera di usarli selettivamente—ad esempio, per esperimenti chiave o analisi finali—mentre ti affidi a modelli più piccoli ed efficienti per compiti iterativi o prototipazione iniziale. + +Quando le risorse sono limitate, **ottimizza effettuando il fine-tuning di modelli più piccoli** sul tuo dataset specifico o combinando modelli pre-addestrati con prompt specifici per il compito per raggiungere l'equilibrio desiderato tra prestazioni ed efficienza computazionale. + +----- + +#### [Consiglio #3] ✅ Puoi caricare salvataggi precedenti dai checkpoint ✅ + +**Se perdi i progressi, la connessione a internet o se un sotto-compito fallisce, puoi sempre caricare da uno stato precedente.** Tutti i tuoi progressi vengono salvati di default nella variabile state_saves, che memorizza ogni singolo checkpoint. Basta passare i seguenti argomenti quando esegui ai_lab_repo.py + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [Consiglio #4] 🈯 Se stai utilizzando una lingua diversa dall'inglese 🈲 + +Se stai utilizzando Agent Laboratory in una lingua diversa dall'inglese, nessun problema, basta assicurarti di fornire un flag di lingua agli agenti per eseguire la ricerca nella tua lingua preferita. Nota che non abbiamo studiato approfonditamente l'utilizzo di Agent Laboratory in altre lingue, quindi assicurati di segnalare eventuali problemi che incontri. + +Ad esempio, se stai utilizzando in cinese: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [Consiglio #5] 🌟 C'è molto spazio per miglioramenti 🌟 + +C'è molto spazio per migliorare questo codice, quindi se alla fine apporti modifiche e vuoi aiutare la comunità, sentiti libero di condividere le modifiche che hai effettuato! Speriamo che questo strumento ti sia d'aiuto! + +## Riferimenti / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-japanese.md b/readme/README-japanese.md new file mode 100755 index 0000000..dd6f6e9 --- /dev/null +++ b/readme/README-japanese.md @@ -0,0 +1,163 @@ +# Agent Laboratory: Using LLM Agents as Research Assistants + +

+ Demonstration of the flow of AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】 +

+ +## 📖 概要 + +- **Agent Laboratory**は、**あなた**が**研究アイデアを実現する**ために支援するエンドツーエンドの自律的な研究ワークフローです。Agent Laboratoryは、大規模言語モデルによって駆動される専門のエージェントで構成されており、文献レビューの実施や計画の策定から実験の実行、包括的な報告書の作成まで、研究の全過程をサポートします。 +- このシステムはあなたの創造性を置き換えるものではなく、補完するために設計されています。アイデアの創出や批判的思考に集中できるようにし、コーディングやドキュメント作成のような反復的で時間のかかる作業を自動化します。計算資源や人間の関与のレベルに応じて対応することで、Agent Laboratoryは科学的発見を加速し、研究の生産性を最適化することを目指しています。 + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 Agent Laboratoryはどのように機能しますか? + +- Agent Laboratoryは、研究プロセスを体系的に導く3つの主要なフェーズから構成されています:(1)文献レビュー、(2)実験、(3)報告書作成。各フェーズでは、LLMによって駆動される専門のエージェントが協力してそれぞれの目標を達成し、arXiv、Hugging Face、Python、LaTeXなどの外部ツールを統合して成果を最適化します。この構造化されたワークフローは、関連する研究論文の独立した収集と分析から始まり、協力的な計画とデータ準備を経て、自動化された実験と包括的な報告書の生成に至ります。これらのフェーズ全体にわたる具体的なエージェントの役割と貢献の詳細は論文で説明されています。 + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ インストール + +### Python venv オプション + +1. **GitHubリポジトリをクローンする**: 以下のコマンドを使用してリポジトリをクローンします: +```bash +git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git +``` + +2. **Python環境を設定してアクティベートする** +```bash +python -m venv venv_agent_lab +``` + +- 次に、この環境をアクティベートします: +```bash +source venv_agent_lab/bin/activate +``` + +3. **必要なライブラリをインストールする** +```bash +pip install -r requirements.txt +``` + +4. **pdflatexをインストールする [オプション]** +```bash +sudo apt install pdflatex +``` + +- これにより、エージェントがLaTeXソースをコンパイルできるようになります。 +- **[重要]** sudo権限がないためにこのステップを実行できない場合、Agent Laboratoryを実行する際に --compile_latexフラグをfalseに設定してPDFのコンパイルをオフにすることができます: --compile_latex=False + +5. **Agent Laboratoryを実行します!** + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" +``` + +または、pdflatexがインストールされていない場合 + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False +``` + +----- +## より良い研究成果を得るためのヒント + + +#### [ヒント #1] 📝 詳細なノートを書くことを忘れずに! 📝 + +**詳細なノートを書くことは重要です**。これにより、エージェントがプロジェクトで達成しようとしていることや、スタイルの好みを理解するのに役立ちます。ノートには、エージェントに実行してほしい実験、APIキーの提供、含めたい特定のプロットや図、研究を行う際にエージェントに知っておいてほしいことなどを含めることができます。 + +また、**エージェントがアクセスできる計算資源**を知らせる機会でもあります。例えば、GPU(数、種類、GB数)、CPU(コア数、種類)、ストレージの制限、ハードウェア仕様などです。 + +ノートを追加するには、ai_lab_repo.py内のtask_notes_LLM構造を変更する必要があります。以下に、いくつかの実験で使用されたノートの例を示します。 + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [ヒント #2] 🚀 より強力なモデルを使用することで、一般的により良い研究が可能になります 🚀 + +研究を行う際、**モデルの選択は結果の質に大きな影響を与える可能性があります**。より強力なモデルは、通常、精度が高く、推論能力が優れており、報告書の生成も優れています。計算資源が許す場合は、o1-(mini/preview)などの先進的な大規模言語モデルの使用を優先してください。 + +ただし、**性能と費用対効果のバランスを取ることが重要です**。強力なモデルはより良い結果をもたらす可能性がありますが、実行には時間と費用がかかることが多いです。重要な実験や最終分析には選択的に使用し、反復作業や初期のプロトタイピングには小さく効率的なモデルを使用することを検討してください。 + +資源が限られている場合は、**小さなモデルを特定のデータセットでファインチューニングするか、タスク固有のプロンプトと組み合わせて使用することで、性能と計算効率の間の望ましいバランスを達成します**。 + +----- + +#### [ヒント #3] ✅ チェックポイントから以前の保存をロードできます ✅ + +**進捗が失われた場合、インターネット接続が切れた場合、またはサブタスクが失敗した場合でも、以前の状態から常にロードできます。** すべての進捗はデフォルトでstate_saves変数に保存され、各チェックポイントが保存されます。ai_lab_repo.pyを実行する際に、以下の引数を渡すだけです + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + + + + +#### [ヒント #4] 🈯 英語以外の言語で実行している場合 🈲 + +Agent Laboratoryを英語以外の言語で実行している場合でも問題ありません。エージェントが希望する言語で研究を行えるように、言語フラグを提供することを確認してください。Agent Laboratoryを他の言語で実行することについては十分に研究していないため、問題が発生した場合は必ず報告してください。 + +例えば、中国語で実行する場合: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [ヒント #5] 🌟 改善の余地がたくさんあります 🌟 + +このコードベースには改善の余地がたくさんありますので、変更を加えてコミュニティに貢献したい場合は、ぜひ変更内容を共有してください!このツールが皆さんのお役に立つことを願っています! + +## 参考文献 / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-korean.md b/readme/README-korean.md new file mode 100755 index 0000000..fe9f5e2 --- /dev/null +++ b/readme/README-korean.md @@ -0,0 +1,166 @@ +# Agent Laboratory: Using LLM Agents as Research Assistants + +

+ Demonstration of the flow of AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】 +

+ +## 📖 개요 + +- **Agent Laboratory**는 **당신**이 인간 연구자로서 **연구 아이디어를 구현**할 수 있도록 지원하는 엔드 투 엔드 자율 연구 워크플로우입니다. Agent Laboratory는 대규모 언어 모델에 의해 구동되는 전문화된 에이전트들로 구성되어 문헌 검토 수행, 계획 수립, 실험 실행, 종합 보고서 작성에 이르기까지 전체 연구 워크플로우를 지원합니다. +- 이 시스템은 당신의 창의성을 대체하기 위해 설계된 것이 아니라 보완하기 위해 설계되었습니다. 아이디어 발상과 비판적 사고에 집중할 수 있도록 하면서 코딩 및 문서화와 같은 반복적이고 시간이 많이 소요되는 작업을 자동화합니다. 다양한 수준의 컴퓨팅 자원과 인간의 참여를 수용함으로써 Agent Laboratory는 과학적 발견을 가속화하고 연구 생산성을 최적화하는 것을 목표로 합니다. + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 Agent Laboratory는 어떻게 작동하나요? + +- Agent Laboratory는 연구 과정을 체계적으로 안내하는 세 가지 주요 단계로 구성됩니다: (1) 문헌 검토, (2) 실험, (3) 보고서 작성. 각 단계 동안 LLM에 의해 구동되는 전문화된 에이전트들이 협력하여 개별 목표를 달성하며, arXiv, Hugging Face, Python, LaTeX와 같은 외부 도구를 통합하여 결과를 최적화합니다. 이 구조화된 워크플로우는 관련 연구 논문의 독립적인 수집 및 분석으로 시작하여, 협력적인 계획 수립 및 데이터 준비를 거쳐, 자동화된 실험 실행 및 종합적인 보고서 생성으로 이어집니다. 이러한 단계 전반에 걸친 특정 에이전트 역할과 기여에 대한 자세한 내용은 논문에서 논의됩니다. + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ 설치 + +### Python venv 옵션 + + +1. **GitHub 저장소 복제**: 다음 명령어를 사용하여 저장소를 복제합니다: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + + +2. **Python 환경 설정 및 활성화** + ```bash + python -m venv venv_agent_lab + ``` + + - 이제 이 환경을 활성화합니다: + ```bash + source venv_agent_lab/bin/activate + ``` + + +3. **필수 라이브러리 설치** + ```bash + pip install -r requirements.txt + ``` + + +4. **pdflatex 설치 [옵션]** + ```bash + sudo apt install pdflatex + ``` + + - 이는 에이전트들이 LaTeX 소스를 컴파일할 수 있도록 합니다. + - **[중요]** sudo 접근 권한이 없어 이 단계를 실행할 수 없는 경우, --compile_latex 플래그를 false로 설정하여 Agent Laboratory 실행 시 PDF 컴파일을 비활성화할 수 있습니다: `--compile_latex=False` + + +5. **이제 Agent Laboratory를 실행하세요!** + + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + + 또는, pdflatex가 설치되어 있지 않은 경우 + + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- +## 더 나은 연구 결과를 위한 팁 + +#### [팁 #1] 📝 광범위한 노트를 작성하세요! 📝 + +**광범위한 노트 작성은** 에이전트가 프로젝트에서 달성하려는 목표와 스타일 선호도를 이해하는 데 중요합니다. 노트에는 에이전트에게 수행하도록 원하는 실험, API 키 제공, 포함하고 싶은 특정 플롯이나 그림, 또는 연구를 수행할 때 에이전트가 알아야 할 모든 내용을 포함할 수 있습니다. + +또한, **에이전트가 접근할 수 있는 컴퓨팅 자원**을 알려줄 수 있는 기회이기도 합니다. 예를 들어 GPU (몇 개, 어떤 유형의 GPU, GB 수), CPU (코어 수, CPU 유형), 저장 한계 및 하드웨어 사양 등을 포함할 수 있습니다. + +노트를 추가하려면, ai_lab_repo.py 내부의 `task_notes_LLM` 구조를 수정해야 합니다. 아래는 일부 실험에 사용된 노트의 예시입니다. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [팁 #2] 🚀 더 강력한 모델을 사용하는 것이 일반적으로 더 나은 연구로 이어집니다 🚀 + +연구를 수행할 때, **모델의 선택은 결과의 질에 상당한 영향을 미칠 수 있습니다**. 더 강력한 모델은 일반적으로 더 높은 정확도, 더 나은 추론 능력, 더 우수한 보고서 생성을 제공합니다. 컴퓨팅 자원이 허용한다면, o1-(mini/preview)와 같은 최첨단 대규모 언어 모델과 같은 고급 모델의 사용을 우선시하세요. + +그러나, **성능과 비용 효율성의 균형을 맞추는 것이 중요합니다**. 강력한 모델은 더 나은 결과를 제공할 수 있지만, 실행하는 데 비용과 시간이 더 많이 소요되는 경우가 많습니다. 예를 들어, 핵심 실험이나 최종 분석에는 고급 모델을 선택적으로 사용하고, 반복 작업이나 초기 프로토타이핑에는 더 작고 효율적인 모델을 사용하는 것을 고려하세요. + +자원이 제한된 경우, **작은 모델을 특정 데이터셋에 맞게 미세 조정하거나, 사전 훈련된 모델과 작업 특화 프롬프트를 결합하여 성능과 컴퓨팅 효율성 사이의 원하는 균형을 달성할 수 있습니다**. + +----- + +#### [팁 #3] ✅ 체크포인트에서 이전 저장 상태를 불러올 수 있습니다 ✅ + +**진행 상황을 잃었거나 인터넷 연결이 끊기거나 하위 작업이 실패한 경우, 이전 상태에서 항상 불러올 수 있습니다.** 모든 진행 상황은 기본적으로 `state_saves` 변수에 저장되며, 이는 각 개별 체크포인트를 저장합니다. ai_lab_repo.py를 실행할 때 다음 인수를 전달하면 됩니다. + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [팁 #4] 🈯 영어가 아닌 다른 언어로 실행하는 경우 🈲 + +Agent Laboratory를 영어가 아닌 다른 언어로 실행하는 경우, 문제 없습니다. 단, 에이전트가 선호하는 언어로 연구를 수행할 수 있도록 언어 플래그를 제공해야 합니다. 다른 언어로 Agent Laboratory를 실행하는 것에 대해 광범위하게 연구하지 않았으므로, 발생하는 문제를 반드시 보고해 주세요. + +예를 들어, 중국어로 실행하는 경우: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [팁 #5] 🌟 개선의 여지가 많습니다 🌟 + +이 코드베이스를 개선할 여지가 많으므로, 변경을 가하고 커뮤니티에 기여하고 싶다면, 변경한 사항을 자유롭게 공유해 주세요! 이 도구가 여러분에게 도움이 되길 바랍니다! + +## 참고 문헌 / Bibtex + + + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-portugese.md b/readme/README-portugese.md new file mode 100755 index 0000000..201b768 --- /dev/null +++ b/readme/README-portugese.md @@ -0,0 +1,159 @@ +# Agent Laboratory: Usando Agentes LLM como Assistentes de Pesquisa + +

+ Demonstration of the flow of AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】 +

+ +## 📖 Visão Geral + +- **Agent Laboratory** é um fluxo de trabalho de pesquisa autônomo de ponta a ponta, destinado a auxiliar **você** como pesquisador humano na **implementação das suas ideias de pesquisa**. O Agent Laboratory consiste em agentes especializados movidos por grandes modelos de linguagem para apoiá-lo durante todo o fluxo de trabalho de pesquisa — desde a condução de revisões de literatura e formulação de planos até a execução de experimentos e a redação de relatórios abrangentes. +- Este sistema não foi projetado para substituir a sua criatividade, mas para complementá-la, permitindo que você se concentre na ideação e no pensamento crítico enquanto automatiza tarefas repetitivas e que consomem muito tempo, como codificação e documentação. Ao acomodar diferentes níveis de recursos computacionais e envolvimento humano, o Agent Laboratory visa acelerar a descoberta científica e otimizar a sua produtividade em pesquisa. + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 Como funciona o Agent Laboratory? + +- O Agent Laboratory consiste em três fases principais que orientam sistematicamente o processo de pesquisa: (1) Revisão de Literatura, (2) Experimentação e (3) Redação de Relatórios. Durante cada fase, agentes especializados movidos por LLMs colaboram para alcançar objetivos distintos, integrando ferramentas externas como arXiv, Hugging Face, Python e LaTeX para otimizar os resultados. Este fluxo de trabalho estruturado começa com a coleta e análise independentes de artigos de pesquisa relevantes, avança através do planejamento colaborativo e preparação de dados, e resulta em experimentação automatizada e geração de relatórios abrangentes. Detalhes sobre os papéis específicos dos agentes e suas contribuições ao longo dessas fases são discutidos no artigo. + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ Instalação + +### Opção de ambiente virtual Python (venv) + +1. **Clone o Repositório do GitHub**: Comece clonando o repositório usando o comando: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **Configure e Ative o Ambiente Python** + ```bash + python -m venv venv_agent_lab + ``` + + - Agora, ative este ambiente: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **Instale as bibliotecas necessárias** + ```bash + pip install -r requirements.txt + ``` + +4. **Instale o pdflatex [OPCIONAL]** + ```bash + sudo apt install pdflatex + ``` + + - Isso permite que o código LaTeX seja compilado pelos agentes. + - **[IMPORTANTE]** Se esta etapa não puder ser executada devido à falta de acesso sudo, a compilação de PDF pode ser desativada executando o Agent Laboratory com a flag --compile_latex definida como false: --compile_latex=False + +5. **Agora execute o Agent Laboratory!** + + ```bash + python ai_lab_repo.py --api-key "API_KEY_AQUI" --llm-backend "o1-mini" --research-topic "SUA IDEIA DE PESQUISA" + ``` + + ou, se você não tiver o pdflatex instalado + + ```bash + python ai_lab_repo.py --api-key "API_KEY_AQUI" --llm-backend "o1-mini" --research-topic "SUA IDEIA DE PESQUISA" --compile_latex=False + ``` + +----- +## Dicas para melhores resultados de pesquisa + +#### [Dica #1] 📝 Certifique-se de escrever notas extensas! 📝 + +**Escrever notas extensas é importante** para ajudar seu agente a entender o que você está tentando realizar em seu projeto, bem como quaisquer preferências de estilo. As notas podem incluir quaisquer experimentos que você deseja que os agentes realizem, fornecendo chaves de API, certos gráficos ou figuras que você deseja incluir, ou qualquer coisa que você queira que o agente saiba ao realizar a pesquisa. + +Esta também é sua oportunidade de informar ao agente **a quais recursos de computação ele tem acesso**, por exemplo, GPUs (quantas, que tipo de GPU, quantos GBs), CPUs (quantos núcleos, que tipo de CPUs), limitações de armazenamento e especificações de hardware. + +Para adicionar notas, você deve modificar a estrutura task_notes_LLM dentro de ai_lab_repo.py. Abaixo está um exemplo de conjunto de notas usadas em alguns de nossos experimentos. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [Dica #2] 🚀 Usar modelos mais poderosos geralmente leva a melhores pesquisas 🚀 + +Ao conduzir pesquisas, **a escolha do modelo pode impactar significativamente a qualidade dos resultados**. Modelos mais poderosos tendem a ter maior precisão, melhores capacidades de raciocínio e melhor geração de relatórios. Se os recursos computacionais permitirem, priorize o uso de modelos avançados como o1-(mini/preview) ou modelos de linguagem grandes de última geração similares. + +No entanto, **é importante equilibrar desempenho e custo-benefício**. Embora modelos poderosos possam gerar melhores resultados, eles geralmente são mais caros e consomem mais tempo para serem executados. Considere usá-los seletivamente — por exemplo, para experimentos chave ou análises finais — enquanto confia em modelos menores e mais eficientes para tarefas iterativas ou prototipagem inicial. + +Quando os recursos são limitados, **otimize ajustando modelos menores** no seu conjunto de dados específico ou combinando modelos pré-treinados com prompts específicos para a tarefa para alcançar o equilíbrio desejado entre desempenho e eficiência computacional. + +----- + +#### [Dica #3] ✅ Você pode carregar salvamentos anteriores a partir de checkpoints ✅ + +**Se você perder o progresso, conexão com a internet ou se uma subtarefa falhar, você sempre pode carregar a partir de um estado anterior.** Todo o seu progresso é salvo por padrão na variável state_saves, que armazena cada checkpoint individual. Basta passar os seguintes argumentos ao executar ai_lab_repo.py + +```bash +python ai_lab_repo.py --api-key "API_KEY_AQUI" --research-topic "SUA IDEIA DE PESQUISA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [Dica #4] 🈯 Se você estiver executando em um idioma diferente do inglês 🈲 + +Se você estiver executando o Agent Laboratory em um idioma diferente do inglês, sem problema, apenas certifique-se de fornecer uma flag de idioma para que os agentes realizem a pesquisa no seu idioma preferido. Observe que não estudamos extensivamente a execução do Agent Laboratory em outros idiomas, portanto, certifique-se de relatar quaisquer problemas que encontrar. + +Por exemplo, se você estiver executando em chinês: + +```bash +python ai_lab_repo.py --api-key "API_KEY_AQUI" --research-topic "SUA IDEIA DE PESQUISA (no seu idioma)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [Dica #5] 🌟 Há muito espaço para melhorias 🌟 + +Há muito espaço para melhorar esta base de código, então se você acabar fazendo alterações e quiser ajudar a comunidade, sinta-se à vontade para compartilhar as mudanças que você fez! Esperamos que esta ferramenta lhe seja útil! + +## Referência / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-russian.md b/readme/README-russian.md new file mode 100755 index 0000000..6e23931 --- /dev/null +++ b/readme/README-russian.md @@ -0,0 +1,161 @@ +# Лаборатория Агентов: Использование агентов на основе больших языковых моделей в качестве научных ассистентов + +

+ Demonstration of the flow of AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Веб-сайт | 💻 Программное обеспечение | 🎥 Видео | 📚 Пример статьи | 📰 Цитирование】 +

+ +## 📖 Обзор + +- **Лаборатория Агентов** — это автономный исследовательский процесс от начала до конца, предназначенный для помощи **вам** как человеческому исследователю в **реализации ваших исследовательских идей**. Лаборатория Агентов состоит из специализированных агентов, управляемых большими языковыми моделями, которые поддерживают вас на протяжении всего исследовательского процесса — от проведения обзора литературы и формулирования планов до выполнения экспериментов и написания подробных отчетов. +- Эта система не предназначена для замены вашего творчества, а дополняет его, позволяя вам сосредоточиться на генерации идей и критическом мышлении, одновременно автоматизируя повторяющиеся и времязатратные задачи, такие как кодирование и документирование. Адаптируясь к различным уровням вычислительных ресурсов и вовлеченности человека, Лаборатория Агентов стремится ускорить научные открытия и оптимизировать вашу исследовательскую продуктивность. + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 Как работает Лаборатория Агентов? + +- Лаборатория Агентов состоит из трех основных фаз, которые систематически направляют исследовательский процесс: (1) Обзор литературы, (2) Экспериментирование и (3) Написание отчета. В каждой фазе специализированные агенты, управляемые большими языковыми моделями, сотрудничают для достижения отдельных целей, интегрируя внешние инструменты, такие как arXiv, Hugging Face, Python и LaTeX, для оптимизации результатов. Эта структурированная рабочая схема начинается с независимого сбора и анализа соответствующих научных работ, проходит через совместное планирование и подготовку данных и заканчивается автоматизированным проведением экспериментов и созданием подробных отчетов. Детали конкретных ролей агентов и их вклад на каждом этапе обсуждаются в статье. + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ Установка + +### Вариант с использованием Python venv + +1. **Клонируйте репозиторий GitHub**: Начните с клонирования репозитория с помощью команды: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **Настройте и активируйте Python окружение** + ```bash + python -m venv venv_agent_lab + ``` + + - Теперь активируйте это окружение: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **Установите необходимые библиотеки** + ```bash + pip install -r requirements.txt + ``` + +4. **Установите pdflatex [ОПЦИОНАЛЬНО]** + ```bash + sudo apt install pdflatex + ``` + + - Это позволяет агентам компилировать исходный код LaTeX. + - **[ВАЖНО]** Если этот шаг невозможно выполнить из-за отсутствия прав sudo, можно отключить компиляцию pdf, запустив Лабораторию Агентов с флагом --compile_latex=False: --compile_latex=False + +5. **Теперь запустите Лабораторию Агентов!** + + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "ВАША ИССЛЕДОВАТЕЛЬСКАЯ ИДЕЯ" + ``` + + или, если у вас не установлен pdflatex + + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "ВАША ИССЛЕДОВАТЕЛЬСКАЯ ИДЕЯ" --compile_latex=False + ``` + +----- + +## Советы для лучших исследовательских результатов + +#### [Совет №1] 📝 Обязательно записывайте подробные заметки! 📝 + +**Ведение подробных заметок важно** для того, чтобы ваш агент понимал, что вы хотите достичь в вашем проекте, а также любые предпочтения в стиле. Заметки могут включать любые эксперименты, которые вы хотите, чтобы агенты выполняли, предоставление API-ключей, определенные графики или фигуры, которые вы хотите включить, или любую информацию, которую вы хотите, чтобы агент знал при проведении исследований. + +Это также ваша возможность сообщить агенту, **какие вычислительные ресурсы у него есть**, например, GPU (сколько, какой тип GPU, сколько GB), CPU (сколько ядер, какой тип CPU), ограничения по памяти и спецификации оборудования. + +Чтобы добавить заметки, необходимо изменить структуру task_notes_LLM внутри файла ai_lab_repo.py. Ниже приведен пример набора заметок, использованных в некоторых наших экспериментах. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [Совет №2] 🚀 Использование более мощных моделей обычно приводит к лучшим исследованиям 🚀 + +При проведении исследований, **выбор модели может значительно повлиять на качество результатов**. Более мощные модели, как правило, имеют более высокую точность, лучшие способности к рассуждению и более качественное генерирование отчетов. Если вычислительные ресурсы позволяют, отдавайте предпочтение использованию продвинутых моделей, таких как o1-(mini/preview) или подобных современных больших языковых моделей. + +Однако, **важно балансировать между производительностью и экономической эффективностью**. Хотя мощные модели могут давать лучшие результаты, они часто дороже и требуют больше времени для выполнения. Рассмотрите возможность использования их выборочно — например, для ключевых экспериментов или окончательных анализов — в то время как для итеративных задач или начального прототипирования полагайтесь на более маленькие и эффективные модели. + +Когда ресурсы ограничены, **оптимизируйте, дорабатывая более маленькие модели** на вашем конкретном наборе данных или комбинируя предобученные модели с специфическими для задачи подсказками, чтобы достичь желаемого баланса между производительностью и вычислительной эффективностью. + +----- + +#### [Совет №3] ✅ Вы можете загрузить предыдущие сохранения из контрольных точек ✅ + +**Если вы потеряете прогресс, потеряете интернет-соединение или если подзадача завершится неудачей, вы всегда можете загрузить предыдущую версию.** Весь ваш прогресс сохраняется по умолчанию в переменной state_saves, которая хранит каждую отдельную контрольную точку. Просто передайте следующие аргументы при запуске ai_lab_repo.py + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "ВАША ИССЛЕДОВАТЕЛЬСКАЯ ИДЕЯ" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [Совет №4] 🈯 Если вы работаете на другом языке, кроме английского 🈲 + +Если вы запускаете Лабораторию Агентов на другом языке, кроме английского, это не проблема, просто убедитесь, что вы предоставили языковой флаг агентам для проведения исследований на предпочитаемом вами языке. Обратите внимание, что мы не проводили обширных исследований по запуску Лаборатории Агентов на других языках, поэтому обязательно сообщайте о любых возникающих проблемах. + +Например, если вы работаете на китайском языке: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "ВАША ИССЛЕДОВАТЕЛЬСКАЯ ИДЕЯ (на вашем языке)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [Совет №5] 🌟 Есть много возможностей для улучшения 🌟 + +Есть много возможностей для улучшения этой кодовой базы, поэтому если вы внесете изменения и захотите помочь сообществу, пожалуйста, не стесняйтесь поделиться внесенными изменениями! Мы надеемся, что этот инструмент вам поможет! + +## Ссылки / Bibtex + +bibtex +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-slovak.md b/readme/README-slovak.md new file mode 100755 index 0000000..3685508 --- /dev/null +++ b/readme/README-slovak.md @@ -0,0 +1,157 @@ +# Agent Laboratory: Používanie LLM Agentov ako Výskumných Asistentov + +

+ Demonstrácia toku AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Webová stránka | 💻 Softvér | 🎥 Video | 📚 Príkladový článok | 📰 Citácia】 +

+ +## 📖 Prehľad + +- **Agent Laboratory** je autonómny výskumný pracovný postup od začiatku do konca, ktorý má za úlohu asistovať **vám** ako ľudskému výskumníkovi pri **realizácii vašich výskumných nápadov**. Agent Laboratory pozostáva zo špecializovaných agentov poháňaných veľkými jazykovými modelmi, ktorí vás podporujú počas celého výskumného procesu – od vykonávania literárnych prehľadov a formulovania plánov až po realizáciu experimentov a písanie komplexných správ. +- Tento systém nie je navrhnutý na nahradenie vašej kreativity, ale na jej doplnenie, čo vám umožňuje sústrediť sa na tvorivosť a kritické myslenie pri automatizácii opakujúcich sa a časovo náročných úloh, ako je kódovanie a dokumentácia. Tým, že zohľadňuje rôzne úrovne výpočtových zdrojov a ľudského zapojenia, Agent Laboratory má za cieľ urýchliť vedecké objavy a optimalizovať vašu výskumnú produktivitu. + +

+ Demonstrácia toku AgentClinic +

+ +### 🔬 Ako Agent Laboratory funguje? + +- Agent Laboratory sa skladá z troch hlavných fáz, ktoré systematicky usmerňujú výskumný proces: (1) Literárny prehľad, (2) Experimentovanie a (3) Písanie správ. Počas každej fázy špecializovaní agenti poháňaní LLM spolupracujú na dosiahnutí konkrétnych cieľov, integrujúc externé nástroje ako arXiv, Hugging Face, Python a LaTeX na optimalizáciu výsledkov. Táto štruktúrovaná pracovná postupnosť začína nezávislým zhromažďovaním a analýzou relevantných výskumných prác, pokračuje cez kolaboratívne plánovanie a prípravu dát a končí automatizovaným experimentovaním a komplexnou generáciou správ. Podrobnosti o konkrétnych rolách agentov a ich príspevkoch v rámci týchto fáz sú diskutované v článku. + +

+ Demonstrácia toku AgentClinic +

+ +## 🖥️ Inštalácia + +### Python venv možnosť + +1. **Naklonujte GitHub repozitár**: Začnite klonovaním repozitára pomocou príkazu: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **Nastavte a aktivujte Python prostredie** + ```bash + python -m venv venv_agent_lab + ``` + + - Teraz aktivujte toto prostredie: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **Nainštalujte požadované knižnice** + ```bash + pip install -r requirements.txt + ``` + +4. **Nainštalujte pdflatex [VOLITEĽNÉ]** + ```bash + sudo apt install pdflatex + ``` + + - Toto umožňuje agentom kompilovať latex zdroj. + - **[DÔLEŽITÉ]** Ak tento krok nemôžete vykonať kvôli absencii sudo prístupu, kompiláciu pdf môžete vypnúť spustením Agent Laboratory s nastavením vlajky --compile_latex na false: `--compile_latex=False` + +5. **Teraz spustite Agent Laboratory!** + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + + alebo, ak nemáte nainštalovaný pdflatex + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- +## Tipy pre lepšie výskumné výsledky + +#### [Tip #1] 📝 Uistite sa, že píšete rozsiahle poznámky! 📝 + +**Písanie rozsiahlych poznámok je dôležité** pre pomoc vášmu agentovi pochopiť, čo sa snažíte dosiahnuť vo vašom projekte, ako aj akékoľvek preferencie štýlu. Poznámky môžu obsahovať akékoľvek experimenty, ktoré chcete, aby agenti vykonali, poskytovanie API kľúčov, určité grafy alebo figúry, ktoré chcete zahrnúť, alebo čokoľvek, čo chcete, aby agent vedel pri vykonávaní výskumu. + +Je to tiež vaša príležitosť informovať agenta, **aké výpočtové zdroje má k dispozícii**, napr. GPU (koľko, aký typ GPU, koľko GB), CPU (koľko jadier, aký typ CPU), obmedzenia úložiska a hardvérové špecifikácie. + +Aby ste pridali poznámky, musíte upraviť štruktúru `task_notes_LLM` v súbore `ai_lab_repo.py`. Nižšie je uvedený príklad sady poznámok použitých pre niektoré naše experimenty. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [Tip #2] 🚀 Používanie výkonnejších modelov zvyčajne vedie k lepšiemu výskumu 🚀 + +Pri vykonávaní výskumu môže **výber modelu významne ovplyvniť kvalitu výsledkov**. Výkonnejšie modely majú tendenciu mať vyššiu presnosť, lepšie schopnosti logického uvažovania a lepšiu generáciu správ. Ak výpočtové zdroje umožňujú, uprednostnite používanie pokročilých modelov, ako sú o1-(mini/preview) alebo podobné najmodernejšie veľké jazykové modely. + +Avšak, **je dôležité nájsť rovnováhu medzi výkonom a nákladovou efektívnosťou**. Zatiaľ čo výkonnejšie modely môžu priniesť lepšie výsledky, často sú drahšie a časovo náročnejšie na spustenie. Zvážte ich selektívne používanie – napríklad pre kľúčové experimenty alebo konečné analýzy – zatiaľ čo na iteratívne úlohy alebo počiatočné prototypovanie sa spoliehajte na menšie, efektívnejšie modely. + +Keď sú zdroje obmedzené, **optimalizujte jemným ladením menších modelov** na vašich špecifických dátach alebo kombinovaním predtrénovaných modelov s úlohovo špecifickými promptami, aby ste dosiahli požadovanú rovnováhu medzi výkonom a výpočtovou efektívnosťou. + +----- + +#### [Tip #3] ✅ Môžete načítať predchádzajúce uloženia z kontrolných bodov ✅ + +**Ak stratíte postup, internetové pripojenie alebo ak sa podúloha nepodarí, môžete vždy načítať z predchádzajúceho stavu.** Všetok váš postup je predvolene uložený v premennej `state_saves`, ktorá ukladá každý jednotlivý kontrolný bod. Stačí pri spúšťaní `ai_lab_repo.py` zadať nasledujúce argumenty: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [Tip #4] 🈯 Ak pracujete v inom jazyku než angličtine 🈲 + +Ak spúšťate Agent Laboratory v inom jazyku než v angličtine, nie je problém, stačí zabezpečiť, aby ste agentom poskytli jazykovú vlajku pre vykonávanie výskumu vo vašom preferovanom jazyku. Všimnite si, že sme neštudovali dôkladne spúšťanie Agent Laboratory v iných jazykoch, preto určite hláste akékoľvek problémy, na ktoré narazíte. + +Napríklad, ak pracujete v čínštine: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [Tip #5] 🌟 Je tu veľa priestoru na zlepšenie 🌟 + +Je tu veľa priestoru na zlepšenie tohto kódu, takže ak urobíte zmeny a chcete pomôcť komunite, neváhajte zdieľať zmeny, ktoré ste vykonali! Dúfame, že vám tento nástroj pomôže! + +## Reference / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-spanish.md b/readme/README-spanish.md new file mode 100755 index 0000000..8ea6895 --- /dev/null +++ b/readme/README-spanish.md @@ -0,0 +1,168 @@ +# Agent Laboratory: Using LLM Agents as Research Assistants + + +

+ Demostración del flujo de AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Sitio web | 💻 Software | 🎥 Video | 📚 Artículo de ejemplo | 📰 Citación】 +

+ +## 📖 Overview + +- **Agent Laboratory** es un flujo de trabajo de investigación autónomo de extremo a extremo diseñado para asistir **a ti** como investigador humano en **implementar tus ideas de investigación**. Agent Laboratory consiste en agentes especializados impulsados por grandes modelos de lenguaje para apoyarte a lo largo de todo el flujo de trabajo de investigación, desde la realización de revisiones bibliográficas y la formulación de planes hasta la ejecución de experimentos y la redacción de informes comprensivos. +- Este sistema no está diseñado para reemplazar tu creatividad, sino para complementarla, permitiéndote enfocarte en la ideación y el pensamiento crítico mientras automatiza tareas repetitivas y que consumen mucho tiempo, como la programación y la documentación. Al acomodar diferentes niveles de recursos computacionales e implicación humana, Agent Laboratory tiene como objetivo acelerar el descubrimiento científico y optimizar tu productividad en la investigación. + +

+ Demostración del flujo de AgentClinic +

+ +### 🔬 How does Agent Laboratory work? + +- Agent Laboratory consta de tres fases principales que guían sistemáticamente el proceso de investigación: (1) Revisión de Literatura, (2) Experimentación y (3) Redacción de Informes. Durante cada fase, agentes especializados impulsados por LLM colaboran para lograr objetivos distintos, integrando herramientas externas como arXiv, Hugging Face, Python y LaTeX para optimizar los resultados. Este flujo de trabajo estructurado comienza con la recolección y análisis independiente de artículos de investigación relevantes, avanza a través de la planificación colaborativa y la preparación de datos, y culmina en la experimentación automatizada y la generación de informes comprensivos. Los detalles sobre roles específicos de los agentes y sus contribuciones a lo largo de estas fases se discuten en el documento. + +

+ Demostración del flujo de AgentClinic +

+ +## 🖥️ Installation + +### Python venv option + + +1. **Clonar el Repositorio de GitHub**: Comienza clonando el repositorio usando el comando: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + + +2. **Configurar y Activar el Entorno de Python** + ```bash + python -m venv venv_agent_lab + ``` + + - Ahora activa este entorno: + ```bash + source venv_agent_lab/bin/activate + ``` + + +3. **Instalar las librerías requeridas** + ```bash + pip install -r requirements.txt + ``` + + +4. **Instalar pdflatex [OPCIONAL]** + ```bash + sudo apt install pdflatex + ``` + + - Esto permite que las fuentes de LaTeX sean compiladas por los agentes. + - **[IMPORTANTE]** Si no puedes ejecutar este paso debido a la falta de acceso sudo, la compilación de PDF puede desactivarse ejecutando Agent Laboratory configurando la bandera `--compile_latex` a falso: `--compile_latex=False` + + +5. **¡Ahora ejecuta Agent Laboratory!** + + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + + o, si no tienes pdflatex instalado + + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- +## Consejos para mejores resultados de investigación + + +#### [Consejo #1] 📝 ¡Asegúrate de escribir notas extensas! 📝 + +**Escribir notas extensas es importante** para ayudar a tu agente a comprender lo que buscas lograr en tu proyecto, así como cualquier preferencia de estilo. Las notas pueden incluir cualquier experimento que desees que los agentes realicen, proporcionar claves de API, ciertos gráficos o figuras que quieras incluir, o cualquier cosa que quieras que el agente sepa al realizar la investigación. + +Esta también es tu oportunidad para informar al agente **a qué recursos computacionales tiene acceso**, por ejemplo, GPUs (cuántas, qué tipo de GPU, cuántos GB), CPUs (cuántos núcleos, qué tipo de CPUs), limitaciones de almacenamiento y especificaciones de hardware. + +Para agregar notas, debes modificar la estructura `task_notes_LLM` dentro de `ai_lab_repo.py`. A continuación se proporciona un ejemplo de conjunto de notas utilizadas en algunos de nuestros experimentos. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [Consejo #2] 🚀 ¡Usar modelos más potentes generalmente conduce a una mejor investigación! 🚀 + +Al realizar investigaciones, **la elección del modelo puede impactar significativamente la calidad de los resultados**. Los modelos más potentes tienden a tener mayor precisión, mejores capacidades de razonamiento y mejor generación de informes. Si los recursos computacionales lo permiten, prioriza el uso de modelos avanzados como o1-(mini/preview) o modelos de lenguaje grandes similares de última generación. + +Sin embargo, **es importante equilibrar el rendimiento y la rentabilidad**. Aunque los modelos potentes pueden ofrecer mejores resultados, a menudo son más costosos y requieren más tiempo para ejecutarse. Considera usarlos de manera selectiva, por ejemplo, para experimentos clave o análisis finales, mientras confías en modelos más pequeños y eficientes para tareas iterativas o prototipos iniciales. + +Cuando los recursos son limitados, **optimiza ajustando finamente modelos más pequeños** en tu conjunto de datos específico o combinando modelos preentrenados con prompts específicos para tareas para lograr el equilibrio deseado entre rendimiento y eficiencia computacional. + +----- + +#### [Consejo #3] ✅ Puedes cargar guardados anteriores desde puntos de control ✅ + +**Si pierdes progreso, la conexión a internet o si una subtarea falla, siempre puedes cargar desde un estado anterior.** Todo tu progreso se guarda por defecto en la variable `state_saves`, que almacena cada punto de control individual. Simplemente pasa los siguientes argumentos al ejecutar `ai_lab_repo.py` + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [Consejo #4] 🈯 Si estás ejecutando en un idioma que no sea inglés 🈲 + +Si estás ejecutando Agent Laboratory en un idioma que no sea inglés, no hay problema, solo asegúrate de proporcionar una bandera de idioma a los agentes para realizar la investigación en tu idioma preferido. Ten en cuenta que no hemos estudiado extensivamente la ejecución de Agent Laboratory en otros idiomas, así que asegúrate de reportar cualquier problema que encuentres. + +Por ejemplo, si estás ejecutando en chino: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [Consejo #5] 🌟 Hay mucho margen para mejorar 🌟 + +Hay mucho margen para mejorar esta base de código, así que si terminas haciendo cambios y quieres ayudar a la comunidad, ¡no dudes en compartir los cambios que has realizado! ¡Esperamos que esta herramienta te sea de ayuda! + +## Referencia / Bibtex + + + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-turkish.md b/readme/README-turkish.md new file mode 100755 index 0000000..0e9af63 --- /dev/null +++ b/readme/README-turkish.md @@ -0,0 +1,158 @@ +# Agent Laboratuvarı: LLM Ajanlarını Araştırma Asistanı Olarak Kullanma + +

+ Demonstration of the flow of AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】 +

+ +## 📖 Genel Bakış + +- **Agent Laboratuvarı**, **araştırma fikirlerinizi uygulamanıza** yardımcı olmak amacıyla **siz** insan araştırmacıyı desteklemek için tasarlanmış uçtan uca otonom bir araştırma iş akışıdır. Agent Laboratuvarı, literatür taramaları yapmaktan planlar oluşturmaya, deneyler yürütmekten kapsamlı raporlar yazmaya kadar tüm araştırma süreci boyunca sizi desteklemek için büyük dil modelleriyle desteklenen uzman ajanlardan oluşur. +- Bu sistem, yaratıcılığınızı yerine koymak için değil, onu tamamlamak için tasarlanmıştır; böylece kodlama ve dokümantasyon gibi tekrarlayan ve zaman alıcı görevleri otomatikleştirirken, fikir üretimi ve eleştirel düşünmeye odaklanabilirsiniz. Farklı düzeylerde hesaplama kaynakları ve insan katılımını karşılayarak, Agent Laboratuvarı bilimsel keşfi hızlandırmayı ve araştırma verimliliğinizi optimize etmeyi amaçlamaktadır. + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 Agent Laboratuvarı Nasıl Çalışır? + +- Agent Laboratuvarı, araştırma sürecini sistematik olarak yönlendiren üç ana aşamadan oluşur: (1) Literatür Taraması, (2) Deney Yapma ve (3) Rapor Yazımı. Her aşamada, LLM'ler tarafından yönlendirilen uzman ajanlar, arXiv, Hugging Face, Python ve LaTeX gibi dış araçları entegre ederek farklı hedeflere ulaşmak için iş birliği yapar ve sonuçları optimize eder. Bu yapılandırılmış iş akışı, ilgili araştırma makalelerinin bağımsız olarak toplanması ve analiz edilmesiyle başlar, ortak planlama ve veri hazırlama aşamalarından geçer ve otomatik deney yapma ile kapsamlı rapor oluşturma ile sona erer. Bu aşamalarda belirli ajan rollerinin ve katkılarının detayları makalede tartışılmaktadır. + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ Kurulum + +### Python venv seçeneği + +1. **GitHub Deposu Klonlayın**: Depoyu aşağıdaki komutu kullanarak klonlayarak başlayın: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **Python Ortamını Kurun ve Aktif Hale Getirin** + ```bash + python -m venv venv_agent_lab + ``` + + - Şimdi bu ortamı etkinleştirin: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **Gerekli Kütüphaneleri Yükleyin** + ```bash + pip install -r requirements.txt + ``` + +4. **pdflatex'i Yükleyin [SEÇENEKSEL]** + ```bash + sudo apt install pdflatex + ``` + + - Bu, ajanların LaTeX kaynaklarını derleyebilmesini sağlar. + - **[ÖNEMLİ]** Bu adımı sudo erişiminiz yoksa çalıştıramıyorsanız, Agent Laboratuvarı'nı çalıştırırken --compile_latex bayrağını false olarak ayarlayarak PDF derlemeyi kapatabilirsiniz: `--compile_latex=False` + +5. **Şimdi Agent Laboratuvarı'nı Çalıştırın!** + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + + veya, pdflatex yüklü değilse + + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- +## Daha İyi Araştırma Sonuçları için İpuçları + +#### [İpucu #1] 📝 Kapsamlı Notlar Yazdığınızdan Emin Olun! 📝 + +**Kapsamlı notlar yazmak**, ajanın projenizde neyi başarmak istediğinizi ve herhangi bir stil tercihlerinizi anlamasına yardımcı olduğu için önemlidir. Notlar, ajanların gerçekleştirmesini istediğiniz deneyler, API anahtarları sağlamak, dahil edilmesini istediğiniz belirli grafikler veya figürler veya araştırma yaparken ajanın bilmesi gereken her şey gibi unsurları içerebilir. + +Ayrıca, ajana **erişebileceği hesaplama kaynaklarını** bildirmeniz için bir fırsattır, örneğin GPU'lar (kaç tane, hangi tür GPU, kaç GB), CPU'lar (kaç çekirdek, hangi tür CPU'lar), depolama sınırlamaları ve donanım özellikleri. + +Not eklemek için, ai_lab_repo.py içindeki task_notes_LLM yapısını değiştirmeniz gerekir. Aşağıda, bazı deneylerimizde kullanılan örnek notlar verilmiştir. + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [İpucu #2] 🚀 Daha Güçlü Modeller Kullanmak Genellikle Daha İyi Araştırma Sonuçlarına Yol Açar 🚀 + +Araştırma yaparken, **model seçimi sonuçların kalitesi üzerinde önemli bir etkiye sahip olabilir**. Daha güçlü modeller genellikle daha yüksek doğruluk, daha iyi akıl yürütme yetenekleri ve daha iyi rapor oluşturma özelliklerine sahiptir. Hesaplama kaynaklarınız izin veriyorsa, o1-(mini/preview) gibi gelişmiş modellerin veya benzeri en son büyük dil modellerinin kullanımını önceliklendirin. + +Ancak, **performans ve maliyet etkinliği arasında denge kurmak önemlidir**. Güçlü modeller daha iyi sonuçlar verebilirken, genellikle çalıştırmaları daha pahalı ve zaman alıcıdır. Bunları seçici olarak kullanmayı düşünün—örneğin, ana deneyler veya son analizler için—iteratif görevler veya ilk prototipler için daha küçük, daha verimli modelleri kullanmaya devam edin. + +Kaynaklar sınırlı olduğunda, **daha küçük modelleri özel veri setinizde ince ayar yaparak veya görev odaklı istemlerle önceden eğitilmiş modelleri birleştirerek performans ve hesaplama verimliliği arasında istenen dengeyi sağlayın**. + +----- + +#### [İpucu #3] ✅ Önceki Kontrol Noktalarından Kaydedilenleri Yükleyebilirsiniz ✅ + +**İlerlemenizi kaybederseniz, internet bağlantınız kesilirse veya bir alt görev başarısız olursa, her zaman önceki bir durumdan yükleme yapabilirsiniz.** Tüm ilerlemeniz varsayılan olarak her bir kontrol noktasını saklayan state_saves değişkeninde kaydedilir. ai_lab_repo.py çalıştırılırken aşağıdaki argümanları geçmeniz yeterlidir: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [İpucu #4] 🈯 İngilizce Dışında Bir Dil Kullanıyorsanız 🈲 + +Agent Laboratuvarı'nı İngilizce dışında bir dilde çalıştırıyorsanız sorun yok, sadece ajanlara araştırmayı tercih ettiğiniz dilde gerçekleştirmeleri için bir dil bayrağı sağlamanız yeterlidir. Agent Laboratuvarı'nı diğer dillerde çalıştırmayı kapsamlı bir şekilde incelemediğimizi unutmayın, bu yüzden karşılaştığınız herhangi bir problemi bildirdiğinizden emin olun. + +Örneğin, Çincede çalıştırıyorsanız: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [İpucu #5] 🌟 Geliştirme İçin Çok Fazla Alan Var 🌟 + +Bu kod tabanını geliştirmek için çok fazla alan var, bu yüzden değişiklik yaparsanız ve topluluğa yardımcı olmak isterseniz, yaptığınız değişiklikleri paylaşmaktan çekinmeyin! Umarız bu araç size yardımcı olur! + +## Referans / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` \ No newline at end of file diff --git a/readme/README-vietnamese.md b/readme/README-vietnamese.md new file mode 100755 index 0000000..1d03958 --- /dev/null +++ b/readme/README-vietnamese.md @@ -0,0 +1,163 @@ +# Agent Laboratory: Sử dụng Đại Diện LLM làm Trợ Lý Nghiên Cứu + +

+ Demonstration of the flow of AgentClinic +

+ + +

+ 【English | 中文 | 日本語 | 한국어 | Filipino | Français | Slovenčina | Português | Español | Türkçe | हिंदी | বাংলা | Tiếng Việt | Русский | العربية | فارسی | Italiano】 +

+ +

+ 【🌐 Website | 💻 Software | 🎥 Video | 📚 Example Paper | 📰 Citation】 +

+ +## 📖 Tổng Quan + +- **Agent Laboratory** là một quy trình nghiên cứu tự động từ đầu đến cuối, nhằm hỗ trợ **bạn** với tư cách là nhà nghiên cứu con người trong việc **triển khai các ý tưởng nghiên cứu của bạn**. Agent Laboratory bao gồm các đại diện chuyên biệt được điều khiển bởi các mô hình ngôn ngữ lớn để hỗ trợ bạn trong toàn bộ quy trình nghiên cứu—từ việc thực hiện đánh giá tài liệu và xây dựng kế hoạch đến thực hiện các thí nghiệm và viết các báo cáo toàn diện. +- Hệ thống này không được thiết kế để thay thế sự sáng tạo của bạn mà để bổ sung cho nó, cho phép bạn tập trung vào ý tưởng và tư duy phản biện trong khi tự động hóa các nhiệm vụ lặp đi lặp lại và tốn thời gian như mã hóa và tài liệu hóa. Bằng cách đáp ứng các mức độ tài nguyên tính toán và sự tham gia của con người khác nhau, Agent Laboratory nhằm mục tiêu tăng tốc khám phá khoa học và tối ưu hóa năng suất nghiên cứu của bạn. + +

+ Demonstration of the flow of AgentClinic +

+ +### 🔬 Agent Laboratory hoạt động như thế nào? + +- Agent Laboratory bao gồm ba giai đoạn chính hướng dẫn hệ thống quy trình nghiên cứu một cách có hệ thống: (1) Đánh giá Tài liệu, (2) Thực nghiệm, và (3) Viết Báo cáo. Trong mỗi giai đoạn, các đại diện chuyên biệt được điều khiển bởi LLM hợp tác để đạt được các mục tiêu riêng biệt, tích hợp các công cụ bên ngoài như arXiv, Hugging Face, Python, và LaTeX để tối ưu hóa kết quả. Quy trình làm việc có cấu trúc này bắt đầu với việc thu thập và phân tích độc lập các bài báo nghiên cứu liên quan, tiến tới lập kế hoạch hợp tác và chuẩn bị dữ liệu, và kết thúc với việc thực hiện các thí nghiệm tự động và tạo báo cáo toàn diện. Chi tiết về các vai trò cụ thể của đại diện và đóng góp của họ trong các giai đoạn này được thảo luận trong bài báo. + +

+ Demonstration of the flow of AgentClinic +

+ +## 🖥️ Cài Đặt + +### Tùy chọn môi trường ảo Python + + +1. **Nhân bản kho lưu trữ GitHub**: Bắt đầu bằng cách nhân bản kho lưu trữ bằng lệnh: + ```bash + git clone git@github.com:SamuelSchmidgall/AgentLaboratory.git + ``` + +2. **Thiết lập và Kích hoạt Môi trường Python** + ```bash + python -m venv venv_agent_lab + ``` + + - Bây giờ kích hoạt môi trường này: + ```bash + source venv_agent_lab/bin/activate + ``` + +3. **Cài đặt các thư viện cần thiết** + ```bash + pip install -r requirements.txt + ``` + +4. **Cài đặt pdflatex [TUÝ CHỌN]** + ```bash + sudo apt install pdflatex + ``` + + - Điều này cho phép mã nguồn latex được biên dịch bởi các đại diện. + - **[QUAN TRỌNG]** Nếu bước này không thể chạy do không có quyền sudo, việc biên dịch pdf có thể được tắt bằng cách chạy Agent Laboratory với cờ --compile_latex đặt thành false: --compile_latex=False + +5. **Bây giờ chạy Agent Laboratory!** + + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" + ``` + + hoặc, nếu bạn không cài đặt pdflatex + + ```bash + python ai_lab_repo.py --api-key "API_KEY_HERE" --llm-backend "o1-mini" --research-topic "YOUR RESEARCH IDEA" --compile_latex=False + ``` + +----- + +## Mẹo để đạt được kết quả nghiên cứu tốt hơn + + +#### [Mẹo #1] 📝 Hãy chắc chắn ghi chép kỹ lưỡng! 📝 + +**Việc ghi chép kỹ lưỡng là quan trọng** để giúp đại diện của bạn hiểu bạn đang muốn đạt được điều gì trong dự án của mình, cũng như bất kỳ sở thích về phong cách nào. Ghi chú có thể bao gồm bất kỳ thí nghiệm nào bạn muốn các đại diện thực hiện, cung cấp các khóa API, các biểu đồ hoặc hình vẽ cụ thể bạn muốn bao gồm, hoặc bất cứ điều gì bạn muốn đại diện biết khi thực hiện nghiên cứu. + +Đây cũng là cơ hội của bạn để cho đại diện biết **các tài nguyên tính toán mà nó có quyền truy cập**, ví dụ: GPU (số lượng, loại GPU, số GB), CPU (số lượng lõi, loại CPU), hạn chế về lưu trữ, và các thông số phần cứng. + +Để thêm ghi chú, bạn phải sửa cấu trúc task_notes_LLM bên trong ai_lab_repo.py. Dưới đây là một ví dụ về bộ ghi chú được sử dụng cho một số thí nghiệm của chúng tôi. + + +```python +task_notes_LLM = [ + {"phases": ["plan formulation"], + "note": f"You should come up with a plan for TWO experiments."}, + + {"phases": ["plan formulation", "data preparation", "running experiments"], + "note": "Please use gpt-4o-mini for your experiments."}, + + {"phases": ["running experiments"], + "note": f"Use the following code to inference gpt-4o-mini: \nfrom openai import OpenAI\nos.environ["OPENAI_API_KEY"] = "{api_key}"\nclient = OpenAI()\ncompletion = client.chat.completions.create(\nmodel="gpt-4o-mini-2024-07-18", messages=messages)\nanswer = completion.choices[0].message.content\n"}, + + {"phases": ["running experiments"], + "note": f"You have access to only gpt-4o-mini using the OpenAI API, please use the following key {api_key} but do not use too many inferences. Do not use openai.ChatCompletion.create or any openai==0.28 commands. Instead use the provided inference code."}, + + {"phases": ["running experiments"], + "note": "I would recommend using a small dataset (approximately only 100 data points) to run experiments in order to save time. Do not use much more than this unless you have to or are running the final tests."}, + + {"phases": ["data preparation", "running experiments"], + "note": "You are running on a MacBook laptop. You can use 'mps' with PyTorch"}, + + {"phases": ["data preparation", "running experiments"], + "note": "Generate figures with very colorful and artistic design."}, + ] +``` + +-------- + +#### [Mẹo #2] 🚀 Sử dụng các mô hình mạnh mẽ hơn thường dẫn đến nghiên cứu tốt hơn 🚀 + +Khi tiến hành nghiên cứu, **lựa chọn mô hình có thể ảnh hưởng đáng kể đến chất lượng kết quả**. Các mô hình mạnh mẽ hơn thường có độ chính xác cao hơn, khả năng lý luận tốt hơn và khả năng tạo báo cáo tốt hơn. Nếu tài nguyên tính toán cho phép, hãy ưu tiên sử dụng các mô hình tiên tiến như o1-(mini/preview) hoặc các mô hình ngôn ngữ lớn tiên tiến tương tự. + +Tuy nhiên, **quan trọng là phải cân bằng giữa hiệu suất và chi phí hiệu quả**. Trong khi các mô hình mạnh mẽ có thể mang lại kết quả tốt hơn, chúng thường đắt hơn và tốn thời gian chạy. Hãy cân nhắc sử dụng chúng một cách chọn lọc—ví dụ, cho các thí nghiệm chính hoặc phân tích cuối cùng—trong khi dựa vào các mô hình nhỏ hơn, hiệu quả hơn cho các nhiệm vụ lặp đi lặp lại hoặc phát mẫu ban đầu. + +Khi tài nguyên hạn chế, **tối ưu hóa bằng cách tinh chỉnh các mô hình nhỏ hơn** trên bộ dữ liệu cụ thể của bạn hoặc kết hợp các mô hình đã được huấn luyện trước với các gợi ý cụ thể cho nhiệm vụ để đạt được sự cân bằng mong muốn giữa hiệu suất và hiệu quả tính toán. + +----- + +#### [Mẹo #3] ✅ Bạn có thể tải lại các lưu trạng thái trước từ các điểm kiểm tra ✅ + +**Nếu bạn mất tiến độ, kết nối internet, hoặc nếu một nhiệm vụ phụ thất bại, bạn luôn có thể tải lại từ trạng thái trước đó.** Tất cả tiến độ của bạn được lưu mặc định trong biến state_saves, lưu trữ từng điểm kiểm tra riêng lẻ. Chỉ cần truyền các tham số sau khi chạy ai_lab_repo.py + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA" --llm-backend "o1-mini" --load-existing True --load-existing-path "save_states/LOAD_PATH" +``` + +----- + +#### [Mẹo #4] 🈯 Nếu bạn đang chạy bằng ngôn ngữ khác tiếng Anh 🈲 + +Nếu bạn đang chạy Agent Laboratory bằng ngôn ngữ khác tiếng Anh, không vấn đề gì, chỉ cần đảm bảo cung cấp cờ ngôn ngữ cho các đại diện để thực hiện nghiên cứu bằng ngôn ngữ bạn mong muốn. Lưu ý rằng chúng tôi chưa nghiên cứu kỹ việc chạy Agent Laboratory bằng các ngôn ngữ khác, vì vậy hãy chắc chắn báo cáo bất kỳ vấn đề nào bạn gặp phải. + +Ví dụ, nếu bạn đang chạy bằng tiếng Trung: + +```bash +python ai_lab_repo.py --api-key "API_KEY_HERE" --research-topic "YOUR RESEARCH IDEA (in your language)" --llm-backend "o1-mini" --language "中文" +``` + +---- + +#### [Mẹo #5] 🌟 Có rất nhiều cơ hội để cải thiện 🌟 + +Có rất nhiều cơ hội để cải thiện cơ sở mã này, vì vậy nếu bạn cuối cùng thay đổi và muốn giúp cộng đồng, hãy cảm thấy tự do chia sẻ các thay đổi mà bạn đã thực hiện! Chúng tôi hy vọng công cụ này sẽ giúp bạn! + +## Tài liệu Tham khảo / Bibtex + +```bibtex +@preprint{schmidgall2025AgentLaboratory, + title={Agent Laboratory: Using LLM Agents as Research Assistants}, + author={Schmidgall, Samuel and Su, Yusheng and Wang, Ze and Sun, Ximeng and Wu, Jialian and Yu, Xiadong and Liu, Jiang, Liu, Zicheng and Barsoum, Emad}, + year={2025} +} +``` diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..55ae922 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,138 @@ +absl-py==2.1.0 +accelerate==1.1.1 +aiohappyeyeballs==2.4.3 +aiohttp==3.11.7 +aiosignal==1.3.1 +annotated-types==0.7.0 +anthropic==0.39.0 +anyio==4.6.2.post1 +arxiv==2.1.3 +astunparse==1.6.3 +async-timeout==5.0.1 +attrs==24.2.0 +blis==1.0.1 +catalogue==2.0.10 +certifi==2024.8.30 +charset-normalizer==3.4.0 +click==8.1.7 +cloudpathlib==0.20.0 +confection==0.1.5 +contourpy==1.3.0 +cycler==0.12.1 +cymem==2.0.10 +datasets==3.1.0 +diffusers==0.31.0 +dill==0.3.8 +distro==1.9.0 +exceptiongroup==1.2.2 +feedparser==6.0.11 +filelock==3.16.1 +flatbuffers==24.3.25 +fonttools==4.55.0 +frozenlist==1.5.0 +fsspec==2024.9.0 +gast==0.6.0 +google-pasta==0.2.0 +grpcio==1.68.0 +h11==0.14.0 +h5py==3.12.1 +httpcore==1.0.7 +httpx==0.27.2 +huggingface-hub==0.26.2 +idna==3.10 +imageio==2.36.0 +importlib_metadata==8.5.0 +importlib_resources==6.4.5 +Jinja2==3.1.4 +jiter==0.7.1 +joblib==1.4.2 +keras==3.7.0 +kiwisolver==1.4.7 +langcodes==3.5.0 +language_data==1.3.0 +lazy_loader==0.4 +libclang==18.1.1 +marisa-trie==1.2.1 +Markdown==3.7 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +matplotlib==3.9.2 +mdurl==0.1.2 +ml-dtypes==0.4.1 +mpmath==1.3.0 +multidict==6.1.0 +multiprocess==0.70.16 +murmurhash==1.0.11 +namex==0.0.8 +nest-asyncio==1.6.0 +networkx==3.2.1 +NEURON==8.2.0 +nltk==3.9.1 +numpy==2.0.2 +openai==1.55.1 +opt_einsum==3.4.0 +optree==0.13.1 +packaging==24.2 +pandas==2.2.3 +patsy==1.0.1 +pillow==11.0.0 +plotly==5.24.1 +preshed==3.0.9 +propcache==0.2.0 +protobuf==5.28.3 +psutil==6.1.0 +pyarrow==18.1.0 +pydantic==2.10.2 +pydantic_core==2.27.1 +Pygments==2.18.0 +pyparsing==3.2.0 +pypdf==5.1.0 +python-dateutil==2.9.0.post0 +pytz==2024.2 +PyYAML==6.0.2 +regex==2024.11.6 +requests==2.32.3 +rich==13.9.4 +sacremoses==0.1.1 +safetensors==0.4.5 +scikit-image==0.24.0 +scikit-learn==1.5.2 +scipy==1.13.1 +seaborn==0.13.2 +semanticscholar==0.8.4 +sgmllib3k==1.0.0 +shellingham==1.5.4 +six==1.16.0 +smart-open==7.0.5 +sniffio==1.3.1 +spacy==3.8.2 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +srsly==2.4.8 +statsmodels==0.14.4 +sympy==1.13.1 +tenacity==9.0.0 +tensorboard==2.18.0 +tensorboard-data-server==0.7.2 +tensorflow==2.18.0 +tensorflow-io-gcs-filesystem==0.37.1 +termcolor==2.5.0 +thinc==8.3.2 +threadpoolctl==3.5.0 +tifffile==2024.8.30 +tiktoken==0.8.0 +tokenizers==0.20.4 +torch==2.5.1 +tqdm==4.67.1 +transformers==4.46.3 +typer==0.13.1 +typing_extensions==4.12.2 +tzdata==2024.2 +urllib3==2.2.3 +wasabi==1.1.3 +weasel==0.4.1 +Werkzeug==3.1.3 +wrapt==1.17.0 +xxhash==3.5.0 +yarl==1.18.0 +zipp==3.21.0 diff --git a/tools.py b/tools.py new file mode 100755 index 0000000..fe4413a --- /dev/null +++ b/tools.py @@ -0,0 +1,359 @@ +from utils import * + +import time +import arxiv +import os, re +import io, sys +import numpy as np +import concurrent.futures +from pypdf import PdfReader +from datasets import load_dataset +from psutil._common import bytes2human +from datasets import load_dataset_builder +from semanticscholar import SemanticScholar +from sklearn.metrics.pairwise import linear_kernel +from sklearn.feature_extraction.text import TfidfVectorizer + +import traceback +import concurrent.futures + + +class HFDataSearch: + def __init__(self, like_thr=3, dwn_thr=50) -> None: + """ + Class for finding relevant huggingface datasets + :param like_thr: + :param dwn_thr: + """ + self.dwn_thr = dwn_thr + self.like_thr = like_thr + self.ds = load_dataset("nkasmanoff/huggingface-datasets")["train"] + + # Initialize lists to collect filtered data + filtered_indices = [] + filtered_descriptions = [] + filtered_likes = [] + filtered_downloads = [] + + # Iterate over the dataset and filter based on criteria + for idx, item in enumerate(self.ds): + # Get likes and downloads, handling None values + likes = int(item['likes']) if item['likes'] is not None else 0 + downloads = int(item['downloads']) if item['downloads'] is not None else 0 + + # Check if likes and downloads meet the thresholds + if likes >= self.like_thr and downloads >= self.dwn_thr: + # Check if the description is a non-empty string + description = item['description'] + if isinstance(description, str) and description.strip(): + # Collect the data + filtered_indices.append(idx) + filtered_descriptions.append(description) + filtered_likes.append(likes) + filtered_downloads.append(downloads) + + # Check if any datasets meet all criteria + if not filtered_indices: + print("No datasets meet the specified criteria.") + self.ds = [] + self.descriptions = [] + self.likes_norm = [] + self.downloads_norm = [] + self.description_vectors = None + return # Exit the constructor + + # Filter the datasets using the collected indices + self.ds = self.ds.select(filtered_indices) + + # Update descriptions, likes, and downloads + self.descriptions = filtered_descriptions + self.likes = np.array(filtered_likes) + self.downloads = np.array(filtered_downloads) + + # Normalize likes and downloads + self.likes_norm = self._normalize(self.likes) + self.downloads_norm = self._normalize(self.downloads) + + # Vectorize the descriptions + self.vectorizer = TfidfVectorizer() + self.description_vectors = self.vectorizer.fit_transform(self.descriptions) + + def _normalize(self, arr): + min_val = arr.min() + max_val = arr.max() + if max_val - min_val == 0: + return np.zeros_like(arr, dtype=float) + return (arr - min_val) / (max_val - min_val) + + def retrieve_ds(self, query, N=10, sim_w=1.0, like_w=0.0, dwn_w=0.0): + """ + Retrieves the top N datasets matching the query, weighted by likes and downloads. + :param query: The search query string. + :param N: The number of results to return. + :param sim_w: Weight for cosine similarity. + :param like_w: Weight for likes. + :param dwn_w: Weight for downloads. + :return: List of top N dataset items. + """ + if not self.ds or self.description_vectors is None: + print("No datasets available to search.") + return [] + + query_vector = self.vectorizer.transform([query]) + cosine_similarities = linear_kernel(query_vector, self.description_vectors).flatten() + # Normalize cosine similarities + cosine_similarities_norm = self._normalize(cosine_similarities) + # Compute final scores + final_scores = ( + sim_w * cosine_similarities_norm + + like_w * self.likes_norm + + dwn_w * self.downloads_norm + ) + # Get top N indices + top_indices = final_scores.argsort()[-N:][::-1] + # Convert indices to Python ints + top_indices = [int(i) for i in top_indices] + top_datasets = [self.ds[i] for i in top_indices] + # check if dataset has a test & train set + has_test_set = list() + has_train_set = list() + ds_size_info = list() + for i in top_indices: + try: + dbuilder = load_dataset_builder(self.ds[i]["id"], trust_remote_code=True).info + except Exception as e: + has_test_set.append(False) + has_train_set.append(False) + ds_size_info.append((None, None, None, None)) + continue + + if dbuilder.splits is None: + has_test_set.append(False) + has_train_set.append(False) + ds_size_info.append((None, None, None, None)) + continue + # Print number of examples for + has_test, has_train = "test" in dbuilder.splits, "train" in dbuilder.splits + has_test_set.append(has_test) + has_train_set.append(has_train) + test_dwn_size, test_elem_size = None, None + train_dwn_size, train_elem_size = None, None + if has_test: + test_dwn_size = bytes2human(dbuilder.splits["test"].num_bytes) + test_elem_size = dbuilder.splits["test"].num_examples + if has_train: + train_dwn_size = bytes2human(dbuilder.splits["train"].num_bytes) + train_elem_size = dbuilder.splits["train"].num_examples + ds_size_info.append((test_dwn_size, test_elem_size, train_dwn_size, train_elem_size)) + for _i in range(len(top_datasets)): + top_datasets[_i]["has_test_set"] = has_test_set[_i] + top_datasets[_i]["has_train_set"] = has_train_set[_i] + top_datasets[_i]["test_download_size"] = ds_size_info[_i][0] + top_datasets[_i]["test_element_size"] = ds_size_info[_i][1] + top_datasets[_i]["train_download_size"] = ds_size_info[_i][2] + top_datasets[_i]["train_element_size"] = ds_size_info[_i][3] + return top_datasets + + def results_str(self, results): + """ + Provide results as list of results in human-readable format. + :param results: (list(dict)) list of results from search + :return: (list(str)) list of results in human-readable format + """ + result_strs = list() + for result in results: + res_str = f"Dataset ID: {result['id']}\n" + res_str += f"Description: {result['description']}\n" + res_str += f"Likes: {result['likes']}\n" + res_str += f"Downloads: {result['downloads']}\n" + res_str += f"Has Testing Set: {result['has_test_set']}\n" + res_str += f"Has Training Set: {result['has_train_set']}\n" + res_str += f"Test Download Size: {result['test_download_size']}\n" + res_str += f"Test Dataset Size: {result['test_element_size']}\n" + res_str += f"Train Download Size: {result['train_download_size']}\n" + res_str += f"Train Dataset Size: {result['train_element_size']}\n" + result_strs.append(res_str) + return result_strs + + +class SemanticScholarSearch: + def __init__(self): + self.sch_engine = SemanticScholar(retry=False) + + def find_papers_by_str(self, query, N=10): + paper_sums = list() + results = self.sch_engine.search_paper(query, limit=N, min_citation_count=3, open_access_pdf=True) + for _i in range(len(results)): + paper_sum = f'Title: {results[_i].title}\n' + paper_sum += f'Abstract: {results[_i].abstract}\n' + paper_sum += f'Citations: {results[_i].citationCount}\n' + paper_sum += f'Release Date: year {results[_i].publicationDate.year}, month {results[_i].publicationDate.month}, day {results[_i].publicationDate.day}\n' + paper_sum += f'Venue: {results[_i].venue}\n' + paper_sum += f'Paper ID: {results[_i].externalIds["DOI"]}\n' + paper_sums.append(paper_sum) + return paper_sums + + def retrieve_full_paper_text(self, query): + pass + + +class ArxivSearch: + def __init__(self): + # Construct the default API client. + self.sch_engine = arxiv.Client() + + def find_papers_by_str(self, query, N=20): + search = arxiv.Search( + query="abs:" + query, + max_results=N, + sort_by=arxiv.SortCriterion.Relevance) + + paper_sums = list() + # `results` is a generator; you can iterate over its elements one by one... + for r in self.sch_engine.results(search): + paperid = r.pdf_url.split("/")[-1] + pubdate = str(r.published).split(" ")[0] + paper_sum = f"Title: {r.title}\n" + paper_sum += f"Summary: {r.summary}\n" + paper_sum += f"Publication Date: {pubdate}\n" + paper_sum += f"Categories: {' '.join(r.categories)}\n" + paper_sum += f"arXiv paper ID: {paperid}\n" + paper_sums.append(paper_sum) + time.sleep(2.0) + return "\n".join(paper_sums) + + def retrieve_full_paper_text(self, query): + pdf_text = str() + paper = next(arxiv.Client().results(arxiv.Search(id_list=[query]))) + # Download the PDF to the PWD with a custom filename. + paper.download_pdf(filename="downloaded-paper.pdf") + # creating a pdf reader object + reader = PdfReader('downloaded-paper.pdf') + # Iterate over all the pages + for page_number, page in enumerate(reader.pages, start=1): + # Extract text from the page + try: + text = page.extract_text() + except Exception as e: + os.remove("downloaded-paper.pdf") + time.sleep(2.0) + return "EXTRACTION FAILED" + + # Do something with the text (e.g., print it) + pdf_text += f"--- Page {page_number} ---" + pdf_text += text + pdf_text += "\n" + os.remove("downloaded-paper.pdf") + time.sleep(2.0) + return pdf_text + +""" +import multiprocessing +import sys +import io +import traceback + +def execute_code(code_str, timeout=180): + if "load_dataset('pubmed" in code_str: + return "pubmed Download took way too long. Program terminated" + + def run_code(queue): + # Redirect stdout to capture print outputs + output_capture = io.StringIO() + sys.stdout = output_capture + + try: + exec_globals = {} + exec(code_str, exec_globals) + except Exception as e: + output_capture.write(f"[CODE EXECUTION ERROR]: {str(e)}\n") + traceback.print_exc(file=output_capture) + finally: + # Put the output in the queue + queue.put(output_capture.getvalue()) + # Restore stdout + sys.stdout = sys.__stdout__ + + # Create a multiprocessing Queue to capture the output + queue = multiprocessing.Queue() + # Create a new Process + process = multiprocessing.Process(target=run_code, args=(queue,)) + process.start() + # Wait for the process to finish or timeout + process.join(timeout) + + if process.is_alive(): + process.terminate() + process.join() + return f"[CODE EXECUTION ERROR]: Code execution exceeded the timeout limit of {timeout} seconds. You must reduce the time complexity of your code." + else: + # Retrieve the output from the queue + output = queue.get() + return output + +""" + +import io +import sys +import traceback +import concurrent.futures + + + +import multiprocessing +import io +import sys +import traceback +import multiprocessing +import io +import sys +import traceback + + +def execute_code(code_str, timeout=60, MAX_LEN=1000): + #print(code_str) + + # prevent plotting errors + import matplotlib + matplotlib.use('Agg') # Use the non-interactive Agg backend + import matplotlib.pyplot as plt + + # Preventing execution of certain resource-intensive datasets + if "load_dataset('pubmed" in code_str: + return "[CODE EXECUTION ERROR] pubmed Download took way too long. Program terminated" + if "exit(" in code_str: + return "[CODE EXECUTION ERROR] The exit() command is not allowed you must remove this." + #print(code_str) + # Capturing the output + output_capture = io.StringIO() + sys.stdout = output_capture + + # Create a new global context for exec + exec_globals = globals() + + def run_code(): + try: + # Executing the code in the global namespace + exec(code_str, exec_globals) + except Exception as e: + output_capture.write(f"[CODE EXECUTION ERROR]: {str(e)}\n") + traceback.print_exc(file=output_capture) + + try: + # Running code in a separate thread with a timeout + with concurrent.futures.ThreadPoolExecutor() as executor: + future = executor.submit(run_code) + future.result(timeout=timeout) + except concurrent.futures.TimeoutError: + return f"[CODE EXECUTION ERROR]: Code execution exceeded the timeout limit of {timeout} seconds. You must reduce the time complexity of your code." + except Exception as e: + return f"[CODE EXECUTION ERROR]: {str(e)}" + finally: + # Restoring standard output + sys.stdout = sys.__stdout__ + + # Returning the captured output + return output_capture.getvalue()[:MAX_LEN] + + + diff --git a/utils.py b/utils.py new file mode 100755 index 0000000..a163273 --- /dev/null +++ b/utils.py @@ -0,0 +1,121 @@ +import os, re +import shutil +import tiktoken +import subprocess + + +def compile_latex(latex_code, compile=True, output_filename="output.pdf", timeout=30): + latex_code = latex_code.replace( + r"\documentclass{article}", + "\\documentclass{article}\n\\usepackage{amsmath}\n\\usepackage{amssymb}\n\\usepackage{array}\n\\usepackage{algorithm}\n\\usepackage{algorithmicx}\n\\usepackage{algpseudocode}\n\\usepackage{booktabs}\n\\usepackage{colortbl}\n\\usepackage{color}\n\\usepackage{enumitem}\n\\usepackage{fontawesome5}\n\\usepackage{float}\n\\usepackage{graphicx}\n\\usepackage{hyperref}\n\\usepackage{listings}\n\\usepackage{makecell}\n\\usepackage{multicol}\n\\usepackage{multirow}\n\\usepackage{pgffor}\n\\usepackage{pifont}\n\\usepackage{soul}\n\\usepackage{sidecap}\n\\usepackage{subcaption}\n\\usepackage{titletoc}\n\\usepackage[symbol]{footmisc}\n\\usepackage{url}\n\\usepackage{wrapfig}\n\\usepackage{xcolor}\n\\usepackage{xspace}") + #print(latex_code) + dir_path = "research_dir/tex" + tex_file_path = os.path.join(dir_path, "temp.tex") + # Write the LaTeX code to the .tex file in the specified directory + with open(tex_file_path, "w") as f: + f.write(latex_code) + + if not compile: + return f"Compilation successful" + + # Compiling the LaTeX code using pdflatex with non-interactive mode and timeout + try: + result = subprocess.run( + ["pdflatex", "-interaction=nonstopmode", "temp.tex"], + check=True, # Raises a CalledProcessError on non-zero exit codes + stdout=subprocess.PIPE, # Capture standard output + stderr=subprocess.PIPE, # Capture standard error + timeout=timeout, # Timeout for the process + cwd=dir_path + ) + + # If compilation is successful, return the success message + return f"Compilation successful: {result.stdout.decode('utf-8')}" + + except subprocess.TimeoutExpired: + # If the compilation takes too long, return a timeout message + return "[CODE EXECUTION ERROR]: Compilation timed out after {} seconds".format(timeout) + except subprocess.CalledProcessError as e: + # If there is an error during LaTeX compilation, return the error message + return f"[CODE EXECUTION ERROR]: Compilation failed: {e.stderr.decode('utf-8')} {e.output.decode('utf-8')}. There was an error in your latex." + + +def count_tokens(messages, model="gpt-4"): + enc = tiktoken.encoding_for_model(model) + num_tokens = sum([len(enc.encode(message["content"])) for message in messages]) + return num_tokens + +def remove_figures(): + """Remove a directory if it exists.""" + for _file in os.listdir("."): + if "Figure_" in _file and ".png" in _file: + os.remove(_file) + +def remove_directory(dir_path): + """Remove a directory if it exists.""" + if os.path.exists(dir_path) and os.path.isdir(dir_path): + try: + shutil.rmtree(dir_path) + print(f"Directory {dir_path} removed successfully.") + except Exception as e: + print(f"Error removing directory {dir_path}: {e}") + else: + print(f"Directory {dir_path} does not exist or is not a directory.") + + +def save_to_file(location, filename, data): + """Utility function to save data as plain text.""" + filepath = os.path.join(location, filename) + try: + with open(filepath, 'w') as f: + f.write(data) # Write the raw string instead of using json.dump + print(f"Data successfully saved to {filepath}") + except Exception as e: + print(f"Error saving file {filename}: {e}") + + +def clip_tokens(messages, model="gpt-4", max_tokens=100000): + enc = tiktoken.encoding_for_model(model) + total_tokens = sum([len(enc.encode(message["content"])) for message in messages]) + + if total_tokens <= max_tokens: + return messages # No need to clip if under the limit + + # Start removing tokens from the beginning + tokenized_messages = [] + for message in messages: + tokenized_content = enc.encode(message["content"]) + tokenized_messages.append({"role": message["role"], "content": tokenized_content}) + + # Flatten all tokens + all_tokens = [token for message in tokenized_messages for token in message["content"]] + + # Remove tokens from the beginning + clipped_tokens = all_tokens[total_tokens - max_tokens:] + + # Rebuild the clipped messages + clipped_messages = [] + current_idx = 0 + for message in tokenized_messages: + message_token_count = len(message["content"]) + if current_idx + message_token_count > len(clipped_tokens): + clipped_message_content = clipped_tokens[current_idx:] + clipped_message = enc.decode(clipped_message_content) + clipped_messages.append({"role": message["role"], "content": clipped_message}) + break + else: + clipped_message_content = clipped_tokens[current_idx:current_idx + message_token_count] + clipped_message = enc.decode(clipped_message_content) + clipped_messages.append({"role": message["role"], "content": clipped_message}) + current_idx += message_token_count + return clipped_messages + + + +def extract_prompt(text, word): + code_block_pattern = rf"```{word}(.*?)```" + code_blocks = re.findall(code_block_pattern, text, re.DOTALL) + extracted_code = "\n".join(code_blocks).strip() + return extracted_code + +