diff --git a/assets/rec.png b/assets/rec.png new file mode 100644 index 0000000..ae4308f Binary files /dev/null and b/assets/rec.png differ diff --git a/assets/technology-278512.mp3 b/assets/technology-278512.mp3 new file mode 100644 index 0000000..044a977 Binary files /dev/null and b/assets/technology-278512.mp3 differ diff --git a/assets/箭头 (1).png b/assets/箭头 (1).png new file mode 100644 index 0000000..1a3b976 Binary files /dev/null and b/assets/箭头 (1).png differ diff --git a/assets/箭头 (2).png b/assets/箭头 (2).png new file mode 100644 index 0000000..5b41a12 Binary files /dev/null and b/assets/箭头 (2).png differ diff --git a/assets/箭头.png b/assets/箭头.png new file mode 100644 index 0000000..86ecb07 Binary files /dev/null and b/assets/箭头.png differ diff --git a/autoagent/cli.py b/autoagent/cli.py index 8b1fe5b..eff1b08 100644 --- a/autoagent/cli.py +++ b/autoagent/cli.py @@ -239,6 +239,7 @@ def user_mode(model: str, context_variables: dict, debug: bool = True): style=style ) client = MetaChain(log_path=logger) + upload_infos = [] while True: # query = ask_text("Tell me what you want to do:") query = session.prompt( @@ -246,7 +247,7 @@ def user_mode(model: str, context_variables: dict, debug: bool = True): bottom_toolbar=HTML('Prompt: Enter @ to mention Agents') ) if query.strip().lower() == 'exit': - # logger.info('User mode completed. See you next time! :waving_hand:', color='green', title='EXIT') + # logger.info('User mode completed. See you next time! :waving_hand:', color='green', title='EXIT') logo_text = "User mode completed. See you next time! :waving_hand:" console.print(Panel(logo_text, style="bold salmon1", expand=True)) @@ -265,6 +266,8 @@ def user_mode(model: str, context_variables: dict, debug: bool = True): if hasattr(agent, "name"): agent_name = agent.name console.print(f"[bold green][bold magenta]@{agent_name}[/bold magenta] will help you, be patient...[/bold green]") + if len(upload_infos) > 0: + query = "{}\n\nUser uploaded files:\n{}".format(query, "\n".join(upload_infos)) messages.append({"role": "user", "content": query}) response = client.run(agent, messages, context_variables, debug=debug) messages.extend(response.messages) @@ -284,9 +287,12 @@ def user_mode(model: str, context_variables: dict, debug: bool = True): elif agent == "select": code_env: DockerEnv = context_variables["code_env"] local_workplace = code_env.local_workplace + docker_workplace = code_env.docker_workplace files_dir = os.path.join(local_workplace, "files") + docker_files_dir = os.path.join(docker_workplace, "files") os.makedirs(files_dir, exist_ok=True) - select_and_copy_files(files_dir, console) + upload_infos.extend(select_and_copy_files(files_dir, console, docker_files_dir)) + agent = agents["System_Triage_Agent"] else: console.print(f"[bold red]Unknown agent: {agent}[/bold red]") diff --git a/autoagent/cli_utils/file_select.py b/autoagent/cli_utils/file_select.py index eab817c..d363994 100644 --- a/autoagent/cli_utils/file_select.py +++ b/autoagent/cli_utils/file_select.py @@ -4,7 +4,7 @@ import shutil import os from rich.console import Console -def select_and_copy_files(dest_dir, console: Console): +def select_and_copy_files(dest_dir, console: Console, docker_files_dir: str): # 创建 tkinter 根窗口但隐藏它 root = tk.Tk() root.withdraw() @@ -37,17 +37,21 @@ def select_and_copy_files(dest_dir, console: Console): return # 复制文件 + upload_infos = [] for file_path in files: file_name = os.path.basename(file_path) dest_path = os.path.join(dest_dir, file_name) + docker_dest_path = os.path.join(docker_files_dir, file_name) try: shutil.copy2(file_path, dest_path) - console.print(f"[bold green]Uploaded: {file_name}[/bold green]") + msg = f"Uploaded: {file_name} to {docker_dest_path}" + upload_infos.append(msg) + console.print(f"[bold green]{msg}[/bold green]") except Exception as e: console.print(f"[bold red]Error uploading {file_name}: {e}[/bold red]") console.print(f"[bold green]Successfully uploaded {len(files)} files[/bold green]") - + return upload_infos if __name__ == "__main__": dest_dir = "/Users/tangjiabin/Documents/reasoning/metachain/workspace_meta_showcase/showcase_nl2agent_showcase/workplace" select_and_copy_files(dest_dir) \ No newline at end of file diff --git a/autoagent/environment/browser_env.py b/autoagent/environment/browser_env.py index 45cefb0..310a14e 100644 --- a/autoagent/environment/browser_env.py +++ b/autoagent/environment/browser_env.py @@ -465,7 +465,7 @@ def _local_to_docker(local_path: str): 'browsergym/openended', task_kwargs={'start_url': 'about:blank', 'goal': 'PLACEHOLDER_GOAL'}, wait_for_user_message=False, - headless=True, + headless=False, disable_env_checker=True, tags_to_mark='all', action_mapping = action_mapping diff --git a/autoagent/environment/mdconvert.py b/autoagent/environment/mdconvert.py index 69efc77..56c14e8 100644 --- a/autoagent/environment/mdconvert.py +++ b/autoagent/environment/mdconvert.py @@ -1051,8 +1051,10 @@ def _get_page_markdown(): # 将markdown内容转换为简单的HTML结构 # 将markdown内容按固定长度分块 - chunk_size = 10000 # 每块大约1000字符 + chunk_size = 5000 # 每块大约1000字符 content = res.text_content + + chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)] formatted_content = '' diff --git a/autoagent/tools/md_obs.py b/autoagent/tools/md_obs.py index 0b4c585..58e0051 100644 --- a/autoagent/tools/md_obs.py +++ b/autoagent/tools/md_obs.py @@ -24,6 +24,7 @@ def flatten_md_axtree_to_str( node_id_to_idx = {} for idx, node in enumerate(AX_tree["nodes"]): node_id_to_idx[node["nodeId"]] = idx + # print("******************************* AX_tree:", AX_tree) def dfs(node_idx: int, depth: int, parent_node_filtered: bool, parent_node_name: str) -> str: tree_str = "" @@ -145,5 +146,63 @@ def flatten_md_axtree_to_str( return tree_str - tree_str = dfs(0, 0, False, "") + tree_str = dfs(0, 1, False, "") return tree_str + +# def flatten_md_axtree_to_str( +# AX_tree, +# extra_properties: dict = None, +# with_visible: bool = False, +# with_clickable: bool = False, +# with_center_coords: bool = False, +# with_bounding_box_coords: bool = False, +# with_som: bool = False, +# skip_generic: bool = True, +# filter_visible_only: bool = False, +# filter_with_bid_only: bool = False, +# filter_som_only: bool = False, +# coord_decimals: int = 0, +# ignored_roles=IGNORED_AXTREE_ROLES, +# ignored_properties=IGNORED_AXTREE_PROPERTIES, +# remove_redundant_static_text: bool = True, +# hide_bid_if_invisible: bool = False, +# hide_all_children: bool = False, +# hide_all_bids: bool = False, +# ) -> str: +# """Formats the accessibility tree into a markdown string""" +# if not AX_tree or "nodes" not in AX_tree or not AX_tree["nodes"]: +# return "No content available" + +# node_id_to_idx = {} +# for idx, node in enumerate(AX_tree["nodes"]): +# node_id_to_idx[node["nodeId"]] = idx + +# def dfs(node_idx: int, depth: int, parent_node_filtered: bool, parent_node_name: str) -> str: +# tree_str = "" +# node = AX_tree["nodes"][node_idx] + +# # 处理节点内容 +# if "name" in node and "value" in node["name"]: +# content = node["name"]["value"].strip() +# if content: +# # 移除多余的空白字符 +# content = ' '.join(content.split()) +# # 添加到输出 +# tree_str += content + "\n" + +# # 递归处理子节点 +# for child_node_id in node.get("childIds", []): +# if child_node_id in node_id_to_idx and child_node_id != node["nodeId"]: +# child_str = dfs( +# node_id_to_idx[child_node_id], +# depth + 1, +# parent_node_filtered=False, +# parent_node_name=node.get("name", {}).get("value", "") +# ) +# if child_str: +# tree_str += child_str + +# return tree_str + +# tree_str = dfs(0, 0, False, "") +# return tree_str.strip() \ No newline at end of file diff --git a/autoagent/tools/web_tools.py b/autoagent/tools/web_tools.py index 6e60323..625b8ff 100644 --- a/autoagent/tools/web_tools.py +++ b/autoagent/tools/web_tools.py @@ -342,14 +342,16 @@ def visit_url(context_variables, url: str): @register_tool("web_search") def web_search(context_variables, query: str): """ - Performs a web search on 'https://www.google.com.sg/?hl=en&gl=US' with the given query. + Performs a web search on 'https://www.bing.com/search' with the given query. Args: query: The query to search for. """ env: BrowserEnv = context_variables.get("web_env", None) assert env is not None, "web_env is not set" try: - action_str = f"_visit_page('https://www.google.com.sg/search?q={quote_plus(query)}&hl=en&gl=US')" + # action_str = f"_visit_page('https://www.google.com.sg/search?q={quote_plus(query)}&hl=en')" + action_str = f"_visit_page('https://www.bing.com/search?q={quote_plus(query)}&FORM=QBLH&hl=en')" + obs = env.step(action_str) web_obs = to_web_obs(obs) except Exception as e: @@ -443,15 +445,18 @@ if __name__ == "__main__": # print(res.value) # res = visit_url(env, 'https://arxiv.org/pdf/2310.13023') # print(res.value) + + context_variables = {"web_env": env} - res = visit_url(context_variables, 'https://en.wikipedia.org/wiki/History_of_the_United_States') + res = visit_url(context_variables, 'https://www.youtube.com/watch?v=pPStdjuYzSI') # res = visit_url(env, 'https://www.reddit.com/r/ChatGPT/comments/1h5ey4m/chatgpt_helped_me_not_blow_up_on_my_boss/') print("******visit_url", res.value) res = get_page_markdown(context_variables) print("******get_page_markdown", res.value) - res = page_down(context_variables) - print("******page_down", res.value) - res = history_back(context_variables) - print("******history_back", res.value) - \ No newline at end of file + # res = page_down(context_variables) + # print("******page_down", res.value) + # res = history_back(context_variables) + # print("******history_back", res.value) + + # print(function_to_json(page_down)) \ No newline at end of file diff --git a/autoagent/util.py b/autoagent/util.py index 6ca2a20..fc454ff 100644 --- a/autoagent/util.py +++ b/autoagent/util.py @@ -349,12 +349,12 @@ def function_to_json(func) -> dict: if param.default == inspect._empty ] - if not parameters: - parameters["dummy"] = { - "type": "string", - "description": "Dummy parameter (not used). Added to satisfy non-empty schema requirements." - } - required = [] + # if not parameters: + # parameters["dummy"] = { + # "type": "string", + # "description": "Dummy parameter (not used). Added to satisfy non-empty schema requirements." + # } + # required = [] return { "type": "function", diff --git a/constant.py b/constant.py index 16fe3d5..ba72a2b 100644 --- a/constant.py +++ b/constant.py @@ -45,7 +45,7 @@ def get_architecture(): if BASE_IMAGES is None: BASE_IMAGES = get_architecture() -COMPLETION_MODEL = os.getenv('COMPLETION_MODEL', "claude-3-5-haiku-20241022") +COMPLETION_MODEL = os.getenv('COMPLETION_MODEL', "claude-3-5-sonnet-20241022") EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', "text-embedding-3-small") MC_MODE = str_to_bool(os.getenv('MC_MODE', True))