From 98081b9b1bdb782165adabd4237e7c7a07e20792 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Tue, 27 Aug 2024 16:09:31 -0500 Subject: [PATCH] (eval) EOF fixes for SWE-Bench evaluation (#3623) * add error handling for client eof * remove root check * remove set -e * echo USER to fix for swebench infer * fix entry timeout * add timeout; fix runtime close --- evaluation/swe_bench/run_infer.py | 9 +++++++++ evaluation/swe_bench/scripts/setup/instance_swe_entry.sh | 8 -------- openhands/runtime/client/client.py | 3 +++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index d368533c3..2f308ba61 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -141,6 +141,12 @@ async def initialize_runtime( logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert obs.exit_code == 0 + action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + if USE_INSTANCE_IMAGE: # inject the init script script_dir = os.path.dirname(__file__) @@ -192,6 +198,7 @@ async def initialize_runtime( assert obs.exit_code == 0 else: action = CmdRunAction(command='source /swe_util/swe_entry.sh') + action.timeout = 1800 logger.info(action, extra={'msg_type': 'ACTION'}) obs = await runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) @@ -323,6 +330,8 @@ async def process_instance( logger.info( f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------' ) + + await runtime.close() # ========================================== # ======= Attempt to evaluate the agent's edits ======= diff --git a/evaluation/swe_bench/scripts/setup/instance_swe_entry.sh b/evaluation/swe_bench/scripts/setup/instance_swe_entry.sh index 3532152dc..c446354aa 100755 --- a/evaluation/swe_bench/scripts/setup/instance_swe_entry.sh +++ b/evaluation/swe_bench/scripts/setup/instance_swe_entry.sh @@ -1,13 +1,5 @@ #!/bin/bash -# set -e - -# assert user name is `root` -if [ "$USER" != "root" ]; then - echo "Error: This script is intended to be run by the 'root' user only." >&2 - exit 1 -fi - source ~/.bashrc SWEUTIL_DIR=/swe_util diff --git a/openhands/runtime/client/client.py b/openhands/runtime/client/client.py index c21e72a82..8816186c4 100644 --- a/openhands/runtime/client/client.py +++ b/openhands/runtime/client/client.py @@ -209,6 +209,9 @@ class RuntimeClient: def _get_bash_prompt_and_update_pwd(self): ps1 = self.shell.after + if ps1 == pexpect.EOF: + logger.error(f'Bash shell EOF! {self.shell.after=}, {self.shell.before=}') + raise RuntimeError('Bash shell EOF') # begin at the last occurrence of '[PEXPECT_BEGIN]'. # In multi-line bash commands, the prompt will be repeated