(eval) EOF fixes for SWE-Bench evaluation (#3623)

* add error handling for client eof

* remove root check

* remove set -e

* echo USER to fix for swebench infer

* fix entry timeout

* add timeout;
fix runtime close
This commit is contained in:
Xingyao Wang
2024-08-27 16:09:31 -05:00
committed by GitHub
parent 0b8779447a
commit 98081b9b1b
3 changed files with 12 additions and 8 deletions

View File

@@ -141,6 +141,12 @@ async def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
if USE_INSTANCE_IMAGE:
# inject the init script
script_dir = os.path.dirname(__file__)
@@ -192,6 +198,7 @@ async def initialize_runtime(
assert obs.exit_code == 0
else:
action = CmdRunAction(command='source /swe_util/swe_entry.sh')
action.timeout = 1800
logger.info(action, extra={'msg_type': 'ACTION'})
obs = await runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -323,6 +330,8 @@ async def process_instance(
logger.info(
f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------'
)
await runtime.close()
# ==========================================
# ======= Attempt to evaluate the agent's edits =======

View File

@@ -1,13 +1,5 @@
#!/bin/bash
# set -e
# assert user name is `root`
if [ "$USER" != "root" ]; then
echo "Error: This script is intended to be run by the 'root' user only." >&2
exit 1
fi
source ~/.bashrc
SWEUTIL_DIR=/swe_util

View File

@@ -209,6 +209,9 @@ class RuntimeClient:
def _get_bash_prompt_and_update_pwd(self):
ps1 = self.shell.after
if ps1 == pexpect.EOF:
logger.error(f'Bash shell EOF! {self.shell.after=}, {self.shell.before=}')
raise RuntimeError('Bash shell EOF')
# begin at the last occurrence of '[PEXPECT_BEGIN]'.
# In multi-line bash commands, the prompt will be repeated