mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2024-08-29 01:18:33 +03:00
Enabling of unittests in aider benchmark should be optional. (#3620)
This commit is contained in:
@@ -32,6 +32,11 @@ development environment and LLM.
|
||||
- `eval_ids`, e.g. `"1,3,10"`, limits the evaluation to instances with the
|
||||
given IDs (comma separated).
|
||||
|
||||
There are also following optional environment variables you can set:
|
||||
```
|
||||
export USE_UNIT_TESTS=true # if you want to allow the Agent to verify correctness using unittests. Default to false.
|
||||
```
|
||||
|
||||
Following is the basic command to start the evaluation.
|
||||
|
||||
You can update the arguments in the script
|
||||
|
||||
@@ -6,7 +6,6 @@ INSTRUCTIONS_ADDENDUM = """
|
||||
Use the above instructions to modify the supplied files: {signature_file}
|
||||
Don't change the names of existing functions or classes, as they may be referenced from other code like unit tests, etc.
|
||||
|
||||
Use the test_file: {test_file}, to verify the correctness of your solution. DO NOT EDIT the test file.
|
||||
Only use standard python libraries, don't suggest installing any packages.
|
||||
"""
|
||||
|
||||
|
||||
@@ -32,6 +32,9 @@ from openhands.events.action import CmdRunAction
|
||||
from openhands.events.observation import CmdOutputObservation
|
||||
from openhands.runtime.runtime import Runtime
|
||||
|
||||
# Configure visibility of unit tests to the Agent.
|
||||
USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
|
||||
|
||||
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
@@ -85,13 +88,14 @@ async def initialize_runtime(
|
||||
file_path,
|
||||
'/workspace',
|
||||
)
|
||||
file_path = os.path.join(tmpdir, f'{instance.instance_name}_test.py')
|
||||
with open(file_path, 'w') as f:
|
||||
f.write(instance.test)
|
||||
await runtime.copy_to(
|
||||
file_path,
|
||||
'/workspace',
|
||||
)
|
||||
if USE_UNIT_TESTS:
|
||||
file_path = os.path.join(tmpdir, f'{instance.instance_name}_test.py')
|
||||
with open(file_path, 'w') as f:
|
||||
f.write(instance.test)
|
||||
await runtime.copy_to(
|
||||
file_path,
|
||||
'/workspace',
|
||||
)
|
||||
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
|
||||
|
||||
|
||||
@@ -163,8 +167,13 @@ async def process_instance(
|
||||
instruction = instance.instruction
|
||||
instruction += INSTRUCTIONS_ADDENDUM.format(
|
||||
signature_file=f'{instance.instance_name}.py',
|
||||
test_file=f'{instance.instance_name}_test.py',
|
||||
)
|
||||
if USE_UNIT_TESTS:
|
||||
instruction += (
|
||||
f'Use the test_file: {instance.instance_name}_test.py, to verify '
|
||||
'the correctness of your solution. DO NOT EDIT the test file.\n\n'
|
||||
)
|
||||
|
||||
instruction += (
|
||||
'IMPORTANT: You should ONLY interact with the environment provided '
|
||||
'to you AND NEVER ASK FOR HUMAN HELP.\n'
|
||||
|
||||
@@ -35,6 +35,12 @@ COMMAND="export PYTHONPATH=evaluation/aider_bench:\$PYTHONPATH && poetry run pyt
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $AGENT_VERSION"
|
||||
|
||||
# Default to NOT use unit tests.
|
||||
if [ -z "$USE_UNIT_TESTS" ]; then
|
||||
export USE_UNIT_TESTS=false
|
||||
fi
|
||||
echo "USE_UNIT_TESTS: $USE_UNIT_TESTS"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
|
||||
|
||||
Reference in New Issue
Block a user