Enabling of unittests in aider benchmark should be optional. (#3620)

This commit is contained in:
Raj Maheshwari
2024-08-27 22:55:55 +05:30
committed by GitHub
parent 292148826e
commit 0cdeb83b17
4 changed files with 28 additions and 9 deletions

View File

@@ -32,6 +32,11 @@ development environment and LLM.
- `eval_ids`, e.g. `"1,3,10"`, limits the evaluation to instances with the
given IDs (comma separated).
There are also following optional environment variables you can set:
```
export USE_UNIT_TESTS=true # if you want to allow the Agent to verify correctness using unittests. Default to false.
```
Following is the basic command to start the evaluation.
You can update the arguments in the script

View File

@@ -6,7 +6,6 @@ INSTRUCTIONS_ADDENDUM = """
Use the above instructions to modify the supplied files: {signature_file}
Don't change the names of existing functions or classes, as they may be referenced from other code like unit tests, etc.
Use the test_file: {test_file}, to verify the correctness of your solution. DO NOT EDIT the test file.
Only use standard python libraries, don't suggest installing any packages.
"""

View File

@@ -32,6 +32,9 @@ from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
# Configure visibility of unit tests to the Agent.
USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
def get_config(
metadata: EvalMetadata,
@@ -85,13 +88,14 @@ async def initialize_runtime(
file_path,
'/workspace',
)
file_path = os.path.join(tmpdir, f'{instance.instance_name}_test.py')
with open(file_path, 'w') as f:
f.write(instance.test)
await runtime.copy_to(
file_path,
'/workspace',
)
if USE_UNIT_TESTS:
file_path = os.path.join(tmpdir, f'{instance.instance_name}_test.py')
with open(file_path, 'w') as f:
f.write(instance.test)
await runtime.copy_to(
file_path,
'/workspace',
)
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
@@ -163,8 +167,13 @@ async def process_instance(
instruction = instance.instruction
instruction += INSTRUCTIONS_ADDENDUM.format(
signature_file=f'{instance.instance_name}.py',
test_file=f'{instance.instance_name}_test.py',
)
if USE_UNIT_TESTS:
instruction += (
f'Use the test_file: {instance.instance_name}_test.py, to verify '
'the correctness of your solution. DO NOT EDIT the test file.\n\n'
)
instruction += (
'IMPORTANT: You should ONLY interact with the environment provided '
'to you AND NEVER ASK FOR HUMAN HELP.\n'

View File

@@ -35,6 +35,12 @@ COMMAND="export PYTHONPATH=evaluation/aider_bench:\$PYTHONPATH && poetry run pyt
--eval-num-workers $NUM_WORKERS \
--eval-note $AGENT_VERSION"
# Default to NOT use unit tests.
if [ -z "$USE_UNIT_TESTS" ]; then
export USE_UNIT_TESTS=false
fi
echo "USE_UNIT_TESTS: $USE_UNIT_TESTS"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"