Allow setting of runtime container image (#3573)

* Add runtime container image setting * Fix typo in test * Fix sandbox base container image * Update variables * Update to base_container_image * Update tests/unit/test_config.py Co-authored-by: Xingyao Wang <xingyao6@illinois.edu> * Fixed eval * Fixed container_image * Fix typo --------- Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
2024-08-29 01:18:33 +03:00 · 2024-08-25 19:05:41 -04:00
parent 356d9b34be
commit f9088766e8
33 changed files with 92 additions and 82 deletions
--- a/.github/workflows/ghcr_runtime.yml
+++ b/.github/workflows/ghcr_runtime.yml
@@ -113,7 +113,7 @@ jobs:

          TEST_RUNTIME=eventstream \
          SANDBOX_USER_ID=$(id -u) \
-          SANDBOX_CONTAINER_IMAGE=$image_name \
+          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
          TEST_IN_CI=true \
          poetry run pytest --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
      - name: Upload coverage to Codecov
@@ -149,7 +149,7 @@ jobs:

          TEST_RUNTIME=eventstream \
          SANDBOX_USER_ID=$(id -u) \
-          SANDBOX_CONTAINER_IMAGE=$image_name \
+          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
          TEST_IN_CI=true \
          TEST_ONLY=true \
          ./tests/integration/regenerate.sh
--- a/config.template.toml
+++ b/config.template.toml
@@ -174,7 +174,7 @@ llm_config = 'gpt3'
 #user_id = 1000

 # Container image to use for the sandbox
-#container_image = "nikolaik/python-nodejs:python3.11-nodejs22"
+#base_container_image = "nikolaik/python-nodejs:python3.11-nodejs22"

 # Use host network
 #use_host_network = false
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
@@ -42,10 +42,10 @@ Créez un fichier ```config.toml``` dans le répertoire OpenHands et entrez ces
 [core]
 workspace_base="./workspace"
 run_as_openhands=true
-sandbox_container_image="image_personnalisée"
+sandbox_base_container_image="image_personnalisée"
 ```

-> Assurez-vous que ```sandbox_container_image``` est défini sur le nom de votre image personnalisée précédente.
+> Assurez-vous que ```sandbox_base_container_image``` est défini sur le nom de votre image personnalisée précédente.

 ## Exécution

@@ -61,7 +61,7 @@ Félicitations !

 Le code pertinent est défini dans [ssh_box.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/ssh_box.py) et [image_agnostic_util.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py).

-En particulier, ssh_box.py vérifie l'objet config pour ```config.sandbox_container_image``` et ensuite tente de récupérer l'image à l'aide de [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72), qui est défini dans image_agnostic_util.py.
+En particulier, ssh_box.py vérifie l'objet config pour ```config.sandbox.base_container_image``` et ensuite tente de récupérer l'image à l'aide de [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72), qui est défini dans image_agnostic_util.py.

 Lorsqu'une image personnalisée est utilisée pour la première fois, elle ne sera pas trouvée et donc elle sera construite (à l'exécution ultérieure, l'image construite sera trouvée et renvoyée).

@@ -92,7 +92,7 @@ Si vous voyez cette erreur dans la sortie de la console, il s'agit du fait que O
 [core]
 workspace_base="./workspace"
 run_as_openhands=true
-sandbox_container_image="image_personnalisée"
+sandbox_base_container_image="image_personnalisée"
 sandbox_user_id="1001"
 ```

--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
@@ -41,10 +41,10 @@ docker build -t custom_image .
 [core]
 workspace_base="./workspace"
 run_as_openhands=true
-sandbox_container_image="custom_image"
+base_container_image="custom_image"
 ```

-> 确保 `sandbox_container_image` 设置为您前一步中自定义映像的名称。
+> 确保 `sandbox_base_container_image` 设置为您前一步中自定义映像的名称。

 ## 运行

@@ -60,7 +60,7 @@ sandbox_container_image="custom_image"

 相关代码定义在 [ssh_box.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/ssh_box.py) 和 [image_agnostic_util.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py) 中。

-特别是 ssh_box.py 检查配置对象中的 ```config.sandbox_container_image```，然后尝试使用 [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72)，在 image_agnostic_util.py 定义中进行检索。
+特别是 ssh_box.py 检查配置对象中的 ```config.sandbox.base_container_image```，然后尝试使用 [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72)，在 image_agnostic_util.py 定义中进行检索。

 初次使用自定义映像时，该映像将不会被找到，因此将被构建（在后续运行中已构建的映像将被查找并返回）。

@@ -92,7 +92,7 @@ dockerfile_content = (
 [core]
 workspace_base="./workspace"
 run_as_openhands=true
-sandbox_container_image="custom_image"
+sandbox_base_container_image="custom_image"
 sandbox_user_id="1001"
 ```

--- a/docs/modules/usage/how-to/custom-sandbox-guide.md
+++ b/docs/modules/usage/how-to/custom-sandbox-guide.md
@@ -67,10 +67,10 @@ Create a `config.toml` file in the OpenHands directory and enter these contents:
 [core]
 workspace_base="./workspace"
 run_as_openhands=true
-sandbox_container_image="custom_image"
+sandbox_base_container_image="custom_image"
 ```

-For `sandbox_container_image`, you can specify either:
+For `sandbox_base_container_image`, you can specify either:

 1. The name of your custom image that you built in the previous step (e.g., `”custom_image”`)
 2. A pre-existing image from Docker Hub (e.g., `”node:20”` if you want a sandbox with Node.js pre-installed)
@@ -98,7 +98,7 @@ If you see this error in the console output it is because OpenHands is trying to
 [core]
 workspace_base="./workspace"
 run_as_openhands=true
-sandbox_container_image="custom_image"
+sandbox_base_container_image="custom_image"
 sandbox_user_id="1001"
 ```

--- a/docs/modules/usage/how-to/evaluation-harness.md
+++ b/docs/modules/usage/how-to/evaluation-harness.md
@@ -116,7 +116,7 @@ To create an evaluation workflow for your benchmark, follow these steps:
           runtime='eventstream',
           max_iterations=metadata.max_iterations,
           sandbox=SandboxConfig(
-               container_image='your_container_image',
+               base_container_image='your_container_image',
               enable_auto_lint=True,
               timeout=300,
           ),
--- a/docs/static/img/backend_architecture.puml
+++ b/docs/static/img/backend_architecture.puml
@@ -135,7 +135,7 @@ class openhands.sandbox.sandbox.DockerInteractive {
  workspace_dir: None
  workspace_dir: None
  timeout: int
-  container_image: None
+  base_container_image: None
  container_name: None
 }
 class openhands.observation.UserMessageObservation {
--- a/docs/static/img/backend_architecture.svg
+++ b/docs/static/img/backend_architecture.svg
--- a/evaluation/EDA/run_infer.py
+++ b/evaluation/EDA/run_infer.py
@@ -62,7 +62,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=False,
            use_host_network=False,
        ),
--- a/evaluation/agent_bench/run_infer.py
+++ b/evaluation/agent_bench/run_infer.py
@@ -44,7 +44,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=True,
            use_host_network=False,
        ),
--- a/evaluation/aider_bench/run_infer.py
+++ b/evaluation/aider_bench/run_infer.py
@@ -42,7 +42,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=True,
            use_host_network=False,
            timeout=100,
--- a/evaluation/biocoder/run_infer.py
+++ b/evaluation/biocoder/run_infer.py
@@ -62,7 +62,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
+            base_container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
            enable_auto_lint=True,
            use_host_network=False,
        ),
--- a/evaluation/bird/run_infer.py
+++ b/evaluation/bird/run_infer.py
@@ -75,7 +75,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=True,
            use_host_network=False,
        ),
--- a/evaluation/browsing_delegation/run_infer.py
+++ b/evaluation/browsing_delegation/run_infer.py
@@ -40,7 +40,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=False,
            use_host_network=False,
        ),
--- a/evaluation/gaia/run_infer.py
+++ b/evaluation/gaia/run_infer.py
@@ -51,7 +51,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=True,
            use_host_network=False,
        ),
--- a/evaluation/gorilla/run_infer.py
+++ b/evaluation/gorilla/run_infer.py
@@ -43,7 +43,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=True,
            use_host_network=False,
        ),
--- a/evaluation/gpqa/run_infer.py
+++ b/evaluation/gpqa/run_infer.py
@@ -65,7 +65,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=True,
            use_host_network=False,
        ),
--- a/evaluation/humanevalfix/run_infer.py
+++ b/evaluation/humanevalfix/run_infer.py
@@ -86,7 +86,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=True,
            use_host_network=False,
        ),
--- a/evaluation/logic_reasoning/run_infer.py
+++ b/evaluation/logic_reasoning/run_infer.py
@@ -49,7 +49,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='xingyaoww/od-eval-logic-reasoning:v1.0',
+            base_container_image='xingyaoww/od-eval-logic-reasoning:v1.0',
            enable_auto_lint=True,
            use_host_network=False,
            runtime_extra_deps='$OD_INTERPRETER_PATH -m pip install scitools-pyke',
--- a/evaluation/miniwob/run_infer.py
+++ b/evaluation/miniwob/run_infer.py
@@ -49,7 +49,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='xingyaoww/od-eval-miniwob:v1.0',
+            base_container_image='xingyaoww/od-eval-miniwob:v1.0',
            enable_auto_lint=True,
            use_host_network=False,
            browsergym_eval_env=env_id,
--- a/evaluation/mint/run_infer.py
+++ b/evaluation/mint/run_infer.py
@@ -101,7 +101,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='xingyaoww/od-eval-mint:v1.0',
+            base_container_image='xingyaoww/od-eval-mint:v1.0',
            enable_auto_lint=True,
            use_host_network=False,
            runtime_extra_deps=f'$OD_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}',
--- a/evaluation/ml_bench/run_infer.py
+++ b/evaluation/ml_bench/run_infer.py
@@ -80,7 +80,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='public.ecr.aws/i5g0m1f6/ml-bench',
+            base_container_image='public.ecr.aws/i5g0m1f6/ml-bench',
            enable_auto_lint=True,
            use_host_network=False,
        ),
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -93,9 +93,9 @@ def get_config(
    SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2.1'
    if USE_INSTANCE_IMAGE:
        # We use a different instance image for the each instance of swe-bench eval
-        container_image = 'sweb.eval.x86_64.' + instance['instance_id']
+        base_container_image = 'sweb.eval.x86_64.' + instance['instance_id']
    else:
-        container_image = SWE_BENCH_CONTAINER_IMAGE
+        base_container_image = SWE_BENCH_CONTAINER_IMAGE

    config = AppConfig(
        default_agent=metadata.agent_class,
@@ -104,7 +104,7 @@ def get_config(
        max_budget_per_task=4,
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image=container_image,
+            base_container_image=base_container_image,
            enable_auto_lint=True,
            use_host_network=False,
            # large enough timeout, since some testcases take very long to run
--- a/evaluation/toolqa/run_infer.py
+++ b/evaluation/toolqa/run_infer.py
@@ -45,7 +45,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=True,
            use_host_network=False,
        ),
--- a/evaluation/webarena/run_infer.py
+++ b/evaluation/webarena/run_infer.py
@@ -54,7 +54,7 @@ def get_config(
        runtime='eventstream',
        max_iterations=metadata.max_iterations,
        sandbox=SandboxConfig(
-            container_image='python:3.11-bookworm',
+            base_container_image='python:3.11-bookworm',
            enable_auto_lint=True,
            use_host_network=False,
            browsergym_eval_env=env_id,
--- a/openhands/core/config.py
+++ b/openhands/core/config.py
@@ -179,7 +179,8 @@ class SandboxConfig(metaclass=Singleton):

    Attributes:
        api_hostname: The hostname for the EventStream Runtime API.
-        container_image: The container image to use for the sandbox.
+        base_container_image: The base container image from which to build the runtime image.
+        runtime_container_image: The runtime container image to use.
        user_id: The user ID for the sandbox.
        timeout: The timeout for the sandbox.
        enable_auto_lint: Whether to enable auto-lint.
@@ -199,7 +200,10 @@ class SandboxConfig(metaclass=Singleton):
    """

    api_hostname: str = 'localhost'
-    container_image: str = 'nikolaik/python-nodejs:python3.11-nodejs22'  # default to nikolaik/python-nodejs:python3.11-nodejs22 for eventstream runtime
+    base_container_image: str | None = (
+        'nikolaik/python-nodejs:python3.11-nodejs22'  # default to nikolaik/python-nodejs:python3.11-nodejs22 for eventstream runtime
+    )
+    runtime_container_image: str | None = None
    user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
    timeout: int = 120
    enable_auto_lint: bool = (
--- a/openhands/core/schema/config.py
+++ b/openhands/core/schema/config.py
@@ -22,7 +22,7 @@ class ConfigType(str, Enum):
    CACHE_DIR = 'CACHE_DIR'
    LLM_MODEL = 'LLM_MODEL'
    CONFIRMATION_MODE = 'CONFIRMATION_MODE'
-    SANDBOX_CONTAINER_IMAGE = 'SANDBOX_CONTAINER_IMAGE'
+    BASE_CONTAINER_IMAGE = 'BASE_CONTAINER_IMAGE'
    RUN_AS_OPENHANDS = 'RUN_AS_OPENHANDS'
    LLM_EMBEDDING_MODEL = 'LLM_EMBEDDING_MODEL'
    LLM_EMBEDDING_BASE_URL = 'LLM_EMBEDDING_BASE_URL'
--- a/openhands/runtime/client/runtime.py
+++ b/openhands/runtime/client/runtime.py
@@ -104,7 +104,6 @@ class EventStreamRuntime(Runtime):
        event_stream: EventStream,
        sid: str = 'default',
        plugins: list[PluginRequirement] | None = None,
-        container_image: str | None = None,
    ):
        super().__init__(
            config, event_stream, sid, plugins
@@ -118,11 +117,8 @@ class EventStreamRuntime(Runtime):
        )
        # TODO: We can switch to aiodocker when `get_od_sandbox_image` is updated to use aiodocker
        self.docker_client: docker.DockerClient = self._init_docker_client()
-        self.container_image = (
-            self.config.sandbox.container_image
-            if container_image is None
-            else container_image
-        )
+        self.base_container_image = self.config.sandbox.base_container_image
+        self.runtime_container_image = self.config.sandbox.runtime_container_image
        self.container_name = self.container_name_prefix + self.instance_id

        self.container = None
@@ -140,11 +136,16 @@ class EventStreamRuntime(Runtime):
                f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
            )

-        self.container_image = build_runtime_image(
-            self.container_image,
-            self.runtime_builder,
-            extra_deps=self.config.sandbox.runtime_extra_deps,
-        )
+        if self.runtime_container_image is None:
+            if self.base_container_image is None:
+                raise ValueError(
+                    'Neither runtime container image nor base container image is set'
+                )
+            self.runtime_container_image = build_runtime_image(
+                self.base_container_image,
+                self.runtime_builder,
+                extra_deps=self.config.sandbox.runtime_extra_deps,
+            )
        self.container = await self._init_container(
            self.sandbox_workspace_dir,
            mount_dir=self.config.workspace_mount_path,
@@ -181,7 +182,7 @@ class EventStreamRuntime(Runtime):
    ):
        try:
            logger.info(
-                f'Starting container with image: {self.container_image} and name: {self.container_name}'
+                f'Starting container with image: {self.runtime_container_image} and name: {self.container_name}'
            )
            plugin_arg = ''
            if plugins is not None and len(plugins) > 0:
@@ -215,7 +216,7 @@ class EventStreamRuntime(Runtime):
            else:
                browsergym_arg = ''
            container = self.docker_client.containers.run(
-                self.container_image,
+                self.runtime_container_image,
                command=(
                    f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base '
                    'PYTHONUNBUFFERED=1 poetry run '
--- a/tests/integration/regenerate.sh
+++ b/tests/integration/regenerate.sh
@@ -57,8 +57,8 @@ mkdir -p $WORKSPACE_BASE

 # use environmental variable if exists
 TEST_RUNTIME="${TEST_RUNTIME:-eventstream}"
-if [ -z "$SANDBOX_CONTAINER_IMAGE" ]; then
-  SANDBOX_CONTAINER_IMAGE="nikolaik/python-nodejs:python3.11-nodejs22"
+if [ -z "$SANDBOX_BASE_CONTAINER_IMAGE" ]; then
+  SANDBOX_BASE_CONTAINER_IMAGE="nikolaik/python-nodejs:python3.11-nodejs22"
 fi

 MAX_ITERATIONS=15
@@ -114,7 +114,7 @@ run_test() {
    MAX_ITERATIONS=$MAX_ITERATIONS \
    DEFAULT_AGENT=$agent \
    TEST_RUNTIME="$TEST_RUNTIME" \
-    SANDBOX_CONTAINER_IMAGE="$SANDBOX_CONTAINER_IMAGE" \
+    SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \
    $pytest_cmd 2>&1 | tee $TMP_FILE

  # Capture the exit code of pytest
@@ -185,7 +185,7 @@ regenerate_without_llm() {
      FORCE_APPLY_PROMPTS=true \
      DEFAULT_AGENT=$agent \
      TEST_RUNTIME="$TEST_RUNTIME" \
-      SANDBOX_CONTAINER_IMAGE="$SANDBOX_CONTAINER_IMAGE" \
+      SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \
      poetry run pytest -s $SCRIPT_DIR/test_agent.py::$test_name
  set +x
 }
@@ -212,7 +212,7 @@ regenerate_with_llm() {
      WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
      DEFAULT_AGENT=$agent \
      RUNTIME="$TEST_RUNTIME" \
-      SANDBOX_CONTAINER_IMAGE="$SANDBOX_CONTAINER_IMAGE" \
+      SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \
      poetry run python "$PROJECT_ROOT/openhands/core/main.py" \
      -i $MAX_ITERATIONS \
      -t "$task Do not ask me for confirmation at any point." \
--- a/tests/runtime/conftest.py
+++ b/tests/runtime/conftest.py
@@ -61,9 +61,9 @@ def enable_auto_lint(request):


@pytest.fixture(scope='module', params=None)
-def container_image(request):
+def base_container_image(request):
    time.sleep(1)
-    env_image = os.environ.get('SANDBOX_CONTAINER_IMAGE')
+    env_image = os.environ.get('BASE_CONTAINER_IMAGE')
    if env_image:
        request.param = env_image
    else:
@@ -95,11 +95,12 @@ async def _load_runtime(
    box_class,
    run_as_openhands: bool = True,
    enable_auto_lint: bool = False,
-    container_image: str | None = None,
+    base_container_image: str | None = None,
    browsergym_eval_env: str | None = None,
 ) -> Runtime:
    sid = 'test'
    cli_session = 'main_test'
+
    # AgentSkills need to be initialized **before** Jupyter
    # otherwise Jupyter will not access the proper dependencies installed by AgentSkills
    plugins = [AgentSkillsRequirement(), JupyterRequirement()]
@@ -114,19 +115,17 @@ async def _load_runtime(
    load_from_env(config, os.environ)
    config.run_as_openhands = run_as_openhands
    config.sandbox.enable_auto_lint = enable_auto_lint
+    if base_container_image is not None:
+        config.sandbox.base_container_image = base_container_image

    file_store = get_file_store(config.file_store, config.file_store_path)
    event_stream = EventStream(cli_session, file_store)

-    if container_image is not None:
-        config.sandbox.container_image = container_image
-
    runtime = box_class(
        config=config,
        event_stream=event_stream,
        sid=sid,
        plugins=plugins,
-        container_image=container_image,
    )
    await runtime.ainit()
    await asyncio.sleep(1)
--- a/tests/runtime/test_browsing.py
+++ b/tests/runtime/test_browsing.py
@@ -80,7 +80,7 @@ async def test_browsergym_eval_env(temp_dir):
        # only supported in event stream runtime
        box_class=EventStreamRuntime,
        run_as_openhands=False,  # need root permission to access file
-        container_image='xingyaoww/od-eval-miniwob:v1.0',
+        base_container_image='xingyaoww/od-eval-miniwob:v1.0',
        browsergym_eval_env='browsergym/miniwob.choose-list',
    )
    from openhands.runtime.browser.browser_env import (
--- a/tests/runtime/test_images.py
+++ b/tests/runtime/test_images.py
@@ -14,15 +14,17 @@ from openhands.events.action import CmdRunAction


@pytest.mark.asyncio
-async def test_bash_python_version(temp_dir, box_class, container_image):
+async def test_bash_python_version(temp_dir, box_class, base_container_image):
    """Make sure Python is available in bash."""
-    if container_image not in [
+    if base_container_image not in [
        'python:3.11-bookworm',
        'nikolaik/python-nodejs:python3.11-nodejs22',
    ]:
        pytest.skip('This test is only for python-related images')

-    runtime = await _load_runtime(temp_dir, box_class, container_image=container_image)
+    runtime = await _load_runtime(
+        temp_dir, box_class, base_container_image=base_container_image
+    )

    action = CmdRunAction(command='which python')
    logger.info(action, extra={'msg_type': 'ACTION'})
@@ -49,15 +51,17 @@ async def test_bash_python_version(temp_dir, box_class, container_image):


@pytest.mark.asyncio
-async def test_nodejs_22_version(temp_dir, box_class, container_image):
+async def test_nodejs_22_version(temp_dir, box_class, base_container_image):
    """Make sure Node.js is available in bash."""
-    if container_image not in [
+    if base_container_image not in [
        'node:22-bookworm',
        'nikolaik/python-nodejs:python3.11-nodejs22',
    ]:
        pytest.skip('This test is only for nodejs-related images')

-    runtime = await _load_runtime(temp_dir, box_class, container_image=container_image)
+    runtime = await _load_runtime(
+        temp_dir, box_class, base_container_image=base_container_image
+    )

    action = CmdRunAction(command='node --version')
    logger.info(action, extra={'msg_type': 'ACTION'})
@@ -71,14 +75,16 @@ async def test_nodejs_22_version(temp_dir, box_class, container_image):


@pytest.mark.asyncio
-async def test_go_version(temp_dir, box_class, container_image):
+async def test_go_version(temp_dir, box_class, base_container_image):
    """Make sure Go is available in bash."""
-    if container_image not in [
+    if base_container_image not in [
        'golang:1.23-bookworm',
    ]:
        pytest.skip('This test is only for go-related images')

-    runtime = await _load_runtime(temp_dir, box_class, container_image=container_image)
+    runtime = await _load_runtime(
+        temp_dir, box_class, base_container_image=base_container_image
+    )

    action = CmdRunAction(command='go version')
    logger.info(action, extra={'msg_type': 'ACTION'})
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -75,7 +75,7 @@ def test_load_from_old_style_env(monkeypatch, default_config):
    monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True')
    monkeypatch.setenv('DEFAULT_AGENT', 'PlannerAgent')
    monkeypatch.setenv('WORKSPACE_BASE', '/opt/files/workspace')
-    monkeypatch.setenv('SANDBOX_CONTAINER_IMAGE', 'custom_image')
+    monkeypatch.setenv('SANDBOX_BASE_CONTAINER_IMAGE', 'custom_image')

    load_from_env(default_config, os.environ)

@@ -89,7 +89,7 @@ def test_load_from_old_style_env(monkeypatch, default_config):
    assert (
        default_config.workspace_mount_path_in_sandbox is not UndefinedString.UNDEFINED
    )
-    assert default_config.sandbox.container_image == 'custom_image'
+    assert default_config.sandbox.base_container_image == 'custom_image'


 def test_load_from_new_style_toml(default_config, temp_toml_file):
@@ -178,7 +178,7 @@ memory_enabled = true
 [core]
 workspace_base = "/opt/files2/workspace"
 sandbox_timeout = 500
-sandbox_container_image = "node:14"
+sandbox_base_container_image = "node:14"
 sandbox_user_id = 1001
 default_agent = "TestAgent"
 """
@@ -192,7 +192,7 @@ default_agent = "TestAgent"
    assert default_config.get_agent_config().memory_enabled is True
    assert default_config.workspace_base == '/opt/files2/workspace'
    assert default_config.sandbox.timeout == 500
-    assert default_config.sandbox.container_image == 'node:14'
+    assert default_config.sandbox.base_container_image == 'node:14'
    assert default_config.sandbox.user_id == 1001
    assert default_config.workspace_mount_path_in_sandbox == '/workspace'

@@ -200,7 +200,7 @@ default_agent = "TestAgent"

    # app config doesn't have fields sandbox_*
    assert not hasattr(default_config, 'sandbox_timeout')
-    assert not hasattr(default_config, 'sandbox_container_image')
+    assert not hasattr(default_config, 'sandbox_base_container_image')
    assert not hasattr(default_config, 'sandbox_user_id')

    # after finalize_config, workspace_mount_path is set to the absolute path of workspace_base
@@ -319,7 +319,7 @@ model = "test-model"

 [sandbox]
 timeout = 1
-container_image = "custom_image"
+base_container_image = "custom_image"
 user_id = 1001
 """
        )
@@ -330,7 +330,7 @@ user_id = 1001

    assert default_config.get_llm_config().model == 'test-model'
    assert default_config.sandbox.timeout == 1
-    assert default_config.sandbox.container_image == 'custom_image'
+    assert default_config.sandbox.base_container_image == 'custom_image'
    assert default_config.sandbox.user_id == 1001


@@ -357,7 +357,7 @@ def test_defaults_dict_after_updates(default_config):
    )
    assert defaults_after_updates['sandbox']['timeout']['default'] == 120
    assert (
-        defaults_after_updates['sandbox']['container_image']['default']
+        defaults_after_updates['sandbox']['base_container_image']['default']
        == 'nikolaik/python-nodejs:python3.11-nodejs22'
    )
    assert defaults_after_updates == initial_defaults