Add one-click download and setup script for OSWorld virtual machine; Make up show_result.py

2024-04-29 12:26:03 +03:00 · 2024-04-10 21:25:28 +08:00
parent 62fd0094b9
commit cd1ebd44ec
3 changed files with 152 additions and 14 deletions
--- a/README.md
+++ b/README.md
@@ -39,21 +39,9 @@ vmrun -T ws list
 ```
 If the installation along with the environment variable set is successful, you will see the message showing the current running virtual machines.

-3. Obtain the virtual machine image. If you are using Linux or Windows with an x86_64 CPU, install the environment package and download the examples and the virtual machine image by executing the following commands:
-Remove the `nogui` parameter if you wish to view the activities within the virtual machine.
+3. Run our setup script to download the necessary virtual machines and set up the environment☕:
 ```bash
-wget https://huggingface.co/datasets/xlangai/ubuntu_x86/resolve/main/Ubuntu.zip
-unzip Ubuntu.zip
-vmrun -T ws start "Ubuntu/Ubuntu.vmx" nogui
-vmrun -T ws snapshot "Ubuntu/Ubuntu.vmx" "init_state"
-```
-
-For macOS with Apple chips, you should install the specially prepared virtual machine image by executing the following commands:
-```bash
-wget https://huggingface.co/datasets/xlangai/ubuntu_arm/resolve/main/Ubuntu.zip
-unzip Ubuntu.zip
-vmrun -T fusion start "Ubuntu/Ubuntu.vmx" nogui
-vmrun -T fusion snapshot "Ubuntu/Ubuntu.vmx" "init_state"
+python setup_vm.py
 ```

 ### On AWS or Azure (Virtualized platform)
--- a/setup_vm.py
+++ b/setup_vm.py
@@ -0,0 +1,79 @@
+import platform
+import subprocess
+
+import requests
+
+# Define the path to the virtual machine
+VM_PATH = r"Ubuntu\Ubuntu.vmx"  # change this to the path of your downloaded virtual machine
+
+
+def download_and_unzip_vm():
+    # Determine the platform and CPU architecture to decide the correct VM image to download
+    if platform.system() == 'Darwin' and platform.machine() == 'arm64':  # macOS with Apple Silicon
+        url = "https://huggingface.co/datasets/xlangai/ubuntu_arm/resolve/main/Ubuntu.zip"
+    elif platform.system() in ['Linux',
+                               'Windows'] and platform.machine() == 'x86_64':  # Linux or Windows with x86_64 CPU
+        url = "https://huggingface.co/datasets/xlangai/ubuntu_x86/resolve/main/Ubuntu.zip"
+    else:
+        raise Exception("Unsupported platform or architecture")
+
+    # Download the virtual machine image
+    print("Downloading the virtual machine image...")
+    subprocess.run(['wget', url], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True,
+                   universal_newlines=True)
+
+    # Unzip the downloaded file
+    print("Unzipping the downloaded file...")
+    subprocess.run(['unzip', 'Ubuntu.zip'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True,
+                   universal_newlines=True)
+
+
+# Execute the function to download and unzip the VM
+download_and_unzip_vm()
+
+
+# Determine the platform of the host machine and decide the parameter for vmrun
+def get_vmrun_type():
+    if platform.system() == 'Windows' or platform.system() == 'Linux':
+        return '-T ws'
+    elif platform.system() == 'Darwin':  # Darwin is the system name for macOS
+        return '-T fusion'
+    else:
+        raise Exception("Unsupported operating system")
+
+
+# Start the virtual machine
+subprocess.run(f'vmrun {get_vmrun_type()} start "{VM_PATH}"', shell=True)
+print("Virtual machine started...")
+
+# Get the IP address of the virtual machine
+get_vm_ip = subprocess.run(f'vmrun {get_vmrun_type()} getGuestIPAddress "{VM_PATH}" -wait', shell=True,
+                           capture_output=True,
+                           text=True)
+print("Virtual machine IP address:", get_vm_ip.stdout.strip())
+
+vm_ip = get_vm_ip.stdout.strip()
+
+
+# Function used to check whether the virtual machine is ready
+def download_screenshot(ip):
+    url = f"http://{ip}:5000/screenshot"
+    try:
+        # max trey times 1, max timeout 1
+        response = requests.get(url, timeout=(1, 1))
+        if response.status_code == 200:
+            return True
+    except requests.exceptions.ConnectionError:
+        pass
+    return False
+
+
+# Try downloading the screenshot until successful
+while not download_screenshot(vm_ip):
+    print("Check whether the virtual machine is ready...")
+
+print("Virtual machine is ready. Start to make a snapshot on the virtual machine. It would take a while...")
+
+# Create a snapshot of the virtual machine
+subprocess.run(f'vmrun {get_vmrun_type()} snapshot "{VM_PATH}" "init_state"', shell=True)
+print("Snapshot created.")
--- a/show_result.py
+++ b/show_result.py
@@ -0,0 +1,71 @@
+import os
+
+
+def get_result(action_space, use_model, observation_type, result_dir):
+    target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+    if not os.path.exists(target_dir):
+        print("New experiment, no result yet.")
+        return None
+
+    all_result = []
+    domain_result = {}
+    all_result_for_analysis = {}
+
+    for domain in os.listdir(target_dir):
+        domain_path = os.path.join(target_dir, domain)
+        if os.path.isdir(domain_path):
+            for example_id in os.listdir(domain_path):
+                example_path = os.path.join(domain_path, example_id)
+                if os.path.isdir(example_path):
+                    if "result.txt" in os.listdir(example_path):
+                        # empty all files under example_id
+                        if domain not in domain_result:
+                            domain_result[domain] = []
+                        result = open(os.path.join(example_path, "result.txt"), "r").read()
+                        try:
+                            domain_result[domain].append(float(result))
+                        except:
+                            domain_result[domain].append(float(bool(result)))
+
+                        if domain not in all_result_for_analysis:
+                            all_result_for_analysis[domain] = {}
+                        all_result_for_analysis[domain][example_id] = domain_result[domain][-1]
+
+                        try:
+                            result = open(os.path.join(example_path, "result.txt"), "r").read()
+                            try:
+                                all_result.append(float(result))
+                            except:
+                                all_result.append(float(bool(result)))
+                        except:
+                            all_result.append(0.0)
+
+    for domain in domain_result:
+        print("Domain:", domain, "Runned:", len(domain_result[domain]), "Success Rate:",
+              sum(domain_result[domain]) / len(domain_result[domain]) * 100, "%")
+
+    print(">>>>>>>>>>>>>")
+    print("Office", "Success Rate:", sum(
+        domain_result["libreoffice_calc"] + domain_result["libreoffice_impress"] + domain_result[
+            "libreoffice_writer"]) / len(
+        domain_result["libreoffice_calc"] + domain_result["libreoffice_impress"] + domain_result[
+            "libreoffice_writer"]) * 100, "%")
+    print("Daily", "Success Rate:",
+          sum(domain_result["vlc"] + domain_result["thunderbird"] + domain_result["chrome"]) / len(
+              domain_result["vlc"] + domain_result["thunderbird"] + domain_result["chrome"]) * 100, "%")
+    print("Professional", "Success Rate:", sum(domain_result["gimp"] + domain_result["vs_code"]) / len(
+        domain_result["gimp"] + domain_result["vs_code"]) * 100, "%")
+
+    with open(os.path.join(target_dir, "all_result.json"), "w") as f:
+        f.write(str(all_result_for_analysis))
+
+    if not all_result:
+        print("New experiment, no result yet.")
+        return None
+    else:
+        print("Runned:", len(all_result), "Current Success Rate:", sum(all_result) / len(all_result) * 100, "%")
+        return all_result
+
+
+if __name__ == '__main__':
+    get_result("pyautogui", "gpt-4-vision-preview", "screenshot", "./results")