mirror of
https://github.com/xlang-ai/OSWorld.git
synced 2024-04-29 12:26:03 +03:00
Add one-click download and setup script for OSWorld virtual machine; Make up show_result.py
This commit is contained in:
16
README.md
16
README.md
@@ -39,21 +39,9 @@ vmrun -T ws list
|
||||
```
|
||||
If the installation along with the environment variable set is successful, you will see the message showing the current running virtual machines.
|
||||
|
||||
3. Obtain the virtual machine image. If you are using Linux or Windows with an x86_64 CPU, install the environment package and download the examples and the virtual machine image by executing the following commands:
|
||||
Remove the `nogui` parameter if you wish to view the activities within the virtual machine.
|
||||
3. Run our setup script to download the necessary virtual machines and set up the environment☕:
|
||||
```bash
|
||||
wget https://huggingface.co/datasets/xlangai/ubuntu_x86/resolve/main/Ubuntu.zip
|
||||
unzip Ubuntu.zip
|
||||
vmrun -T ws start "Ubuntu/Ubuntu.vmx" nogui
|
||||
vmrun -T ws snapshot "Ubuntu/Ubuntu.vmx" "init_state"
|
||||
```
|
||||
|
||||
For macOS with Apple chips, you should install the specially prepared virtual machine image by executing the following commands:
|
||||
```bash
|
||||
wget https://huggingface.co/datasets/xlangai/ubuntu_arm/resolve/main/Ubuntu.zip
|
||||
unzip Ubuntu.zip
|
||||
vmrun -T fusion start "Ubuntu/Ubuntu.vmx" nogui
|
||||
vmrun -T fusion snapshot "Ubuntu/Ubuntu.vmx" "init_state"
|
||||
python setup_vm.py
|
||||
```
|
||||
|
||||
### On AWS or Azure (Virtualized platform)
|
||||
|
||||
79
setup_vm.py
Normal file
79
setup_vm.py
Normal file
@@ -0,0 +1,79 @@
|
||||
import platform
|
||||
import subprocess
|
||||
|
||||
import requests
|
||||
|
||||
# Define the path to the virtual machine
|
||||
VM_PATH = r"Ubuntu\Ubuntu.vmx" # change this to the path of your downloaded virtual machine
|
||||
|
||||
|
||||
def download_and_unzip_vm():
|
||||
# Determine the platform and CPU architecture to decide the correct VM image to download
|
||||
if platform.system() == 'Darwin' and platform.machine() == 'arm64': # macOS with Apple Silicon
|
||||
url = "https://huggingface.co/datasets/xlangai/ubuntu_arm/resolve/main/Ubuntu.zip"
|
||||
elif platform.system() in ['Linux',
|
||||
'Windows'] and platform.machine() == 'x86_64': # Linux or Windows with x86_64 CPU
|
||||
url = "https://huggingface.co/datasets/xlangai/ubuntu_x86/resolve/main/Ubuntu.zip"
|
||||
else:
|
||||
raise Exception("Unsupported platform or architecture")
|
||||
|
||||
# Download the virtual machine image
|
||||
print("Downloading the virtual machine image...")
|
||||
subprocess.run(['wget', url], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True,
|
||||
universal_newlines=True)
|
||||
|
||||
# Unzip the downloaded file
|
||||
print("Unzipping the downloaded file...")
|
||||
subprocess.run(['unzip', 'Ubuntu.zip'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True,
|
||||
universal_newlines=True)
|
||||
|
||||
|
||||
# Execute the function to download and unzip the VM
|
||||
download_and_unzip_vm()
|
||||
|
||||
|
||||
# Determine the platform of the host machine and decide the parameter for vmrun
|
||||
def get_vmrun_type():
|
||||
if platform.system() == 'Windows' or platform.system() == 'Linux':
|
||||
return '-T ws'
|
||||
elif platform.system() == 'Darwin': # Darwin is the system name for macOS
|
||||
return '-T fusion'
|
||||
else:
|
||||
raise Exception("Unsupported operating system")
|
||||
|
||||
|
||||
# Start the virtual machine
|
||||
subprocess.run(f'vmrun {get_vmrun_type()} start "{VM_PATH}"', shell=True)
|
||||
print("Virtual machine started...")
|
||||
|
||||
# Get the IP address of the virtual machine
|
||||
get_vm_ip = subprocess.run(f'vmrun {get_vmrun_type()} getGuestIPAddress "{VM_PATH}" -wait', shell=True,
|
||||
capture_output=True,
|
||||
text=True)
|
||||
print("Virtual machine IP address:", get_vm_ip.stdout.strip())
|
||||
|
||||
vm_ip = get_vm_ip.stdout.strip()
|
||||
|
||||
|
||||
# Function used to check whether the virtual machine is ready
|
||||
def download_screenshot(ip):
|
||||
url = f"http://{ip}:5000/screenshot"
|
||||
try:
|
||||
# max trey times 1, max timeout 1
|
||||
response = requests.get(url, timeout=(1, 1))
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
except requests.exceptions.ConnectionError:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
# Try downloading the screenshot until successful
|
||||
while not download_screenshot(vm_ip):
|
||||
print("Check whether the virtual machine is ready...")
|
||||
|
||||
print("Virtual machine is ready. Start to make a snapshot on the virtual machine. It would take a while...")
|
||||
|
||||
# Create a snapshot of the virtual machine
|
||||
subprocess.run(f'vmrun {get_vmrun_type()} snapshot "{VM_PATH}" "init_state"', shell=True)
|
||||
print("Snapshot created.")
|
||||
71
show_result.py
Normal file
71
show_result.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os
|
||||
|
||||
|
||||
def get_result(action_space, use_model, observation_type, result_dir):
|
||||
target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
|
||||
if not os.path.exists(target_dir):
|
||||
print("New experiment, no result yet.")
|
||||
return None
|
||||
|
||||
all_result = []
|
||||
domain_result = {}
|
||||
all_result_for_analysis = {}
|
||||
|
||||
for domain in os.listdir(target_dir):
|
||||
domain_path = os.path.join(target_dir, domain)
|
||||
if os.path.isdir(domain_path):
|
||||
for example_id in os.listdir(domain_path):
|
||||
example_path = os.path.join(domain_path, example_id)
|
||||
if os.path.isdir(example_path):
|
||||
if "result.txt" in os.listdir(example_path):
|
||||
# empty all files under example_id
|
||||
if domain not in domain_result:
|
||||
domain_result[domain] = []
|
||||
result = open(os.path.join(example_path, "result.txt"), "r").read()
|
||||
try:
|
||||
domain_result[domain].append(float(result))
|
||||
except:
|
||||
domain_result[domain].append(float(bool(result)))
|
||||
|
||||
if domain not in all_result_for_analysis:
|
||||
all_result_for_analysis[domain] = {}
|
||||
all_result_for_analysis[domain][example_id] = domain_result[domain][-1]
|
||||
|
||||
try:
|
||||
result = open(os.path.join(example_path, "result.txt"), "r").read()
|
||||
try:
|
||||
all_result.append(float(result))
|
||||
except:
|
||||
all_result.append(float(bool(result)))
|
||||
except:
|
||||
all_result.append(0.0)
|
||||
|
||||
for domain in domain_result:
|
||||
print("Domain:", domain, "Runned:", len(domain_result[domain]), "Success Rate:",
|
||||
sum(domain_result[domain]) / len(domain_result[domain]) * 100, "%")
|
||||
|
||||
print(">>>>>>>>>>>>>")
|
||||
print("Office", "Success Rate:", sum(
|
||||
domain_result["libreoffice_calc"] + domain_result["libreoffice_impress"] + domain_result[
|
||||
"libreoffice_writer"]) / len(
|
||||
domain_result["libreoffice_calc"] + domain_result["libreoffice_impress"] + domain_result[
|
||||
"libreoffice_writer"]) * 100, "%")
|
||||
print("Daily", "Success Rate:",
|
||||
sum(domain_result["vlc"] + domain_result["thunderbird"] + domain_result["chrome"]) / len(
|
||||
domain_result["vlc"] + domain_result["thunderbird"] + domain_result["chrome"]) * 100, "%")
|
||||
print("Professional", "Success Rate:", sum(domain_result["gimp"] + domain_result["vs_code"]) / len(
|
||||
domain_result["gimp"] + domain_result["vs_code"]) * 100, "%")
|
||||
|
||||
with open(os.path.join(target_dir, "all_result.json"), "w") as f:
|
||||
f.write(str(all_result_for_analysis))
|
||||
|
||||
if not all_result:
|
||||
print("New experiment, no result yet.")
|
||||
return None
|
||||
else:
|
||||
print("Runned:", len(all_result), "Current Success Rate:", sum(all_result) / len(all_result) * 100, "%")
|
||||
return all_result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
get_result("pyautogui", "gpt-4-vision-preview", "screenshot", "./results")
|
||||
Reference in New Issue
Block a user