From 6777ea255a5145cafb967ba369caf938528a2034 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Mon, 15 Apr 2024 18:47:54 +0800
Subject: [PATCH] Fix https://github.com/xlang-ai/OSWorld/issues/21 ; Update
 README for multimodal agents; Add badge in README; Add setup.py

---
 README.md           | 20 ++++++++++
 mm_agents/README.md |  5 ++-
 setup.py            | 92 +++++++++++++++++++++++++++++++++++++++++++++
 setup_vm.py         |  2 +-
 4 files changed, 116 insertions(+), 3 deletions(-)
 create mode 100644 setup.py
diff --git a/README.md b/README.md
index 8fede83..efc57fa 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,26 @@
   <a href="https://discord.gg/4Gnw7eTEZR">Discord</a>
 </p>
 
+<p align="left">
+    <a href="https://img.shields.io/badge/PRs-Welcome-red">
+        <img src="https://img.shields.io/badge/PRs-Welcome-red">
+    </a>
+    <a href="https://img.shields.io/github/last-commit/xlang-ai/OSWorld?color=green">
+        <img src="https://img.shields.io/github/last-commit/xlang-ai/OSWorld?color=green">
+    </a>
+    <a href="https://opensource.org/licenses/Apache-2.0">
+        <img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
+    </a>
+    <a href="https://badge.fury.io/py/desktop-env">
+        <img src="https://badge.fury.io/py/desktop-env.svg">
+    </a>
+    <a href="https://pepy.tech/project/desktop-env">
+        <img src="https://static.pepy.tech/badge/desktop-env">
+    </a>
+    <br/>
+</p>
+
+
 ## 📢 Updates
 - 2024-04-11: We released our [paper](https://arxiv.org/abs/2404.07972), [environment and benchmark](https://github.com/xlang-ai/OSWorld), and [project page](https://os-world.github.io/). Check it out!
 
diff --git a/mm_agents/README.md b/mm_agents/README.md
index ccf95d4..be4c476 100644
--- a/mm_agents/README.md
+++ b/mm_agents/README.md
@@ -31,7 +31,7 @@ agent = PromptAgent(
 agent.reset()
 # say we have an instruction and observation
 instruction = "Please help me to find the nearest restaurant."
-obs = {"screenshot": "path/to/observation.jpg"}
+obs = {"screenshot": open("path/to/observation.jpg", 'rb').read()}
 response, actions = agent.predict(
     instruction,
     obs
@@ -51,8 +51,9 @@ And the following action spaces:
 
 To feed an observation into the agent, you have to maintain the `obs` variable as a dict with the corresponding information:
 ```python
+# continue from the previous code snippet
 obs = {
-    "screenshot": "path/to/observation.jpg",
+    "screenshot": open("path/to/observation.jpg", 'rb').read(),
     "a11y_tree": ""  # [a11y_tree data]
 }
 response, actions = agent.predict(
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..c4a5c06
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,92 @@
+import subprocess
+import sys
+
+from setuptools import setup, find_packages
+from setuptools.command.install import install
+
+
+class InstallPlaywrightCommand(install):
+    """Customized setuptools install command that runs 'playwright install'."""
+
+    def run(self):
+        # Call the original install command to handle regular installation process
+        install.run(self)
+
+        # Attempt to run 'playwright install' using subprocess
+        try:
+            subprocess.check_call([sys.executable, "-m", "playwright", "install"])
+            print("Successfully ran 'playwright install'.")
+        except subprocess.CalledProcessError as e:
+            print("Failed to run 'playwright install'. Please run 'playwright install' manually.")
+            print(e)
+
+
+setup(
+    name="desktop_env",
+    version="0.1.5",
+    author="Tianbao Xie, Danyang Zhang,  Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Toh Jing Hua, etc.",
+    author_email="tianbaoxiexxx@gmail.com",
+    description="The package provides a desktop environment for setting and evaluating desktop automation tasks.",
+    long_description=open('README.md', encoding="utf-8").read(),
+    long_description_content_type="text/markdown",
+    url="https://github.com/xlang-ai/desktop_env",
+    packages=find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires='>=3.9',
+    install_requires=[
+        "numpy~=1.24.3",
+        "Pillow~=10.1.0",
+        "fabric",
+        "gymnasium~=0.28.1",
+        "requests~=2.31.0",
+        "transformers~=4.35.2",
+        "torch~=2.1.1",
+        "accelerate",
+        "opencv-python~=4.8.1.78",
+        "matplotlib~=3.7.4",
+        "pynput~=1.7.6",
+        "pyautogui~=0.9.54",
+        "psutil~=5.9.6",
+        "tqdm~=4.65.0",
+        "pandas~=2.0.3",
+        "flask~=3.0.0",
+        "requests-toolbelt~=1.0.0",
+        "lxml",
+        "cssselect",
+        "xmltodict",
+        "openpyxl",
+        "python-docx",
+        "python-pptx",
+        "pypdf",
+        "PyGetWindow",
+        "rapidfuzz",
+        "pyacoustid",
+        "opencv-python",
+        "ImageHash",
+        "scikit-image",
+        "librosa",
+        "pymupdf",
+        "chardet",
+        "playwright",
+        "formulas",
+        "pydrive",
+        "fastdtw",
+        "odfpy",
+        "func-timeout",
+        "beautifulsoup4",
+        "PyYaml",
+        "mutagen",
+        "easyocr",
+        "borb",
+        "pypdf2",
+        "pdfplumber",
+        "wrapt_timeout_decorator"
+    ],
+    cmdclass={
+        'install': InstallPlaywrightCommand,  # Use the custom install command
+    },
+)
\ No newline at end of file
diff --git a/setup_vm.py b/setup_vm.py
index fafdd15..9f6e000 100644
--- a/setup_vm.py
+++ b/setup_vm.py
@@ -17,7 +17,7 @@ def download_and_unzip_vm():
     # Determine the platform and CPU architecture to decide the correct VM image to download
     if platform.machine() == 'arm64':  # macOS with Apple Silicon
         url = "https://huggingface.co/datasets/xlangai/ubuntu_arm/resolve/main/Ubuntu.zip"
-    elif platform.machine().lower() == 'amd64':
+    elif platform.machine().lower() in ['amd64', "x86_64"]:
         url = "https://huggingface.co/datasets/xlangai/ubuntu_x86/resolve/main/Ubuntu.zip"
     else:
         raise Exception("Unsupported platform or architecture")