Fix https://github.com/xlang-ai/OSWorld/issues/21 ; Update README for multimodal agents; Add badge in README; Add setup.py

2024-04-29 12:26:03 +03:00 · 2024-04-15 18:47:54 +08:00
parent 9c75df5dce
commit 6777ea255a
4 changed files with 116 additions and 3 deletions
--- a/README.md
+++ b/README.md
@@ -10,6 +10,26 @@
  <a href="https://discord.gg/4Gnw7eTEZR">Discord</a>
 </p>
 <p align="left">
    <a href="https://img.shields.io/badge/PRs-Welcome-red">
        <img src="https://img.shields.io/badge/PRs-Welcome-red">
    </a>
    <a href="https://img.shields.io/github/last-commit/xlang-ai/OSWorld?color=green">
        <img src="https://img.shields.io/github/last-commit/xlang-ai/OSWorld?color=green">
    </a>
    <a href="https://opensource.org/licenses/Apache-2.0">
        <img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
    </a>
    <a href="https://badge.fury.io/py/desktop-env">
        <img src="https://badge.fury.io/py/desktop-env.svg">
    </a>
    <a href="https://pepy.tech/project/desktop-env">
        <img src="https://static.pepy.tech/badge/desktop-env">
    </a>
    <br/>
 </p>
 ## 📢 Updates
 - 2024-04-11: We released our [paper](https://arxiv.org/abs/2404.07972), [environment and benchmark](https://github.com/xlang-ai/OSWorld), and [project page](https://os-world.github.io/). Check it out!
--- a/mm_agents/README.md
+++ b/mm_agents/README.md
@@ -31,7 +31,7 @@ agent = PromptAgent(
 agent.reset()
 # say we have an instruction and observation
 instruction = "Please help me to find the nearest restaurant."
-obs = {"screenshot": "path/to/observation.jpg"}
+obs = {"screenshot": open("path/to/observation.jpg", 'rb').read()}
 response, actions = agent.predict(
    instruction,
    obs
@@ -51,8 +51,9 @@ And the following action spaces:
 To feed an observation into the agent, you have to maintain the `obs` variable as a dict with the corresponding information:
 ```python
 # continue from the previous code snippet
 obs = {
-    "screenshot": "path/to/observation.jpg",
+    "screenshot": open("path/to/observation.jpg", 'rb').read(),
    "a11y_tree": ""  # [a11y_tree data]
 }
 response, actions = agent.predict(
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,92 @@
 import subprocess
 import sys
 from setuptools import setup, find_packages
 from setuptools.command.install import install
 class InstallPlaywrightCommand(install):
    """Customized setuptools install command that runs 'playwright install'."""
    def run(self):
        # Call the original install command to handle regular installation process
        install.run(self)
        # Attempt to run 'playwright install' using subprocess
        try:
            subprocess.check_call([sys.executable, "-m", "playwright", "install"])
            print("Successfully ran 'playwright install'.")
        except subprocess.CalledProcessError as e:
            print("Failed to run 'playwright install'. Please run 'playwright install' manually.")
            print(e)
 setup(
    name="desktop_env",
    version="0.1.5",
    author="Tianbao Xie, Danyang Zhang,  Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Toh Jing Hua, etc.",
    author_email="tianbaoxiexxx@gmail.com",
    description="The package provides a desktop environment for setting and evaluating desktop automation tasks.",
    long_description=open('README.md', encoding="utf-8").read(),
    long_description_content_type="text/markdown",
    url="https://github.com/xlang-ai/desktop_env",
    packages=find_packages(),
    classifiers=[
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: Apache Software License",
        "Operating System :: OS Independent",
    ],
    python_requires='>=3.9',
    install_requires=[
        "numpy~=1.24.3",
        "Pillow~=10.1.0",
        "fabric",
        "gymnasium~=0.28.1",
        "requests~=2.31.0",
        "transformers~=4.35.2",
        "torch~=2.1.1",
        "accelerate",
        "opencv-python~=4.8.1.78",
        "matplotlib~=3.7.4",
        "pynput~=1.7.6",
        "pyautogui~=0.9.54",
        "psutil~=5.9.6",
        "tqdm~=4.65.0",
        "pandas~=2.0.3",
        "flask~=3.0.0",
        "requests-toolbelt~=1.0.0",
        "lxml",
        "cssselect",
        "xmltodict",
        "openpyxl",
        "python-docx",
        "python-pptx",
        "pypdf",
        "PyGetWindow",
        "rapidfuzz",
        "pyacoustid",
        "opencv-python",
        "ImageHash",
        "scikit-image",
        "librosa",
        "pymupdf",
        "chardet",
        "playwright",
        "formulas",
        "pydrive",
        "fastdtw",
        "odfpy",
        "func-timeout",
        "beautifulsoup4",
        "PyYaml",
        "mutagen",
        "easyocr",
        "borb",
        "pypdf2",
        "pdfplumber",
        "wrapt_timeout_decorator"
    ],
    cmdclass={
        'install': InstallPlaywrightCommand,  # Use the custom install command
    },
 )
--- a/setup_vm.py
+++ b/setup_vm.py
@@ -17,7 +17,7 @@ def download_and_unzip_vm():
    # Determine the platform and CPU architecture to decide the correct VM image to download
    if platform.machine() == 'arm64':  # macOS with Apple Silicon
        url = "https://huggingface.co/datasets/xlangai/ubuntu_arm/resolve/main/Ubuntu.zip"
-    elif platform.machine().lower() == 'amd64':
+    elif platform.machine().lower() in ['amd64', "x86_64"]:
        url = "https://huggingface.co/datasets/xlangai/ubuntu_x86/resolve/main/Ubuntu.zip"
    else:
        raise Exception("Unsupported platform or architecture")