Fix https://github.com/xlang-ai/OSWorld/issues/21 ; Update README for multimodal agents; Add badge in README; Add setup.py

This commit is contained in:
Timothyxxx
2024-04-15 18:47:54 +08:00
parent 9c75df5dce
commit 6777ea255a
4 changed files with 116 additions and 3 deletions

View File

@@ -10,6 +10,26 @@
<a href="https://discord.gg/4Gnw7eTEZR">Discord</a>
</p>
<p align="left">
<a href="https://img.shields.io/badge/PRs-Welcome-red">
<img src="https://img.shields.io/badge/PRs-Welcome-red">
</a>
<a href="https://img.shields.io/github/last-commit/xlang-ai/OSWorld?color=green">
<img src="https://img.shields.io/github/last-commit/xlang-ai/OSWorld?color=green">
</a>
<a href="https://opensource.org/licenses/Apache-2.0">
<img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
</a>
<a href="https://badge.fury.io/py/desktop-env">
<img src="https://badge.fury.io/py/desktop-env.svg">
</a>
<a href="https://pepy.tech/project/desktop-env">
<img src="https://static.pepy.tech/badge/desktop-env">
</a>
<br/>
</p>
## 📢 Updates
- 2024-04-11: We released our [paper](https://arxiv.org/abs/2404.07972), [environment and benchmark](https://github.com/xlang-ai/OSWorld), and [project page](https://os-world.github.io/). Check it out!

View File

@@ -31,7 +31,7 @@ agent = PromptAgent(
agent.reset()
# say we have an instruction and observation
instruction = "Please help me to find the nearest restaurant."
obs = {"screenshot": "path/to/observation.jpg"}
obs = {"screenshot": open("path/to/observation.jpg", 'rb').read()}
response, actions = agent.predict(
instruction,
obs
@@ -51,8 +51,9 @@ And the following action spaces:
To feed an observation into the agent, you have to maintain the `obs` variable as a dict with the corresponding information:
```python
# continue from the previous code snippet
obs = {
"screenshot": "path/to/observation.jpg",
"screenshot": open("path/to/observation.jpg", 'rb').read(),
"a11y_tree": "" # [a11y_tree data]
}
response, actions = agent.predict(

92
setup.py Normal file
View File

@@ -0,0 +1,92 @@
import subprocess
import sys
from setuptools import setup, find_packages
from setuptools.command.install import install
class InstallPlaywrightCommand(install):
"""Customized setuptools install command that runs 'playwright install'."""
def run(self):
# Call the original install command to handle regular installation process
install.run(self)
# Attempt to run 'playwright install' using subprocess
try:
subprocess.check_call([sys.executable, "-m", "playwright", "install"])
print("Successfully ran 'playwright install'.")
except subprocess.CalledProcessError as e:
print("Failed to run 'playwright install'. Please run 'playwright install' manually.")
print(e)
setup(
name="desktop_env",
version="0.1.5",
author="Tianbao Xie, Danyang Zhang, Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Toh Jing Hua, etc.",
author_email="tianbaoxiexxx@gmail.com",
description="The package provides a desktop environment for setting and evaluating desktop automation tasks.",
long_description=open('README.md', encoding="utf-8").read(),
long_description_content_type="text/markdown",
url="https://github.com/xlang-ai/desktop_env",
packages=find_packages(),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
],
python_requires='>=3.9',
install_requires=[
"numpy~=1.24.3",
"Pillow~=10.1.0",
"fabric",
"gymnasium~=0.28.1",
"requests~=2.31.0",
"transformers~=4.35.2",
"torch~=2.1.1",
"accelerate",
"opencv-python~=4.8.1.78",
"matplotlib~=3.7.4",
"pynput~=1.7.6",
"pyautogui~=0.9.54",
"psutil~=5.9.6",
"tqdm~=4.65.0",
"pandas~=2.0.3",
"flask~=3.0.0",
"requests-toolbelt~=1.0.0",
"lxml",
"cssselect",
"xmltodict",
"openpyxl",
"python-docx",
"python-pptx",
"pypdf",
"PyGetWindow",
"rapidfuzz",
"pyacoustid",
"opencv-python",
"ImageHash",
"scikit-image",
"librosa",
"pymupdf",
"chardet",
"playwright",
"formulas",
"pydrive",
"fastdtw",
"odfpy",
"func-timeout",
"beautifulsoup4",
"PyYaml",
"mutagen",
"easyocr",
"borb",
"pypdf2",
"pdfplumber",
"wrapt_timeout_decorator"
],
cmdclass={
'install': InstallPlaywrightCommand, # Use the custom install command
},
)

View File

@@ -17,7 +17,7 @@ def download_and_unzip_vm():
# Determine the platform and CPU architecture to decide the correct VM image to download
if platform.machine() == 'arm64': # macOS with Apple Silicon
url = "https://huggingface.co/datasets/xlangai/ubuntu_arm/resolve/main/Ubuntu.zip"
elif platform.machine().lower() == 'amd64':
elif platform.machine().lower() in ['amd64', "x86_64"]:
url = "https://huggingface.co/datasets/xlangai/ubuntu_x86/resolve/main/Ubuntu.zip"
else:
raise Exception("Unsupported platform or architecture")