mirror of
https://github.com/xlang-ai/OSWorld.git
synced 2024-04-29 12:26:03 +03:00
Fix https://github.com/xlang-ai/OSWorld/issues/21 ; Update README for multimodal agents; Add badge in README; Add setup.py
This commit is contained in:
20
README.md
20
README.md
@@ -10,6 +10,26 @@
|
||||
<a href="https://discord.gg/4Gnw7eTEZR">Discord</a>
|
||||
</p>
|
||||
|
||||
<p align="left">
|
||||
<a href="https://img.shields.io/badge/PRs-Welcome-red">
|
||||
<img src="https://img.shields.io/badge/PRs-Welcome-red">
|
||||
</a>
|
||||
<a href="https://img.shields.io/github/last-commit/xlang-ai/OSWorld?color=green">
|
||||
<img src="https://img.shields.io/github/last-commit/xlang-ai/OSWorld?color=green">
|
||||
</a>
|
||||
<a href="https://opensource.org/licenses/Apache-2.0">
|
||||
<img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg">
|
||||
</a>
|
||||
<a href="https://badge.fury.io/py/desktop-env">
|
||||
<img src="https://badge.fury.io/py/desktop-env.svg">
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/desktop-env">
|
||||
<img src="https://static.pepy.tech/badge/desktop-env">
|
||||
</a>
|
||||
<br/>
|
||||
</p>
|
||||
|
||||
|
||||
## 📢 Updates
|
||||
- 2024-04-11: We released our [paper](https://arxiv.org/abs/2404.07972), [environment and benchmark](https://github.com/xlang-ai/OSWorld), and [project page](https://os-world.github.io/). Check it out!
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ agent = PromptAgent(
|
||||
agent.reset()
|
||||
# say we have an instruction and observation
|
||||
instruction = "Please help me to find the nearest restaurant."
|
||||
obs = {"screenshot": "path/to/observation.jpg"}
|
||||
obs = {"screenshot": open("path/to/observation.jpg", 'rb').read()}
|
||||
response, actions = agent.predict(
|
||||
instruction,
|
||||
obs
|
||||
@@ -51,8 +51,9 @@ And the following action spaces:
|
||||
|
||||
To feed an observation into the agent, you have to maintain the `obs` variable as a dict with the corresponding information:
|
||||
```python
|
||||
# continue from the previous code snippet
|
||||
obs = {
|
||||
"screenshot": "path/to/observation.jpg",
|
||||
"screenshot": open("path/to/observation.jpg", 'rb').read(),
|
||||
"a11y_tree": "" # [a11y_tree data]
|
||||
}
|
||||
response, actions = agent.predict(
|
||||
|
||||
92
setup.py
Normal file
92
setup.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
from setuptools.command.install import install
|
||||
|
||||
|
||||
class InstallPlaywrightCommand(install):
|
||||
"""Customized setuptools install command that runs 'playwright install'."""
|
||||
|
||||
def run(self):
|
||||
# Call the original install command to handle regular installation process
|
||||
install.run(self)
|
||||
|
||||
# Attempt to run 'playwright install' using subprocess
|
||||
try:
|
||||
subprocess.check_call([sys.executable, "-m", "playwright", "install"])
|
||||
print("Successfully ran 'playwright install'.")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print("Failed to run 'playwright install'. Please run 'playwright install' manually.")
|
||||
print(e)
|
||||
|
||||
|
||||
setup(
|
||||
name="desktop_env",
|
||||
version="0.1.5",
|
||||
author="Tianbao Xie, Danyang Zhang, Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Toh Jing Hua, etc.",
|
||||
author_email="tianbaoxiexxx@gmail.com",
|
||||
description="The package provides a desktop environment for setting and evaluating desktop automation tasks.",
|
||||
long_description=open('README.md', encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/xlang-ai/desktop_env",
|
||||
packages=find_packages(),
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
python_requires='>=3.9',
|
||||
install_requires=[
|
||||
"numpy~=1.24.3",
|
||||
"Pillow~=10.1.0",
|
||||
"fabric",
|
||||
"gymnasium~=0.28.1",
|
||||
"requests~=2.31.0",
|
||||
"transformers~=4.35.2",
|
||||
"torch~=2.1.1",
|
||||
"accelerate",
|
||||
"opencv-python~=4.8.1.78",
|
||||
"matplotlib~=3.7.4",
|
||||
"pynput~=1.7.6",
|
||||
"pyautogui~=0.9.54",
|
||||
"psutil~=5.9.6",
|
||||
"tqdm~=4.65.0",
|
||||
"pandas~=2.0.3",
|
||||
"flask~=3.0.0",
|
||||
"requests-toolbelt~=1.0.0",
|
||||
"lxml",
|
||||
"cssselect",
|
||||
"xmltodict",
|
||||
"openpyxl",
|
||||
"python-docx",
|
||||
"python-pptx",
|
||||
"pypdf",
|
||||
"PyGetWindow",
|
||||
"rapidfuzz",
|
||||
"pyacoustid",
|
||||
"opencv-python",
|
||||
"ImageHash",
|
||||
"scikit-image",
|
||||
"librosa",
|
||||
"pymupdf",
|
||||
"chardet",
|
||||
"playwright",
|
||||
"formulas",
|
||||
"pydrive",
|
||||
"fastdtw",
|
||||
"odfpy",
|
||||
"func-timeout",
|
||||
"beautifulsoup4",
|
||||
"PyYaml",
|
||||
"mutagen",
|
||||
"easyocr",
|
||||
"borb",
|
||||
"pypdf2",
|
||||
"pdfplumber",
|
||||
"wrapt_timeout_decorator"
|
||||
],
|
||||
cmdclass={
|
||||
'install': InstallPlaywrightCommand, # Use the custom install command
|
||||
},
|
||||
)
|
||||
@@ -17,7 +17,7 @@ def download_and_unzip_vm():
|
||||
# Determine the platform and CPU architecture to decide the correct VM image to download
|
||||
if platform.machine() == 'arm64': # macOS with Apple Silicon
|
||||
url = "https://huggingface.co/datasets/xlangai/ubuntu_arm/resolve/main/Ubuntu.zip"
|
||||
elif platform.machine().lower() == 'amd64':
|
||||
elif platform.machine().lower() in ['amd64', "x86_64"]:
|
||||
url = "https://huggingface.co/datasets/xlangai/ubuntu_x86/resolve/main/Ubuntu.zip"
|
||||
else:
|
||||
raise Exception("Unsupported platform or architecture")
|
||||
|
||||
Reference in New Issue
Block a user