mirror of
https://github.com/xlang-ai/OSWorld.git
synced 2024-04-29 12:26:03 +03:00
Setup initial examples
This commit is contained in:
24
evaluation_examples/README.md
Normal file
24
evaluation_examples/README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Evaluation examples
|
||||
|
||||
Here we put the data examples to benchmark the ability of agents when interacting with GUI.
|
||||
The examples are stored in `./examples` where each data item formatted as:
|
||||
|
||||
```
|
||||
{
|
||||
"id": "uid", # unique id
|
||||
"snapshot": "snapshot_id", # the snapshot id of the environment, with some data already there and apps already opened, or just desktop
|
||||
"instruction": "natural_language_instruction", # the natural language instruction of the task, what we want the agent to do
|
||||
"source": "website_url", # where we know this example, some forum, or some website, or some paper
|
||||
"config": {xxx}, # the scripts to setup the donwload and open files actions, as the initial state of a task
|
||||
"trajectory": "trajectory_directory", # the trajectory directory, which contains the action sequence file, the screenshots and the recording video
|
||||
"related_apps": ["app1", "app2", ...], # the related apps, which are opened during the task
|
||||
"evaluator": "evaluation_dir", # the directory of the evaluator, which contains the evaluation script for this example
|
||||
…
|
||||
}
|
||||
```
|
||||
|
||||
The `./trajectories` file contains the annotated trajectories for each data item in `./examples` for finishing the task.
|
||||
|
||||
For now, it is under construction, and only tested on Windows 10. Please:
|
||||
- Modify the path accordingly to run the evaluation;
|
||||
- Remind us if some parts are overfit to our environment.
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "0bf05a7d-b28b-44d2-955a-50b41e24012a",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "I would like to pad all the numbers in the 'Old ID' column with zeros in front, to fill them up to seven digits in the 'New 7 Digit ID' column.",
|
||||
"source": "https://www.youtube.com/shorts/FPAQaDTS8VY",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"",
|
||||
"C:\\Users\\tianbaox\\Desktop\\Customers_New_7digit_Id.xlsx"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\Customers_New_7digit_Id.xlsx"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/0bf05a7d-b28b-44d2-955a-50b41e24012a",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "2bd59342-0664-4ccb-ba87-79379096cc08",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "Make sparkline chart line by line",
|
||||
"source": "https://www.youtube.com/shorts/L3Z-F1QTQFY",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"",
|
||||
"C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/2bd59342-0664-4ccb-ba87-79379096cc08",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "37608790-6147-45d0-9f20-1137bb35703d",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "Help me fill the columns of First Name, Last Name and Rank",
|
||||
"source": "https://www.youtube.com/shorts/uzPo_CPCHH8",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"https://drive.usercontent.google.com/download?id=1wDqap5cBfxnlqTNrZG61k_wDWTujl6AU&export=download&authuser=0&confirm=t&uuid=fd183b89-76b7-4dc5-880e-1045ed769562&at=APZUnTWp9RMafMg0xohhBWazN3YD:1701785710674",
|
||||
"C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/37608790-6147-45d0-9f20-1137bb35703d",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "7a4e4bc8-922c-4c84-865c-25ba34136be1",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "Reorder the columns to be \"Data\", \"First Name\", \"Last Name\", \"Order ID\", \"Sales\"",
|
||||
"source": "https://www.youtube.com/shorts/bvUhr1AHs44",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"",
|
||||
"C:\\Users\\tianbaox\\Desktop\\Name_Order_Id_move_column.xlsx"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\Name_Order_Id_move_column.xlsx"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/7a4e4bc8-922c-4c84-865c-25ba34136be1",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "7b802dad-6e0f-4204-9815-d4e3f57627d8",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "I would like to sort this table based on cell color, placing all the rows marked with pink at the beginning, while keeping their order among themselves unchanged.",
|
||||
"source": "https://www.youtube.com/shorts/Of-lzeP1usE",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"",
|
||||
"C:\\Users\\tianbaox\\Desktop\\Customer_Sort_by_cell_color.xlsx"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\Customer_Sort_by_cell_color.xlsx"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/7b802dad-6e0f-4204-9815-d4e3f57627d8",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "7efeb4b1-3d19-4762-b163-63328d66303b",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "Fill in the Serieal Numbers in \"Serial #\" column",
|
||||
"source": "https://www.youtube.com/shorts/4jzXfZNhfmk",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"",
|
||||
"C:\\Users\\tianbaox\\Desktop\\Order_Sales_Serial#.xlsx"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\Order_Sales_Serial#.xlsx"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "a9f325aa-8c05-4e4f-8341-9e4358565f4f",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "Clean the messy movie titles and put them in the cleaned column",
|
||||
"source": "https://www.youtube.com/shorts/A0gmEBRKXWs",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"",
|
||||
"C:\\Users\\tianbaox\\Desktop\\"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/a9f325aa-8c05-4e4f-8341-9e4358565f4f",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "d681960f-7bc3-4286-9913-a8812ba3261a",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "According to the green table shown above, calculate and give each student a grade",
|
||||
"source": "https://www.youtube.com/shorts/d7U1S_IsTVM",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"https://drive.usercontent.google.com/download?id=1wodZjx1KjThUsrtF6ZJaCTy1fQX4E9vA&export=download&authuser=0&confirm=t&uuid=d07ca312-1abc-40f2-81cd-d06e27119854&at=APZUnTWwjnxsHQYapSvpLR8NmlfV:1701785087048",
|
||||
"C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/d681960f-7bc3-4286-9913-a8812ba3261a",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "Traverse the table and paste it below",
|
||||
"source": "https://www.youtube.com/shorts/t9JLUaT55UQ",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"",
|
||||
"C:\\Users\\tianbaox\\Desktop\\"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "Enable each cell in the column\"Pass/Fail/Held\" is a drop down list",
|
||||
"source": "https://www.youtube.com/shorts/tXOovKn0H68",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"",
|
||||
"C:\\Users\\tianbaox\\Desktop\\"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/ecb0df7a-4e8d-4a03-b162-053391d3afaf",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "f9584479-3d0d-4c79-affa-9ad7afdd8850",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "Fill the missing row and column which show the total value",
|
||||
"source": "https://youtube.com/shorts/feldd-Pn48c?si=9xJiem2uAHm6Jshb",
|
||||
"config": {
|
||||
"download": [
|
||||
[
|
||||
"https://drive.usercontent.google.com/download?id=1rwhniaClEkF8XFzdfaNUA6GmAiy4syMZ&export=download&authuser=0&confirm=t&uuid=6fdd5b04-85f4-45e1-ad74-368f8f2a82ab&at=APZUnTUP-JxPxLfNls6jXWghblQ5:1701766091851",
|
||||
"C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx"
|
||||
]
|
||||
],
|
||||
"open": [
|
||||
"C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx"
|
||||
]
|
||||
},
|
||||
"trajectory": "trajectories/f9584479-3d0d-4c79-affa-9ad7afdd8850",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
13
evaluation_examples/examples/template.json
Normal file
13
evaluation_examples/examples/template.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"id": "",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "",
|
||||
"source": "",
|
||||
"config": {
|
||||
},
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"libreoffice calc"
|
||||
],
|
||||
"evaluator": "evaluation_dir"
|
||||
}
|
||||
12
main.py
12
main.py
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
from desktop_env.envs.desktop_env import DesktopEnv
|
||||
|
||||
|
||||
@@ -5,17 +6,16 @@ def human_agent():
|
||||
"""
|
||||
Runs the Gym environment with human input.
|
||||
"""
|
||||
|
||||
with open("evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json", "r") as f:
|
||||
example = json.load(f)
|
||||
|
||||
env = DesktopEnv(
|
||||
# path_to_vm=r"""C:\Users\tianbaox\Downloads\Windows 10 x64\Windows 10 x64.vmx""",
|
||||
path_to_vm=r"""C:\Users\tianbaox\Documents\Virtual Machines\Win10\Win10.vmx""",
|
||||
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
|
||||
snapshot_path="base_setup",
|
||||
config={
|
||||
"download": [(
|
||||
"https://drive.usercontent.google.com/download?id=1rwhniaClEkF8XFzdfaNUA6GmAiy4syMZ&export=download&authuser=0&confirm=t&uuid=6fdd5b04-85f4-45e1-ad74-368f8f2a82ab&at=APZUnTUP-JxPxLfNls6jXWghblQ5:1701766091851",
|
||||
r"C:\Users\tianbaox\Desktop\Quarterly_Product_Sales_by_Zone.xlsx")],
|
||||
"open": [r"C:\Users\tianbaox\Desktop\Quarterly_Product_Sales_by_Zone.xlsx"],
|
||||
}
|
||||
config=example["config"],
|
||||
)
|
||||
|
||||
# reset the environment to certain snapshot
|
||||
|
||||
Reference in New Issue
Block a user