Setup initial examples

This commit is contained in:
Timothyxxx
2023-12-06 17:04:33 +08:00
parent 4ba053998d
commit b9c317f0f5
14 changed files with 285 additions and 6 deletions

View File

@@ -0,0 +1,24 @@
# Evaluation examples
Here we put the data examples to benchmark the ability of agents when interacting with GUI.
The examples are stored in `./examples` where each data item formatted as:
```
{
"id": "uid", # unique id
"snapshot": "snapshot_id", # the snapshot id of the environment, with some data already there and apps already opened, or just desktop
"instruction": "natural_language_instruction", # the natural language instruction of the task, what we want the agent to do
"source": "website_url", # where we know this example, some forum, or some website, or some paper
"config": {xxx}, # the scripts to setup the donwload and open files actions, as the initial state of a task
"trajectory": "trajectory_directory", # the trajectory directory, which contains the action sequence file, the screenshots and the recording video
"related_apps": ["app1", "app2", ...], # the related apps, which are opened during the task
"evaluator": "evaluation_dir", # the directory of the evaluator, which contains the evaluation script for this example
}
```
The `./trajectories` file contains the annotated trajectories for each data item in `./examples` for finishing the task.
For now, it is under construction, and only tested on Windows 10. Please:
- Modify the path accordingly to run the evaluation;
- Remind us if some parts are overfit to our environment.

View File

@@ -0,0 +1,22 @@
{
"id": "0bf05a7d-b28b-44d2-955a-50b41e24012a",
"snapshot": "libreoffice_calc",
"instruction": "I would like to pad all the numbers in the 'Old ID' column with zeros in front, to fill them up to seven digits in the 'New 7 Digit ID' column.",
"source": "https://www.youtube.com/shorts/FPAQaDTS8VY",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\Customers_New_7digit_Id.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Customers_New_7digit_Id.xlsx"
]
},
"trajectory": "trajectories/0bf05a7d-b28b-44d2-955a-50b41e24012a",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "2bd59342-0664-4ccb-ba87-79379096cc08",
"snapshot": "libreoffice_calc",
"instruction": "Make sparkline chart line by line",
"source": "https://www.youtube.com/shorts/L3Z-F1QTQFY",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
]
},
"trajectory": "trajectories/2bd59342-0664-4ccb-ba87-79379096cc08",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "37608790-6147-45d0-9f20-1137bb35703d",
"snapshot": "libreoffice_calc",
"instruction": "Help me fill the columns of First Name, Last Name and Rank",
"source": "https://www.youtube.com/shorts/uzPo_CPCHH8",
"config": {
"download": [
[
"https://drive.usercontent.google.com/download?id=1wDqap5cBfxnlqTNrZG61k_wDWTujl6AU&export=download&authuser=0&confirm=t&uuid=fd183b89-76b7-4dc5-880e-1045ed769562&at=APZUnTWp9RMafMg0xohhBWazN3YD:1701785710674",
"C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
]
},
"trajectory": "trajectories/37608790-6147-45d0-9f20-1137bb35703d",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "7a4e4bc8-922c-4c84-865c-25ba34136be1",
"snapshot": "libreoffice_calc",
"instruction": "Reorder the columns to be \"Data\", \"First Name\", \"Last Name\", \"Order ID\", \"Sales\"",
"source": "https://www.youtube.com/shorts/bvUhr1AHs44",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\Name_Order_Id_move_column.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Name_Order_Id_move_column.xlsx"
]
},
"trajectory": "trajectories/7a4e4bc8-922c-4c84-865c-25ba34136be1",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "7b802dad-6e0f-4204-9815-d4e3f57627d8",
"snapshot": "libreoffice_calc",
"instruction": "I would like to sort this table based on cell color, placing all the rows marked with pink at the beginning, while keeping their order among themselves unchanged.",
"source": "https://www.youtube.com/shorts/Of-lzeP1usE",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\Customer_Sort_by_cell_color.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Customer_Sort_by_cell_color.xlsx"
]
},
"trajectory": "trajectories/7b802dad-6e0f-4204-9815-d4e3f57627d8",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "7efeb4b1-3d19-4762-b163-63328d66303b",
"snapshot": "libreoffice_calc",
"instruction": "Fill in the Serieal Numbers in \"Serial #\" column",
"source": "https://www.youtube.com/shorts/4jzXfZNhfmk",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\Order_Sales_Serial#.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Order_Sales_Serial#.xlsx"
]
},
"trajectory": "trajectories/",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "a9f325aa-8c05-4e4f-8341-9e4358565f4f",
"snapshot": "libreoffice_calc",
"instruction": "Clean the messy movie titles and put them in the cleaned column",
"source": "https://www.youtube.com/shorts/A0gmEBRKXWs",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\"
]
},
"trajectory": "trajectories/a9f325aa-8c05-4e4f-8341-9e4358565f4f",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "d681960f-7bc3-4286-9913-a8812ba3261a",
"snapshot": "libreoffice_calc",
"instruction": "According to the green table shown above, calculate and give each student a grade",
"source": "https://www.youtube.com/shorts/d7U1S_IsTVM",
"config": {
"download": [
[
"https://drive.usercontent.google.com/download?id=1wodZjx1KjThUsrtF6ZJaCTy1fQX4E9vA&export=download&authuser=0&confirm=t&uuid=d07ca312-1abc-40f2-81cd-d06e27119854&at=APZUnTWwjnxsHQYapSvpLR8NmlfV:1701785087048",
"C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx"
]
},
"trajectory": "trajectories/d681960f-7bc3-4286-9913-a8812ba3261a",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
"snapshot": "libreoffice_calc",
"instruction": "Traverse the table and paste it below",
"source": "https://www.youtube.com/shorts/t9JLUaT55UQ",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\"
]
},
"trajectory": "trajectories/eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
"snapshot": "libreoffice_calc",
"instruction": "Enable each cell in the column\"Pass/Fail/Held\" is a drop down list",
"source": "https://www.youtube.com/shorts/tXOovKn0H68",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\"
]
},
"trajectory": "trajectories/ecb0df7a-4e8d-4a03-b162-053391d3afaf",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,22 @@
{
"id": "f9584479-3d0d-4c79-affa-9ad7afdd8850",
"snapshot": "libreoffice_calc",
"instruction": "Fill the missing row and column which show the total value",
"source": "https://youtube.com/shorts/feldd-Pn48c?si=9xJiem2uAHm6Jshb",
"config": {
"download": [
[
"https://drive.usercontent.google.com/download?id=1rwhniaClEkF8XFzdfaNUA6GmAiy4syMZ&export=download&authuser=0&confirm=t&uuid=6fdd5b04-85f4-45e1-ad74-368f8f2a82ab&at=APZUnTUP-JxPxLfNls6jXWghblQ5:1701766091851",
"C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx"
]
},
"trajectory": "trajectories/f9584479-3d0d-4c79-affa-9ad7afdd8850",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -0,0 +1,13 @@
{
"id": "",
"snapshot": "libreoffice_calc",
"instruction": "",
"source": "",
"config": {
},
"trajectory": "trajectories/",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

12
main.py
View File

@@ -1,3 +1,4 @@
import json
from desktop_env.envs.desktop_env import DesktopEnv
@@ -5,17 +6,16 @@ def human_agent():
"""
Runs the Gym environment with human input.
"""
with open("evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json", "r") as f:
example = json.load(f)
env = DesktopEnv(
# path_to_vm=r"""C:\Users\tianbaox\Downloads\Windows 10 x64\Windows 10 x64.vmx""",
path_to_vm=r"""C:\Users\tianbaox\Documents\Virtual Machines\Win10\Win10.vmx""",
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
snapshot_path="base_setup",
config={
"download": [(
"https://drive.usercontent.google.com/download?id=1rwhniaClEkF8XFzdfaNUA6GmAiy4syMZ&export=download&authuser=0&confirm=t&uuid=6fdd5b04-85f4-45e1-ad74-368f8f2a82ab&at=APZUnTUP-JxPxLfNls6jXWghblQ5:1701766091851",
r"C:\Users\tianbaox\Desktop\Quarterly_Product_Sales_by_Zone.xlsx")],
"open": [r"C:\Users\tianbaox\Desktop\Quarterly_Product_Sales_by_Zone.xlsx"],
}
config=example["config"],
)
# reset the environment to certain snapshot