mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2024-08-29 01:18:33 +03:00
Merge branch 'main' into dependabot/pip/boto3-1.35.7
This commit is contained in:
@@ -37,7 +37,7 @@
|
||||
<a href="https://docs.all-hands.dev/modules/usage/intro"><img src="https://img.shields.io/badge/Documentation-OpenHands-blue?logo=googledocs&logoColor=white&style=for-the-badge" alt="Check out the documentation"></a>
|
||||
<a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper-%20on%20Arxiv-red?logo=arxiv&style=for-the-badge" alt="Paper on Arxiv"></a>
|
||||
<br/>
|
||||
<a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Evaluation-Benchmark%20on%20HF%20Space-green?logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark"></a>
|
||||
<a href="https://huggingface.co/spaces/OpenDevin/evaluation"><img src="https://img.shields.io/badge/Evaluation-Benchmark%20on%20HF%20Space-green?logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark"></a>
|
||||
</div>
|
||||
<hr>
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ from openhands.runtime.plugins import (
|
||||
JupyterRequirement,
|
||||
PluginRequirement,
|
||||
)
|
||||
from openhands.utils.microagent import MicroAgent
|
||||
from openhands.utils.prompt import PromptManager
|
||||
|
||||
|
||||
@@ -73,10 +74,21 @@ class CodeActAgent(Agent):
|
||||
"""
|
||||
super().__init__(llm, config)
|
||||
self.reset()
|
||||
|
||||
self.micro_agent = (
|
||||
MicroAgent(
|
||||
os.path.join(
|
||||
os.path.dirname(__file__), 'micro', f'{config.micro_agent_name}.md'
|
||||
)
|
||||
)
|
||||
if config.micro_agent_name
|
||||
else None
|
||||
)
|
||||
|
||||
self.prompt_manager = PromptManager(
|
||||
prompt_dir=os.path.join(os.path.dirname(__file__)),
|
||||
agent_skills_docs=AgentSkillsRequirement.documentation,
|
||||
micro_agent_name=None, # TODO: implement micro-agent
|
||||
micro_agent=self.micro_agent,
|
||||
)
|
||||
|
||||
def action_to_str(self, action: Action) -> str:
|
||||
|
||||
59
agenthub/codeact_agent/micro/github.md
Normal file
59
agenthub/codeact_agent/micro/github.md
Normal file
@@ -0,0 +1,59 @@
|
||||
---
|
||||
name: github
|
||||
agent: CodeActAgent
|
||||
require_env_var:
|
||||
SANDBOX_ENV_GITHUB_TOKEN: "Create a GitHub Personal Access Token (https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) and set it as SANDBOX_GITHUB_TOKEN in your environment variables."
|
||||
---
|
||||
|
||||
# How to Interact with Github
|
||||
|
||||
## Environment Variable Available
|
||||
|
||||
1. `GITHUB_TOKEN`: A read-only token for Github.
|
||||
|
||||
## Using GitHub's RESTful API
|
||||
|
||||
Use `curl` with the `GITHUB_TOKEN` to interact with GitHub's API. Here are some common operations:
|
||||
|
||||
1. View an issue:
|
||||
```
|
||||
curl -H "Authorization: token $GITHUB_TOKEN" \
|
||||
https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}
|
||||
```
|
||||
|
||||
2. List repository issues:
|
||||
```
|
||||
curl -H "Authorization: token $GITHUB_TOKEN" \
|
||||
https://api.github.com/repos/{owner}/{repo}/issues
|
||||
```
|
||||
|
||||
3. Get repository details:
|
||||
```
|
||||
curl -H "Authorization: token $GITHUB_TOKEN" \
|
||||
https://api.github.com/repos/{owner}/{repo}
|
||||
```
|
||||
|
||||
4. List pull requests:
|
||||
```
|
||||
curl -H "Authorization: token $GITHUB_TOKEN" \
|
||||
https://api.github.com/repos/{owner}/{repo}/pulls
|
||||
```
|
||||
|
||||
5. Get user information:
|
||||
```
|
||||
curl -H "Authorization: token $GITHUB_TOKEN" \
|
||||
https://api.github.com/user
|
||||
```
|
||||
|
||||
Replace `{owner}`, `{repo}`, and `{issue_number}` with appropriate values.
|
||||
|
||||
## Important Notes
|
||||
|
||||
1. Always use the GitHub API for operations instead of a web browser.
|
||||
2. The `GITHUB_TOKEN` is read-only. Avoid operations that require write access.
|
||||
3. Git config (username and email) is pre-set. Do not modify.
|
||||
4. Edit and test code locally. Never push directly to remote.
|
||||
5. Verify correct branch before committing.
|
||||
6. Commit changes frequently.
|
||||
7. If the issue or task is ambiguous or lacks sufficient detail, always request clarification from the user before proceeding.
|
||||
8. You should avoid using command line tools like `sed` for file editing.
|
||||
@@ -64,6 +64,15 @@ workspace_base = "./workspace"
|
||||
# Name of the default agent
|
||||
#default_agent = "CodeActAgent"
|
||||
|
||||
# JWT secret for authentication
|
||||
#jwt_secret = ""
|
||||
|
||||
# Restrict file types for file uploads
|
||||
#file_uploads_restrict_file_types = false
|
||||
|
||||
# List of allowed file extensions for uploads
|
||||
#file_uploads_allowed_extensions = [".*"]
|
||||
|
||||
#################################### LLM #####################################
|
||||
# Configuration for LLM models (group name starts with 'llm')
|
||||
# use 'llm' for the default LLM config
|
||||
@@ -126,6 +135,15 @@ model = "gpt-4o"
|
||||
# Retry minimum wait time
|
||||
#retry_min_wait = 3
|
||||
|
||||
# Retry multiplier for exponential backoff
|
||||
#retry_multiplier = 2.0
|
||||
|
||||
# Drop any unmapped (unsupported) params without causing an exception
|
||||
#drop_params = false
|
||||
|
||||
# Base URL for the OLLAMA API
|
||||
#ollama_base_url = ""
|
||||
|
||||
# Temperature for the API
|
||||
#temperature = 0.0
|
||||
|
||||
@@ -149,6 +167,9 @@ model = "gpt-3.5"
|
||||
# agent.CodeActAgent
|
||||
##############################################################################
|
||||
[agent]
|
||||
# Name of the micro agent to use for this agent
|
||||
#micro_agent_name = ""
|
||||
|
||||
# Memory enabled
|
||||
#memory_enabled = false
|
||||
|
||||
@@ -182,6 +203,18 @@ llm_config = 'gpt3'
|
||||
# Enable auto linting after editing
|
||||
#enable_auto_lint = false
|
||||
|
||||
# Whether to initialize plugins
|
||||
#initialize_plugins = true
|
||||
|
||||
# Extra dependencies to install in the runtime image
|
||||
#runtime_extra_deps = ""
|
||||
|
||||
# Environment variables to set at the launch of the runtime
|
||||
#runtime_startup_env_vars = {}
|
||||
|
||||
# BrowserGym environment to use for evaluation
|
||||
#browsergym_eval_env = ""
|
||||
|
||||
#################################### Security ###################################
|
||||
# Configuration for security features
|
||||
##############################################################################
|
||||
|
||||
@@ -31,7 +31,7 @@ export function HomepageHeader() {
|
||||
<a href="https://arxiv.org/abs/2407.16741">
|
||||
<img src="https://img.shields.io/badge/Paper-%20on%20Arxiv-red?logo=arxiv&style=for-the-badge" alt="Paper on Arxiv" />
|
||||
</a>
|
||||
<a href="https://huggingface.co/spaces/OpenHands/evaluation">
|
||||
<a href="https://huggingface.co/spaces/OpenDevin/evaluation">
|
||||
<img src="https://img.shields.io/badge/Evaluation-Benchmark%20on%20HF%20Space-green?logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark" />
|
||||
</a>
|
||||
</div>
|
||||
|
||||
@@ -3,12 +3,14 @@
|
||||
This folder contains code and resources to run experiments and evaluations.
|
||||
|
||||
## Logistics
|
||||
|
||||
To better organize the evaluation folder, we should follow the rules below:
|
||||
- Each subfolder contains a specific benchmark or experiment. For example, `evaluation/swe_bench` should contain
|
||||
|
||||
- Each subfolder contains a specific benchmark or experiment. For example, `evaluation/swe_bench` should contain
|
||||
all the preprocessing/evaluation/analysis scripts.
|
||||
- Raw data and experimental records should not be stored within this repo.
|
||||
- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization.
|
||||
- Important data files of manageable size and analysis scripts (e.g., jupyter notebooks) can be directly uploaded to this repo.
|
||||
- Raw data and experimental records should not be stored within this repo.
|
||||
- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenDevin/evaluation) for visualization.
|
||||
- Important data files of manageable size and analysis scripts (e.g., jupyter notebooks) can be directly uploaded to this repo.
|
||||
|
||||
## Supported Benchmarks
|
||||
|
||||
@@ -23,6 +25,7 @@ To learn more about how to integrate your benchmark into OpenHands, check out [t
|
||||
- ML-Bench: [`evaluation/ml_bench`](./ml_bench)
|
||||
- APIBench: [`evaluation/gorilla`](./gorilla/)
|
||||
- ToolQA: [`evaluation/toolqa`](./toolqa/)
|
||||
- AiderBench: [`evaluation/aider_bench`](./aider_bench/)
|
||||
|
||||
### Web Browsing
|
||||
|
||||
@@ -38,7 +41,6 @@ To learn more about how to integrate your benchmark into OpenHands, check out [t
|
||||
- Entity deduction Arena (EDA): [`evaluation/EDA`](./EDA)
|
||||
- ProofWriter: [`evaluation/logic_reasoning`](./logic_reasoning)
|
||||
|
||||
|
||||
## Before everything begins: Setup Environment and LLM Configuration
|
||||
|
||||
Please follow instruction [here](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) to setup your local development environment and LLM.
|
||||
@@ -65,12 +67,10 @@ api_key = "XXX"
|
||||
temperature = 0.0
|
||||
```
|
||||
|
||||
|
||||
### Result Visualization
|
||||
|
||||
Check [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization of existing experimental results.
|
||||
|
||||
Check [this huggingface space](https://huggingface.co/spaces/OpenDevin/evaluation) for visualization of existing experimental results.
|
||||
|
||||
### Upload your results
|
||||
|
||||
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results to our hosted huggingface repo via PR following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
|
||||
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results to our hosted huggingface repo via PR following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
|
||||
|
||||
@@ -33,8 +33,10 @@ development environment and LLM.
|
||||
given IDs (comma separated).
|
||||
|
||||
There are also following optional environment variables you can set:
|
||||
```
|
||||
|
||||
```bash
|
||||
export USE_UNIT_TESTS=true # if you want to allow the Agent to verify correctness using unittests. Default to false.
|
||||
export SKIP_NUM=12 # skip the first 12 instances from the dataset
|
||||
```
|
||||
|
||||
Following is the basic command to start the evaluation.
|
||||
@@ -58,6 +60,8 @@ You can update the arguments in the script
|
||||
|
||||
```bash
|
||||
poetry run python ./evaluation/aider_bench/scripts/summarize_results.py [path_to_output_jsonl_file]
|
||||
# with optional SKIP_NUM
|
||||
poetry run python SKIP_NUM=12 ./evaluation/aider_bench/scripts/summarize_results.py [path_to_output_jsonl_file]
|
||||
```
|
||||
|
||||
Full example:
|
||||
|
||||
@@ -34,6 +34,10 @@ from openhands.runtime.runtime import Runtime
|
||||
|
||||
# Configure visibility of unit tests to the Agent.
|
||||
USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
|
||||
SKIP_NUM = os.environ.get('SKIP_NUM')
|
||||
SKIP_NUM = (
|
||||
int(SKIP_NUM) if SKIP_NUM and SKIP_NUM.isdigit() and int(SKIP_NUM) >= 0 else None
|
||||
)
|
||||
|
||||
|
||||
def get_config(
|
||||
@@ -66,7 +70,7 @@ async def initialize_runtime(
|
||||
|
||||
This function is called before the runtime is used to run the agent.
|
||||
"""
|
||||
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
|
||||
logger.info(f"\n{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}\n")
|
||||
obs: CmdOutputObservation
|
||||
|
||||
# Set instance id
|
||||
@@ -96,7 +100,7 @@ async def initialize_runtime(
|
||||
file_path,
|
||||
'/workspace',
|
||||
)
|
||||
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
|
||||
logger.info(f"\n{'-' * 50} END Runtime Initialization Fn {'-' * 50}\n")
|
||||
|
||||
|
||||
async def complete_runtime(
|
||||
@@ -109,7 +113,7 @@ async def complete_runtime(
|
||||
If you need to do something in the sandbox to get the correctness metric after
|
||||
the agent has run, modify this function.
|
||||
"""
|
||||
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
|
||||
logger.info(f"\n{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}\n")
|
||||
obs: CmdOutputObservation
|
||||
|
||||
# Rewriting the test file to ignore any changes Agent may have made.
|
||||
@@ -136,7 +140,9 @@ async def complete_runtime(
|
||||
if isinstance(obs, CmdOutputObservation):
|
||||
exit_code = obs.exit_code
|
||||
|
||||
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
|
||||
logger.info(f"\n{'-' * 50} END Runtime Completion Fn {'-' * 50}\n")
|
||||
|
||||
await runtime.close()
|
||||
|
||||
return {
|
||||
'test_output': obs.content,
|
||||
@@ -156,7 +162,9 @@ async def process_instance(
|
||||
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
|
||||
reset_logger_for_multiprocessing(logger, str(instance.instance_id), log_dir)
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {str(instance.instance_id)}.')
|
||||
logger.info(
|
||||
f'\nStarting evaluation for instance {str(instance.instance_id)}.\n'
|
||||
)
|
||||
|
||||
# =============================================
|
||||
# build instruction
|
||||
@@ -268,10 +276,14 @@ if __name__ == '__main__':
|
||||
eval_ids = None
|
||||
if args.eval_ids:
|
||||
eval_ids = str(args.eval_ids).split(',')
|
||||
logger.info(f'Using specific dataset IDs: {eval_ids}')
|
||||
logger.info(f'\nUsing specific dataset IDs: {eval_ids}\n')
|
||||
|
||||
instances = prepare_dataset(
|
||||
aider_bench_tests, output_file, args.eval_n_limit, eval_ids=eval_ids
|
||||
aider_bench_tests,
|
||||
output_file,
|
||||
args.eval_n_limit,
|
||||
eval_ids=eval_ids,
|
||||
skip_num=SKIP_NUM,
|
||||
)
|
||||
|
||||
asyncio.run(
|
||||
|
||||
@@ -22,7 +22,7 @@ def extract_test_results(res_file_path: str) -> tuple[list[str], list[str]]:
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 2:
|
||||
print(
|
||||
'Usage: poetry run python summarise_results.py <path_to_output_jsonl_file>'
|
||||
'Usage: poetry run python summarize_results.py <path_to_output_jsonl_file>'
|
||||
)
|
||||
sys.exit(1)
|
||||
json_file_path = sys.argv[1]
|
||||
|
||||
@@ -26,7 +26,7 @@ poetry run python evaluation/miniwob/get_success_rate.py evaluation/evaluation_o
|
||||
|
||||
## Submit your evaluation results
|
||||
|
||||
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
|
||||
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
|
||||
|
||||
|
||||
## BrowsingAgent V1.0 result
|
||||
|
||||
@@ -95,7 +95,7 @@ With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patc
|
||||
|
||||
> If you want to evaluate existing results, you should first run this to clone existing outputs
|
||||
>```bash
|
||||
>git clone https://huggingface.co/spaces/OpenHands/evaluation evaluation/evaluation_outputs
|
||||
>git clone https://huggingface.co/spaces/OpenDevin/evaluation evaluation/evaluation_outputs
|
||||
>```
|
||||
|
||||
NOTE, you should have already pulled the instance-level OR env-level docker images following [this section](#openhands-swe-bench-instance-level-docker-support).
|
||||
@@ -129,10 +129,10 @@ The final results will be saved to `evaluation/evaluation_outputs/outputs/swe_be
|
||||
|
||||
## Visualize Results
|
||||
|
||||
First you need to clone `https://huggingface.co/spaces/OpenHands/evaluation` and add your own running results from openhands into the `outputs` of the cloned repo.
|
||||
First you need to clone `https://huggingface.co/spaces/OpenDevin/evaluation` and add your own running results from openhands into the `outputs` of the cloned repo.
|
||||
|
||||
```bash
|
||||
git clone https://huggingface.co/spaces/OpenHands/evaluation
|
||||
git clone https://huggingface.co/spaces/OpenDevin/evaluation
|
||||
```
|
||||
|
||||
**(optional) setup streamlit environment with conda**:
|
||||
@@ -156,4 +156,4 @@ Then you can access the SWE-Bench trajectory visualizer at `localhost:8501`.
|
||||
|
||||
## Submit your evaluation results
|
||||
|
||||
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
|
||||
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
|
||||
|
||||
@@ -181,34 +181,44 @@ def prepare_dataset(
|
||||
output_file: str,
|
||||
eval_n_limit: int,
|
||||
eval_ids: list[str] | None = None,
|
||||
skip_num: int | None = None,
|
||||
):
|
||||
assert (
|
||||
'instance_id' in dataset.columns
|
||||
), "Expected 'instance_id' column in the dataset. You should define your own unique identifier for each instance and use it as the 'instance_id' column."
|
||||
id_column = 'instance_id'
|
||||
logger.info(f'Writing evaluation output to {output_file}')
|
||||
finished_ids = set()
|
||||
finished_ids: set[str] = set()
|
||||
if os.path.exists(output_file):
|
||||
with open(output_file, 'r') as f:
|
||||
for line in f:
|
||||
data = json.loads(line)
|
||||
finished_ids.add(data[id_column])
|
||||
finished_ids.add(str(data[id_column]))
|
||||
logger.warning(
|
||||
f'Output file {output_file} already exists. Loaded {len(finished_ids)} finished instances.'
|
||||
f'\nOutput file {output_file} already exists. Loaded {len(finished_ids)} finished instances.'
|
||||
)
|
||||
|
||||
if eval_ids:
|
||||
eval_ids_converted = [dataset[id_column].dtype.type(id) for id in eval_ids]
|
||||
dataset = dataset[dataset[id_column].isin(eval_ids_converted)]
|
||||
logger.info(f'Limiting evaluation to {len(eval_ids)} specific instances.')
|
||||
elif eval_n_limit:
|
||||
elif skip_num and skip_num >= 0:
|
||||
skip_num = min(skip_num, len(dataset))
|
||||
dataset = dataset.iloc[skip_num:]
|
||||
logger.info(
|
||||
f'Starting evaluation with skipping first {skip_num} instances ({len(dataset)} instances to run).'
|
||||
)
|
||||
if eval_n_limit and eval_n_limit > 0:
|
||||
dataset = dataset.head(eval_n_limit)
|
||||
logger.info(f'Limiting evaluation to {eval_n_limit} instances.')
|
||||
elif eval_n_limit and eval_n_limit > 0:
|
||||
dataset = dataset.head(eval_n_limit)
|
||||
logger.info(f'Limiting evaluation to first {eval_n_limit} instances.')
|
||||
|
||||
new_dataset = [
|
||||
instance
|
||||
for _, instance in dataset.iterrows()
|
||||
if instance[id_column] not in finished_ids
|
||||
if str(instance[id_column]) not in finished_ids
|
||||
]
|
||||
logger.info(
|
||||
f'Finished instances: {len(finished_ids)}, Remaining instances: {len(new_dataset)}'
|
||||
@@ -228,8 +238,8 @@ async def run_evaluation(
|
||||
):
|
||||
use_multiprocessing = num_workers > 1
|
||||
logger.info(
|
||||
f'Evaluation started with Agent {metadata.agent_class}, '
|
||||
f'model {metadata.llm_config.model}, max iterations {metadata.max_iterations}.'
|
||||
f'Evaluation started with Agent {metadata.agent_class}:\n'
|
||||
f'model {metadata.llm_config.model}, max iterations {metadata.max_iterations}.\n'
|
||||
)
|
||||
pbar = tqdm(total=len(dataset))
|
||||
output_fp = open(output_file, 'a')
|
||||
@@ -241,7 +251,7 @@ async def run_evaluation(
|
||||
pbar.set_description(f'Instance {output.instance_id}')
|
||||
pbar.set_postfix_str(f'Test Result: {output.test_result}')
|
||||
logger.info(
|
||||
f'Finished evaluation for instance {output.instance_id}: {output.test_result}'
|
||||
f'Finished evaluation for instance {output.instance_id}: {output.test_result}\n'
|
||||
)
|
||||
output_fp.write(json.dumps(output.model_dump()) + '\n')
|
||||
output_fp.flush()
|
||||
@@ -270,11 +280,11 @@ async def run_evaluation(
|
||||
await update_progress(output)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print('KeyboardInterrupt received. Cleaning up...')
|
||||
print('\nKeyboardInterrupt received. Cleaning up...\n')
|
||||
cleanup()
|
||||
|
||||
output_fp.close()
|
||||
logger.info('Evaluation finished.')
|
||||
logger.info('\nEvaluation finished.\n')
|
||||
|
||||
|
||||
def reset_logger_for_multiprocessing(
|
||||
|
||||
@@ -7,6 +7,7 @@ This folder contains evaluation for [WebArena](https://github.com/web-arena-x/we
|
||||
Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
|
||||
|
||||
## Setup WebArena Environment
|
||||
|
||||
WebArena requires you to set up websites containing pre-populated content that is accessible via URL to the machine running the OpenHands agents.
|
||||
Follow [this document](https://github.com/web-arena-x/webarena/blob/main/environment_docker/README.md) to set up your own WebArena environment through local servers or AWS EC2 instances.
|
||||
Take note of the base URL (`$WEBARENA_BASE_URL`) of the machine where the environment is installed.
|
||||
@@ -36,8 +37,7 @@ poetry run python evaluation/webarena/get_success_rate.py evaluation/evaluation_
|
||||
|
||||
## Submit your evaluation results
|
||||
|
||||
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
|
||||
|
||||
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenDevin/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
|
||||
|
||||
## BrowsingAgent V1.0 result
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ import toml
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from openhands.core import logger
|
||||
from openhands.core.utils import Singleton
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -123,11 +122,13 @@ class AgentConfig:
|
||||
"""Configuration for the agent.
|
||||
|
||||
Attributes:
|
||||
micro_agent_name: The name of the micro agent to use for this agent.
|
||||
memory_enabled: Whether long-term memory (embeddings) is enabled.
|
||||
memory_max_threads: The maximum number of threads indexing at the same time for embeddings.
|
||||
llm_config: The name of the llm config to use. If specified, this will override global llm config.
|
||||
"""
|
||||
|
||||
micro_agent_name: str | None = None
|
||||
memory_enabled: bool = False
|
||||
memory_max_threads: int = 2
|
||||
llm_config: str | None = None
|
||||
@@ -141,7 +142,7 @@ class AgentConfig:
|
||||
|
||||
|
||||
@dataclass
|
||||
class SecurityConfig(metaclass=Singleton):
|
||||
class SecurityConfig:
|
||||
"""Configuration for security related functionalities.
|
||||
|
||||
Attributes:
|
||||
@@ -174,7 +175,7 @@ class SecurityConfig(metaclass=Singleton):
|
||||
|
||||
|
||||
@dataclass
|
||||
class SandboxConfig(metaclass=Singleton):
|
||||
class SandboxConfig:
|
||||
"""Configuration for the sandbox.
|
||||
|
||||
Attributes:
|
||||
@@ -241,7 +242,7 @@ class UndefinedString(str, Enum):
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppConfig(metaclass=Singleton):
|
||||
class AppConfig:
|
||||
"""Configuration for the app.
|
||||
|
||||
Attributes:
|
||||
@@ -565,7 +566,12 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
|
||||
sandbox_config = SandboxConfig(**toml_config['sandbox'])
|
||||
|
||||
# update the config object with the new values
|
||||
AppConfig(sandbox=sandbox_config, **core_config)
|
||||
cfg.sandbox = sandbox_config
|
||||
for key, value in core_config.items():
|
||||
if hasattr(cfg, key):
|
||||
setattr(cfg, key, value)
|
||||
else:
|
||||
logger.openhands_logger.warning(f'Unknown core config key: {key}')
|
||||
except (TypeError, KeyError) as e:
|
||||
logger.openhands_logger.warning(
|
||||
f'Cannot parse config from toml, toml values have not been applied.\nError: {e}',
|
||||
|
||||
@@ -72,3 +72,8 @@ class LLMResponseError(Exception):
|
||||
class UserCancelledError(Exception):
|
||||
def __init__(self, message='User cancelled the request'):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class MicroAgentValidationError(Exception):
|
||||
def __init__(self, message='Micro agent validation failed'):
|
||||
super().__init__(message)
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
from openhands.core.utils.singleton import Singleton
|
||||
|
||||
__all__ = ['Singleton']
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
import dataclasses
|
||||
|
||||
from openhands.core import logger
|
||||
|
||||
|
||||
class Singleton(type):
|
||||
_instances: dict = {}
|
||||
|
||||
def __call__(cls, *args, **kwargs):
|
||||
if cls not in cls._instances:
|
||||
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
|
||||
else:
|
||||
# allow updates, just update existing instance
|
||||
# perhaps not the most orthodox way to do it, though it simplifies client code
|
||||
# useful for pre-defined groups of settings
|
||||
instance = cls._instances[cls]
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(instance, key):
|
||||
setattr(instance, key, value)
|
||||
else:
|
||||
logger.openhands_logger.warning(
|
||||
f'Unknown key for {cls.__name__}: "{key}"'
|
||||
)
|
||||
return cls._instances[cls]
|
||||
|
||||
@classmethod
|
||||
def reset(cls):
|
||||
# used by pytest to reset the state of the singleton instances
|
||||
for instance_type, instance in cls._instances.items():
|
||||
print('resetting... ', instance_type)
|
||||
for field_info in dataclasses.fields(instance_type):
|
||||
if dataclasses.is_dataclass(field_info.type):
|
||||
setattr(instance, field_info.name, field_info.type())
|
||||
elif field_info.default_factory is not dataclasses.MISSING:
|
||||
setattr(instance, field_info.name, field_info.default_factory())
|
||||
else:
|
||||
setattr(instance, field_info.name, field_info.default)
|
||||
@@ -12,3 +12,33 @@ def find_available_tcp_port() -> int:
|
||||
return -1
|
||||
finally:
|
||||
sock.close()
|
||||
|
||||
|
||||
def display_number_matrix(number: int) -> str | None:
|
||||
if not 0 <= number <= 999:
|
||||
return None
|
||||
|
||||
# Define the matrix representation for each digit
|
||||
digits = {
|
||||
'0': ['###', '# #', '# #', '# #', '###'],
|
||||
'1': [' #', ' #', ' #', ' #', ' #'],
|
||||
'2': ['###', ' #', '###', '# ', '###'],
|
||||
'3': ['###', ' #', '###', ' #', '###'],
|
||||
'4': ['# #', '# #', '###', ' #', ' #'],
|
||||
'5': ['###', '# ', '###', ' #', '###'],
|
||||
'6': ['###', '# ', '###', '# #', '###'],
|
||||
'7': ['###', ' #', ' #', ' #', ' #'],
|
||||
'8': ['###', '# #', '###', '# #', '###'],
|
||||
'9': ['###', '# #', '###', ' #', '###'],
|
||||
}
|
||||
|
||||
# alternatively, with leading zeros: num_str = f"{number:03d}"
|
||||
num_str = str(number) # Convert to string without padding
|
||||
|
||||
result = []
|
||||
for row in range(5):
|
||||
line = ' '.join(digits[digit][row] for digit in num_str)
|
||||
result.append(line)
|
||||
|
||||
matrix_display = '\n'.join(result)
|
||||
return f'\n{matrix_display}\n'
|
||||
|
||||
44
openhands/utils/microagent.py
Normal file
44
openhands/utils/microagent.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import os
|
||||
|
||||
import frontmatter
|
||||
import pydantic
|
||||
|
||||
from openhands.controller.agent import Agent
|
||||
from openhands.core.exceptions import MicroAgentValidationError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
class MicroAgentMetadata(pydantic.BaseModel):
|
||||
name: str
|
||||
agent: str
|
||||
require_env_var: dict[str, str]
|
||||
|
||||
|
||||
class MicroAgent:
|
||||
def __init__(self, path: str):
|
||||
self.path = path
|
||||
if not os.path.exists(path):
|
||||
raise FileNotFoundError(f'Micro agent file {path} is not found')
|
||||
with open(path, 'r') as file:
|
||||
self._loaded = frontmatter.load(file)
|
||||
self._content = self._loaded.content
|
||||
self._metadata = MicroAgentMetadata(**self._loaded.metadata)
|
||||
self._validate_micro_agent()
|
||||
|
||||
@property
|
||||
def content(self) -> str:
|
||||
return self._content
|
||||
|
||||
def _validate_micro_agent(self):
|
||||
logger.info(
|
||||
f'Loading and validating micro agent [{self._metadata.name}] based on [{self._metadata.agent}]'
|
||||
)
|
||||
# Make sure the agent is registered
|
||||
agent_cls = Agent.get_cls(self._metadata.agent)
|
||||
assert agent_cls is not None
|
||||
# Make sure the environment variables are set
|
||||
for env_var, instruction in self._metadata.require_env_var.items():
|
||||
if env_var not in os.environ:
|
||||
raise MicroAgentValidationError(
|
||||
f'Environment variable [{env_var}] is required by micro agent [{self._metadata.name}] but not set. {instruction}'
|
||||
)
|
||||
@@ -2,6 +2,8 @@ import os
|
||||
|
||||
from jinja2 import Template
|
||||
|
||||
from openhands.utils.microagent import MicroAgent
|
||||
|
||||
|
||||
class PromptManager:
|
||||
"""
|
||||
@@ -14,23 +16,21 @@ class PromptManager:
|
||||
Attributes:
|
||||
prompt_dir (str): Directory containing prompt templates.
|
||||
agent_skills_docs (str): Documentation of agent skills.
|
||||
micro_agent (str | None): Content of the micro-agent definition file, if specified.
|
||||
micro_agent (MicroAgent | None): Micro-agent, if specified.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
prompt_dir: str,
|
||||
agent_skills_docs: str,
|
||||
micro_agent_name: str | None = None,
|
||||
micro_agent: MicroAgent | None = None,
|
||||
):
|
||||
self.prompt_dir: str = prompt_dir
|
||||
self.agent_skills_docs: str = agent_skills_docs
|
||||
|
||||
self.system_template: Template = self._load_template('system_prompt')
|
||||
self.user_template: Template = self._load_template('user_prompt')
|
||||
self.micro_agent: str | None = (
|
||||
self._load_micro_agent(micro_agent_name) if micro_agent_name else None
|
||||
)
|
||||
self.micro_agent: MicroAgent | None = micro_agent
|
||||
|
||||
def _load_template(self, template_name: str) -> Template:
|
||||
template_path = os.path.join(self.prompt_dir, f'{template_name}.j2')
|
||||
@@ -39,15 +39,6 @@ class PromptManager:
|
||||
with open(template_path, 'r') as file:
|
||||
return Template(file.read())
|
||||
|
||||
def _load_micro_agent(self, micro_agent_name: str) -> str:
|
||||
micro_agent_path = os.path.join(self.prompt_dir, f'micro/{micro_agent_name}.md')
|
||||
if not os.path.exists(micro_agent_path):
|
||||
raise FileNotFoundError(
|
||||
f'Micro agent file {micro_agent_path} for {micro_agent_name} is not found'
|
||||
)
|
||||
with open(micro_agent_path, 'r') as file:
|
||||
return file.read()
|
||||
|
||||
@property
|
||||
def system_message(self) -> str:
|
||||
rendered = self.system_template.render(
|
||||
@@ -66,5 +57,7 @@ class PromptManager:
|
||||
These additional context will convert the current generic agent
|
||||
into a more specialized agent that is tailored to the user's task.
|
||||
"""
|
||||
rendered = self.user_template.render(micro_agent=self.micro_agent)
|
||||
rendered = self.user_template.render(
|
||||
micro_agent=self.micro_agent.content if self.micro_agent else None
|
||||
)
|
||||
return rendered.strip()
|
||||
|
||||
22
poetry.lock
generated
22
poetry.lock
generated
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aenum"
|
||||
@@ -6585,6 +6585,24 @@ files = [
|
||||
[package.extras]
|
||||
cli = ["click (>=5.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "python-frontmatter"
|
||||
version = "1.1.0"
|
||||
description = "Parse and manage posts with YAML (or other) frontmatter"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "python-frontmatter-1.1.0.tar.gz", hash = "sha256:7118d2bd56af9149625745c58c9b51fb67e8d1294a0c76796dafdc72c36e5f6d"},
|
||||
{file = "python_frontmatter-1.1.0-py3-none-any.whl", hash = "sha256:335465556358d9d0e6c98bbeb69b1c969f2a4a21360587b9873bfc3b213407c1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
PyYAML = "*"
|
||||
|
||||
[package.extras]
|
||||
docs = ["sphinx"]
|
||||
test = ["mypy", "pyaml", "pytest", "toml", "types-PyYAML", "types-toml"]
|
||||
|
||||
[[package]]
|
||||
name = "python-json-logger"
|
||||
version = "2.0.7"
|
||||
@@ -9459,4 +9477,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "0e95b8afa4826171ad0b57a46690c8dc4317e1d5a642388e9be9352eac7b3cdc"
|
||||
content-hash = "ca8ef3dbc1eed207bca42c98c3cbf1fc085548977994ebc28283bc5ddbfa0101"
|
||||
|
||||
@@ -47,6 +47,7 @@ tree-sitter = "0.21.3"
|
||||
bashlex = "^0.18"
|
||||
pyjwt = "^2.9.0"
|
||||
dirhash = "*"
|
||||
python-frontmatter = "^1.1.0"
|
||||
python-docx = "*"
|
||||
PyPDF2 = "*"
|
||||
python-pptx = "*"
|
||||
@@ -83,6 +84,7 @@ reportlab = "*"
|
||||
[tool.coverage.run]
|
||||
concurrency = ["gevent"]
|
||||
|
||||
|
||||
[tool.poetry.group.runtime.dependencies]
|
||||
jupyterlab = "*"
|
||||
notebook = "*"
|
||||
@@ -113,6 +115,7 @@ ignore = ["D1"]
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
|
||||
[tool.poetry.group.evaluation.dependencies]
|
||||
streamlit = "*"
|
||||
whatthepatch = "*"
|
||||
|
||||
@@ -49,6 +49,7 @@ async def test_set_agent_state(mock_agent, mock_event_stream):
|
||||
|
||||
await controller.set_agent_state_to(AgentState.PAUSED)
|
||||
assert controller.get_agent_state() == AgentState.PAUSED
|
||||
await controller.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -65,6 +66,7 @@ async def test_on_event_message_action(mock_agent, mock_event_stream):
|
||||
message_action = MessageAction(content='Test message')
|
||||
await controller.on_event(message_action)
|
||||
assert controller.get_agent_state() == AgentState.RUNNING
|
||||
await controller.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -81,6 +83,7 @@ async def test_on_event_change_agent_state_action(mock_agent, mock_event_stream)
|
||||
change_state_action = ChangeAgentStateAction(agent_state=AgentState.PAUSED)
|
||||
await controller.on_event(change_state_action)
|
||||
assert controller.get_agent_state() == AgentState.PAUSED
|
||||
await controller.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -97,6 +100,7 @@ async def test_report_error(mock_agent, mock_event_stream):
|
||||
await controller.report_error(error_message)
|
||||
assert controller.state.last_error == error_message
|
||||
controller.event_stream.add_event.assert_called_once()
|
||||
await controller.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -116,6 +120,7 @@ async def test_step_with_exception(mock_agent, mock_event_stream):
|
||||
|
||||
# Verify that report_error was called with the correct error message
|
||||
controller.report_error.assert_called_once_with('Malformed action')
|
||||
await controller.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -134,6 +139,7 @@ async def test_step_max_iterations(mock_agent, mock_event_stream):
|
||||
await controller._step()
|
||||
assert controller.state.traffic_control_state == TrafficControlState.THROTTLING
|
||||
assert controller.state.agent_state == AgentState.PAUSED
|
||||
await controller.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -153,6 +159,7 @@ async def test_step_max_iterations_headless(mock_agent, mock_event_stream):
|
||||
assert controller.state.traffic_control_state == TrafficControlState.THROTTLING
|
||||
# In headless mode, throttling results in an error
|
||||
assert controller.state.agent_state == AgentState.ERROR
|
||||
await controller.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -172,6 +179,7 @@ async def test_step_max_budget(mock_agent, mock_event_stream):
|
||||
await controller._step()
|
||||
assert controller.state.traffic_control_state == TrafficControlState.THROTTLING
|
||||
assert controller.state.agent_state == AgentState.PAUSED
|
||||
await controller.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -192,3 +200,4 @@ async def test_step_max_budget_headless(mock_agent, mock_event_stream):
|
||||
assert controller.state.traffic_control_state == TrafficControlState.THROTTLING
|
||||
# In headless mode, throttling results in an error
|
||||
assert controller.state.agent_state == AgentState.ERROR
|
||||
await controller.close()
|
||||
|
||||
@@ -40,7 +40,6 @@ def temp_toml_file(tmp_path):
|
||||
@pytest.fixture
|
||||
def default_config(monkeypatch):
|
||||
# Fixture to provide a default AppConfig instance
|
||||
AppConfig.reset()
|
||||
yield AppConfig()
|
||||
|
||||
|
||||
@@ -501,8 +500,8 @@ def test_api_keys_repr_str():
|
||||
def test_max_iterations_and_max_budget_per_task_from_toml(temp_toml_file):
|
||||
temp_toml = """
|
||||
[core]
|
||||
max_iterations = 100
|
||||
max_budget_per_task = 4.0
|
||||
max_iterations = 42
|
||||
max_budget_per_task = 4.7
|
||||
"""
|
||||
|
||||
config = AppConfig()
|
||||
@@ -511,8 +510,8 @@ max_budget_per_task = 4.0
|
||||
|
||||
load_from_toml(config, temp_toml_file)
|
||||
|
||||
assert config.max_iterations == 100
|
||||
assert config.max_budget_per_task == 4.0
|
||||
assert config.max_iterations == 42
|
||||
assert config.max_budget_per_task == 4.7
|
||||
|
||||
|
||||
def test_get_llm_config_arg(temp_toml_file):
|
||||
|
||||
@@ -87,9 +87,6 @@ def test_app_config_attributes_masking(test_handler):
|
||||
assert 'e2b-xyz789' not in log_output
|
||||
assert 'ghp_abcdefghijklmnopqrstuvwxyz' not in log_output
|
||||
|
||||
# reset the AppConfig
|
||||
AppConfig.reset()
|
||||
|
||||
|
||||
def test_sensitive_env_vars_masking(test_handler):
|
||||
logger, stream = test_handler
|
||||
|
||||
73
tests/unit/test_microagent_utils.py
Normal file
73
tests/unit/test_microagent_utils.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from pytest import MonkeyPatch
|
||||
|
||||
import agenthub # noqa: F401
|
||||
from openhands.core.exceptions import (
|
||||
AgentNotRegisteredError,
|
||||
MicroAgentValidationError,
|
||||
)
|
||||
from openhands.utils.microagent import MicroAgent
|
||||
|
||||
CONTENT = (
|
||||
'# dummy header\n' 'dummy content\n' '## dummy subheader\n' 'dummy subcontent\n'
|
||||
)
|
||||
|
||||
|
||||
def test_micro_agent_load(tmp_path, monkeypatch: MonkeyPatch):
|
||||
with open(os.path.join(tmp_path, 'dummy.md'), 'w') as f:
|
||||
f.write(
|
||||
(
|
||||
'---\n'
|
||||
'name: dummy\n'
|
||||
'agent: CodeActAgent\n'
|
||||
'require_env_var:\n'
|
||||
' SANDBOX_OPENHANDS_TEST_ENV_VAR: "Set this environment variable for testing purposes"\n'
|
||||
'---\n' + CONTENT
|
||||
)
|
||||
)
|
||||
|
||||
# Patch the required environment variable
|
||||
monkeypatch.setenv('SANDBOX_OPENHANDS_TEST_ENV_VAR', 'dummy_value')
|
||||
|
||||
micro_agent = MicroAgent(os.path.join(tmp_path, 'dummy.md'))
|
||||
assert micro_agent is not None
|
||||
assert micro_agent.content == CONTENT.strip()
|
||||
|
||||
|
||||
def test_not_existing_agent(tmp_path, monkeypatch: MonkeyPatch):
|
||||
with open(os.path.join(tmp_path, 'dummy.md'), 'w') as f:
|
||||
f.write(
|
||||
(
|
||||
'---\n'
|
||||
'name: dummy\n'
|
||||
'agent: NotExistingAgent\n'
|
||||
'require_env_var:\n'
|
||||
' SANDBOX_OPENHANDS_TEST_ENV_VAR: "Set this environment variable for testing purposes"\n'
|
||||
'---\n' + CONTENT
|
||||
)
|
||||
)
|
||||
monkeypatch.setenv('SANDBOX_OPENHANDS_TEST_ENV_VAR', 'dummy_value')
|
||||
|
||||
with pytest.raises(AgentNotRegisteredError):
|
||||
MicroAgent(os.path.join(tmp_path, 'dummy.md'))
|
||||
|
||||
|
||||
def test_not_existing_env_var(tmp_path):
|
||||
with open(os.path.join(tmp_path, 'dummy.md'), 'w') as f:
|
||||
f.write(
|
||||
(
|
||||
'---\n'
|
||||
'name: dummy\n'
|
||||
'agent: CodeActAgent\n'
|
||||
'require_env_var:\n'
|
||||
' SANDBOX_OPENHANDS_TEST_ENV_VAR: "Set this environment variable for testing purposes"\n'
|
||||
'---\n' + CONTENT
|
||||
)
|
||||
)
|
||||
|
||||
with pytest.raises(MicroAgentValidationError) as excinfo:
|
||||
MicroAgent(os.path.join(tmp_path, 'dummy.md'))
|
||||
|
||||
assert 'Set this environment variable for testing purposes' in str(excinfo.value)
|
||||
@@ -1,8 +1,10 @@
|
||||
import os
|
||||
import shutil
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.utils.microagent import MicroAgent
|
||||
from openhands.utils.prompt import PromptManager
|
||||
|
||||
|
||||
@@ -56,11 +58,19 @@ def test_prompt_manager_with_micro_agent(prompt_dir, agent_skills_docs):
|
||||
with open(os.path.join(prompt_dir, 'micro', f'{micro_agent_name}.md'), 'w') as f:
|
||||
f.write(micro_agent_content)
|
||||
|
||||
manager = PromptManager(prompt_dir, agent_skills_docs, micro_agent_name)
|
||||
# Mock MicroAgent
|
||||
mock_micro_agent = Mock(spec=MicroAgent)
|
||||
mock_micro_agent.content = micro_agent_content
|
||||
|
||||
manager = PromptManager(
|
||||
prompt_dir=prompt_dir,
|
||||
agent_skills_docs=agent_skills_docs,
|
||||
micro_agent=mock_micro_agent,
|
||||
)
|
||||
|
||||
assert manager.prompt_dir == prompt_dir
|
||||
assert manager.agent_skills_docs == agent_skills_docs
|
||||
assert manager.micro_agent == micro_agent_content
|
||||
assert manager.micro_agent == mock_micro_agent
|
||||
|
||||
assert isinstance(manager.system_message, str)
|
||||
assert (
|
||||
@@ -86,7 +96,7 @@ def test_prompt_manager_with_micro_agent(prompt_dir, agent_skills_docs):
|
||||
|
||||
def test_prompt_manager_file_not_found(prompt_dir, agent_skills_docs):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
PromptManager(prompt_dir, agent_skills_docs, 'non_existent_micro_agent')
|
||||
MicroAgent(os.path.join(prompt_dir, 'micro', 'non_existent_micro_agent.md'))
|
||||
|
||||
|
||||
def test_prompt_manager_template_rendering(prompt_dir, agent_skills_docs):
|
||||
|
||||
Reference in New Issue
Block a user