Merge pull request #42 from humanlayer/5-17-and-contributors

5 17 and contributors
This commit is contained in:
Dex
2025-05-25 15:17:18 -07:00
committed by GitHub
71 changed files with 6501 additions and 27 deletions

View File

@@ -245,6 +245,10 @@ After digging through hundreds of AI libriaries and working with dozens of found
- [The AI Agent Index (MIT)](https://aiagentindex.mit.edu/)
- [NotebookLM on Finding Model Capability Boundaries](https://open.substack.com/pub/swyx/p/notebooklm?selection=08e1187c-cfee-4c63-93c9-71216640a5f8)
## Contributors
Thanks to everyone who has contributed to 12-factor agents!
[<img src="https://avatars.githubusercontent.com/u/3730605?v=4&s=80" width="80px" alt="dexhorthy" />](https://github.com/dexhorthy) [<img src="https://avatars.githubusercontent.com/u/50557586?v=4&s=80" width="80px" alt="Sypherd" />](https://github.com/Sypherd) [<img src="https://avatars.githubusercontent.com/u/66259401?v=4&s=80" width="80px" alt="tofaramususa" />](https://github.com/tofaramususa) [<img src="https://avatars.githubusercontent.com/u/18105223?v=4&s=80" width="80px" alt="a-churchill" />](https://github.com/a-churchill) [<img src="https://avatars.githubusercontent.com/u/4084885?v=4&s=80" width="80px" alt="Elijas" />](https://github.com/Elijas) [<img src="https://avatars.githubusercontent.com/u/39267118?v=4&s=80" width="80px" alt="hugolmn" />](https://github.com/hugolmn) [<img src="https://avatars.githubusercontent.com/u/1882972?v=4&s=80" width="80px" alt="jeremypeters" />](https://github.com/jeremypeters)
[<img src="https://avatars.githubusercontent.com/u/380402?v=4&s=80" width="80px" alt="kndl" />](https://github.com/kndl) [<img src="https://avatars.githubusercontent.com/u/16674643?v=4&s=80" width="80px" alt="maciejkos" />](https://github.com/maciejkos) [<img src="https://avatars.githubusercontent.com/u/85041180?v=4&s=80" width="80px" alt="pfbyjy" />](https://github.com/pfbyjy) [<img src="https://avatars.githubusercontent.com/u/36044389?v=4&s=80" width="80px" alt="0xRaduan" />](https://github.com/0xRaduan) [<img src="https://avatars.githubusercontent.com/u/7169731?v=4&s=80" width="80px" alt="zyuanlim" />](https://github.com/zyuanlim) [<img src="https://avatars.githubusercontent.com/u/15862501?v=4&s=80" width="80px" alt="lombardo-chcg" />](https://github.com/lombardo-chcg) [<img src="https://avatars.githubusercontent.com/u/160066852?v=4&s=80" width="80px" alt="sahanatvessel" />](https://github.com/sahanatvessel)

View File

@@ -0,0 +1 @@
3.13

View File

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
"""
Generate a Markdown grid of contributor avatars for a GitHub repository.
Usage:
python generate_contributors_grid.py --repo owner/name --token GH_TOKEN [--cols 7] [--image_size 80] [--output FILE]
Arguments:
--repo GitHub repository in "owner/name" form (e.g. "octocat/Hello-World")
--token Personal access token with `public_repo` scope (or `repo` for private).
Can also be provided via the GITHUB_TOKEN environment variable.
--cols Number of avatars per row in the generated grid (default 7).
--image_size Pixel width for avatars (GitHub automatically resizes; default 80).
--output File to write the Markdown grid into (default: stdout, use '-' for stdout).
The generated file contains a Markdown tableless grid of linked avatars that can
be embedded in README.md or any other Markdown document.
"""
from __future__ import annotations
import argparse
import os
import sys
import textwrap
from typing import List, Dict
import requests
API_URL_TEMPLATE = "https://api.github.com/repos/{owner}/{repo}/contributors"
def fetch_contributors(owner: str, repo: str, token: str | None, per_page: int = 100) -> List[Dict]:
"""Return a list of contributor objects from the GitHub REST API."""
headers = {"Accept": "application/vnd.github+json"}
if token:
headers["Authorization"] = f"Bearer {token}"
contributors: List[Dict] = []
page = 1
while True:
url = f"{API_URL_TEMPLATE.format(owner=owner, repo=repo)}?per_page={per_page}&page={page}"
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
batch = response.json()
if not batch:
break
contributors.extend(batch)
if len(batch) < per_page:
break
page += 1
return contributors
def build_markdown(contributors: List[Dict], cols: int = 7, image_size: int = 80) -> str:
"""Return a Markdown fragment containing a grid of linked avatar images."""
lines: List[str] = []
row: List[str] = []
for contributor in contributors:
login = contributor["login"]
avatar = f"{contributor['avatar_url']}&s={image_size}"
profile = contributor["html_url"]
cell = f'[<img src="{avatar}" width="{image_size}px" alt="{login}" />]({profile})'
row.append(cell)
if len(row) == cols:
lines.append(" ".join(row))
row = []
if row:
lines.append(" ".join(row))
return "\n\n".join(lines)
def main() -> None:
parser = argparse.ArgumentParser(description="Generate a Markdown grid of contributor avatars")
parser.add_argument("--repo", required=True, help="GitHub repo in owner/name form")
parser.add_argument("--token", help="GitHub Personal Access Token (or set GITHUB_TOKEN env)")
parser.add_argument("--cols", type=int, default=7, help="Number of avatars per row (default 7)")
parser.add_argument("--image_size", type=int, default=80, help="Avatar size in px (default 80)")
parser.add_argument("--output", "-o", default="-", help="Output file (default: stdout, use '-' for stdout)")
args = parser.parse_args()
token = args.token or os.getenv("GITHUB_TOKEN")
if not token:
parser.error("A GitHub token must be supplied via --token or GITHUB_TOKEN env var.")
if "/" not in args.repo:
parser.error("--repo must be in 'owner/name' form")
owner, repo = args.repo.split("/", 1)
contributors = fetch_contributors(owner, repo, token)
if not contributors:
sys.exit("No contributors found. Is the repository correct and does the token have access?")
markdown = build_markdown(contributors, cols=args.cols, image_size=args.image_size)
header = textwrap.dedent(
f"""
<!-- AUTO-GENERATED BY generate_contributors_grid.py -->
## Contributors
Thanks to these wonderful people:\n
"""
)
if args.output == "-":
sys.stdout.write(header)
sys.stdout.write(markdown)
sys.stdout.write("\n")
else:
with open(args.output, "w", encoding="utf-8") as fh:
fh.write(header)
fh.write(markdown)
fh.write("\n")
print(f"Wrote {len(contributors)} contributors to {args.output}", file=sys.stderr)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,9 @@
[project]
name = "contributors-markdown"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"requests>=2.32.3",
]

77
hack/contributors_markdown/uv.lock generated Normal file
View File

@@ -0,0 +1,77 @@
version = 1
requires-python = ">=3.13"
[[package]]
name = "certifi"
version = "2025.4.26"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e8/9e/c05b3920a3b7d20d3d3310465f50348e5b3694f4f88c6daf736eef3024c4/certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", size = 160705 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618 },
]
[[package]]
name = "charset-normalizer"
version = "3.4.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622 },
{ url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435 },
{ url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653 },
{ url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231 },
{ url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243 },
{ url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442 },
{ url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147 },
{ url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057 },
{ url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454 },
{ url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174 },
{ url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166 },
{ url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064 },
{ url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641 },
{ url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626 },
]
[[package]]
name = "contributors-markdown"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "requests" },
]
[package.metadata]
requires-dist = [{ name = "requests", specifier = ">=2.32.3" }]
[[package]]
name = "idna"
version = "3.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
]
[[package]]
name = "requests"
version = "2.32.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "idna" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
]
[[package]]
name = "urllib3"
version = "2.4.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 },
]

View File

@@ -0,0 +1,142 @@
# Chapter 0 - Hello World
Let's start with a basic TypeScript setup and a hello world program.
This guide is written in TypeScript (yes, a python version is coming soon)
There are many checkpoints between the every file edit in theworkshop steps,
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.
To run this guide, you'll need a relatively recent version of nodejs and npm installed
You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine
brew install node@20
You should see the node version
node --version
Copy initial package.json
cp ./walkthrough/00-package.json package.json
<details>
<summary>show file</summary>
```json
// ./walkthrough/00-package.json
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
```
</details>
Install dependencies
npm install
Copy tsconfig.json
cp ./walkthrough/00-tsconfig.json tsconfig.json
<details>
<summary>show file</summary>
```json
// ./walkthrough/00-tsconfig.json
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
```
</details>
add .gitignore
cp ./walkthrough/00-.gitignore .gitignore
<details>
<summary>show file</summary>
```gitignore
// ./walkthrough/00-.gitignore
baml_client/
node_modules/
```
</details>
Create src folder
mkdir -p src
Add a simple hello world index.ts
cp ./walkthrough/00-index.ts src/index.ts
<details>
<summary>show file</summary>
```ts
// ./walkthrough/00-index.ts
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)
```
</details>
Run it to verify
npx tsx src/index.ts
You should see:
hello, world!

View File

@@ -0,0 +1,2 @@
baml_client/
node_modules/

View File

@@ -0,0 +1,9 @@
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)

View File

@@ -0,0 +1,20 @@
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}

View File

@@ -0,0 +1,24 @@
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}

View File

@@ -0,0 +1,2 @@
baml_client/
node_modules/

View File

@@ -0,0 +1,231 @@
# Chapter 1 - CLI and Agent Loop
Now let's add BAML and create our first agent with a CLI interface.
First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.
npm install @boundaryml/baml
Initialize BAML
npx baml-cli init
Remove default resume.baml
rm baml_src/resume.baml
Add our starter agent, a single baml prompt that we'll build on
cp ./walkthrough/01-agent.baml baml_src/agent.baml
<details>
<summary>show file</summary>
```rust
// ./walkthrough/01-agent.baml
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
```
</details>
Generate BAML client code
npx baml-cli generate
Enable BAML logging for this section
export BAML_LOG=debug
Add the CLI interface
cp ./walkthrough/01-cli.ts src/cli.ts
<details>
<summary>show file</summary>
```ts
// ./walkthrough/01-cli.ts
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
```
</details>
Update index.ts to use the CLI
```diff
src/index.ts
+import { cli } from "./cli"
+
async function hello(): Promise<void> {
console.log('hello, world!')
async function main() {
- await hello()
+ await cli()
}
```
<details>
<summary>skip this step</summary>
cp ./walkthrough/01-index.ts src/index.ts
</details>
Add the agent implementation
cp ./walkthrough/01-agent.ts src/agent.ts
<details>
<summary>show file</summary>
```ts
// ./walkthrough/01-agent.ts
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}
```
</details>
The the BAML code is configured to use BASETEN_API_KEY by default
To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co),
and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/).
```rust
function DetermineNextStep(thread: string) -> DoneForNow {
client Qwen3
// ...
```
If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key.
If you want to try swapping out the model, you can change the `client` line.
[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)
For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini)
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.
For example, to use openai with an OPENAI_API_KEY, you can do:
client "openai/gpt-4o"
Set your env vars
export BASETEN_API_KEY=...
export BASETEN_BASE_URL=...
Try it out
npx tsx src/index.ts hello
you should see a familiar response from the model
{
intent: 'done_for_now',
message: 'Hello! How can I assist you today?'
}

View File

@@ -0,0 +1,9 @@
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)

View File

@@ -0,0 +1,49 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}

View File

@@ -0,0 +1,32 @@
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}

View File

@@ -0,0 +1,23 @@
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}

View File

@@ -0,0 +1,11 @@
import { cli } from "./cli"
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)

View File

@@ -0,0 +1,2 @@
baml_client/
node_modules/

View File

@@ -0,0 +1,84 @@
# Chapter 2 - Add Calculator Tools
Let's add some calculator tools to our agent.
Let's start by adding a tool definition for the calculator
These are simpile structured outputs that we'll ask the model to
return as a "next step" in the agentic loop.
cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
<details>
<summary>show file</summary>
```rust
// ./walkthrough/02-tool_calculator.baml
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
```
</details>
Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps
```diff
baml_src/agent.baml
function DetermineNextStep(
thread: string
-) -> DoneForNow {
+) -> CalculatorTools | DoneForNow {
client Qwen3
```
<details>
<summary>skip this step</summary>
cp ./walkthrough/02-agent.baml baml_src/agent.baml
</details>
Generate updated BAML client
npx baml-cli generate
Try out the calculator
npx tsx src/index.ts 'can you add 3 and 4'
You should see a tool call to the calculator
{
intent: 'add',
a: 3,
b: 4
}

View File

@@ -0,0 +1,49 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}

View File

@@ -0,0 +1,75 @@
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client<llm> CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client<llm> CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client<llm> CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client<llm> CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}

View File

@@ -0,0 +1,18 @@
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.88.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}

View File

@@ -0,0 +1,32 @@
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}

View File

@@ -0,0 +1,23 @@
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}

View File

@@ -0,0 +1,11 @@
import { cli } from "./cli"
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)

View File

@@ -0,0 +1,49 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}

View File

@@ -0,0 +1,27 @@
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}

View File

@@ -0,0 +1,2 @@
baml_client/
node_modules/

View File

@@ -0,0 +1,194 @@
# Chapter 3 - Process Tool Calls in a Loop
Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.
First, lets update the agent to handle the tool call
```diff
src/agent.ts
}
-// right now this just runs one turn with the LLM, but
-// we'll update this function to handle all the agent logic
-export async function agentLoop(thread: Thread): Promise<AgentResponse> {
- const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
- return nextStep;
+
+
+export async function agentLoop(thread: Thread): Promise<string> {
+
+ while (true) {
+ const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
+ console.log("nextStep", nextStep);
+
+ switch (nextStep.intent) {
+ case "done_for_now":
+ // response to human, return the next step object
+ return nextStep.message;
+ case "add":
+ thread.events.push({
+ "type": "tool_call",
+ "data": nextStep
+ });
+ const result = nextStep.a + nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ continue;
+ default:
+ throw new Error(`Unknown intent: ${nextStep.intent}`);
+ }
+ }
}
```
<details>
<summary>skip this step</summary>
cp ./walkthrough/03-agent.ts src/agent.ts
</details>
Now, lets try it out
npx tsx src/index.ts 'can you add 3 and 4'
you should see the agent call the tool and then return the result
{
intent: 'done_for_now',
message: 'The sum of 3 and 4 is 7.'
}
For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output
export BAML_LOG=off
Try a multi-step calculation
npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'
you'll notice that tools like multiply and divide are not available
npx tsx src/index.ts 'can you multiply 3 and 4'
next, let's add handlers for the rest of the calculator tools
```diff
src/agent.ts
-import { b } from "../baml_client";
+import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
-// tool call or a respond to human tool
-type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
-
export interface Event {
type: string
}
+export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
+export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
+ let result: number;
+ switch (nextStep.intent) {
+ case "add":
+ result = nextStep.a + nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "subtract":
+ result = nextStep.a - nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "multiply":
+ result = nextStep.a * nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "divide":
+ result = nextStep.a / nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ }
+}
export async function agentLoop(thread: Thread): Promise<string> {
console.log("nextStep", nextStep);
+ thread.events.push({
+ "type": "tool_call",
+ "data": nextStep
+ });
+
switch (nextStep.intent) {
case "done_for_now":
return nextStep.message;
case "add":
- thread.events.push({
- "type": "tool_call",
- "data": nextStep
- });
- const result = nextStep.a + nextStep.b;
- console.log("tool_response", result);
- thread.events.push({
- "type": "tool_response",
- "data": result
- });
- continue;
- default:
- throw new Error(`Unknown intent: ${nextStep.intent}`);
+ case "subtract":
+ case "multiply":
+ case "divide":
+ thread = await handleNextStep(nextStep, thread);
}
}
```
<details>
<summary>skip this step</summary>
cp ./walkthrough/03b-agent.ts src/agent.ts
</details>
Test subtraction
npx tsx src/index.ts 'can you subtract 3 from 4'
now, let's test the multiplication tool
npx tsx src/index.ts 'can you multiply 3 and 4'
finally, let's test a more complex calculation with multiple operations
npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
congratulations, you've taking your first step into hand-rolling an agent loop.
from here, we're going to start incorporating some more intermediate and advanced
concepts for 12-factor agents.

View File

@@ -0,0 +1,49 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}

View File

@@ -0,0 +1,75 @@
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client<llm> CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client<llm> CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client<llm> CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client<llm> CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}

View File

@@ -0,0 +1,18 @@
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.88.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}

View File

@@ -0,0 +1,27 @@
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}

View File

@@ -0,0 +1,32 @@
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}

View File

@@ -0,0 +1,23 @@
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}

View File

@@ -0,0 +1,11 @@
import { cli } from "./cli"
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)

View File

@@ -0,0 +1,55 @@
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export async function agentLoop(thread: Thread): Promise<string> {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
switch (nextStep.intent) {
case "done_for_now":
// response to human, return the next step object
return nextStep.message;
case "add":
thread.events.push({
"type": "tool_call",
"data": nextStep
});
const result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
continue;
default:
throw new Error(`Unknown intent: ${nextStep.intent}`);
}
}
}

View File

@@ -0,0 +1,86 @@
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise<string> {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
// response to human, return the next step object
return nextStep.message;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -9,12 +9,12 @@ targets:
newFiles:
cat: false
cp: true
# - folders:
# path: "./build/sections"
# skip:
# - "cleanup"
# final:
# dirName: "final"
- folders:
path: "./build/sections"
skip:
- "cleanup"
final:
dirName: "final"
sections:
- name: cleanup
@@ -220,3 +220,465 @@ sections:
from here, we're going to start incorporating some more intermediate and advanced
concepts for 12-factor agents.
- name: baml-tests
title: "Chapter 4 - Add Tests to agent.baml"
text: "Let's add some tests to our BAML agent."
steps:
- text: to start, leave the baml logs enabled
command: |
export BAML_LOG=debug
- text: |
next, let's add some tests to the agent
We'll start with a simple test that checks the agent's ability to handle
a basic calculation.
file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml}
- text: "Run the tests"
command: |
npx baml-cli test
- text: |
now, let's improve the test with assertions!
Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.
file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml}
- text: "Run the tests"
command: |
npx baml-cli test
- text: |
as you add more tests, you can disable the logs to keep the output clean.
You may want to turn them on as you iterate on specific tests.
command: |
export BAML_LOG=off
- text: |
now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window
file: {src: ./walkthrough/04c-agent.baml, dest: baml_src/agent.baml}
- text: |
let's try to run it
command: |
npx baml-cli test
- name: human-tools
title: "Chapter 5 - Multiple Human Tools"
text: |
In this section, we'll add support for multiple tools that serve to
contact humans.
steps:
- text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
command: |
export BAML_LOG=off
- text: |
first, let's add a tool that can request clarification from a human
this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.
file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml}
- text: |
next, let's re-generate the client code
NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.
command: |
npx baml-cli generate
incremental: true
- text: |
now, let's update the agent to use the new tool
file: {src: ./walkthrough/05-agent.ts, dest: src/agent.ts}
- text: |
next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI
file: {src: ./walkthrough/05-cli.ts, dest: src/cli.ts}
- text: |
let's try it out
command: |
npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '
- text: |
next, let's add a test that checks the agent's ability to handle
a clarification request
file: {src: ./walkthrough/05b-agent.baml, dest: baml_src/agent.baml}
- text: |
and now we can run the tests again
command: |
npx baml-cli test
- text: |
you'll notice the new test passes, but the hello world test fails
This is because the agent's default behavior is to return "done_for_now"
file: {src: ./walkthrough/05c-agent.baml, dest: baml_src/agent.baml}
- text: "Verify tests pass"
command: |
npx baml-cli test
- name: customize-prompt
title: "Chapter 6 - Customize Your Prompt with Reasoning"
text: |
In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.
this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)
there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)
steps:
- text: "for this section, it will be helpful to leave the baml logs enabled"
command: |
export BAML_LOG=debug
- text: |
update the agent prompt to include a reasoning step
file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml}
- text: generate the updated client
command: |
npx baml-cli generate
incremental: true
- text: |
now, you can try it out with a simple prompt
command: |
npx tsx src/index.ts 'can you multiply 3 and 4'
results:
- text: you should see output from the baml logs showing the reasoning steps
- text: |
#### optional challenge
add a field to your tool output format that includes the reasoning steps in the output!
- name: context-window
title: "Chapter 7 - Customize Your Context Window"
text: |
In this section, we'll explore how to customize the context window
of the agent.
this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
steps:
- text: |
update the agent to pretty-print the Context window for the model
file: {src: ./walkthrough/07-agent.ts, dest: src/agent.ts}
- text: "Test the formatting"
command: |
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
- text: |
next, let's update the agent to use XML formatting instead
this is a very popular format for passing data to a model,
among other things, because of the token efficiency of XML.
file: {src: ./walkthrough/07b-agent.ts, dest: src/agent.ts}
- text: |
let's try it out
command: |
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
- text: |
lets update our tests to match the new output format
file: {src: ./walkthrough/07c-agent.baml, dest: baml_src/agent.baml}
- text: |
check out the updated tests
command: |
npx baml-cli test
- name: api-endpoints
title: "Chapter 8 - Adding API Endpoints"
text: "Add an Express server to expose the agent via HTTP."
steps:
- text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
command: |
export BAML_LOG=off
- text: "Install Express and types"
command: |
npm install express && npm install --save-dev @types/express supertest
incremental: true
- text: "Add the server implementation"
file: {src: ./walkthrough/08-server.ts, dest: src/server.ts}
- text: "Start the server"
command: |
npx tsx src/server.ts
- text: "Test with curl (in another terminal)"
command: |
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you add 3 and 4"}'
results:
- text: |
You should get an answer from the agent which includes the
agentic trace, ending in a message like:
code: |
{"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}
- name: state-management
title: "Chapter 9 - In-Memory State and Async Clarification"
text: "Add state management and async clarification support."
steps:
- text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
command: |
export BAML_LOG=off
- text: "Add some simple in-memory state management for threads"
file: {src: ./walkthrough/09-state.ts, dest: src/state.ts}
- text: |
update the server to use the state management
* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id
* implement POST /thread/:id/response
file: {src: ./walkthrough/09-server.ts, dest: src/server.ts}
- text: "Start the server"
command: |
npx tsx src/server.ts
- text: "Test clarification flow"
command: |
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you multiply 3 and xyz"}'
- name: human-approval
title: "Chapter 10 - Adding Human Approval"
text: "Add support for human approval of operations."
steps:
- text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
command: |
export BAML_LOG=off
- text: |
update the server to handle human approvals
* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs
file: {src: ./walkthrough/10-server.ts, dest: src/server.ts}
- text: "Add a few methods to the agent to handle approvals and responses"
file: {src: ./walkthrough/10-agent.ts, dest: src/agent.ts}
- text: "Start the server"
command: |
npx tsx src/server.ts
- text: "Test division with approval"
command: |
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you divide 3 by 4"}'
results:
- text: "You should see:"
code: |
{
"thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
}
]
}
- text: "reject the request with another curl call, changing the thread ID"
command: |
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'
results:
- text: 'You should see: the last tool call is now `"intent":"divide","a":3,"b":5`'
code: |
{
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
},
{
"type": "tool_response",
"data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 5,
"response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
}
}
]
}
- text: "now you can approve the operation"
command: |
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": true}'
results:
- text: "you should see the final message includes the tool response and final result!"
code: |
...
{
"type": "tool_response",
"data": 0.5
},
{
"type": "done_for_now",
"message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
"response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
}
- name: humanlayer-approval
title: "Chapter 11 - Human Approvals over email"
text: |
in this section, we'll add support for human approvals over email.
This will start a little bit contrived, just to get the concepts down -
We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI
While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)
steps:
- text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
command: |
export BAML_LOG=off
- text: "Install HumanLayer"
command: |
npm install humanlayer
incremental: true
- text: "Update CLI to send `divide` and `request_more_information` to a human via email"
file: {src: ./walkthrough/11-cli.ts, dest: src/cli.ts}
- text: "Run the CLI"
command: |
npx tsx src/index.ts 'can you divide 4 by 5'
results:
- text: "The last line of your program should mention human review step"
code: |
nextStep { intent: 'divide', a: 4, b: 5 }
HumanLayer: Requested human approval from HumanLayer cloud
- text: |
go ahead and respond to the email with some feedback:
![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true)
- text: |
you should get another email with an updated attempt based on your feedback!
You can go ahead and approve this one:
![appove-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true)
results:
- text: and your final output will look like
code: |
nextStep {
intent: 'done_for_now',
message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
}
The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!
- text: |
lets implement the `request_more_information` flow as well
file: {src: ./walkthrough/11b-cli.ts, dest: src/cli.ts}
- text: |
lets test the require_approval flow as by asking for a calculation
with garbled input:
command: |
npx tsx src/index.ts 'can you multiply 4 and xyz'
- text: "You should get an email with a request for clarification"
command: |
Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?
- text: you can response with something like
command: |
use 8 instead of xyz
results:
- text: you should see a final result on the CLI like
code: |
I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.
- text: |
as a final step, lets explore using a custom html template for the email
file: {src: ./walkthrough/11c-cli.ts, dest: src/cli.ts}
- text: |
first try with divide:
command: |
npx tsx src/index.ts 'can you divide 4 by 5'
results:
- text: |
you should see a slightly different email with the custom template
![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true)
feel free to run with the flow and then you can try updating the template to your liking
(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)
try triggering "request_more_information" as well!
- text: |
thats it - in the next chapter, we'll build a fully email-driven
workflow agent that uses webhooks for human approval
- name: humanlayer-webhook
title: "Chapter XX - HumanLayer Webhook Integration"
text: |
the previous sections used the humanlayer SDK in "synchronous mode" - that
means every time we wait for human approval, we sit in a loop
polling until the human response if received.
That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results.
steps:
- text: |
add code to initialize humanlayer in the server
file: {src: ./walkthrough/12-1-server-init.ts, dest: src/server.ts}
- text: |
next, lets update the /thread endpoint to
1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls
# file: {src: }
- text: |
Update the server to be able to handle request_clarification responses
- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response
file: {src: ./walkthrough/12a-server.ts, dest: src/server.ts}
- text: "Start the server in another terminal"
command: |
npx tsx src/server.ts
- text: |
now that the server is running, send a payload to the '/thread' endpoint
- text: __ do the response step
- text: __ now handle approvals for divide
- text: __ now also handle done_for_now

View File

@@ -15,6 +15,7 @@ function DetermineNextStep(
thread: string
) -> DoneForNow {
client Qwen3
// client "openai/gpt-4o"
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"

View File

@@ -3,14 +3,27 @@ class DoneForNow {
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
client Qwen3
// client "openai/gpt-4o"
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}

View File

@@ -0,0 +1,62 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client Qwen3
// client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
}

View File

@@ -0,0 +1,64 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client Qwen3
// client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(hello, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(math_operation, {{this.intent == "multiply"}})
}

View File

@@ -0,0 +1,116 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client Qwen3
// client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}

View File

@@ -0,0 +1,129 @@
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client Qwen3
// client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}

View File

@@ -0,0 +1,87 @@
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise<Thread> {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}

View File

@@ -0,0 +1,50 @@
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
let lastEvent = result.events.slice(-1)[0];
while (lastEvent.data.intent === "request_more_information") {
const message = await askHuman(lastEvent.data.message);
thread.events.push({ type: "human_response", data: message });
const result = await agentLoop(thread);
lastEvent = result.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(message: string) {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve(answer);
});
});
}

View File

@@ -0,0 +1,157 @@
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client Qwen3
// client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}

View File

@@ -0,0 +1,158 @@
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client Qwen3
// client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}

View File

@@ -0,0 +1,164 @@
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client Qwen3
// client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
First, always plan out what to do next, for example:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}

View File

@@ -0,0 +1,87 @@
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events, null, 2);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise<Thread> {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}

View File

@@ -0,0 +1,99 @@
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
</${e.data?.intent || e.type}>
`)
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise<Thread> {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}

View File

@@ -0,0 +1,165 @@
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client Qwen3
// client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
<user_input>
hello!
</user_input>
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
<user_input>
can you multiply 3 and 4?
</user_input>
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
<user_input>
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
</user_input>
<multiply>
a: 3
b: 4
</multiply>
<tool_response>
12
</tool_response>
<divide>
a: 12
b: 2
</divide>
<tool_response>
6
</tool_response>
<add>
a: 6
b: 12
</add>
<tool_response>
18
</tool_response>
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
<user_input>
can you multiply 3 and fe1iiaff10
</user_input>
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
<user_input>
can you multiply 3 and FD*(#F&& ?
</user_input>
<request_more_information>
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
</request_more_information>
<human_response>
lets try 12 instead
</human_response>
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}

View File

@@ -0,0 +1,29 @@
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const result = await agentLoop(thread);
res.json(result);
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
// optional - add state
res.status(404).json({ error: "Not implemented yet" });
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };

View File

@@ -0,0 +1,75 @@
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
import { ThreadStore } from '../src/state';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
thread.events.push({
type: "human_response",
data: req.body.message
});
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(newThread);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };

View File

@@ -0,0 +1,23 @@
import crypto from 'crypto';
import { Thread } from '../src/agent';
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
private threads: Map<string, Thread> = new Map();
create(thread: Thread): string {
const id = crypto.randomUUID();
this.threads.set(id, thread);
return id;
}
get(id: string): Thread | undefined {
return this.threads.get(id);
}
update(id: string, thread: Thread): void {
this.threads.set(id, thread);
}
}

View File

@@ -0,0 +1,111 @@
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
</${e.data?.intent || e.type}>
`)
}
awaitingHumanResponse(): boolean {
const lastEvent = this.events[this.events.length - 1];
return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
}
awaitingHumanApproval(): boolean {
const lastEvent = this.events[this.events.length - 1];
return lastEvent.data.intent === 'divide';
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise<Thread> {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "divide":
// divide is scary, return it for human approval
return thread;
case "add":
case "subtract":
case "multiply":
thread = await handleNextStep(nextStep, thread);
}
}
}

View File

@@ -0,0 +1,112 @@
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(newThread);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };

View File

@@ -0,0 +1,103 @@
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise<Event> {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise<Event> {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise<Event> {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
}
}
})
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 182 KiB

View File

@@ -0,0 +1,116 @@
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise<Event> {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise<Event> {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise<Event> {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
}
}
})
if (lastEvent.data.intent === "request_more_information") {
// fetch response synchronously - this will block until reply
const response = await hl.fetchHumanResponse({
spec: {
msg: lastEvent.data.message
}
})
return {
"type": "tool_response",
"data": response
}
}
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

View File

@@ -0,0 +1,125 @@
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise<Event> {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise<Event> {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise<Event> {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
// custom email body - jinja
template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
<br><br>
reply to this email to approve
{% endif %}`
}
}
})
if (lastEvent.data.intent === "request_more_information") {
// fetch response synchronously - this will block until reply
const response = await hl.fetchHumanResponse({
spec: {
msg: lastEvent.data.message
}
})
return {
"type": "tool_response",
"data": response
}
}
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

View File

@@ -0,0 +1,131 @@
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const result = await agentLoop(thread);
store.update(req.params.id, result);
lastEvent = result.events[result.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(result);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };

View File

@@ -0,0 +1,106 @@
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
// run agent loop asynchronously, return immediately
Promise.resolve().then(async () => {
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
if (thread.awaitingHumanResponse()) {
const hl = getHumanlayer();
// create a human contact - returns immediately
hl.createHumanContact({
spec: {
msg: lastEvent.data.message,
state: {
thread_id: threadId,
}
}
});
}
});
res.json({ status: "processing" });
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type WebhookResponse = V1Beta2HumanContactCompleted;
const handleHumanResponse = async (req: Request, res: Response) => {
}
app.post('/webhook', async (req: Request, res: Response) => {
console.log("webhook response", req.body);
const response = req.body as WebhookResponse;
// response is guaranteed to be set on a webhook
const humanResponse: string = response.event.status?.response as string;
const threadId = response.event.spec.state?.thread_id;
if (!threadId) {
return res.status(400).json({ error: "Thread ID not found" });
}
const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
if (!thread.awaitingHumanResponse()) {
return res.status(400).json({ error: "Thread is not awaiting human response" });
}
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };

View File

@@ -0,0 +1,105 @@
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
// run agent loop asynchronously, return immediately
Promise.resolve().then(async () => {
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
if (thread.awaitingHumanResponse()) {
const hl = getHumanlayer();
// create a human contact - returns immediately
hl.createHumanContact({
spec: {
msg: lastEvent.data.message,
state: {
thread_id: threadId,
}
}
});
}
});
res.json({ status: "processing" });
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type WebhookResponse = V1Beta2HumanContactCompleted;
const handleHumanResponse = async (req: Request, res: Response) => {
}
app.post('/webhook', async (req: Request, res: Response) => {
console.log("webhook response", req.body);
const response = req.body as WebhookResponse;
// response is guaranteed to be set on a webhook
const humanResponse: string = response.event.status?.response as string;
const threadId = response.event.spec.state?.thread_id;
if (!threadId) {
return res.status(400).json({ error: "Thread ID not found" });
}
const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
if (!thread.awaitingHumanResponse()) {
return res.status(400).json({ error: "Thread is not awaiting human response" });
}
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };

View File

@@ -0,0 +1,106 @@
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
// run agent loop asynchronously, return immediately
Promise.resolve().then(async () => {
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
if (thread.awaitingHumanResponse()) {
const hl = getHumanlayer();
// create a human contact - returns immediately
hl.createHumanContact({
spec: {
msg: lastEvent.data.message,
state: {
thread_id: threadId,
}
}
});
}
});
res.json({ status: "processing" });
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type WebhookResponse = V1Beta2HumanContactCompleted;
const handleHumanResponse = async (req: Request, res: Response) => {
}
app.post('/webhook', async (req: Request, res: Response) => {
console.log("webhook response", req.body);
const response = req.body as WebhookResponse;
// response is guaranteed to be set on a webhook
const humanResponse: string = response.event.status?.response as string;
const threadId = response.event.spec.state?.thread_id;
if (!threadId) {
return res.status(400).json({ error: "Thread ID not found" });
}
const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
if (!thread.awaitingHumanResponse()) {
return res.status(400).json({ error: "Thread is not awaiting human response" });
}
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };

View File

@@ -0,0 +1,143 @@
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { V1Beta2EmailEventReceived, V1Beta2FunctionCallCompleted, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const result = await agentLoop(thread);
store.update(req.params.id, result);
lastEvent = result.events[result.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(result);
});
type WebhookResponse = V1Beta2HumanContactCompleted;
app.post('/webhook/response', async (req, res) => {
console.log("webhook response", req.body);
const response = req.body as WebhookResponse;
// response is guaranteed to be set on a webhook
const humanResponse: string = response.event.status?.response as string;
const threadId = response.event.spec.state?.thread_id;
if (!threadId) {
return res.status(400).json({ error: "Thread ID not found" });
}
const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
if (!thread.awaitingHumanResponse()) {
return res.status(400).json({ error: "Thread is not awaiting human response" });
}
thread.events.push({
type: "human_response",
data: response.event.status?.response
});
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };