This commit is contained in:
dexhorthy
2025-07-16 14:40:54 -07:00
parent c0150f240f
commit 44667e7231
39 changed files with 606 additions and 4215 deletions

View File

@@ -0,0 +1,244 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "28d943f3",
"metadata": {},
"source": [
"# Test Tree Command After Fetch"
]
},
{
"cell_type": "markdown",
"id": "2ceb6af5",
"metadata": {},
"source": [
"Simple test to verify tree command works after fetch_file"
]
},
{
"cell_type": "markdown",
"id": "c29aaef8",
"metadata": {},
"source": [
"## Test Tree Command"
]
},
{
"cell_type": "markdown",
"id": "6004361d",
"metadata": {},
"source": [
"Testing tree command after fetch_file"
]
},
{
"cell_type": "markdown",
"id": "f608feb7",
"metadata": {},
"source": [
"Setting up BAML"
]
},
{
"cell_type": "markdown",
"id": "949493c7",
"metadata": {},
"source": [
"### BAML Setup\n",
"\n",
"Don't worry too much about this setup code - it will make sense later! For now, just know that:\n",
"- BAML is a tool for working with language models\n",
"- We need some special setup code to make it work nicely in Google Colab\n",
"- The `get_baml_client()` function will be used to interact with AI models"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d2a355e",
"metadata": {},
"outputs": [],
"source": [
"!pip install baml-py==0.202.0 pydantic"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "50e508d6",
"metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
"import os\n",
"\n",
"# Try to import Google Colab userdata, but don't fail if not in Colab\n",
"try:\n",
" from google.colab import userdata\n",
" IN_COLAB = True\n",
"except ImportError:\n",
" IN_COLAB = False\n",
"\n",
"def baml_generate():\n",
" try:\n",
" result = subprocess.run(\n",
" [\"baml-cli\", \"generate\"],\n",
" check=True,\n",
" capture_output=True,\n",
" text=True\n",
" )\n",
" if result.stdout:\n",
" print(\"[baml-cli generate]\\n\", result.stdout)\n",
" if result.stderr:\n",
" print(\"[baml-cli generate]\\n\", result.stderr)\n",
" except subprocess.CalledProcessError as e:\n",
" msg = (\n",
" f\"`baml-cli generate` failed with exit code {e.returncode}\\n\"\n",
" f\"--- STDOUT ---\\n{e.stdout}\\n\"\n",
" f\"--- STDERR ---\\n{e.stderr}\"\n",
" )\n",
" raise RuntimeError(msg) from None\n",
"\n",
"def get_baml_client():\n",
" \"\"\"\n",
" a bunch of fun jank to work around the google colab import cache\n",
" \"\"\"\n",
" # Set API key from Colab secrets or environment\n",
" if IN_COLAB:\n",
" os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
" elif 'OPENAI_API_KEY' not in os.environ:\n",
" print(\"Warning: OPENAI_API_KEY not set. Please set it in your environment.\")\n",
" \n",
" baml_generate()\n",
" \n",
" # Force delete all baml_client modules from sys.modules\n",
" import sys\n",
" modules_to_delete = [key for key in sys.modules.keys() if key.startswith('baml_client')]\n",
" for module in modules_to_delete:\n",
" del sys.modules[module]\n",
" \n",
" # Now import fresh\n",
" import baml_client\n",
" return baml_client.sync_client.b\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e29841b6",
"metadata": {},
"outputs": [],
"source": [
"!baml-cli init"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a923e734",
"metadata": {},
"outputs": [],
"source": [
"# Helper function to capture BAML logs in notebook output\n",
"import os\n",
"from IPython.utils.capture import capture_output\n",
"\n",
"def run_with_baml_logs(func, *args, **kwargs):\n",
" \"\"\"Run a function and capture BAML logs in the notebook output.\"\"\"\n",
" # Capture both stdout and stderr\n",
" with capture_output() as captured:\n",
" result = func(*args, **kwargs)\n",
" \n",
" # Display the captured output\n",
" if captured.stdout:\n",
" print(captured.stdout)\n",
" if captured.stderr:\n",
" # BAML logs go to stderr - format them nicely\n",
" print(\"\\n=== BAML Logs ===\")\n",
" print(captured.stderr)\n",
" print(\"=================\\n\")\n",
" \n",
" return result\n",
"\n",
"# Set BAML log level (options: error, warn, info, debug, trace)\n",
"os.environ['BAML_LOG'] = 'info'\n"
]
},
{
"cell_type": "markdown",
"id": "10ef9f0e",
"metadata": {},
"source": [
"Fetching a BAML file"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20691263",
"metadata": {},
"outputs": [],
"source": [
"!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/01-agent.baml && cat baml_src/agent.baml"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "149932f4",
"metadata": {},
"outputs": [],
"source": [
"!tree -I baml_client"
]
},
{
"cell_type": "markdown",
"id": "38cb3a47",
"metadata": {},
"source": [
"The tree command above should show our file structure"
]
},
{
"cell_type": "markdown",
"id": "62eadcdf",
"metadata": {},
"source": [
"Let's fetch another file"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e404be1",
"metadata": {},
"outputs": [],
"source": [
"!curl -fsSL -o baml_src/tool_calculator.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/02-tool_calculator.baml && cat baml_src/tool_calculator.baml"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8e132a07",
"metadata": {},
"outputs": [],
"source": [
"!tree -I baml_client"
]
},
{
"cell_type": "markdown",
"id": "5209bcf6",
"metadata": {},
"source": [
"Now we should see both BAML files in the tree"
]
}
],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,21 @@
title: "Test Tree Command After Fetch"
text: "Simple test to verify tree command works after fetch_file"
targets:
- ipynb: "./tmp_test.ipynb"
sections:
- name: test-tree
title: "Test Tree Command"
text: "Testing tree command after fetch_file"
steps:
- text: "Setting up BAML"
- baml_setup: true
- text: "Fetching a BAML file"
- fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
- command: "!tree -I baml_client"
- text: "The tree command above should show our file structure"
- text: "Let's fetch another file"
- fetch_file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml}
- command: "!tree -I baml_client"
- text: "Now we should see both BAML files in the tree"

View File

@@ -74,6 +74,7 @@ sections:
This BAML file defines what our agent can do:
- fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
- command: "!tree -I baml_client"
- text: |
Now let's create our main function that accepts a message parameter:
- file: {src: ./walkthrough/01-main.py}
@@ -279,9 +280,11 @@ sections:
- fetch_file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml}
- text: |
Now let's test it with a simple calculation to see the reasoning in action:
**Note:** The BAML logs below will show the model's reasoning steps. Look for the `<reasoning>` tags in the logs to see how the model thinks through the problem before deciding what to do.
- run_main: {regenerate_baml: true, args: "can you multiply 3 and 4"}
- text: |
You should notice in the BAML logs (if enabled) that the model now includes reasoning steps before deciding what to do.
You should see the reasoning steps in the BAML logs above. The model explicitly thinks through what it needs to do before making a decision.
## Advanced Prompt Engineering

View File

@@ -83,110 +83,38 @@ def get_baml_client():
init_code = "!baml-cli init"
nb.cells.append(new_code_cell(init_code))
# Fourth cell: BAML logging helper for Jupyter
logging_setup = '''# Enable BAML logging capture in Jupyter
# Fourth cell: Add BAML logging helper
logging_helper = '''# Helper function to capture BAML logs in notebook output
import os
import sys
from IPython.utils.capture import capture_output
# Set BAML logging level
os.environ['BAML_LOG'] = 'info'
# Helper function to run code with BAML log capture
def run_with_baml_logs(func, *args, **kwargs):
"""Run a function and display BAML logs in the notebook."""
print(f"Running with BAML_LOG={os.environ.get('BAML_LOG')}...")
# Capture all output
"""Run a function and capture BAML logs in the notebook output."""
# Capture both stdout and stderr
with capture_output() as captured:
result = func(*args, **kwargs)
# Display the result first
if result is not None:
print("=== Result ===")
print(result)
# Display captured stdout
# Display the captured output
if captured.stdout:
print("\\n=== Output ===")
print(captured.stdout)
# Display BAML logs from stderr
if captured.stderr:
# BAML logs go to stderr - format them nicely
print("\\n=== BAML Logs ===")
# Format the logs for better readability
log_lines = captured.stderr.strip().split('\\n')
for line in log_lines:
if 'reasoning' in line.lower() or '<reasoning>' in line:
print(f"🤔 {line}")
elif 'error' in line.lower():
print(f"{line}")
elif 'warn' in line.lower():
print(f"⚠️ {line}")
else:
print(f" {line}")
print(captured.stderr)
print("=================\\n")
return result
print("BAML logging helper loaded! Use run_with_baml_logs(main, 'your message') to see logs.")
# Set BAML log level (options: error, warn, info, debug, trace)
os.environ['BAML_LOG'] = 'info'
'''
nb.cells.append(new_code_cell(logging_setup))
nb.cells.append(new_code_cell(logging_helper))
def process_step(nb, step, base_path, current_functions, section_name=None):
"""Process different step types."""
if 'text' in step:
# Add markdown cell
nb.cells.append(new_markdown_cell(step['text']))
# Special handling for reasoning section
if section_name == 'customize-prompt' and 'reasoning in action' in step['text']:
# Add enhanced reasoning visualization after the text
reasoning_viz = '''# Enhanced logging for reasoning visualization
import re
from IPython.display import display, HTML
def run_and_show_reasoning(func, *args, **kwargs):
"""Run a function and highlight the reasoning steps from BAML logs."""
from IPython.utils.capture import capture_output
with capture_output() as captured:
result = func(*args, **kwargs)
# Extract and format reasoning from logs
if captured.stderr:
# Look for reasoning sections in the logs
log_text = captured.stderr
# Find reasoning blocks
reasoning_pattern = r'<reasoning>(.*?)</reasoning>'
reasoning_matches = re.findall(reasoning_pattern, log_text, re.DOTALL)
if reasoning_matches:
display(HTML("<h3>🧠 Model Reasoning:</h3>"))
for reasoning in reasoning_matches:
display(HTML(f"""
<div style='background-color: #f0f8ff; border-left: 4px solid #4169e1;
padding: 10px; margin: 10px 0; font-family: monospace;'>
{reasoning.strip().replace(chr(10), '<br>')}
</div>
"""))
# Show the result
display(HTML("<h3>📤 Response:</h3>"))
display(HTML(f"<pre>{str(result)}</pre>"))
# Optionally show full logs
display(HTML("""<details><summary>View Full BAML Logs</summary>
<pre style='font-size: 0.8em; background-color: #f5f5f5; padding: 10px;'>""" +
log_text.replace('<', '&lt;').replace('>', '&gt;') +
"</pre></details>"))
return result
print("Enhanced reasoning visualization loaded! Use:")
print("run_and_show_reasoning(main, 'can you multiply 3 and 4')")
'''
nb.cells.append(new_code_cell(reasoning_viz))
if 'baml_setup' in step:
# Add BAML setup cells
@@ -256,18 +184,18 @@ print("run_and_show_reasoning(main, 'can you multiply 3 and 4')")
else:
call_parts.append(f'{key}={value}')
# Generate the function call
main_call = f'main({", ".join(call_parts)})' if call_parts else "main()"
# Generate the function call - default to using logging for BAML sections
if call_parts:
main_call = f'main({", ".join(call_parts)})'
else:
main_call = "main()"
# Use different wrappers based on section
if section_name == 'customize-prompt':
# Use enhanced reasoning visualization for this section
nb.cells.append(new_code_cell(f'run_and_show_reasoning({main_call})'))
elif section_name in ['cli-and-agent', 'calculator-tools', 'tool-loop']:
# Use basic logging wrapper for these sections
# For sections with BAML, use logging wrapper
if section_name in ['cli-and-agent', 'calculator-tools', 'tool-loop', 'baml-tests',
'human-tools', 'customize-prompt', 'context-window']:
nb.cells.append(new_code_cell(f'run_with_baml_logs({main_call})'))
else:
# Default to plain call
# Only hello-world section runs without logging
nb.cells.append(new_code_cell(main_call))
def convert_walkthrough_to_notebook(yaml_path, output_path):

File diff suppressed because it is too large Load Diff

View File

@@ -1,142 +0,0 @@
# Chapter 0 - Hello World
Let's start with a basic TypeScript setup and a hello world program.
This guide is written in TypeScript (yes, a python version is coming soon)
There are many checkpoints between the every file edit in theworkshop steps,
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.
To run this guide, you'll need a relatively recent version of nodejs and npm installed
You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine
brew install node@20
You should see the node version
node --version
Copy initial package.json
cp ./walkthrough/00-package.json package.json
<details>
<summary>show file</summary>
```json
// ./walkthrough/00-package.json
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
```
</details>
Install dependencies
npm install
Copy tsconfig.json
cp ./walkthrough/00-tsconfig.json tsconfig.json
<details>
<summary>show file</summary>
```json
// ./walkthrough/00-tsconfig.json
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
```
</details>
add .gitignore
cp ./walkthrough/00-.gitignore .gitignore
<details>
<summary>show file</summary>
```gitignore
// ./walkthrough/00-.gitignore
baml_client/
node_modules/
```
</details>
Create src folder
mkdir -p src
Add a simple hello world index.ts
cp ./walkthrough/00-index.ts src/index.ts
<details>
<summary>show file</summary>
```ts
// ./walkthrough/00-index.ts
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)
```
</details>
Run it to verify
npx tsx src/index.ts
You should see:
hello, world!

View File

@@ -1,2 +0,0 @@
baml_client/
node_modules/

View File

@@ -1,9 +0,0 @@
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)

View File

@@ -1,20 +0,0 @@
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}

View File

@@ -1,24 +0,0 @@
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}

View File

@@ -1,2 +0,0 @@
baml_client/
node_modules/

View File

@@ -1,231 +0,0 @@
# Chapter 1 - CLI and Agent Loop
Now let's add BAML and create our first agent with a CLI interface.
First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.
npm install @boundaryml/baml
Initialize BAML
npx baml-cli init
Remove default resume.baml
rm baml_src/resume.baml
Add our starter agent, a single baml prompt that we'll build on
cp ./walkthrough/01-agent.baml baml_src/agent.baml
<details>
<summary>show file</summary>
```rust
// ./walkthrough/01-agent.baml
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
```
</details>
Generate BAML client code
npx baml-cli generate
Enable BAML logging for this section
export BAML_LOG=debug
Add the CLI interface
cp ./walkthrough/01-cli.ts src/cli.ts
<details>
<summary>show file</summary>
```ts
// ./walkthrough/01-cli.ts
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
```
</details>
Update index.ts to use the CLI
```diff
src/index.ts
+import { cli } from "./cli"
+
async function hello(): Promise<void> {
console.log('hello, world!')
async function main() {
- await hello()
+ await cli()
}
```
<details>
<summary>skip this step</summary>
cp ./walkthrough/01-index.ts src/index.ts
</details>
Add the agent implementation
cp ./walkthrough/01-agent.ts src/agent.ts
<details>
<summary>show file</summary>
```ts
// ./walkthrough/01-agent.ts
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}
```
</details>
The the BAML code is configured to use BASETEN_API_KEY by default
To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co),
and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/).
```rust
function DetermineNextStep(thread: string) -> DoneForNow {
client Qwen3
// ...
```
If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key.
If you want to try swapping out the model, you can change the `client` line.
[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)
For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini)
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.
For example, to use openai with an OPENAI_API_KEY, you can do:
client "openai/gpt-4o"
Set your env vars
export BASETEN_API_KEY=...
export BASETEN_BASE_URL=...
Try it out
npx tsx src/index.ts hello
you should see a familiar response from the model
{
intent: 'done_for_now',
message: 'Hello! How can I assist you today?'
}

View File

@@ -1,9 +0,0 @@
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)

View File

@@ -1,49 +0,0 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}

View File

@@ -1,32 +0,0 @@
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}

View File

@@ -1,23 +0,0 @@
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}

View File

@@ -1,11 +0,0 @@
import { cli } from "./cli"
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)

View File

@@ -1,2 +0,0 @@
baml_client/
node_modules/

View File

@@ -1,84 +0,0 @@
# Chapter 2 - Add Calculator Tools
Let's add some calculator tools to our agent.
Let's start by adding a tool definition for the calculator
These are simpile structured outputs that we'll ask the model to
return as a "next step" in the agentic loop.
cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
<details>
<summary>show file</summary>
```rust
// ./walkthrough/02-tool_calculator.baml
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
```
</details>
Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps
```diff
baml_src/agent.baml
function DetermineNextStep(
thread: string
-) -> DoneForNow {
+) -> CalculatorTools | DoneForNow {
client Qwen3
```
<details>
<summary>skip this step</summary>
cp ./walkthrough/02-agent.baml baml_src/agent.baml
</details>
Generate updated BAML client
npx baml-cli generate
Try out the calculator
npx tsx src/index.ts 'can you add 3 and 4'
You should see a tool call to the calculator
{
intent: 'add',
a: 3,
b: 4
}

View File

@@ -1,49 +0,0 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}

View File

@@ -1,75 +0,0 @@
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client<llm> CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client<llm> CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client<llm> CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client<llm> CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}

View File

@@ -1,18 +0,0 @@
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.88.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}

View File

@@ -1,32 +0,0 @@
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}

View File

@@ -1,23 +0,0 @@
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}

View File

@@ -1,11 +0,0 @@
import { cli } from "./cli"
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)

View File

@@ -1,49 +0,0 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}

View File

@@ -1,27 +0,0 @@
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}

View File

@@ -1,2 +0,0 @@
baml_client/
node_modules/

View File

@@ -1,194 +0,0 @@
# Chapter 3 - Process Tool Calls in a Loop
Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.
First, lets update the agent to handle the tool call
```diff
src/agent.ts
}
-// right now this just runs one turn with the LLM, but
-// we'll update this function to handle all the agent logic
-export async function agentLoop(thread: Thread): Promise<AgentResponse> {
- const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
- return nextStep;
+
+
+export async function agentLoop(thread: Thread): Promise<string> {
+
+ while (true) {
+ const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
+ console.log("nextStep", nextStep);
+
+ switch (nextStep.intent) {
+ case "done_for_now":
+ // response to human, return the next step object
+ return nextStep.message;
+ case "add":
+ thread.events.push({
+ "type": "tool_call",
+ "data": nextStep
+ });
+ const result = nextStep.a + nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ continue;
+ default:
+ throw new Error(`Unknown intent: ${nextStep.intent}`);
+ }
+ }
}
```
<details>
<summary>skip this step</summary>
cp ./walkthrough/03-agent.ts src/agent.ts
</details>
Now, lets try it out
npx tsx src/index.ts 'can you add 3 and 4'
you should see the agent call the tool and then return the result
{
intent: 'done_for_now',
message: 'The sum of 3 and 4 is 7.'
}
For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output
export BAML_LOG=off
Try a multi-step calculation
npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'
you'll notice that tools like multiply and divide are not available
npx tsx src/index.ts 'can you multiply 3 and 4'
next, let's add handlers for the rest of the calculator tools
```diff
src/agent.ts
-import { b } from "../baml_client";
+import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
-// tool call or a respond to human tool
-type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
-
export interface Event {
type: string
}
+export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
+export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
+ let result: number;
+ switch (nextStep.intent) {
+ case "add":
+ result = nextStep.a + nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "subtract":
+ result = nextStep.a - nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "multiply":
+ result = nextStep.a * nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "divide":
+ result = nextStep.a / nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ }
+}
export async function agentLoop(thread: Thread): Promise<string> {
console.log("nextStep", nextStep);
+ thread.events.push({
+ "type": "tool_call",
+ "data": nextStep
+ });
+
switch (nextStep.intent) {
case "done_for_now":
return nextStep.message;
case "add":
- thread.events.push({
- "type": "tool_call",
- "data": nextStep
- });
- const result = nextStep.a + nextStep.b;
- console.log("tool_response", result);
- thread.events.push({
- "type": "tool_response",
- "data": result
- });
- continue;
- default:
- throw new Error(`Unknown intent: ${nextStep.intent}`);
+ case "subtract":
+ case "multiply":
+ case "divide":
+ thread = await handleNextStep(nextStep, thread);
}
}
```
<details>
<summary>skip this step</summary>
cp ./walkthrough/03b-agent.ts src/agent.ts
</details>
Test subtraction
npx tsx src/index.ts 'can you subtract 3 from 4'
now, let's test the multiplication tool
npx tsx src/index.ts 'can you multiply 3 and 4'
finally, let's test a more complex calculation with multiple operations
npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
congratulations, you've taking your first step into hand-rolling an agent loop.
from here, we're going to start incorporating some more intermediate and advanced
concepts for 12-factor agents.

View File

@@ -1,49 +0,0 @@
class DoneForNow {
intent "done_for_now"
message string
}
client<llm> Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}

View File

@@ -1,75 +0,0 @@
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client<llm> CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client<llm> CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client<llm> CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client<llm> CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}

View File

@@ -1,18 +0,0 @@
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.88.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}

View File

@@ -1,27 +0,0 @@
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}

View File

@@ -1,32 +0,0 @@
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}

View File

@@ -1,23 +0,0 @@
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}

View File

@@ -1,11 +0,0 @@
import { cli } from "./cli"
async function hello(): Promise<void> {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)

View File

@@ -1,55 +0,0 @@
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export async function agentLoop(thread: Thread): Promise<string> {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
switch (nextStep.intent) {
case "done_for_now":
// response to human, return the next step object
return nextStep.message;
case "add":
thread.events.push({
"type": "tool_call",
"data": nextStep
});
const result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
continue;
default:
throw new Error(`Unknown intent: ${nextStep.intent}`);
}
}
}

View File

@@ -1,86 +0,0 @@
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise<string> {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
// response to human, return the next step object
return nextStep.message;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}

File diff suppressed because it is too large Load Diff