mirror of
https://github.com/humanlayer/12-factor-agents.git
synced 2025-08-20 18:59:53 +03:00
cleanup
This commit is contained in:
244
workshops/2025-07-16/hack/tmp_test.ipynb
Normal file
244
workshops/2025-07-16/hack/tmp_test.ipynb
Normal file
@@ -0,0 +1,244 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "28d943f3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test Tree Command After Fetch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2ceb6af5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Simple test to verify tree command works after fetch_file"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c29aaef8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test Tree Command"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6004361d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Testing tree command after fetch_file"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f608feb7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Setting up BAML"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "949493c7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### BAML Setup\n",
|
||||
"\n",
|
||||
"Don't worry too much about this setup code - it will make sense later! For now, just know that:\n",
|
||||
"- BAML is a tool for working with language models\n",
|
||||
"- We need some special setup code to make it work nicely in Google Colab\n",
|
||||
"- The `get_baml_client()` function will be used to interact with AI models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3d2a355e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install baml-py==0.202.0 pydantic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "50e508d6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import subprocess\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Try to import Google Colab userdata, but don't fail if not in Colab\n",
|
||||
"try:\n",
|
||||
" from google.colab import userdata\n",
|
||||
" IN_COLAB = True\n",
|
||||
"except ImportError:\n",
|
||||
" IN_COLAB = False\n",
|
||||
"\n",
|
||||
"def baml_generate():\n",
|
||||
" try:\n",
|
||||
" result = subprocess.run(\n",
|
||||
" [\"baml-cli\", \"generate\"],\n",
|
||||
" check=True,\n",
|
||||
" capture_output=True,\n",
|
||||
" text=True\n",
|
||||
" )\n",
|
||||
" if result.stdout:\n",
|
||||
" print(\"[baml-cli generate]\\n\", result.stdout)\n",
|
||||
" if result.stderr:\n",
|
||||
" print(\"[baml-cli generate]\\n\", result.stderr)\n",
|
||||
" except subprocess.CalledProcessError as e:\n",
|
||||
" msg = (\n",
|
||||
" f\"`baml-cli generate` failed with exit code {e.returncode}\\n\"\n",
|
||||
" f\"--- STDOUT ---\\n{e.stdout}\\n\"\n",
|
||||
" f\"--- STDERR ---\\n{e.stderr}\"\n",
|
||||
" )\n",
|
||||
" raise RuntimeError(msg) from None\n",
|
||||
"\n",
|
||||
"def get_baml_client():\n",
|
||||
" \"\"\"\n",
|
||||
" a bunch of fun jank to work around the google colab import cache\n",
|
||||
" \"\"\"\n",
|
||||
" # Set API key from Colab secrets or environment\n",
|
||||
" if IN_COLAB:\n",
|
||||
" os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
|
||||
" elif 'OPENAI_API_KEY' not in os.environ:\n",
|
||||
" print(\"Warning: OPENAI_API_KEY not set. Please set it in your environment.\")\n",
|
||||
" \n",
|
||||
" baml_generate()\n",
|
||||
" \n",
|
||||
" # Force delete all baml_client modules from sys.modules\n",
|
||||
" import sys\n",
|
||||
" modules_to_delete = [key for key in sys.modules.keys() if key.startswith('baml_client')]\n",
|
||||
" for module in modules_to_delete:\n",
|
||||
" del sys.modules[module]\n",
|
||||
" \n",
|
||||
" # Now import fresh\n",
|
||||
" import baml_client\n",
|
||||
" return baml_client.sync_client.b\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e29841b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!baml-cli init"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a923e734",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Helper function to capture BAML logs in notebook output\n",
|
||||
"import os\n",
|
||||
"from IPython.utils.capture import capture_output\n",
|
||||
"\n",
|
||||
"def run_with_baml_logs(func, *args, **kwargs):\n",
|
||||
" \"\"\"Run a function and capture BAML logs in the notebook output.\"\"\"\n",
|
||||
" # Capture both stdout and stderr\n",
|
||||
" with capture_output() as captured:\n",
|
||||
" result = func(*args, **kwargs)\n",
|
||||
" \n",
|
||||
" # Display the captured output\n",
|
||||
" if captured.stdout:\n",
|
||||
" print(captured.stdout)\n",
|
||||
" if captured.stderr:\n",
|
||||
" # BAML logs go to stderr - format them nicely\n",
|
||||
" print(\"\\n=== BAML Logs ===\")\n",
|
||||
" print(captured.stderr)\n",
|
||||
" print(\"=================\\n\")\n",
|
||||
" \n",
|
||||
" return result\n",
|
||||
"\n",
|
||||
"# Set BAML log level (options: error, warn, info, debug, trace)\n",
|
||||
"os.environ['BAML_LOG'] = 'info'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "10ef9f0e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Fetching a BAML file"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "20691263",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/01-agent.baml && cat baml_src/agent.baml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "149932f4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!tree -I baml_client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "38cb3a47",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The tree command above should show our file structure"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "62eadcdf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's fetch another file"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e404be1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!curl -fsSL -o baml_src/tool_calculator.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/02-tool_calculator.baml && cat baml_src/tool_calculator.baml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8e132a07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!tree -I baml_client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5209bcf6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we should see both BAML files in the tree"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
21
workshops/2025-07-16/hack/tmp_walkthrough.yaml
Normal file
21
workshops/2025-07-16/hack/tmp_walkthrough.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
title: "Test Tree Command After Fetch"
|
||||
text: "Simple test to verify tree command works after fetch_file"
|
||||
|
||||
targets:
|
||||
- ipynb: "./tmp_test.ipynb"
|
||||
|
||||
sections:
|
||||
- name: test-tree
|
||||
title: "Test Tree Command"
|
||||
text: "Testing tree command after fetch_file"
|
||||
steps:
|
||||
- text: "Setting up BAML"
|
||||
- baml_setup: true
|
||||
- text: "Fetching a BAML file"
|
||||
- fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
|
||||
- command: "!tree -I baml_client"
|
||||
- text: "The tree command above should show our file structure"
|
||||
- text: "Let's fetch another file"
|
||||
- fetch_file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml}
|
||||
- command: "!tree -I baml_client"
|
||||
- text: "Now we should see both BAML files in the tree"
|
||||
@@ -74,6 +74,7 @@ sections:
|
||||
|
||||
This BAML file defines what our agent can do:
|
||||
- fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
|
||||
- command: "!tree -I baml_client"
|
||||
- text: |
|
||||
Now let's create our main function that accepts a message parameter:
|
||||
- file: {src: ./walkthrough/01-main.py}
|
||||
@@ -279,9 +280,11 @@ sections:
|
||||
- fetch_file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml}
|
||||
- text: |
|
||||
Now let's test it with a simple calculation to see the reasoning in action:
|
||||
|
||||
**Note:** The BAML logs below will show the model's reasoning steps. Look for the `<reasoning>` tags in the logs to see how the model thinks through the problem before deciding what to do.
|
||||
- run_main: {regenerate_baml: true, args: "can you multiply 3 and 4"}
|
||||
- text: |
|
||||
You should notice in the BAML logs (if enabled) that the model now includes reasoning steps before deciding what to do.
|
||||
You should see the reasoning steps in the BAML logs above. The model explicitly thinks through what it needs to do before making a decision.
|
||||
|
||||
## Advanced Prompt Engineering
|
||||
|
||||
|
||||
@@ -83,110 +83,38 @@ def get_baml_client():
|
||||
init_code = "!baml-cli init"
|
||||
nb.cells.append(new_code_cell(init_code))
|
||||
|
||||
# Fourth cell: BAML logging helper for Jupyter
|
||||
logging_setup = '''# Enable BAML logging capture in Jupyter
|
||||
# Fourth cell: Add BAML logging helper
|
||||
logging_helper = '''# Helper function to capture BAML logs in notebook output
|
||||
import os
|
||||
import sys
|
||||
from IPython.utils.capture import capture_output
|
||||
|
||||
# Set BAML logging level
|
||||
os.environ['BAML_LOG'] = 'info'
|
||||
|
||||
# Helper function to run code with BAML log capture
|
||||
def run_with_baml_logs(func, *args, **kwargs):
|
||||
"""Run a function and display BAML logs in the notebook."""
|
||||
print(f"Running with BAML_LOG={os.environ.get('BAML_LOG')}...")
|
||||
|
||||
# Capture all output
|
||||
"""Run a function and capture BAML logs in the notebook output."""
|
||||
# Capture both stdout and stderr
|
||||
with capture_output() as captured:
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
# Display the result first
|
||||
if result is not None:
|
||||
print("=== Result ===")
|
||||
print(result)
|
||||
|
||||
# Display captured stdout
|
||||
# Display the captured output
|
||||
if captured.stdout:
|
||||
print("\\n=== Output ===")
|
||||
print(captured.stdout)
|
||||
|
||||
# Display BAML logs from stderr
|
||||
if captured.stderr:
|
||||
# BAML logs go to stderr - format them nicely
|
||||
print("\\n=== BAML Logs ===")
|
||||
# Format the logs for better readability
|
||||
log_lines = captured.stderr.strip().split('\\n')
|
||||
for line in log_lines:
|
||||
if 'reasoning' in line.lower() or '<reasoning>' in line:
|
||||
print(f"🤔 {line}")
|
||||
elif 'error' in line.lower():
|
||||
print(f"❌ {line}")
|
||||
elif 'warn' in line.lower():
|
||||
print(f"⚠️ {line}")
|
||||
else:
|
||||
print(f" {line}")
|
||||
print(captured.stderr)
|
||||
print("=================\\n")
|
||||
|
||||
return result
|
||||
|
||||
print("BAML logging helper loaded! Use run_with_baml_logs(main, 'your message') to see logs.")
|
||||
# Set BAML log level (options: error, warn, info, debug, trace)
|
||||
os.environ['BAML_LOG'] = 'info'
|
||||
'''
|
||||
nb.cells.append(new_code_cell(logging_setup))
|
||||
nb.cells.append(new_code_cell(logging_helper))
|
||||
|
||||
def process_step(nb, step, base_path, current_functions, section_name=None):
|
||||
"""Process different step types."""
|
||||
if 'text' in step:
|
||||
# Add markdown cell
|
||||
nb.cells.append(new_markdown_cell(step['text']))
|
||||
|
||||
# Special handling for reasoning section
|
||||
if section_name == 'customize-prompt' and 'reasoning in action' in step['text']:
|
||||
# Add enhanced reasoning visualization after the text
|
||||
reasoning_viz = '''# Enhanced logging for reasoning visualization
|
||||
import re
|
||||
from IPython.display import display, HTML
|
||||
|
||||
def run_and_show_reasoning(func, *args, **kwargs):
|
||||
"""Run a function and highlight the reasoning steps from BAML logs."""
|
||||
from IPython.utils.capture import capture_output
|
||||
|
||||
with capture_output() as captured:
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
# Extract and format reasoning from logs
|
||||
if captured.stderr:
|
||||
# Look for reasoning sections in the logs
|
||||
log_text = captured.stderr
|
||||
|
||||
# Find reasoning blocks
|
||||
reasoning_pattern = r'<reasoning>(.*?)</reasoning>'
|
||||
reasoning_matches = re.findall(reasoning_pattern, log_text, re.DOTALL)
|
||||
|
||||
if reasoning_matches:
|
||||
display(HTML("<h3>🧠 Model Reasoning:</h3>"))
|
||||
for reasoning in reasoning_matches:
|
||||
display(HTML(f"""
|
||||
<div style='background-color: #f0f8ff; border-left: 4px solid #4169e1;
|
||||
padding: 10px; margin: 10px 0; font-family: monospace;'>
|
||||
{reasoning.strip().replace(chr(10), '<br>')}
|
||||
</div>
|
||||
"""))
|
||||
|
||||
# Show the result
|
||||
display(HTML("<h3>📤 Response:</h3>"))
|
||||
display(HTML(f"<pre>{str(result)}</pre>"))
|
||||
|
||||
# Optionally show full logs
|
||||
display(HTML("""<details><summary>View Full BAML Logs</summary>
|
||||
<pre style='font-size: 0.8em; background-color: #f5f5f5; padding: 10px;'>""" +
|
||||
log_text.replace('<', '<').replace('>', '>') +
|
||||
"</pre></details>"))
|
||||
|
||||
return result
|
||||
|
||||
print("Enhanced reasoning visualization loaded! Use:")
|
||||
print("run_and_show_reasoning(main, 'can you multiply 3 and 4')")
|
||||
'''
|
||||
nb.cells.append(new_code_cell(reasoning_viz))
|
||||
|
||||
if 'baml_setup' in step:
|
||||
# Add BAML setup cells
|
||||
@@ -256,18 +184,18 @@ print("run_and_show_reasoning(main, 'can you multiply 3 and 4')")
|
||||
else:
|
||||
call_parts.append(f'{key}={value}')
|
||||
|
||||
# Generate the function call
|
||||
main_call = f'main({", ".join(call_parts)})' if call_parts else "main()"
|
||||
# Generate the function call - default to using logging for BAML sections
|
||||
if call_parts:
|
||||
main_call = f'main({", ".join(call_parts)})'
|
||||
else:
|
||||
main_call = "main()"
|
||||
|
||||
# Use different wrappers based on section
|
||||
if section_name == 'customize-prompt':
|
||||
# Use enhanced reasoning visualization for this section
|
||||
nb.cells.append(new_code_cell(f'run_and_show_reasoning({main_call})'))
|
||||
elif section_name in ['cli-and-agent', 'calculator-tools', 'tool-loop']:
|
||||
# Use basic logging wrapper for these sections
|
||||
# For sections with BAML, use logging wrapper
|
||||
if section_name in ['cli-and-agent', 'calculator-tools', 'tool-loop', 'baml-tests',
|
||||
'human-tools', 'customize-prompt', 'context-window']:
|
||||
nb.cells.append(new_code_cell(f'run_with_baml_logs({main_call})'))
|
||||
else:
|
||||
# Default to plain call
|
||||
# Only hello-world section runs without logging
|
||||
nb.cells.append(new_code_cell(main_call))
|
||||
|
||||
def convert_walkthrough_to_notebook(yaml_path, output_path):
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,142 +0,0 @@
|
||||
# Chapter 0 - Hello World
|
||||
|
||||
Let's start with a basic TypeScript setup and a hello world program.
|
||||
|
||||
This guide is written in TypeScript (yes, a python version is coming soon)
|
||||
|
||||
There are many checkpoints between the every file edit in theworkshop steps,
|
||||
so even if you aren't super familiar with typescript,
|
||||
you should be able to keep up and run each example.
|
||||
|
||||
To run this guide, you'll need a relatively recent version of nodejs and npm installed
|
||||
|
||||
You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine
|
||||
|
||||
|
||||
brew install node@20
|
||||
|
||||
You should see the node version
|
||||
|
||||
node --version
|
||||
|
||||
Copy initial package.json
|
||||
|
||||
cp ./walkthrough/00-package.json package.json
|
||||
|
||||
<details>
|
||||
<summary>show file</summary>
|
||||
|
||||
```json
|
||||
// ./walkthrough/00-package.json
|
||||
{
|
||||
"name": "my-agent",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "tsx src/index.ts",
|
||||
"build": "tsc"
|
||||
},
|
||||
"dependencies": {
|
||||
"tsx": "^4.15.0",
|
||||
"typescript": "^5.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.0.0",
|
||||
"@typescript-eslint/eslint-plugin": "^6.0.0",
|
||||
"@typescript-eslint/parser": "^6.0.0",
|
||||
"eslint": "^8.0.0"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
Install dependencies
|
||||
|
||||
npm install
|
||||
|
||||
Copy tsconfig.json
|
||||
|
||||
cp ./walkthrough/00-tsconfig.json tsconfig.json
|
||||
|
||||
<details>
|
||||
<summary>show file</summary>
|
||||
|
||||
```json
|
||||
// ./walkthrough/00-tsconfig.json
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2017",
|
||||
"lib": ["esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"plugins": [],
|
||||
"paths": {
|
||||
"@/*": ["./*"]
|
||||
}
|
||||
},
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
||||
"exclude": ["node_modules", "walkthrough"]
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
add .gitignore
|
||||
|
||||
cp ./walkthrough/00-.gitignore .gitignore
|
||||
|
||||
<details>
|
||||
<summary>show file</summary>
|
||||
|
||||
```gitignore
|
||||
// ./walkthrough/00-.gitignore
|
||||
baml_client/
|
||||
node_modules/
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
Create src folder
|
||||
|
||||
mkdir -p src
|
||||
|
||||
Add a simple hello world index.ts
|
||||
|
||||
cp ./walkthrough/00-index.ts src/index.ts
|
||||
|
||||
<details>
|
||||
<summary>show file</summary>
|
||||
|
||||
```ts
|
||||
// ./walkthrough/00-index.ts
|
||||
async function hello(): Promise<void> {
|
||||
console.log('hello, world!')
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await hello()
|
||||
}
|
||||
|
||||
main().catch(console.error)
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
Run it to verify
|
||||
|
||||
npx tsx src/index.ts
|
||||
|
||||
You should see:
|
||||
|
||||
hello, world!
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
baml_client/
|
||||
node_modules/
|
||||
@@ -1,9 +0,0 @@
|
||||
async function hello(): Promise<void> {
|
||||
console.log('hello, world!')
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await hello()
|
||||
}
|
||||
|
||||
main().catch(console.error)
|
||||
@@ -1,20 +0,0 @@
|
||||
{
|
||||
"name": "my-agent",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "tsx src/index.ts",
|
||||
"build": "tsc"
|
||||
},
|
||||
"dependencies": {
|
||||
"tsx": "^4.15.0",
|
||||
"typescript": "^5.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.0.0",
|
||||
"@typescript-eslint/eslint-plugin": "^6.0.0",
|
||||
"@typescript-eslint/parser": "^6.0.0",
|
||||
"eslint": "^8.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2017",
|
||||
"lib": ["esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"plugins": [],
|
||||
"paths": {
|
||||
"@/*": ["./*"]
|
||||
}
|
||||
},
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
||||
"exclude": ["node_modules", "walkthrough"]
|
||||
}
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
baml_client/
|
||||
node_modules/
|
||||
@@ -1,231 +0,0 @@
|
||||
# Chapter 1 - CLI and Agent Loop
|
||||
|
||||
Now let's add BAML and create our first agent with a CLI interface.
|
||||
|
||||
First, we'll need to install [BAML](https://github.com/boundaryml/baml)
|
||||
which is a tool for prompting and structured outputs.
|
||||
|
||||
|
||||
npm install @boundaryml/baml
|
||||
|
||||
Initialize BAML
|
||||
|
||||
npx baml-cli init
|
||||
|
||||
Remove default resume.baml
|
||||
|
||||
rm baml_src/resume.baml
|
||||
|
||||
Add our starter agent, a single baml prompt that we'll build on
|
||||
|
||||
cp ./walkthrough/01-agent.baml baml_src/agent.baml
|
||||
|
||||
<details>
|
||||
<summary>show file</summary>
|
||||
|
||||
```rust
|
||||
// ./walkthrough/01-agent.baml
|
||||
class DoneForNow {
|
||||
intent "done_for_now"
|
||||
message string
|
||||
}
|
||||
|
||||
client<llm> Qwen3 {
|
||||
provider "openai-generic"
|
||||
options {
|
||||
base_url env.BASETEN_BASE_URL
|
||||
api_key env.BASETEN_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
function DetermineNextStep(
|
||||
thread: string
|
||||
) -> DoneForNow {
|
||||
client Qwen3
|
||||
|
||||
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
|
||||
prompt #"
|
||||
{{ _.role("system") }}
|
||||
|
||||
/nothink
|
||||
|
||||
You are a helpful assistant that can help with tasks.
|
||||
|
||||
{{ _.role("user") }}
|
||||
|
||||
You are working on the following thread:
|
||||
|
||||
{{ thread }}
|
||||
|
||||
What should the next step be?
|
||||
|
||||
{{ ctx.output_format }}
|
||||
"#
|
||||
}
|
||||
|
||||
test HelloWorld {
|
||||
functions [DetermineNextStep]
|
||||
args {
|
||||
thread #"
|
||||
{
|
||||
"type": "user_input",
|
||||
"data": "hello!"
|
||||
}
|
||||
"#
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
Generate BAML client code
|
||||
|
||||
npx baml-cli generate
|
||||
|
||||
Enable BAML logging for this section
|
||||
|
||||
export BAML_LOG=debug
|
||||
|
||||
Add the CLI interface
|
||||
|
||||
cp ./walkthrough/01-cli.ts src/cli.ts
|
||||
|
||||
<details>
|
||||
<summary>show file</summary>
|
||||
|
||||
```ts
|
||||
// ./walkthrough/01-cli.ts
|
||||
// cli.ts lets you invoke the agent loop from the command line
|
||||
|
||||
import { agentLoop, Thread, Event } from "./agent";
|
||||
|
||||
export async function cli() {
|
||||
// Get command line arguments, skipping the first two (node and script name)
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0) {
|
||||
console.error("Error: Please provide a message as a command line argument");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Join all arguments into a single message
|
||||
const message = args.join(" ");
|
||||
|
||||
// Create a new thread with the user's message as the initial event
|
||||
const thread = new Thread([{ type: "user_input", data: message }]);
|
||||
|
||||
// Run the agent loop with the thread
|
||||
const result = await agentLoop(thread);
|
||||
console.log(result);
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
Update index.ts to use the CLI
|
||||
|
||||
```diff
|
||||
src/index.ts
|
||||
+import { cli } from "./cli"
|
||||
+
|
||||
async function hello(): Promise<void> {
|
||||
console.log('hello, world!')
|
||||
|
||||
async function main() {
|
||||
- await hello()
|
||||
+ await cli()
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>skip this step</summary>
|
||||
|
||||
cp ./walkthrough/01-index.ts src/index.ts
|
||||
|
||||
</details>
|
||||
|
||||
Add the agent implementation
|
||||
|
||||
cp ./walkthrough/01-agent.ts src/agent.ts
|
||||
|
||||
<details>
|
||||
<summary>show file</summary>
|
||||
|
||||
```ts
|
||||
// ./walkthrough/01-agent.ts
|
||||
import { b } from "../baml_client";
|
||||
|
||||
// tool call or a respond to human tool
|
||||
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
|
||||
|
||||
export interface Event {
|
||||
type: string
|
||||
data: any;
|
||||
}
|
||||
|
||||
export class Thread {
|
||||
events: Event[] = [];
|
||||
|
||||
constructor(events: Event[]) {
|
||||
this.events = events;
|
||||
}
|
||||
|
||||
serializeForLLM() {
|
||||
// can change this to whatever custom serialization you want to do, XML, etc
|
||||
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
|
||||
return JSON.stringify(this.events);
|
||||
}
|
||||
}
|
||||
|
||||
// right now this just runs one turn with the LLM, but
|
||||
// we'll update this function to handle all the agent logic
|
||||
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
|
||||
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
|
||||
return nextStep;
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
The the BAML code is configured to use BASETEN_API_KEY by default
|
||||
|
||||
To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co),
|
||||
and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/).
|
||||
|
||||
```rust
|
||||
function DetermineNextStep(thread: string) -> DoneForNow {
|
||||
client Qwen3
|
||||
// ...
|
||||
```
|
||||
|
||||
If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key.
|
||||
|
||||
If you want to try swapping out the model, you can change the `client` line.
|
||||
|
||||
[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)
|
||||
|
||||
For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini)
|
||||
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.
|
||||
|
||||
For example, to use openai with an OPENAI_API_KEY, you can do:
|
||||
|
||||
client "openai/gpt-4o"
|
||||
|
||||
|
||||
Set your env vars
|
||||
|
||||
export BASETEN_API_KEY=...
|
||||
export BASETEN_BASE_URL=...
|
||||
|
||||
Try it out
|
||||
|
||||
npx tsx src/index.ts hello
|
||||
|
||||
you should see a familiar response from the model
|
||||
|
||||
{
|
||||
intent: 'done_for_now',
|
||||
message: 'Hello! How can I assist you today?'
|
||||
}
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
async function hello(): Promise<void> {
|
||||
console.log('hello, world!')
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await hello()
|
||||
}
|
||||
|
||||
main().catch(console.error)
|
||||
@@ -1,49 +0,0 @@
|
||||
class DoneForNow {
|
||||
intent "done_for_now"
|
||||
message string
|
||||
}
|
||||
|
||||
client<llm> Qwen3 {
|
||||
provider "openai-generic"
|
||||
options {
|
||||
base_url env.BASETEN_BASE_URL
|
||||
api_key env.BASETEN_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
function DetermineNextStep(
|
||||
thread: string
|
||||
) -> DoneForNow {
|
||||
client Qwen3
|
||||
|
||||
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
|
||||
prompt #"
|
||||
{{ _.role("system") }}
|
||||
|
||||
/nothink
|
||||
|
||||
You are a helpful assistant that can help with tasks.
|
||||
|
||||
{{ _.role("user") }}
|
||||
|
||||
You are working on the following thread:
|
||||
|
||||
{{ thread }}
|
||||
|
||||
What should the next step be?
|
||||
|
||||
{{ ctx.output_format }}
|
||||
"#
|
||||
}
|
||||
|
||||
test HelloWorld {
|
||||
functions [DetermineNextStep]
|
||||
args {
|
||||
thread #"
|
||||
{
|
||||
"type": "user_input",
|
||||
"data": "hello!"
|
||||
}
|
||||
"#
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
import { b } from "../baml_client";
|
||||
|
||||
// tool call or a respond to human tool
|
||||
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
|
||||
|
||||
export interface Event {
|
||||
type: string
|
||||
data: any;
|
||||
}
|
||||
|
||||
export class Thread {
|
||||
events: Event[] = [];
|
||||
|
||||
constructor(events: Event[]) {
|
||||
this.events = events;
|
||||
}
|
||||
|
||||
serializeForLLM() {
|
||||
// can change this to whatever custom serialization you want to do, XML, etc
|
||||
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
|
||||
return JSON.stringify(this.events);
|
||||
}
|
||||
}
|
||||
|
||||
// right now this just runs one turn with the LLM, but
|
||||
// we'll update this function to handle all the agent logic
|
||||
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
|
||||
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
|
||||
return nextStep;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
// cli.ts lets you invoke the agent loop from the command line
|
||||
|
||||
import { agentLoop, Thread, Event } from "./agent";
|
||||
|
||||
export async function cli() {
|
||||
// Get command line arguments, skipping the first two (node and script name)
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0) {
|
||||
console.error("Error: Please provide a message as a command line argument");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Join all arguments into a single message
|
||||
const message = args.join(" ");
|
||||
|
||||
// Create a new thread with the user's message as the initial event
|
||||
const thread = new Thread([{ type: "user_input", data: message }]);
|
||||
|
||||
// Run the agent loop with the thread
|
||||
const result = await agentLoop(thread);
|
||||
console.log(result);
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
import { cli } from "./cli"
|
||||
|
||||
async function hello(): Promise<void> {
|
||||
console.log('hello, world!')
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await cli()
|
||||
}
|
||||
|
||||
main().catch(console.error)
|
||||
@@ -1,2 +0,0 @@
|
||||
baml_client/
|
||||
node_modules/
|
||||
@@ -1,84 +0,0 @@
|
||||
# Chapter 2 - Add Calculator Tools
|
||||
|
||||
Let's add some calculator tools to our agent.
|
||||
|
||||
Let's start by adding a tool definition for the calculator
|
||||
|
||||
These are simpile structured outputs that we'll ask the model to
|
||||
return as a "next step" in the agentic loop.
|
||||
|
||||
|
||||
cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
|
||||
|
||||
<details>
|
||||
<summary>show file</summary>
|
||||
|
||||
```rust
|
||||
// ./walkthrough/02-tool_calculator.baml
|
||||
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
|
||||
|
||||
|
||||
class AddTool {
|
||||
intent "add"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
class SubtractTool {
|
||||
intent "subtract"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
class MultiplyTool {
|
||||
intent "multiply"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
class DivideTool {
|
||||
intent "divide"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
Now, let's update the agent's DetermineNextStep method to
|
||||
expose the calculator tools as potential next steps
|
||||
|
||||
|
||||
```diff
|
||||
baml_src/agent.baml
|
||||
function DetermineNextStep(
|
||||
thread: string
|
||||
-) -> DoneForNow {
|
||||
+) -> CalculatorTools | DoneForNow {
|
||||
client Qwen3
|
||||
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>skip this step</summary>
|
||||
|
||||
cp ./walkthrough/02-agent.baml baml_src/agent.baml
|
||||
|
||||
</details>
|
||||
|
||||
Generate updated BAML client
|
||||
|
||||
npx baml-cli generate
|
||||
|
||||
Try out the calculator
|
||||
|
||||
npx tsx src/index.ts 'can you add 3 and 4'
|
||||
|
||||
You should see a tool call to the calculator
|
||||
|
||||
{
|
||||
intent: 'add',
|
||||
a: 3,
|
||||
b: 4
|
||||
}
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
class DoneForNow {
|
||||
intent "done_for_now"
|
||||
message string
|
||||
}
|
||||
|
||||
client<llm> Qwen3 {
|
||||
provider "openai-generic"
|
||||
options {
|
||||
base_url env.BASETEN_BASE_URL
|
||||
api_key env.BASETEN_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
function DetermineNextStep(
|
||||
thread: string
|
||||
) -> DoneForNow {
|
||||
client Qwen3
|
||||
|
||||
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
|
||||
prompt #"
|
||||
{{ _.role("system") }}
|
||||
|
||||
/nothink
|
||||
|
||||
You are a helpful assistant that can help with tasks.
|
||||
|
||||
{{ _.role("user") }}
|
||||
|
||||
You are working on the following thread:
|
||||
|
||||
{{ thread }}
|
||||
|
||||
What should the next step be?
|
||||
|
||||
{{ ctx.output_format }}
|
||||
"#
|
||||
}
|
||||
|
||||
test HelloWorld {
|
||||
functions [DetermineNextStep]
|
||||
args {
|
||||
thread #"
|
||||
{
|
||||
"type": "user_input",
|
||||
"data": "hello!"
|
||||
}
|
||||
"#
|
||||
}
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
|
||||
|
||||
client<llm> CustomGPT4o {
|
||||
provider openai
|
||||
options {
|
||||
model "gpt-4o"
|
||||
api_key env.OPENAI_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
client<llm> CustomGPT4oMini {
|
||||
provider openai
|
||||
retry_policy Exponential
|
||||
options {
|
||||
model "gpt-4o-mini"
|
||||
api_key env.OPENAI_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
client<llm> CustomSonnet {
|
||||
provider anthropic
|
||||
options {
|
||||
model "claude-3-5-sonnet-20241022"
|
||||
api_key env.ANTHROPIC_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
client<llm> CustomHaiku {
|
||||
provider anthropic
|
||||
retry_policy Constant
|
||||
options {
|
||||
model "claude-3-haiku-20240307"
|
||||
api_key env.ANTHROPIC_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
|
||||
client<llm> CustomFast {
|
||||
provider round-robin
|
||||
options {
|
||||
// This will alternate between the two clients
|
||||
strategy [CustomGPT4oMini, CustomHaiku]
|
||||
}
|
||||
}
|
||||
|
||||
// https://docs.boundaryml.com/docs/snippets/clients/fallback
|
||||
client<llm> OpenaiFallback {
|
||||
provider fallback
|
||||
options {
|
||||
// This will try the clients in order until one succeeds
|
||||
strategy [CustomGPT4oMini, CustomGPT4oMini]
|
||||
}
|
||||
}
|
||||
|
||||
// https://docs.boundaryml.com/docs/snippets/clients/retry
|
||||
retry_policy Constant {
|
||||
max_retries 3
|
||||
// Strategy is optional
|
||||
strategy {
|
||||
type constant_delay
|
||||
delay_ms 200
|
||||
}
|
||||
}
|
||||
|
||||
retry_policy Exponential {
|
||||
max_retries 2
|
||||
// Strategy is optional
|
||||
strategy {
|
||||
type exponential_backoff
|
||||
delay_ms 300
|
||||
multiplier 1.5
|
||||
max_delay_ms 10000
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
// This helps use auto generate libraries you can use in the language of
|
||||
// your choice. You can have multiple generators if you use multiple languages.
|
||||
// Just ensure that the output_dir is different for each generator.
|
||||
generator target {
|
||||
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
|
||||
output_type "typescript"
|
||||
|
||||
// Where the generated code will be saved (relative to baml_src/)
|
||||
output_dir "../"
|
||||
|
||||
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
|
||||
// The BAML VSCode extension version should also match this version.
|
||||
version "0.88.0"
|
||||
|
||||
// Valid values: "sync", "async"
|
||||
// This controls what `b.FunctionName()` will be (sync or async).
|
||||
default_client_mode async
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
import { b } from "../baml_client";
|
||||
|
||||
// tool call or a respond to human tool
|
||||
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
|
||||
|
||||
export interface Event {
|
||||
type: string
|
||||
data: any;
|
||||
}
|
||||
|
||||
export class Thread {
|
||||
events: Event[] = [];
|
||||
|
||||
constructor(events: Event[]) {
|
||||
this.events = events;
|
||||
}
|
||||
|
||||
serializeForLLM() {
|
||||
// can change this to whatever custom serialization you want to do, XML, etc
|
||||
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
|
||||
return JSON.stringify(this.events);
|
||||
}
|
||||
}
|
||||
|
||||
// right now this just runs one turn with the LLM, but
|
||||
// we'll update this function to handle all the agent logic
|
||||
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
|
||||
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
|
||||
return nextStep;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
// cli.ts lets you invoke the agent loop from the command line
|
||||
|
||||
import { agentLoop, Thread, Event } from "./agent";
|
||||
|
||||
export async function cli() {
|
||||
// Get command line arguments, skipping the first two (node and script name)
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0) {
|
||||
console.error("Error: Please provide a message as a command line argument");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Join all arguments into a single message
|
||||
const message = args.join(" ");
|
||||
|
||||
// Create a new thread with the user's message as the initial event
|
||||
const thread = new Thread([{ type: "user_input", data: message }]);
|
||||
|
||||
// Run the agent loop with the thread
|
||||
const result = await agentLoop(thread);
|
||||
console.log(result);
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
import { cli } from "./cli"
|
||||
|
||||
async function hello(): Promise<void> {
|
||||
console.log('hello, world!')
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await cli()
|
||||
}
|
||||
|
||||
main().catch(console.error)
|
||||
@@ -1,49 +0,0 @@
|
||||
class DoneForNow {
|
||||
intent "done_for_now"
|
||||
message string
|
||||
}
|
||||
|
||||
client<llm> Qwen3 {
|
||||
provider "openai-generic"
|
||||
options {
|
||||
base_url env.BASETEN_BASE_URL
|
||||
api_key env.BASETEN_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
function DetermineNextStep(
|
||||
thread: string
|
||||
) -> CalculatorTools | DoneForNow {
|
||||
client Qwen3
|
||||
|
||||
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
|
||||
prompt #"
|
||||
{{ _.role("system") }}
|
||||
|
||||
/nothink
|
||||
|
||||
You are a helpful assistant that can help with tasks.
|
||||
|
||||
{{ _.role("user") }}
|
||||
|
||||
You are working on the following thread:
|
||||
|
||||
{{ thread }}
|
||||
|
||||
What should the next step be?
|
||||
|
||||
{{ ctx.output_format }}
|
||||
"#
|
||||
}
|
||||
|
||||
test HelloWorld {
|
||||
functions [DetermineNextStep]
|
||||
args {
|
||||
thread #"
|
||||
{
|
||||
"type": "user_input",
|
||||
"data": "hello!"
|
||||
}
|
||||
"#
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
|
||||
|
||||
|
||||
class AddTool {
|
||||
intent "add"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
class SubtractTool {
|
||||
intent "subtract"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
class MultiplyTool {
|
||||
intent "multiply"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
class DivideTool {
|
||||
intent "divide"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
baml_client/
|
||||
node_modules/
|
||||
@@ -1,194 +0,0 @@
|
||||
# Chapter 3 - Process Tool Calls in a Loop
|
||||
|
||||
Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.
|
||||
|
||||
First, lets update the agent to handle the tool call
|
||||
|
||||
|
||||
```diff
|
||||
src/agent.ts
|
||||
}
|
||||
|
||||
-// right now this just runs one turn with the LLM, but
|
||||
-// we'll update this function to handle all the agent logic
|
||||
-export async function agentLoop(thread: Thread): Promise<AgentResponse> {
|
||||
- const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
|
||||
- return nextStep;
|
||||
+
|
||||
+
|
||||
+export async function agentLoop(thread: Thread): Promise<string> {
|
||||
+
|
||||
+ while (true) {
|
||||
+ const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
|
||||
+ console.log("nextStep", nextStep);
|
||||
+
|
||||
+ switch (nextStep.intent) {
|
||||
+ case "done_for_now":
|
||||
+ // response to human, return the next step object
|
||||
+ return nextStep.message;
|
||||
+ case "add":
|
||||
+ thread.events.push({
|
||||
+ "type": "tool_call",
|
||||
+ "data": nextStep
|
||||
+ });
|
||||
+ const result = nextStep.a + nextStep.b;
|
||||
+ console.log("tool_response", result);
|
||||
+ thread.events.push({
|
||||
+ "type": "tool_response",
|
||||
+ "data": result
|
||||
+ });
|
||||
+ continue;
|
||||
+ default:
|
||||
+ throw new Error(`Unknown intent: ${nextStep.intent}`);
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>skip this step</summary>
|
||||
|
||||
cp ./walkthrough/03-agent.ts src/agent.ts
|
||||
|
||||
</details>
|
||||
|
||||
Now, lets try it out
|
||||
|
||||
|
||||
npx tsx src/index.ts 'can you add 3 and 4'
|
||||
|
||||
you should see the agent call the tool and then return the result
|
||||
|
||||
{
|
||||
intent: 'done_for_now',
|
||||
message: 'The sum of 3 and 4 is 7.'
|
||||
}
|
||||
|
||||
For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output
|
||||
|
||||
export BAML_LOG=off
|
||||
|
||||
Try a multi-step calculation
|
||||
|
||||
npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'
|
||||
|
||||
you'll notice that tools like multiply and divide are not available
|
||||
|
||||
npx tsx src/index.ts 'can you multiply 3 and 4'
|
||||
|
||||
next, let's add handlers for the rest of the calculator tools
|
||||
|
||||
|
||||
```diff
|
||||
src/agent.ts
|
||||
-import { b } from "../baml_client";
|
||||
+import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
|
||||
|
||||
-// tool call or a respond to human tool
|
||||
-type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
|
||||
-
|
||||
export interface Event {
|
||||
type: string
|
||||
}
|
||||
|
||||
+export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
|
||||
|
||||
+export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
|
||||
+ let result: number;
|
||||
+ switch (nextStep.intent) {
|
||||
+ case "add":
|
||||
+ result = nextStep.a + nextStep.b;
|
||||
+ console.log("tool_response", result);
|
||||
+ thread.events.push({
|
||||
+ "type": "tool_response",
|
||||
+ "data": result
|
||||
+ });
|
||||
+ return thread;
|
||||
+ case "subtract":
|
||||
+ result = nextStep.a - nextStep.b;
|
||||
+ console.log("tool_response", result);
|
||||
+ thread.events.push({
|
||||
+ "type": "tool_response",
|
||||
+ "data": result
|
||||
+ });
|
||||
+ return thread;
|
||||
+ case "multiply":
|
||||
+ result = nextStep.a * nextStep.b;
|
||||
+ console.log("tool_response", result);
|
||||
+ thread.events.push({
|
||||
+ "type": "tool_response",
|
||||
+ "data": result
|
||||
+ });
|
||||
+ return thread;
|
||||
+ case "divide":
|
||||
+ result = nextStep.a / nextStep.b;
|
||||
+ console.log("tool_response", result);
|
||||
+ thread.events.push({
|
||||
+ "type": "tool_response",
|
||||
+ "data": result
|
||||
+ });
|
||||
+ return thread;
|
||||
+ }
|
||||
+}
|
||||
|
||||
export async function agentLoop(thread: Thread): Promise<string> {
|
||||
console.log("nextStep", nextStep);
|
||||
|
||||
+ thread.events.push({
|
||||
+ "type": "tool_call",
|
||||
+ "data": nextStep
|
||||
+ });
|
||||
+
|
||||
switch (nextStep.intent) {
|
||||
case "done_for_now":
|
||||
return nextStep.message;
|
||||
case "add":
|
||||
- thread.events.push({
|
||||
- "type": "tool_call",
|
||||
- "data": nextStep
|
||||
- });
|
||||
- const result = nextStep.a + nextStep.b;
|
||||
- console.log("tool_response", result);
|
||||
- thread.events.push({
|
||||
- "type": "tool_response",
|
||||
- "data": result
|
||||
- });
|
||||
- continue;
|
||||
- default:
|
||||
- throw new Error(`Unknown intent: ${nextStep.intent}`);
|
||||
+ case "subtract":
|
||||
+ case "multiply":
|
||||
+ case "divide":
|
||||
+ thread = await handleNextStep(nextStep, thread);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>skip this step</summary>
|
||||
|
||||
cp ./walkthrough/03b-agent.ts src/agent.ts
|
||||
|
||||
</details>
|
||||
|
||||
Test subtraction
|
||||
|
||||
npx tsx src/index.ts 'can you subtract 3 from 4'
|
||||
|
||||
now, let's test the multiplication tool
|
||||
|
||||
|
||||
npx tsx src/index.ts 'can you multiply 3 and 4'
|
||||
|
||||
finally, let's test a more complex calculation with multiple operations
|
||||
|
||||
|
||||
npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
|
||||
|
||||
congratulations, you've taking your first step into hand-rolling an agent loop.
|
||||
|
||||
from here, we're going to start incorporating some more intermediate and advanced
|
||||
concepts for 12-factor agents.
|
||||
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
class DoneForNow {
|
||||
intent "done_for_now"
|
||||
message string
|
||||
}
|
||||
|
||||
client<llm> Qwen3 {
|
||||
provider "openai-generic"
|
||||
options {
|
||||
base_url env.BASETEN_BASE_URL
|
||||
api_key env.BASETEN_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
function DetermineNextStep(
|
||||
thread: string
|
||||
) -> CalculatorTools | DoneForNow {
|
||||
client Qwen3
|
||||
|
||||
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
|
||||
prompt #"
|
||||
{{ _.role("system") }}
|
||||
|
||||
/nothink
|
||||
|
||||
You are a helpful assistant that can help with tasks.
|
||||
|
||||
{{ _.role("user") }}
|
||||
|
||||
You are working on the following thread:
|
||||
|
||||
{{ thread }}
|
||||
|
||||
What should the next step be?
|
||||
|
||||
{{ ctx.output_format }}
|
||||
"#
|
||||
}
|
||||
|
||||
test HelloWorld {
|
||||
functions [DetermineNextStep]
|
||||
args {
|
||||
thread #"
|
||||
{
|
||||
"type": "user_input",
|
||||
"data": "hello!"
|
||||
}
|
||||
"#
|
||||
}
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
|
||||
|
||||
client<llm> CustomGPT4o {
|
||||
provider openai
|
||||
options {
|
||||
model "gpt-4o"
|
||||
api_key env.OPENAI_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
client<llm> CustomGPT4oMini {
|
||||
provider openai
|
||||
retry_policy Exponential
|
||||
options {
|
||||
model "gpt-4o-mini"
|
||||
api_key env.OPENAI_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
client<llm> CustomSonnet {
|
||||
provider anthropic
|
||||
options {
|
||||
model "claude-3-5-sonnet-20241022"
|
||||
api_key env.ANTHROPIC_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
client<llm> CustomHaiku {
|
||||
provider anthropic
|
||||
retry_policy Constant
|
||||
options {
|
||||
model "claude-3-haiku-20240307"
|
||||
api_key env.ANTHROPIC_API_KEY
|
||||
}
|
||||
}
|
||||
|
||||
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
|
||||
client<llm> CustomFast {
|
||||
provider round-robin
|
||||
options {
|
||||
// This will alternate between the two clients
|
||||
strategy [CustomGPT4oMini, CustomHaiku]
|
||||
}
|
||||
}
|
||||
|
||||
// https://docs.boundaryml.com/docs/snippets/clients/fallback
|
||||
client<llm> OpenaiFallback {
|
||||
provider fallback
|
||||
options {
|
||||
// This will try the clients in order until one succeeds
|
||||
strategy [CustomGPT4oMini, CustomGPT4oMini]
|
||||
}
|
||||
}
|
||||
|
||||
// https://docs.boundaryml.com/docs/snippets/clients/retry
|
||||
retry_policy Constant {
|
||||
max_retries 3
|
||||
// Strategy is optional
|
||||
strategy {
|
||||
type constant_delay
|
||||
delay_ms 200
|
||||
}
|
||||
}
|
||||
|
||||
retry_policy Exponential {
|
||||
max_retries 2
|
||||
// Strategy is optional
|
||||
strategy {
|
||||
type exponential_backoff
|
||||
delay_ms 300
|
||||
multiplier 1.5
|
||||
max_delay_ms 10000
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
// This helps use auto generate libraries you can use in the language of
|
||||
// your choice. You can have multiple generators if you use multiple languages.
|
||||
// Just ensure that the output_dir is different for each generator.
|
||||
generator target {
|
||||
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
|
||||
output_type "typescript"
|
||||
|
||||
// Where the generated code will be saved (relative to baml_src/)
|
||||
output_dir "../"
|
||||
|
||||
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
|
||||
// The BAML VSCode extension version should also match this version.
|
||||
version "0.88.0"
|
||||
|
||||
// Valid values: "sync", "async"
|
||||
// This controls what `b.FunctionName()` will be (sync or async).
|
||||
default_client_mode async
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
|
||||
|
||||
|
||||
class AddTool {
|
||||
intent "add"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
class SubtractTool {
|
||||
intent "subtract"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
class MultiplyTool {
|
||||
intent "multiply"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
class DivideTool {
|
||||
intent "divide"
|
||||
a int | float
|
||||
b int | float
|
||||
}
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
import { b } from "../baml_client";
|
||||
|
||||
// tool call or a respond to human tool
|
||||
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
|
||||
|
||||
export interface Event {
|
||||
type: string
|
||||
data: any;
|
||||
}
|
||||
|
||||
export class Thread {
|
||||
events: Event[] = [];
|
||||
|
||||
constructor(events: Event[]) {
|
||||
this.events = events;
|
||||
}
|
||||
|
||||
serializeForLLM() {
|
||||
// can change this to whatever custom serialization you want to do, XML, etc
|
||||
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
|
||||
return JSON.stringify(this.events);
|
||||
}
|
||||
}
|
||||
|
||||
// right now this just runs one turn with the LLM, but
|
||||
// we'll update this function to handle all the agent logic
|
||||
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
|
||||
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
|
||||
return nextStep;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
// cli.ts lets you invoke the agent loop from the command line
|
||||
|
||||
import { agentLoop, Thread, Event } from "./agent";
|
||||
|
||||
export async function cli() {
|
||||
// Get command line arguments, skipping the first two (node and script name)
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0) {
|
||||
console.error("Error: Please provide a message as a command line argument");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Join all arguments into a single message
|
||||
const message = args.join(" ");
|
||||
|
||||
// Create a new thread with the user's message as the initial event
|
||||
const thread = new Thread([{ type: "user_input", data: message }]);
|
||||
|
||||
// Run the agent loop with the thread
|
||||
const result = await agentLoop(thread);
|
||||
console.log(result);
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
import { cli } from "./cli"
|
||||
|
||||
async function hello(): Promise<void> {
|
||||
console.log('hello, world!')
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await cli()
|
||||
}
|
||||
|
||||
main().catch(console.error)
|
||||
@@ -1,55 +0,0 @@
|
||||
import { b } from "../baml_client";
|
||||
|
||||
// tool call or a respond to human tool
|
||||
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
|
||||
|
||||
export interface Event {
|
||||
type: string
|
||||
data: any;
|
||||
}
|
||||
|
||||
export class Thread {
|
||||
events: Event[] = [];
|
||||
|
||||
constructor(events: Event[]) {
|
||||
this.events = events;
|
||||
}
|
||||
|
||||
serializeForLLM() {
|
||||
// can change this to whatever custom serialization you want to do, XML, etc
|
||||
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
|
||||
return JSON.stringify(this.events);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
export async function agentLoop(thread: Thread): Promise<string> {
|
||||
|
||||
while (true) {
|
||||
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
|
||||
console.log("nextStep", nextStep);
|
||||
|
||||
switch (nextStep.intent) {
|
||||
case "done_for_now":
|
||||
// response to human, return the next step object
|
||||
return nextStep.message;
|
||||
case "add":
|
||||
thread.events.push({
|
||||
"type": "tool_call",
|
||||
"data": nextStep
|
||||
});
|
||||
const result = nextStep.a + nextStep.b;
|
||||
console.log("tool_response", result);
|
||||
thread.events.push({
|
||||
"type": "tool_response",
|
||||
"data": result
|
||||
});
|
||||
continue;
|
||||
default:
|
||||
throw new Error(`Unknown intent: ${nextStep.intent}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,86 +0,0 @@
|
||||
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
|
||||
|
||||
export interface Event {
|
||||
type: string
|
||||
data: any;
|
||||
}
|
||||
|
||||
export class Thread {
|
||||
events: Event[] = [];
|
||||
|
||||
constructor(events: Event[]) {
|
||||
this.events = events;
|
||||
}
|
||||
|
||||
serializeForLLM() {
|
||||
// can change this to whatever custom serialization you want to do, XML, etc
|
||||
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
|
||||
return JSON.stringify(this.events);
|
||||
}
|
||||
}
|
||||
|
||||
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
|
||||
|
||||
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
|
||||
let result: number;
|
||||
switch (nextStep.intent) {
|
||||
case "add":
|
||||
result = nextStep.a + nextStep.b;
|
||||
console.log("tool_response", result);
|
||||
thread.events.push({
|
||||
"type": "tool_response",
|
||||
"data": result
|
||||
});
|
||||
return thread;
|
||||
case "subtract":
|
||||
result = nextStep.a - nextStep.b;
|
||||
console.log("tool_response", result);
|
||||
thread.events.push({
|
||||
"type": "tool_response",
|
||||
"data": result
|
||||
});
|
||||
return thread;
|
||||
case "multiply":
|
||||
result = nextStep.a * nextStep.b;
|
||||
console.log("tool_response", result);
|
||||
thread.events.push({
|
||||
"type": "tool_response",
|
||||
"data": result
|
||||
});
|
||||
return thread;
|
||||
case "divide":
|
||||
result = nextStep.a / nextStep.b;
|
||||
console.log("tool_response", result);
|
||||
thread.events.push({
|
||||
"type": "tool_response",
|
||||
"data": result
|
||||
});
|
||||
return thread;
|
||||
}
|
||||
}
|
||||
|
||||
export async function agentLoop(thread: Thread): Promise<string> {
|
||||
|
||||
while (true) {
|
||||
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
|
||||
console.log("nextStep", nextStep);
|
||||
|
||||
thread.events.push({
|
||||
"type": "tool_call",
|
||||
"data": nextStep
|
||||
});
|
||||
|
||||
switch (nextStep.intent) {
|
||||
case "done_for_now":
|
||||
// response to human, return the next step object
|
||||
return nextStep.message;
|
||||
case "add":
|
||||
case "subtract":
|
||||
case "multiply":
|
||||
case "divide":
|
||||
thread = await handleNextStep(nextStep, thread);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user