workshop stuff

This commit is contained in:
dexhorthy
2025-07-17 09:46:03 -07:00
parent 2900ce9b50
commit c700b222a1
14 changed files with 1051 additions and 393 deletions

View File

@@ -10,7 +10,7 @@ generator target {
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version. // The BAML VSCode extension version should also match this version.
version "0.85.0" version "0.202.0"
// Valid values: "sync", "async" // Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async). // This controls what `b.FunctionName()` will be (sync or async).

View File

@@ -8,7 +8,7 @@
"name": "my-agent", "name": "my-agent",
"version": "0.1.0", "version": "0.1.0",
"dependencies": { "dependencies": {
"baml": "^0.0.0", "@boundaryml/baml": "latest",
"tsx": "^4.15.0", "tsx": "^4.15.0",
"typescript": "^5.0.0" "typescript": "^5.0.0"
}, },
@@ -19,6 +19,142 @@
"eslint": "^8.0.0" "eslint": "^8.0.0"
} }
}, },
"node_modules/@boundaryml/baml": {
"version": "0.202.0",
"resolved": "https://registry.npmjs.org/@boundaryml/baml/-/baml-0.202.0.tgz",
"integrity": "sha512-0RNgCBp2egdWJfsNqNaWe/qUg6ea9OLzkcUTE8+wHmlpB2SgK5QRYTaOnt9WX4KHnUvIiMJijIOjy35RGYk45g==",
"license": "MIT",
"dependencies": {
"@scarf/scarf": "^1.3.0"
},
"bin": {
"baml-cli": "cli.js"
},
"engines": {
"node": ">= 10"
},
"optionalDependencies": {
"@boundaryml/baml-darwin-arm64": "0.202.0",
"@boundaryml/baml-darwin-x64": "0.202.0",
"@boundaryml/baml-linux-arm64-gnu": "0.202.0",
"@boundaryml/baml-linux-arm64-musl": "0.202.0",
"@boundaryml/baml-linux-x64-gnu": "0.202.0",
"@boundaryml/baml-linux-x64-musl": "0.202.0",
"@boundaryml/baml-win32-x64-msvc": "0.202.0"
}
},
"node_modules/@boundaryml/baml-darwin-arm64": {
"version": "0.202.0",
"resolved": "https://registry.npmjs.org/@boundaryml/baml-darwin-arm64/-/baml-darwin-arm64-0.202.0.tgz",
"integrity": "sha512-i0Y9tCkaWcERJL4yL1/lWSvAYzKiGMsuO1MMDFO3R3cBvbGpRlGY13hKsDtpQy7YePoGzy68MMAqQFm1Y6ucLw==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@boundaryml/baml-darwin-x64": {
"version": "0.202.0",
"resolved": "https://registry.npmjs.org/@boundaryml/baml-darwin-x64/-/baml-darwin-x64-0.202.0.tgz",
"integrity": "sha512-e9q/igONW33ltNUAxW6Jimv/1bucN1LgD0TqaF6gSjhyelZr4bZ68f3n5rwK0UF+4VBkNkvC+UXoWgYky5dBOg==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@boundaryml/baml-linux-arm64-gnu": {
"version": "0.202.0",
"resolved": "https://registry.npmjs.org/@boundaryml/baml-linux-arm64-gnu/-/baml-linux-arm64-gnu-0.202.0.tgz",
"integrity": "sha512-3DWTK9gMUHv+BlsZ1BAprMXQsRzPFKhlzmG71y+G3s0ZJIFzrQ9rmdv93lejyslPPTw0M2TD2CjBDrNsnmSX3A==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@boundaryml/baml-linux-arm64-musl": {
"version": "0.202.0",
"resolved": "https://registry.npmjs.org/@boundaryml/baml-linux-arm64-musl/-/baml-linux-arm64-musl-0.202.0.tgz",
"integrity": "sha512-fTFK+w7ku61dKzIeIaNsMLpiT793MKmj1La6oznhwpuoOdLm861GXzJUut4Bri8n4UFULfnPiCCp4nU5nwpwcQ==",
"cpu": [
"arm64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@boundaryml/baml-linux-x64-gnu": {
"version": "0.202.0",
"resolved": "https://registry.npmjs.org/@boundaryml/baml-linux-x64-gnu/-/baml-linux-x64-gnu-0.202.0.tgz",
"integrity": "sha512-gKainskhyex0c8AmzrfYSbyRXwK4OCSjpO6oKni8+EFcaH/OZD6rDqmS1ggcNoTKw2MqC/H1hfyMCw3BdEDxVA==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@boundaryml/baml-linux-x64-musl": {
"version": "0.202.0",
"resolved": "https://registry.npmjs.org/@boundaryml/baml-linux-x64-musl/-/baml-linux-x64-musl-0.202.0.tgz",
"integrity": "sha512-KHrG8iut5vc58L41eKtNF8W1OgDzYMmXRtcuevHuy22cRb4TbhYP2bTOo+r9iZOc/zBN1Yl1Cv3U+u+pX3ypPw==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@boundaryml/baml-win32-x64-msvc": {
"version": "0.202.0",
"resolved": "https://registry.npmjs.org/@boundaryml/baml-win32-x64-msvc/-/baml-win32-x64-msvc-0.202.0.tgz",
"integrity": "sha512-DcZiQ/eRKf11FgKFnVN8H1Tsnc6M9UgC6tLKIwr0YUYe2buKPXNkS2tPk0n4gHSnPX/bdWqyeUchk+4E6yqiDQ==",
"cpu": [
"x64"
],
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 10"
}
},
"node_modules/@esbuild/aix-ppc64": { "node_modules/@esbuild/aix-ppc64": {
"version": "0.25.4", "version": "0.25.4",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.4.tgz", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.4.tgz",
@@ -606,6 +742,13 @@
"node": ">= 8" "node": ">= 8"
} }
}, },
"node_modules/@scarf/scarf": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/@scarf/scarf/-/scarf-1.4.0.tgz",
"integrity": "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==",
"hasInstallScript": true,
"license": "Apache-2.0"
},
"node_modules/@types/json-schema": { "node_modules/@types/json-schema": {
"version": "7.0.15", "version": "7.0.15",
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
@@ -925,11 +1068,6 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/baml": {
"version": "0.0.0",
"resolved": "https://registry.npmjs.org/baml/-/baml-0.0.0.tgz",
"integrity": "sha512-wlrNMVNrHKoB65HXhjTD8mFLWQZVaapWl35gHB+wrp4Sx1+zm5U32LJ2cgYV+1/UPBVC198E5PXJdwYNf2JFKg=="
},
"node_modules/brace-expansion": { "node_modules/brace-expansion": {
"version": "2.0.1", "version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",

View File

@@ -7,7 +7,7 @@
"build": "tsc" "build": "tsc"
}, },
"dependencies": { "dependencies": {
"baml": "^0.0.0", "@boundaryml/baml": "latest",
"tsx": "^4.15.0", "tsx": "^4.15.0",
"typescript": "^5.0.0" "typescript": "^5.0.0"
}, },

View File

@@ -1,9 +1,9 @@
# Workshop 2025-07-16: Python/Jupyter Notebook Implementation # Workshop 2025-07-16: Python/Jupyter Notebook Implementation
**Main Tool**: `hack/walkthroughgen_py.py` - Converts TypeScript walkthrough to Jupyter notebooks **Main Tool**: `walkthroughgen_py.py` - Converts TypeScript walkthrough to Jupyter notebooks
**Config**: `hack/walkthrough_python.yaml` - Defines notebook structure and content **Config**: `walkthrough.yaml` - Defines notebook structure and content
**Output**: `hack/workshop_final.ipynb` - Generated notebook with Chapters 0-7 **Output**: `workshop_final.ipynb` - Generated notebook with Chapters 0-7
**Testing**: `hack/test_notebook_colab_sim.sh` - Simulates Google Colab environment **Testing**: `test_notebook_colab_sim.sh` - Simulates Google Colab environment
## Key Implementation Learnings ## Key Implementation Learnings
@@ -53,15 +53,15 @@
## Testing Commands ## Testing Commands
• Generate notebook: `uv run python hack/walkthroughgen_py.py hack/walkthrough_python.yaml -o hack/test.ipynb` • Generate notebook: `uv run python walkthroughgen_py.py walkthrough.yaml -o test.ipynb`
• Full Colab sim: `cd hack && ./test_notebook_colab_sim.sh` • Full Colab sim: `./test_notebook_colab_sim.sh`
• Run BAML tests: `baml-cli test` (from directory with baml_src) • Run BAML tests: `baml-cli test` (from directory with baml_src)
## File Structure ## File Structure
`walkthrough/*.py` - Python implementations of each chapter's code `walkthrough/*.py` - Python implementations of each chapter's code
`walkthrough/*.baml` - BAML files fetched from GitHub during notebook execution `walkthrough/*.baml` - BAML files fetched from GitHub during notebook execution
`hack/walkthroughgen_py.py` - Main conversion tool `walkthroughgen_py.py` - Main conversion tool
`hack/walkthrough_python.yaml` - Notebook definition with all chapters `walkthrough.yaml` - Notebook definition with all chapters
`hack/test_notebook_colab_sim.sh` - Full Colab environment simulation `test_notebook_colab_sim.sh` - Full Colab environment simulation
`hack/workshop_final.ipynb` - Final generated notebook ready for workshop `workshop_final.ipynb` - Final generated notebook ready for workshop

View File

@@ -0,0 +1,71 @@
#!/usr/bin/env python3
"""
Analyze notebook for BAML log capture success/failure
"""
import json
import sys
import os
def check_logs(notebook_path):
"""Check if BAML logs were captured in the notebook"""
if not os.path.exists(notebook_path):
print(f"❌ Notebook not found: {notebook_path}")
return False, False
with open(notebook_path) as f:
nb = json.load(f)
found_log_pattern = False
found_capture_test = False
for i, cell in enumerate(nb['cells']):
if cell['cell_type'] == 'code' and 'outputs' in cell:
# Check if this is a log capture test cell
source = ''.join(cell.get('source', []))
if 'run_with_baml_logs' in source:
found_capture_test = True
print(f'Found log capture test in cell {i}')
# Check outputs for BAML logs
for output in cell['outputs']:
if output.get('output_type') == 'stream' and 'text' in output:
text = ''.join(output['text'])
# Look for the specific BAML log pattern
if '---Parsed Response (class DoneForNow)---' in text:
found_log_pattern = True
print(f'✅ FOUND BAML LOG PATTERN in cell {i} output!')
log_lines = [line for line in text.split('\n') if 'Parsed Response' in line]
if log_lines:
print(f'Log excerpt: {log_lines[0]}')
# Also check for our test markers
if 'Captured BAML Logs' in text:
print(f'Found "Captured BAML Logs" section in cell {i}')
if 'No BAML Logs Captured' in text:
print(f'Found "No BAML Logs Captured" section in cell {i}')
return found_capture_test, found_log_pattern
def main():
if len(sys.argv) != 2:
print("Usage: python analyze_log_capture.py <notebook_path>")
sys.exit(1)
notebook_path = sys.argv[1]
capture_test_found, log_pattern_found = check_logs(notebook_path)
if not capture_test_found:
print('❌ FAIL: No log capture test found in notebook')
sys.exit(1)
if log_pattern_found:
print('✅ PASS: BAML logs successfully captured in notebook output!')
sys.exit(0)
else:
print('❌ FAIL: BAML log pattern not found in captured output')
print('This means the log capture method is NOT working')
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,87 @@
#!/usr/bin/env python3
"""
Utility to inspect notebook cell outputs for debugging
"""
import json
import sys
import os
def inspect_notebook(notebook_path, filter_keyword=None):
"""Inspect notebook cells and outputs"""
if not os.path.exists(notebook_path):
print(f"❌ Notebook not found: {notebook_path}")
return
with open(notebook_path) as f:
nb = json.load(f)
print(f"📓 Inspecting notebook: {notebook_path}")
print(f"📊 Total cells: {len(nb['cells'])}")
print("=" * 60)
for i, cell in enumerate(nb['cells']):
if cell['cell_type'] == 'code':
source = ''.join(cell.get('source', []))
# Filter by keyword if provided
if filter_keyword and filter_keyword.lower() not in source.lower():
continue
print(f"\n🔍 CELL {i} ({'code'})")
print("📝 SOURCE:")
print(source[:300] + "..." if len(source) > 300 else source)
if 'outputs' in cell and cell['outputs']:
print(f"\n📤 OUTPUTS ({len(cell['outputs'])} outputs):")
for j, output in enumerate(cell['outputs']):
output_type = output.get('output_type', 'unknown')
print(f" Output {j}: type={output_type}")
if 'text' in output:
text = ''.join(output['text'])
print(f" Text length: {len(text)} chars")
# Show first few lines for context
lines = text.split('\n')[:5]
for line in lines:
if line.strip():
print(f" > {line[:80]}...")
# Check for interesting patterns
patterns = ['BAML', 'Parsed', 'Response', 'Error', 'Exception']
found_patterns = [p for p in patterns if p in text]
if found_patterns:
print(f" 🎯 Found patterns: {found_patterns}")
elif 'data' in output:
data_keys = list(output['data'].keys())
print(f" Data keys: {data_keys}")
# Check for execution errors
if output_type == 'error':
print(f" ❌ ERROR: {output.get('ename', 'Unknown')}")
print(f" 💬 Message: {output.get('evalue', 'No message')}")
if 'traceback' in output:
print(f" 📍 Traceback: {len(output['traceback'])} lines")
# Show last few lines of traceback
for line in output['traceback'][-3:]:
print(f" 🔍 {line.strip()}")
else:
print("\n📤 No outputs")
print("-" * 40)
def main():
if len(sys.argv) < 2:
print("Usage: python inspect_notebook.py <notebook_path> [filter_keyword]")
sys.exit(1)
notebook_path = sys.argv[1]
filter_keyword = sys.argv[2] if len(sys.argv) > 2 else None
inspect_notebook(notebook_path, filter_keyword)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,31 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"print(\"Hello stdout!\")\n",
"print(\"Hello stderr!\", file=sys.stderr)\n",
"with open(\"test_output.txt\", \"w\") as f:\n",
" f.write(\"Notebook executed successfully!\\n\")\n",
"print(\"✅ Test complete\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -0,0 +1,35 @@
#!/bin/bash
set -e
echo "🧪 Testing BAML Log Capture..."
# Clean up any previous test
rm -f test_capture.ipynb
rm -rf tmp/test_capture_*
# Generate test notebook
echo "📝 Generating test notebook..."
uv run python walkthroughgen_py.py simple_log_test.yaml -o test_capture.ipynb
# Run in sim
echo "🚀 Running test in sim..."
./test_notebook_colab_sim.sh test_capture.ipynb > /dev/null 2>&1
# Find the executed notebook in the timestamped directory
NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1)
NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb"
echo "📋 Analyzing results from $NOTEBOOK_PATH..."
# First dump debug info
echo "🔍 Dumping debug info..."
python3 inspect_notebook.py "$NOTEBOOK_PATH" "run_with_baml_logs"
echo ""
echo "📊 Running log capture analysis..."
# Check for BAML log patterns in the executed notebook
python3 analyze_log_capture.py "$NOTEBOOK_PATH"
echo "🧹 Cleaning up..."
rm -f test_capture.ipynb

View File

@@ -0,0 +1,426 @@
# Jupyter Notebook Testing Framework
This document describes the general testing framework for validating any functionality in Jupyter notebooks, with a specific example of testing BAML log capture.
## General Framework
### Overview
The testing framework provides a complete iteration loop for testing notebook implementations:
1. **Generate** test notebooks with specific functionality
2. **Execute** notebooks in a simulated Google Colab environment
3. **Analyze** executed notebooks for expected outputs and behaviors
4. **Report** clear pass/fail results
### Core Components
#### Notebook Simulator (`test_notebook_colab_sim.sh`)
The simulation script creates a realistic Google Colab environment for any notebook:
**Environment Setup:**
- Creates timestamped test directory: `./tmp/test_YYYYMMDD_HHMMSS/`
- Sets up fresh Python virtual environment
- Installs Jupyter dependencies (`notebook`, `nbconvert`, `ipykernel`)
**Notebook Execution:**
- Copies test notebook to clean environment
- Uses `ExecutePreprocessor` to run all cells (simulates Colab execution)
- **Critical:** Activates virtual environment before execution
- **Critical:** Saves executed notebook with cell outputs back to disk
**Usage:**
```bash
./test_notebook_colab_sim.sh your_notebook.ipynb
```
The simulator will:
- Execute all cells in the notebook
- Preserve the test directory for inspection
- Show final directory structure
- Report success/failure
#### Output Inspector (`inspect_notebook.py`)
Debug utility for examining notebook cell outputs in detail:
**Features:**
- Shows cell source code and execution counts
- Displays all output types (stream, execute_result, error)
- Highlights patterns in output text
- Shows execution errors with tracebacks
- Filters cells by keywords for focused debugging
**Usage:**
```bash
# Inspect all cells
python3 inspect_notebook.py path/to/notebook.ipynb
# Filter for specific content
python3 inspect_notebook.py path/to/notebook.ipynb "keyword"
# Look for errors
python3 inspect_notebook.py path/to/notebook.ipynb "error"
```
**Sample Output:**
```
🔍 CELL 0 (code)
📝 SOURCE:
import sys
print("Hello!")
print("Error!", file=sys.stderr)
📤 OUTPUTS (2 outputs):
Output 0: type=stream
Text length: 7 chars
> Hello!...
Output 1: type=stream
Text length: 7 chars
> Error!...
🎯 Found patterns: ['Error']
```
### Key Insights for Notebook Testing
#### Execution Environment
1. **Virtual environment activation is critical** - Without it, execution fails silently
2. **Output persistence must be explicit** - `ExecutePreprocessor` only modifies notebook in memory
3. **Check execution counts** - `execution_count=None` means cell never executed
4. **Handle different output types** - stream, execute_result, error, display_data
#### Common Debugging Steps
1. **Verify basic execution:**
```bash
python3 -c "
import json
nb = json.load(open('path/to/notebook.ipynb'))
print('Execution counts:', [cell.get('execution_count') for cell in nb['cells'] if cell['cell_type']=='code'])
"
```
2. **Check for execution errors:**
```bash
python3 inspect_notebook.py path/to/notebook.ipynb "error"
```
3. **Look for specific output patterns:**
```bash
python3 inspect_notebook.py path/to/notebook.ipynb "your_pattern"
```
### Creating Custom Tests
#### 1. Minimal Test Template
Create a simple notebook that tests basic functionality:
```json
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Test basic execution\n",
"print('Hello from notebook!')\n",
"\n",
"# Test file creation\n",
"with open('test.txt', 'w') as f:\n",
" f.write('Test successful\\n')\n",
"\n",
"# Test error handling\n",
"try:\n",
" result = your_function_to_test()\n",
" print(f'Result: {result}')\n",
"except Exception as e:\n",
" print(f'Error: {e}')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
```
#### 2. Test Script Template
```bash
#!/bin/bash
set -e
echo "🧪 Testing [Your Feature]..."
# Clean up any previous test
rm -f test_notebook.ipynb
# Generate or copy your test notebook
cp your_test_notebook.ipynb test_notebook.ipynb
# Run in simulator
echo "🚀 Running test in sim..."
./test_notebook_colab_sim.sh test_notebook.ipynb
# Find the executed notebook
NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1)
NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb"
# Analyze results
echo "📋 Analyzing results..."
python3 inspect_notebook.py "$NOTEBOOK_PATH" "your_search_term"
# Add your custom analysis
python3 -c "
import json
with open('$NOTEBOOK_PATH') as f:
nb = json.load(f)
# Your custom analysis logic here
success = check_for_expected_outputs(nb)
if success:
print('✅ PASS: Test succeeded!')
else:
print('❌ FAIL: Test failed!')
exit(1)
"
echo "🧹 Cleaning up..."
rm -f test_notebook.ipynb
```
---
## Use Case: BAML Log Capture Testing
This section demonstrates how to use the general framework for a specific use case: testing BAML log capture in notebooks.
### Problem Statement
BAML (a language model framework) uses FFI bindings to a Rust binary and outputs logs to stderr. We need to test whether different log capture methods can successfully capture these logs in Jupyter notebook cells.
### Test Implementation
#### Test Configuration (`simple_log_test.yaml`)
```yaml
title: "BAML Log Capture Test"
text: "Simple test for log capture"
sections:
- title: "Log Capture Test"
steps:
- baml_setup: true
- fetch_file:
src: "walkthrough/01-agent.baml"
dest: "baml_src/agent.baml"
- file:
src: "./simple_main.py"
- text: "Testing log capture with show_logs=true:"
- run_main:
args: "What is 2+2?"
show_logs: true
```
#### Test Function (`simple_main.py`)
```python
def main(message="What is 2+2?"):
"""Simple main function that calls BAML directly"""
client = get_baml_client()
# Call the BAML function - this should generate logs
result = client.DetermineNextStep(f"User asked: {message}")
print(f"Input: {message}")
print(f"Result: {result}")
return result
```
#### Log Capture Implementation
The current working implementation in `walkthroughgen_py.py`:
```python
def run_with_baml_logs(func, *args, **kwargs):
"""Test log capture using IPython capture_output"""
# Ensure BAML_LOG is set
if 'BAML_LOG' not in os.environ:
os.environ['BAML_LOG'] = 'info'
print(f"[LOG CAPTURE TEST] Running with BAML_LOG={os.environ.get('BAML_LOG')}...")
# Capture both stdout and stderr
with capture_output() as captured:
result = func(*args, **kwargs)
# Display captured outputs
if captured.stdout:
print("=== Captured Stdout ===")
print(captured.stdout)
if captured.stderr:
print("=== Captured BAML Logs ===")
print(captured.stderr)
else:
print("=== No BAML Logs Captured ===")
print("=== Function Result ===")
print(result)
return result
```
### Test Execution
#### Main Test Script (`test_log_capture.sh`)
```bash
#!/bin/bash
set -e
echo "🧪 Testing BAML Log Capture..."
# Generate test notebook from YAML config
echo "📝 Generating test notebook..."
uv run python walkthroughgen_py.py simple_log_test.yaml -o test_capture.ipynb
# Run in simulator
echo "🚀 Running test in sim..."
./test_notebook_colab_sim.sh test_capture.ipynb
# Find the executed notebook
NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1)
NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb"
echo "📋 Analyzing results from $NOTEBOOK_PATH..."
# Debug output
echo "🔍 Dumping debug info..."
python3 inspect_notebook.py "$NOTEBOOK_PATH" "run_with_baml_logs"
# Analyze for BAML log patterns
echo "📊 Running log capture analysis..."
python3 analyze_log_capture.py "$NOTEBOOK_PATH"
echo "🧹 Cleaning up..."
rm -f test_capture.ipynb
```
#### Analysis Script (`analyze_log_capture.py`)
```python
#!/usr/bin/env python3
import json
import sys
import os
def check_logs(notebook_path):
"""Check if BAML logs were captured in the notebook"""
with open(notebook_path) as f:
nb = json.load(f)
found_log_pattern = False
found_capture_test = False
for i, cell in enumerate(nb['cells']):
if cell['cell_type'] == 'code' and 'outputs' in cell:
source = ''.join(cell.get('source', []))
if 'run_with_baml_logs' in source:
found_capture_test = True
print(f'Found log capture test in cell {i}')
# Check outputs for BAML logs
for output in cell['outputs']:
if output.get('output_type') == 'stream' and 'text' in output:
text = ''.join(output['text'])
# Look for the specific BAML log pattern
if '---Parsed Response (class DoneForNow)---' in text:
found_log_pattern = True
print(f'✅ FOUND BAML LOG PATTERN in cell {i} output!')
return found_capture_test, found_log_pattern
# Run analysis and return pass/fail
capture_test_found, log_pattern_found = check_logs(sys.argv[1])
if not capture_test_found:
print('❌ FAIL: No log capture test found in notebook')
sys.exit(1)
if log_pattern_found:
print('✅ PASS: BAML logs successfully captured in notebook output!')
sys.exit(0)
else:
print('❌ FAIL: BAML log pattern not found in captured output')
sys.exit(1)
```
### Expected Output Flow
#### Successful Test Run:
```bash
$ ./test_log_capture.sh
🧪 Testing BAML Log Capture...
📝 Generating test notebook...
Generated notebook: test_capture.ipynb
🚀 Running test in sim...
🧪 Creating clean test environment in: ./tmp/test_20250716_191106
📁 Test directory will be preserved for inspection
🐍 Creating fresh Python virtual environment...
📦 Installing Jupyter dependencies...
🏃 Running notebook in clean environment...
✅ Notebook executed successfully!
💾 Executed notebook saved with outputs
📋 Analyzing results from tmp/test_20250716_191106/test_notebook.ipynb...
🔍 Dumping debug info...
Found log capture test in cell 11
📤 OUTPUTS (3 outputs):
Output 0: type=stream
Text length: 49 chars
> [LOG CAPTURE TEST] Running with BAML_LOG=info......
Output 1: type=stream
Text length: 1272 chars
> 2025-07-16T19:11:22.445 [BAML [92mINFO[0m] [35mFunction DetermineNextStep[0m...
🎯 Found patterns: ['BAML', 'Parsed', 'Response']
📊 Running log capture analysis...
Found log capture test in cell 11
✅ FOUND BAML LOG PATTERN in cell 11 output!
✅ PASS: BAML logs successfully captured in notebook output!
🧹 Cleaning up...
```
### Key BAML-Specific Insights
1. **BAML logs go to stderr** - Due to FFI bindings to Rust binary
2. **Requires `BAML_LOG=info`** - Environment variable controls verbosity
3. **Logs include ANSI color codes** - Need to handle terminal formatting
4. **Pattern matching** - Look for `---Parsed Response (class DoneForNow)---` to confirm successful execution
5. **IPython capture_output() works** - Successfully captures stderr in notebook context
### Iteration Loop Benefits
This framework enables rapid testing of different log capture approaches:
1. **Modify** the `run_with_baml_logs` function in `walkthroughgen_py.py`
2. **Run** `./test_log_capture.sh`
3. **Get** immediate pass/fail feedback
4. **Debug** with `inspect_notebook.py` if needed
5. **Repeat** until working implementation found
This same pattern can be applied to test any notebook functionality: library integrations, environment setup, output formatting, error handling, etc.

View File

@@ -64,6 +64,11 @@ try:
ep.preprocess(nb, {'metadata': {'path': '.'}}) ep.preprocess(nb, {'metadata': {'path': '.'}})
print("\n✅ Notebook executed successfully!") print("\n✅ Notebook executed successfully!")
# Save the executed notebook back to disk
with open('test_notebook.ipynb', 'w') as f:
nbformat.write(nb, f)
print("💾 Executed notebook saved with outputs")
# Show final directory structure # Show final directory structure
print("\n📁 Final directory structure:") print("\n📁 Final directory structure:")
for root, dirs, files in os.walk('.'): for root, dirs, files in os.walk('.'):
@@ -85,7 +90,7 @@ EOF
# Run the notebook # Run the notebook
echo "🏃 Running notebook in clean environment..." echo "🏃 Running notebook in clean environment..."
python run_notebook.py source venv/bin/activate && python run_notebook.py
# Check what BAML files were created # Check what BAML files were created
echo -e "\n📄 BAML files created:" echo -e "\n📄 BAML files created:"

View File

@@ -11,68 +11,84 @@ sections:
steps: steps:
- text: | - text: |
This guide will walk you through building agents in Python with BAML. This guide will walk you through building agents in Python with BAML.
We'll start simple with a hello world program and gradually build up to a full agent. We'll start simple with a hello world program and gradually build up to a full agent.
For this notebook, you'll need to have your OpenAI API key saved in Google Colab secrets. For this notebook, you'll need to have your OpenAI API key saved in Google Colab secrets.
## Where We're Headed
Before we dive in, let's understand the journey ahead. We're building toward **micro-agents in deterministic DAGs** - a powerful pattern that combines the flexibility of AI with the reliability of traditional software.
📖 **Learn more**: [A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md)
![Software DAG Evolution](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/010-software-dag.png)
- text: "Here's our simple hello world program:" - text: "Here's our simple hello world program:"
- file: {src: ./walkthrough/00-main.py} - file: {src: ./walkthrough/00-main.py}
- text: "Let's run it to verify it works:" - text: "Let's run it to verify it works:"
- run_main: {regenerate_baml: false} - run_main: {regenerate_baml: false}
- name: cli-and-agent - name: cli-and-agent
title: "Chapter 1 - CLI and Agent Loop" title: "Chapter 1 - CLI and Agent Loop"
text: "Now let's add BAML and create our first agent with a CLI interface." text: "Now let's add BAML and create our first agent with a CLI interface."
steps: steps:
- text: | - text: |
In this chapter, we'll integrate BAML to create an AI agent that can respond to user input. In this chapter, we'll integrate BAML to create an AI agent that can respond to user input.
## What is BAML? ## What is BAML?
BAML (Boundary Markup Language) is a domain-specific language designed to help developers build reliable AI workflows and agents. Created by [BoundaryML](https://www.boundaryml.com/) (a Y Combinator W23 company), BAML adds the engineering to prompt engineering. BAML (Boundary Markup Language) is a domain-specific language designed to help developers build reliable AI workflows and agents. Created by [BoundaryML](https://www.boundaryml.com/) (a Y Combinator W23 company), BAML adds the engineering to prompt engineering.
### Why BAML? ### Why BAML?
- **Type-safe outputs**: Get fully type-safe outputs from LLMs, even when streaming - **Type-safe outputs**: Get fully type-safe outputs from LLMs, even when streaming
- **Language agnostic**: Works with Python, TypeScript, Ruby, Go, and more - **Language agnostic**: Works with Python, TypeScript, Ruby, Go, and more
- **LLM agnostic**: Works with any LLM provider (OpenAI, Anthropic, etc.) - **LLM agnostic**: Works with any LLM provider (OpenAI, Anthropic, etc.)
- **Better performance**: State-of-the-art structured outputs that outperform even OpenAI's native function calling - **Better performance**: State-of-the-art structured outputs that outperform even OpenAI's native function calling
- **Developer-friendly**: Native VSCode extension with syntax highlighting, autocomplete, and interactive playground - **Developer-friendly**: Native VSCode extension with syntax highlighting, autocomplete, and interactive playground
### Learn More ### Learn More
- 📚 [Official Documentation](https://docs.boundaryml.com/home) - 📚 [Official Documentation](https://docs.boundaryml.com/home)
- 💻 [GitHub Repository](https://github.com/BoundaryML/baml) - 💻 [GitHub Repository](https://github.com/BoundaryML/baml)
- 🎯 [What is BAML?](https://docs.boundaryml.com/guide/introduction/what-is-baml) - 🎯 [What is BAML?](https://docs.boundaryml.com/guide/introduction/what-is-baml)
- 📖 [BAML Examples](https://github.com/BoundaryML/baml-examples) - 📖 [BAML Examples](https://github.com/BoundaryML/baml-examples)
- 🏢 [Company Website](https://www.boundaryml.com/) - 🏢 [Company Website](https://www.boundaryml.com/)
- 📰 [Blog: AI Agents Need a New Syntax](https://www.boundaryml.com/blog/ai-agents-need-new-syntax) - 📰 [Blog: AI Agents Need a New Syntax](https://www.boundaryml.com/blog/ai-agents-need-new-syntax)
BAML turns prompt engineering into schema engineering, where you focus on defining the structure of your data rather than wrestling with prompts. This approach leads to more reliable and maintainable AI applications. BAML turns prompt engineering into schema engineering, where you focus on defining the structure of your data rather than wrestling with prompts. This approach leads to more reliable and maintainable AI applications.
### Note on Developer Experience ### Note on Developer Experience
BAML works much better in VS Code with their official extension, which provides syntax highlighting, autocomplete, inline testing, and an interactive playground. However, for this notebook tutorial, we'll work with BAML files directly without the enhanced IDE features. BAML works much better in VS Code with their official extension, which provides syntax highlighting, autocomplete, inline testing, and an interactive playground. However, for this notebook tutorial, we'll work with BAML files directly without the enhanced IDE features.
## Factor 1: Natural Language to Tool Calls
What we're building implements the first factor of 12-factor agents - converting natural language into structured tool calls.
📖 **Learn more**: [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md)
![Natural Language to Tool Calls](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/110-natural-language-tool-calls.png)
First, let's set up BAML support in our notebook. First, let's set up BAML support in our notebook.
- baml_setup: true - baml_setup: true
- command: "!ls baml_src" - command: "!ls baml_src"
- text: | - text: |
Now let's create our agent that will use BAML to process user input. Now let's create our agent that will use BAML to process user input.
First, we'll define the core agent logic: First, we'll define the core agent logic:
- file: {src: ./walkthrough/01-agent.py} - file: {src: ./walkthrough/01-agent.py}
- text: | - text: |
Next, we need to define the BAML function that our agent will use. Next, we need to define the BAML function that our agent will use.
### Understanding BAML Syntax ### Understanding BAML Syntax
BAML files define: BAML files define:
- **Classes**: Structured output schemas (like `DoneForNow` below) - **Classes**: Structured output schemas (like `DoneForNow` below)
- **Functions**: AI-powered functions that take inputs and return structured outputs - **Functions**: AI-powered functions that take inputs and return structured outputs
- **Tests**: Example inputs/outputs to validate your prompts - **Tests**: Example inputs/outputs to validate your prompts
This BAML file defines what our agent can do: This BAML file defines what our agent can do:
- fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml} - fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
- command: "!ls baml_src" - command: "!ls baml_src"
@@ -85,35 +101,37 @@ sections:
- `main("Tell me a joke")` - `main("Tell me a joke")`
- `main("How are you doing today?")` - `main("How are you doing today?")`
in this case, we'll use the baml_generate function to in this case, we'll use the baml_generate function to
generate the pydantic and python bindings from our generate the pydantic and python bindings from our
baml source, but in the future we'll skip this step as it baml source, but in the future we'll skip this step as it
is done automatically by the get_baml_client() function is done automatically by the get_baml_client() function
- run_main: {regenerate_baml: true, args: "Hello from the Python notebook!"} - run_main: {regenerate_baml: true, args: "Hello from the Python notebook!"}
- text: |
In a few cases, we'll enable the baml debug logs to see the inputs/outputs to and from the model.
- run_main: {regenerate_baml: false, args: "Hello from the Python notebook!", show_logs: true}
- text: |
what's most important there is that you can see the prompt and how the output_format is injected
to tell the model what kind of json we want to return.
- name: calculator-tools - name: calculator-tools
title: "Chapter 2 - Add Calculator Tools" title: "Chapter 2 - Add Calculator Tools"
text: "Let's add some calculator tools to our agent." text: "Let's add some calculator tools to our agent."
steps: steps:
- text: | - text: |
Let's start by adding a tool definition for the calculator. Let's start by adding a tool definition for the calculator.
These are simple structured outputs that we'll ask the model to These are simple structured outputs that we'll ask the model to
return as a "next step" in the agentic loop. return as a "next step" in the agentic loop.
## Factor 4: Tools Are Structured Outputs
This chapter demonstrates that tools are just structured JSON outputs from the LLM - nothing more complex!
📖 **Learn more**: [Factor 4: Tools Are Structured Outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)
![Tools Are Structured Outputs](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/140-tools-are-just-structured-outputs.png)
- fetch_file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml} - fetch_file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml}
- command: "!ls baml_src" - command: "!ls baml_src"
- text: | - text: |
Now, let's update the agent's DetermineNextStep method to Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps. expose the calculator tools as potential next steps.
- fetch_file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml} - fetch_file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml}
- text: | - text: |
Now let's update our main function to show the tool call: Now let's update our main function to show the tool call:
@@ -122,7 +140,7 @@ sections:
Let's try out the calculator! The agent should recognize that you want to perform a calculation Let's try out the calculator! The agent should recognize that you want to perform a calculation
and return the appropriate tool call instead of just a message. and return the appropriate tool call instead of just a message.
- run_main: {regenerate_baml: false, args: "can you add 3 and 4"} - run_main: {regenerate_baml: false, args: "can you add 3 and 4"}
- name: tool-loop - name: tool-loop
title: "Chapter 3 - Process Tool Calls in a Loop" title: "Chapter 3 - Process Tool Calls in a Loop"
text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM." text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM."
@@ -132,7 +150,21 @@ sections:
- The agent can call multiple tools in sequence - The agent can call multiple tools in sequence
- Each tool result is fed back to the agent - Each tool result is fed back to the agent
- The agent continues until it has a final answer - The agent continues until it has a final answer
## The Agent Loop Pattern
We're implementing the core agent loop - where the AI makes decisions, executes tools, and continues until done.
![Agent Loop Animation](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/027-agent-loop-animation.gif)
## Factor 5: Unify Execution State
Notice how we're storing everything as events in our Thread - this is Factor 5 in action!
📖 **Learn more**: [Factor 5: Unify Execution State](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md)
![Unify State Animation](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/155-unify-state-animation.gif)
Let's update our agent to handle tool calls properly: Let's update our agent to handle tool calls properly:
- file: {src: ./walkthrough/03-agent.py} - file: {src: ./walkthrough/03-agent.py}
- text: | - text: |
@@ -141,17 +173,13 @@ sections:
- text: | - text: |
Let's try it out! The agent should now call the tool and return the calculated result: Let's try it out! The agent should now call the tool and return the calculated result:
- run_main: {regenerate_baml: false, args: "can you add 3 and 4"} - run_main: {regenerate_baml: false, args: "can you add 3 and 4"}
- text: |
you can run with baml_logs enabled to see how the prompt changed when we added the New
tool types to our union of response types.
- run_main: {regenerate_baml: false, args: "can you add 3 and 4", show_logs: true}
- text: | - text: |
You should see the agent: You should see the agent:
1. Recognize it needs to use the add tool 1. Recognize it needs to use the add tool
2. Call the tool with the correct parameters 2. Call the tool with the correct parameters
3. Get the result (7) 3. Get the result (7)
4. Generate a final response incorporating the result 4. Generate a final response incorporating the result
For more complex calculations, we need to handle all calculator operations. Let's add support for subtract, multiply, and divide: For more complex calculations, we need to handle all calculator operations. Let's add support for subtract, multiply, and divide:
- file: {src: ./walkthrough/03b-agent.py} - file: {src: ./walkthrough/03b-agent.py}
- text: | - text: |
@@ -165,28 +193,28 @@ sections:
- run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"} - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"}
- text: | - text: |
Congratulations! You've taken your first step into hand-rolling an agent loop. Congratulations! You've taken your first step into hand-rolling an agent loop.
Key concepts you've learned: Key concepts you've learned:
- **Thread Management**: Tracking conversation history and tool calls - **Thread Management**: Tracking conversation history and tool calls
- **Tool Execution**: Processing different tool types and returning results - **Tool Execution**: Processing different tool types and returning results
- **Agent Loop**: Continuing until the agent has a final answer - **Agent Loop**: Continuing until the agent has a final answer
From here, we'll start incorporating more intermediate and advanced concepts for 12-factor agents. From here, we'll start incorporating more intermediate and advanced concepts for 12-factor agents.
- name: baml-tests - name: baml-tests
title: "Chapter 4 - Add Tests to agent.baml" title: "Chapter 4 - Add Tests to agent.baml"
text: "Let's add some tests to our BAML agent." text: "Let's add some tests to our BAML agent."
steps: steps:
- text: | - text: |
In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly. In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly.
## Why Test BAML Functions? ## Why Test BAML Functions?
- **Catch regressions**: Ensure changes don't break existing behavior - **Catch regressions**: Ensure changes don't break existing behavior
- **Document behavior**: Tests serve as living documentation - **Document behavior**: Tests serve as living documentation
- **Validate edge cases**: Test complex scenarios and conversation flows - **Validate edge cases**: Test complex scenarios and conversation flows
- **CI/CD integration**: Run tests automatically in your pipeline - **CI/CD integration**: Run tests automatically in your pipeline
Let's start with a simple test that checks the agent's ability to handle basic interactions: Let's start with a simple test that checks the agent's ability to handle basic interactions:
- fetch_file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml} - fetch_file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml}
- text: | - text: |
@@ -194,14 +222,14 @@ sections:
- command: "!baml-cli test" - command: "!baml-cli test"
- text: | - text: |
Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output. Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output.
## BAML Assertion Syntax ## BAML Assertion Syntax
Assertions use the `@@assert` directive: Assertions use the `@@assert` directive:
``` ```
@@assert(name, {{condition}}) @@assert(name, {{condition}})
``` ```
- `name`: A descriptive name for the assertion - `name`: A descriptive name for the assertion
- `condition`: A boolean expression using `this` to access the output - `condition`: A boolean expression using `this` to access the output
- fetch_file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml} - fetch_file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml}
@@ -210,7 +238,7 @@ sections:
- command: "!baml-cli test" - command: "!baml-cli test"
- text: | - text: |
Finally, let's add more complex test cases that test multi-step conversations. Finally, let's add more complex test cases that test multi-step conversations.
These tests simulate an entire conversation flow, including: These tests simulate an entire conversation flow, including:
- User input - User input
- Tool calls made by the agent - Tool calls made by the agent
@@ -222,14 +250,14 @@ sections:
- command: "!baml-cli test" - command: "!baml-cli test"
- text: | - text: |
## Key Testing Concepts ## Key Testing Concepts
1. **Test Structure**: Each test specifies functions, arguments, and assertions 1. **Test Structure**: Each test specifies functions, arguments, and assertions
2. **Progressive Testing**: Start simple, then test complex scenarios 2. **Progressive Testing**: Start simple, then test complex scenarios
3. **Conversation History**: Test how the agent handles multi-turn conversations 3. **Conversation History**: Test how the agent handles multi-turn conversations
4. **Tool Integration**: Verify the agent correctly uses tools in sequence 4. **Tool Integration**: Verify the agent correctly uses tools in sequence
With these tests in place, you can confidently modify your agent knowing that core functionality is protected by automated tests! With these tests in place, you can confidently modify your agent knowing that core functionality is protected by automated tests!
- name: human-tools - name: human-tools
title: "Chapter 5 - Multiple Human Tools" title: "Chapter 5 - Multiple Human Tools"
text: | text: |
@@ -237,16 +265,16 @@ sections:
steps: steps:
- text: | - text: |
So far, our agent only returns a final answer with "done_for_now". But what if the agent needs clarification? So far, our agent only returns a final answer with "done_for_now". But what if the agent needs clarification?
Let's add a new tool that allows the agent to request more information from the user. Let's add a new tool that allows the agent to request more information from the user.
## Why Human-in-the-Loop? ## Why Human-in-the-Loop?
- **Handle ambiguous inputs**: When user input is unclear or contains typos - **Handle ambiguous inputs**: When user input is unclear or contains typos
- **Request missing information**: When the agent needs more context - **Request missing information**: When the agent needs more context
- **Confirm sensitive operations**: Before performing important actions - **Confirm sensitive operations**: Before performing important actions
- **Interactive workflows**: Build conversational agents that engage users - **Interactive workflows**: Build conversational agents that engage users
First, let's update our BAML file to include a ClarificationRequest tool: First, let's update our BAML file to include a ClarificationRequest tool:
- fetch_file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml} - fetch_file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml}
- text: | - text: |
@@ -265,76 +293,69 @@ sections:
3. In Colab, you'll be prompted to type a response 3. In Colab, you'll be prompted to type a response
4. In local testing, an auto-response is provided 4. In local testing, an auto-response is provided
5. The agent continues with the clarified input 5. The agent continues with the clarified input
## Interactive Testing in Colab ## Interactive Testing in Colab
When running in Google Colab, the `input()` function will create an interactive text box where you can type your response. Try different clarifications to see how the agent adapts! When running in Google Colab, the `input()` function will create an interactive text box where you can type your response. Try different clarifications to see how the agent adapts!
## Key Concepts ## Key Concepts
- **Human Tools**: Special tool types that return control to the human - **Human Tools**: Special tool types that return control to the human
- **Conversation Flow**: The agent can pause execution to get human input - **Conversation Flow**: The agent can pause execution to get human input
- **Context Preservation**: The full conversation history is maintained - **Context Preservation**: The full conversation history is maintained
- **Flexible Handling**: Different behaviors for different environments - **Flexible Handling**: Different behaviors for different environments
- name: customize-prompt - name: customize-prompt
title: "Chapter 6 - Customize Your Prompt with Reasoning" title: "Chapter 6 - Customize Your Prompt with Reasoning"
text: | text: |
In this section, we'll explore how to customize the prompt of the agent with reasoning steps. In this section, we'll explore how to customize the prompt of the agent with reasoning steps.
This is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md) This is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)
steps: steps:
- text: | - text: |
## Why Add Reasoning to Prompts? ## Why Add Reasoning to Prompts?
Adding explicit reasoning steps to your prompts can significantly improve agent performance: Adding explicit reasoning steps to your prompts can significantly improve agent performance:
- **Better decisions**: The model thinks through problems step-by-step - **Better decisions**: The model thinks through problems step-by-step
- **Transparency**: You can see the model's thought process - **Transparency**: You can see the model's thought process
- **Fewer errors**: Structured thinking reduces mistakes - **Fewer errors**: Structured thinking reduces mistakes
- **Debugging**: Easier to identify where reasoning went wrong - **Debugging**: Easier to identify where reasoning went wrong
Let's update our agent prompt to include a reasoning step: Let's update our agent prompt to include a reasoning step:
- fetch_file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml} - fetch_file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml}
- text: | - text: |
Now let's test it with a simple calculation to see the reasoning in action: Now let's test it with a simple calculation to see the reasoning in action:
**Note:** The BAML logs below will show the model's reasoning steps. Look for the `<reasoning>` tags in the logs to see how the model thinks through the problem before deciding what to do. - run_main: {args: "can you multiply 3 and 4"}
- run_main: {args: "can you multiply 3 and 4", show_logs: true}
- text: | - text: |
You should see the reasoning steps in the BAML logs above. The model explicitly thinks through what it needs to do before making a decision. The model uses explicit reasoning steps to think through the problem before making a decision.
💡 **Tip:** If you want to see BAML logs for any other calls in this notebook, you can use the `run_with_baml_logs` helper function:
```python
# Instead of: main("your message")
# Use: run_with_baml_logs(main, "your message")
```
## Advanced Prompt Engineering ## Advanced Prompt Engineering
You can enhance your prompts further by: You can enhance your prompts further by:
- Adding specific reasoning templates for different tasks - Adding specific reasoning templates for different tasks
- Including examples of good reasoning - Including examples of good reasoning
- Structuring the reasoning with numbered steps - Structuring the reasoning with numbered steps
- Adding checks for common mistakes - Adding checks for common mistakes
The key is to guide the model's thinking process while still allowing flexibility. The key is to guide the model's thinking process while still allowing flexibility.
- name: context-window - name: context-window
title: "Chapter 7 - Customize Your Context Window" title: "Chapter 7 - Customize Your Context Window"
text: | text: |
In this section, we'll explore how to customize the context window of the agent. In this section, we'll explore how to customize the context window of the agent.
This is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md) This is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
steps: steps:
- text: | - text: |
## Context Window Serialization ## Context Window Serialization
How you format your conversation history can significantly impact: How you format your conversation history can significantly impact:
- **Token usage**: Some formats are more efficient - **Token usage**: Some formats are more efficient
- **Model understanding**: Clear structure helps the model - **Model understanding**: Clear structure helps the model
- **Debugging**: Readable formats help development - **Debugging**: Readable formats help development
Let's implement two serialization formats: pretty-printed JSON and XML. Let's implement two serialization formats: pretty-printed JSON and XML.
- file: {src: ./walkthrough/07-agent.py} - file: {src: ./walkthrough/07-agent.py}
- text: | - text: |
@@ -348,15 +369,15 @@ sections:
- run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: true}} - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: true}}
- text: | - text: |
## XML vs JSON Trade-offs ## XML vs JSON Trade-offs
**XML Benefits**: **XML Benefits**:
- More token-efficient for nested data - More token-efficient for nested data
- Clear hierarchy with opening/closing tags - Clear hierarchy with opening/closing tags
- Better for long conversations - Better for long conversations
**JSON Benefits**: **JSON Benefits**:
- Familiar to most developers - Familiar to most developers
- Easy to parse and debug - Easy to parse and debug
- Native to JavaScript/Python - Native to JavaScript/Python
Choose based on your specific needs and token constraints! Choose based on your specific needs and token constraints!

View File

@@ -1,9 +1,13 @@
# Agent implementation with clarification support # Agent implementation with clarification support
import json import json
def agent_loop(thread, clarification_handler): def agent_loop(thread, clarification_handler, max_iterations=3):
"""Run the agent loop until we get a final answer.""" """Run the agent loop until we get a final answer (max 3 iterations)."""
while True: iteration_count = 0
while iteration_count < max_iterations:
iteration_count += 1
print(f"🔄 Agent loop iteration {iteration_count}/{max_iterations}")
# Get the client # Get the client
baml_client = get_baml_client() baml_client = get_baml_client()
@@ -63,6 +67,9 @@ def agent_loop(thread, clarification_handler):
}) })
else: else:
return "Error: Unexpected result type" return "Error: Unexpected result type"
# If we've reached max iterations without a final answer
return f"Agent reached maximum iterations ({max_iterations}) without completing the task."
class Thread: class Thread:
"""Simple thread to track conversation history.""" """Simple thread to track conversation history."""

View File

@@ -83,86 +83,6 @@ def get_baml_client():
init_code = "!baml-cli init" init_code = "!baml-cli init"
nb.cells.append(new_code_cell(init_code)) nb.cells.append(new_code_cell(init_code))
# Fourth cell: Add BAML logging helper
logging_helper = '''# Helper function to capture BAML logs in notebook output
import os
import sys
from IPython.utils.capture import capture_output
import contextlib
def run_with_baml_logs(func, *args, **kwargs):
"""Run a function and capture BAML logs in the notebook output."""
# Ensure BAML_LOG is set
if 'BAML_LOG' not in os.environ:
os.environ['BAML_LOG'] = 'info'
print(f"Running with BAML_LOG={os.environ.get('BAML_LOG')}...")
# Capture both stdout and stderr
with capture_output() as captured:
result = func(*args, **kwargs)
# Display the result first
if result is not None:
print("=== Result ===")
print(result)
# Display captured stdout if any
if captured.stdout:
print("\\n=== Output ===")
print(captured.stdout)
# Display BAML logs from stderr
if captured.stderr:
print("\\n=== BAML Logs ===")
# Format the logs for better readability
log_lines = captured.stderr.strip().split('\\n')
for line in log_lines:
if 'reasoning' in line.lower() or '<reasoning>' in line:
print(f"🤔 {line}")
elif 'error' in line.lower():
print(f"{line}")
elif 'warn' in line.lower():
print(f"⚠️ {line}")
else:
print(f" {line}")
return result
# Alternative: Force stderr to stdout redirection
@contextlib.contextmanager
def redirect_stderr_to_stdout():
"""Context manager to redirect stderr to stdout."""
old_stderr = sys.stderr
sys.stderr = sys.stdout
try:
yield
finally:
sys.stderr = old_stderr
def run_with_baml_logs_redirect(func, *args, **kwargs):
"""Run a function with stderr redirected to stdout for immediate display."""
if 'BAML_LOG' not in os.environ:
os.environ['BAML_LOG'] = 'info'
print(f"Running with BAML_LOG={os.environ.get('BAML_LOG')} (stderr→stdout)...")
with redirect_stderr_to_stdout():
result = func(*args, **kwargs)
if result is not None:
print("\\n=== Result ===")
print(result)
return result
# Set BAML log level (options: error, warn, info, debug, trace)
os.environ['BAML_LOG'] = 'info'
print("BAML logging helpers loaded!")
print("- Use run_with_baml_logs() to capture and display logs after execution")
print("- Use run_with_baml_logs_redirect() to see logs in real-time as they're generated")
'''
nb.cells.append(new_code_cell(logging_helper))
def process_step(nb, step, base_path, current_functions, section_name=None): def process_step(nb, step, base_path, current_functions, section_name=None):
"""Process different step types.""" """Process different step types."""
@@ -244,18 +164,8 @@ def process_step(nb, step, base_path, current_functions, section_name=None):
else: else:
main_call = "main()" main_call = "main()"
# Check if we should use logging wrapper # Execute the main function call
use_logging = step['run_main'].get('show_logs', False) nb.cells.append(new_code_cell(main_call))
if use_logging:
# Use logging wrapper
if call_parts:
nb.cells.append(new_code_cell(f'run_with_baml_logs(main, {", ".join(call_parts)})'))
else:
nb.cells.append(new_code_cell('run_with_baml_logs(main)'))
else:
# Normal execution without logging
nb.cells.append(new_code_cell(main_call))
def convert_walkthrough_to_notebook(yaml_path, output_path): def convert_walkthrough_to_notebook(yaml_path, output_path):
"""Convert walkthrough.yaml to Jupyter notebook.""" """Convert walkthrough.yaml to Jupyter notebook."""

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "7c856804", "id": "a55820ee",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Building the 12-factor agent template from scratch in Python" "# Building the 12-factor agent template from scratch in Python"
@@ -10,7 +10,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "6c96065f", "id": "ba52e30a",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Steps to start from a bare Python repo and build up a 12-factor agent. This walkthrough will guide you through creating a Python agent that follows the 12-factor methodology with BAML." "Steps to start from a bare Python repo and build up a 12-factor agent. This walkthrough will guide you through creating a Python agent that follows the 12-factor methodology with BAML."
@@ -18,7 +18,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "d8a45720", "id": "75b26c9b",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Chapter 0 - Hello World" "## Chapter 0 - Hello World"
@@ -26,7 +26,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "a7a5467e", "id": "fa4b9e07",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's start with a basic Python setup and a hello world program." "Let's start with a basic Python setup and a hello world program."
@@ -34,7 +34,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "563ef643", "id": "4e464227",
"metadata": {}, "metadata": {},
"source": [ "source": [
"This guide will walk you through building agents in Python with BAML.\n", "This guide will walk you through building agents in Python with BAML.\n",
@@ -46,7 +46,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "7db47ab2", "id": "99dac1bb",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Here's our simple hello world program:" "Here's our simple hello world program:"
@@ -55,7 +55,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "c9cc0758", "id": "9c6946fd",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -69,7 +69,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "5b920391", "id": "5523efac",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's run it to verify it works:" "Let's run it to verify it works:"
@@ -78,7 +78,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "29ba0259", "id": "6a437eb2",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -87,7 +87,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "26398377", "id": "d9aa0df6",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Chapter 1 - CLI and Agent Loop" "## Chapter 1 - CLI and Agent Loop"
@@ -95,7 +95,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "0b666a9e", "id": "970c65da",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's add BAML and create our first agent with a CLI interface." "Now let's add BAML and create our first agent with a CLI interface."
@@ -103,7 +103,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "a6191d3c", "id": "976a0fca",
"metadata": {}, "metadata": {},
"source": [ "source": [
"In this chapter, we'll integrate BAML to create an AI agent that can respond to user input.\n", "In this chapter, we'll integrate BAML to create an AI agent that can respond to user input.\n",
@@ -140,7 +140,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "e44cf54f", "id": "ba1f7191",
"metadata": {}, "metadata": {},
"source": [ "source": [
"### BAML Setup\n", "### BAML Setup\n",
@@ -154,7 +154,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "f323b5b9", "id": "9910f8a3",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -164,7 +164,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "e9424fab", "id": "a4ad6e77",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -224,7 +224,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "b34a99bc", "id": "b99ba982",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -234,39 +234,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "8a2812f6", "id": "ee716f3a",
"metadata": {},
"outputs": [],
"source": [
"# Helper function to capture BAML logs in notebook output\n",
"import os\n",
"from IPython.utils.capture import capture_output\n",
"\n",
"def run_with_baml_logs(func, *args, **kwargs):\n",
" \"\"\"Run a function and capture BAML logs in the notebook output.\"\"\"\n",
" # Capture both stdout and stderr\n",
" with capture_output() as captured:\n",
" result = func(*args, **kwargs)\n",
" \n",
" # Display the captured output\n",
" if captured.stdout:\n",
" print(captured.stdout)\n",
" if captured.stderr:\n",
" # BAML logs go to stderr - format them nicely\n",
" print(\"\\n=== BAML Logs ===\")\n",
" print(captured.stderr)\n",
" print(\"=================\\n\")\n",
" \n",
" return result\n",
"\n",
"# Set BAML log level (options: error, warn, info, debug, trace)\n",
"os.environ['BAML_LOG'] = 'info'\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d7efec52",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -275,7 +243,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "eaa41eda", "id": "894474da",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's create our agent that will use BAML to process user input.\n", "Now let's create our agent that will use BAML to process user input.\n",
@@ -286,7 +254,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "6048a2f5", "id": "dbf9d929",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -321,7 +289,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "88143079", "id": "b9421cd4",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Next, we need to define the BAML function that our agent will use.\n", "Next, we need to define the BAML function that our agent will use.\n",
@@ -339,7 +307,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "ee4a5f17", "id": "58d8bda5",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -349,7 +317,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "47435e42", "id": "1edc5279",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -358,7 +326,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "83a9feee", "id": "ee489cc1",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's create our main function that accepts a message parameter:\n" "Now let's create our main function that accepts a message parameter:\n"
@@ -367,7 +335,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "1231c8fc", "id": "f4fea69e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -383,7 +351,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "2ddea81d", "id": "fe3fd9c7",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's test our agent! Try calling main() with different messages:\n", "Let's test our agent! Try calling main() with different messages:\n",
@@ -391,16 +359,16 @@
"- `main(\"Tell me a joke\")`\n", "- `main(\"Tell me a joke\")`\n",
"- `main(\"How are you doing today?\")`\n", "- `main(\"How are you doing today?\")`\n",
"\n", "\n",
"in this case, we'll use the baml_generate function to \n", "in this case, we'll use the baml_generate function to\n",
"generate the pydantic and python bindings from our \n", "generate the pydantic and python bindings from our\n",
"baml source, but in the future we'll skip this step as it \n", "baml source, but in the future we'll skip this step as it\n",
"is done automatically by the get_baml_client() function \n" "is done automatically by the get_baml_client() function\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "c3523c76", "id": "7fc1ee38",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -410,7 +378,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "02f16835", "id": "8756df71",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -419,34 +387,13 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "e0e5c359", "id": "9b5ca88c",
"metadata": {}, "metadata": {},
"source": [ "source": []
"In a few cases, we'll enable the baml debug logs to see the inputs/outputs to and from the model.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e7f1d260",
"metadata": {},
"outputs": [],
"source": [
"run_with_baml_logs(main, \"Hello from the Python notebook!\")"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "c1323d34", "id": "e79f4d84",
"metadata": {},
"source": [
"what's most important there is that you can see the prompt and how the output_format is injected\n",
"to tell the model what kind of json we want to return.\n"
]
},
{
"cell_type": "markdown",
"id": "dba3ff7f",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Chapter 2 - Add Calculator Tools" "## Chapter 2 - Add Calculator Tools"
@@ -454,7 +401,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "83fd4e9e", "id": "4659d5ef",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's add some calculator tools to our agent." "Let's add some calculator tools to our agent."
@@ -462,7 +409,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "fd85b772", "id": "73df701a",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's start by adding a tool definition for the calculator.\n", "Let's start by adding a tool definition for the calculator.\n",
@@ -474,7 +421,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "92e26be8", "id": "c538cd53",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -484,7 +431,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "02702fa2", "id": "1df07ff3",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -493,7 +440,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "e2579b34", "id": "1ffe3854",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now, let's update the agent's DetermineNextStep method to\n", "Now, let's update the agent's DetermineNextStep method to\n",
@@ -503,7 +450,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "3cace82a", "id": "d6f9ee99",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -512,7 +459,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "e3910c3d", "id": "147bd22c",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's update our main function to show the tool call:\n" "Now let's update our main function to show the tool call:\n"
@@ -521,7 +468,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "aeeb0546", "id": "f8f99089",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -542,7 +489,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "c456f5c5", "id": "ffb6c213",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's try out the calculator! The agent should recognize that you want to perform a calculation\n", "Let's try out the calculator! The agent should recognize that you want to perform a calculation\n",
@@ -552,7 +499,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "ebede785", "id": "7afaa326",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -561,7 +508,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "1790412b", "id": "599d21dd",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Chapter 3 - Process Tool Calls in a Loop" "## Chapter 3 - Process Tool Calls in a Loop"
@@ -569,7 +516,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "15999167", "id": "d80e3f9f",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's add a real agentic loop that can run the tools and get a final answer from the LLM." "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM."
@@ -577,7 +524,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "20b3b45e", "id": "427fbc77",
"metadata": {}, "metadata": {},
"source": [ "source": [
"In this chapter, we'll enhance our agent to process tool calls in a loop. This means:\n", "In this chapter, we'll enhance our agent to process tool calls in a loop. This means:\n",
@@ -591,7 +538,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "2860b705", "id": "ac8ae567",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -637,7 +584,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "8525277b", "id": "e875f4c2",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's update our main function to use the new agent loop:\n" "Now let's update our main function to use the new agent loop:\n"
@@ -646,7 +593,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "c9d55067", "id": "2aead128",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -664,7 +611,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "3945d097", "id": "a29bf07d",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's try it out! The agent should now call the tool and return the calculated result:\n" "Let's try it out! The agent should now call the tool and return the calculated result:\n"
@@ -673,7 +620,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "a5897a69", "id": "c6c6a0ca",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -682,26 +629,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "754dac11", "id": "4c20a7d5",
"metadata": {},
"source": [
"you can run with baml_logs enabled to see how the prompt changed when we added the New\n",
"tool types to our union of response types.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "95dfb524",
"metadata": {},
"outputs": [],
"source": [
"run_with_baml_logs(main, \"can you add 3 and 4\")"
]
},
{
"cell_type": "markdown",
"id": "ad00ab9f",
"metadata": {}, "metadata": {},
"source": [ "source": [
"You should see the agent:\n", "You should see the agent:\n",
@@ -716,7 +644,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "920308ba", "id": "561c0b54",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -790,7 +718,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "1241ac9e", "id": "7c612b06",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's test subtraction:\n" "Now let's test subtraction:\n"
@@ -799,7 +727,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "27dad2e2", "id": "4be4af22",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -808,7 +736,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "2911b810", "id": "1da0ad58",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Test multiplication:\n" "Test multiplication:\n"
@@ -817,7 +745,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "bd38e06a", "id": "49d5e040",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -826,7 +754,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "ed3a2c02", "id": "d5a27929",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Finally, let's test a complex multi-step calculation:\n" "Finally, let's test a complex multi-step calculation:\n"
@@ -835,7 +763,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "3dea94b1", "id": "431414aa",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -844,7 +772,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "618bea98", "id": "99ab35d5",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Congratulations! You've taken your first step into hand-rolling an agent loop.\n", "Congratulations! You've taken your first step into hand-rolling an agent loop.\n",
@@ -859,7 +787,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "12b04d87", "id": "9ba4e319",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Chapter 4 - Add Tests to agent.baml" "## Chapter 4 - Add Tests to agent.baml"
@@ -867,7 +795,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "7208e80e", "id": "6bf77db0",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's add some tests to our BAML agent." "Let's add some tests to our BAML agent."
@@ -875,7 +803,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "6bbdec63", "id": "c6f0d38a",
"metadata": {}, "metadata": {},
"source": [ "source": [
"In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly.\n", "In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly.\n",
@@ -893,7 +821,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "8d3602d1", "id": "cd0ae03f",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -902,7 +830,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "6069cbc3", "id": "5bf05182",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Run the tests to see them in action:\n" "Run the tests to see them in action:\n"
@@ -911,7 +839,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "e464d7b5", "id": "30bbcac5",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -920,7 +848,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "a9be4502", "id": "2cbbf5db",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output.\n", "Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output.\n",
@@ -939,7 +867,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "b0984190", "id": "dbbc5283",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -948,7 +876,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "7be0ca40", "id": "ecf9cb68",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Run the tests again to see assertions in action:\n" "Run the tests again to see assertions in action:\n"
@@ -957,7 +885,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "52fcc668", "id": "8d0611f3",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -966,7 +894,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "8f035d7a", "id": "8789e20e",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Finally, let's add more complex test cases that test multi-step conversations.\n", "Finally, let's add more complex test cases that test multi-step conversations.\n",
@@ -981,7 +909,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "587f5e67", "id": "abf5be5b",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -990,7 +918,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "702c4652", "id": "8ce0f9de",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Run the comprehensive test suite:\n" "Run the comprehensive test suite:\n"
@@ -999,7 +927,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "b2e5c012", "id": "4afe82b8",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -1008,7 +936,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "2c49a891", "id": "5d0ba42b",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Key Testing Concepts\n", "## Key Testing Concepts\n",
@@ -1023,7 +951,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "44eb50a1", "id": "bf15b77e",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Chapter 5 - Multiple Human Tools" "## Chapter 5 - Multiple Human Tools"
@@ -1031,7 +959,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "47027888", "id": "e69dbeca",
"metadata": {}, "metadata": {},
"source": [ "source": [
"In this section, we'll add support for multiple tools that serve to contact humans.\n" "In this section, we'll add support for multiple tools that serve to contact humans.\n"
@@ -1039,7 +967,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "c2327093", "id": "f3e29142",
"metadata": {}, "metadata": {},
"source": [ "source": [
"So far, our agent only returns a final answer with \"done_for_now\". But what if the agent needs clarification?\n", "So far, our agent only returns a final answer with \"done_for_now\". But what if the agent needs clarification?\n",
@@ -1059,7 +987,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "1b0d9077", "id": "9b42b75e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -1068,7 +996,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "dd0b17a5", "id": "7be2af7d",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's update our agent to handle clarification requests:\n" "Now let's update our agent to handle clarification requests:\n"
@@ -1077,7 +1005,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "1a957b14", "id": "21a3f526",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -1085,9 +1013,13 @@
"# Agent implementation with clarification support\n", "# Agent implementation with clarification support\n",
"import json\n", "import json\n",
"\n", "\n",
"def agent_loop(thread, clarification_handler):\n", "def agent_loop(thread, clarification_handler, max_iterations=3):\n",
" \"\"\"Run the agent loop until we get a final answer.\"\"\"\n", " \"\"\"Run the agent loop until we get a final answer (max 3 iterations).\"\"\"\n",
" while True:\n", " iteration_count = 0\n",
" while iteration_count < max_iterations:\n",
" iteration_count += 1\n",
" print(f\"🔄 Agent loop iteration {iteration_count}/{max_iterations}\")\n",
" \n",
" # Get the client\n", " # Get the client\n",
" baml_client = get_baml_client()\n", " baml_client = get_baml_client()\n",
" \n", " \n",
@@ -1147,6 +1079,9 @@
" })\n", " })\n",
" else:\n", " else:\n",
" return \"Error: Unexpected result type\"\n", " return \"Error: Unexpected result type\"\n",
" \n",
" # If we've reached max iterations without a final answer\n",
" return f\"Agent reached maximum iterations ({max_iterations}) without completing the task.\"\n",
"\n", "\n",
"class Thread:\n", "class Thread:\n",
" \"\"\"Simple thread to track conversation history.\"\"\"\n", " \"\"\"Simple thread to track conversation history.\"\"\"\n",
@@ -1156,7 +1091,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "495441f6", "id": "5f017c77",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Finally, let's create a main function that handles human interaction:\n" "Finally, let's create a main function that handles human interaction:\n"
@@ -1165,7 +1100,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "e50ec1ec", "id": "e648be92",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -1203,7 +1138,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "36ea5481", "id": "2f4b962e",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's test with an ambiguous input that should trigger a clarification request:\n" "Let's test with an ambiguous input that should trigger a clarification request:\n"
@@ -1212,7 +1147,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "05f7aeff", "id": "948684f2",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -1221,7 +1156,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "404b23ed", "id": "54b7d0d4",
"metadata": {}, "metadata": {},
"source": [ "source": [
"You should see:\n", "You should see:\n",
@@ -1245,7 +1180,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "292c8ae5", "id": "253d3f6f",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Chapter 6 - Customize Your Prompt with Reasoning" "## Chapter 6 - Customize Your Prompt with Reasoning"
@@ -1253,7 +1188,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "40e400d2", "id": "87dc996a",
"metadata": {}, "metadata": {},
"source": [ "source": [
"In this section, we'll explore how to customize the prompt of the agent with reasoning steps.\n", "In this section, we'll explore how to customize the prompt of the agent with reasoning steps.\n",
@@ -1263,7 +1198,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "3ab476f7", "id": "7694a842",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Why Add Reasoning to Prompts?\n", "## Why Add Reasoning to Prompts?\n",
@@ -1281,7 +1216,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "877c73a9", "id": "2b38033a",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -1290,36 +1225,28 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "09657b2f", "id": "30aff7de",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's test it with a simple calculation to see the reasoning in action:\n", "Now let's test it with a simple calculation to see the reasoning in action:\n"
"\n",
"**Note:** The BAML logs below will show the model's reasoning steps. Look for the `<reasoning>` tags in the logs to see how the model thinks through the problem before deciding what to do.\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "c4cfccff", "id": "515f9755",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"run_with_baml_logs(main, \"can you multiply 3 and 4\")" "main(\"can you multiply 3 and 4\")"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "6c4cd43c", "id": "2f69536c",
"metadata": {}, "metadata": {},
"source": [ "source": [
"You should see the reasoning steps in the BAML logs above. The model explicitly thinks through what it needs to do before making a decision.\n", "The model uses explicit reasoning steps to think through the problem before making a decision.\n",
"\n",
"💡 **Tip:** If you want to see BAML logs for any other calls in this notebook, you can use the `run_with_baml_logs` helper function:\n",
"```python\n",
"# Instead of: main(\"your message\")\n",
"# Use: run_with_baml_logs(main, \"your message\")\n",
"```\n",
"\n", "\n",
"## Advanced Prompt Engineering\n", "## Advanced Prompt Engineering\n",
"\n", "\n",
@@ -1334,7 +1261,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "0c23951d", "id": "8274aff0",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Chapter 7 - Customize Your Context Window" "## Chapter 7 - Customize Your Context Window"
@@ -1342,7 +1269,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "6fb08c76", "id": "f930c899",
"metadata": {}, "metadata": {},
"source": [ "source": [
"In this section, we'll explore how to customize the context window of the agent.\n", "In this section, we'll explore how to customize the context window of the agent.\n",
@@ -1352,7 +1279,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "5e25342a", "id": "1d4235ed",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Context Window Serialization\n", "## Context Window Serialization\n",
@@ -1368,7 +1295,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "eb204207", "id": "dccf9a9f",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -1481,7 +1408,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "3a789d22", "id": "e02d1361",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's create a main function that can switch between formats:\n" "Now let's create a main function that can switch between formats:\n"
@@ -1490,7 +1417,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "3ca0bab4", "id": "03c71da7",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -1515,7 +1442,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "3865f8a4", "id": "1d1718ab",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Let's test with JSON format first:\n" "Let's test with JSON format first:\n"
@@ -1524,7 +1451,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "ebadc358", "id": "41b41a22",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -1533,7 +1460,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "ef60144f", "id": "d1bb4844",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now let's try the same with XML format:\n" "Now let's try the same with XML format:\n"
@@ -1542,7 +1469,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "981012bd", "id": "2ab2a144",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -1551,7 +1478,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "68bf94d2", "id": "8883acac",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## XML vs JSON Trade-offs\n", "## XML vs JSON Trade-offs\n",
@@ -1566,7 +1493,7 @@
"- Easy to parse and debug\n", "- Easy to parse and debug\n",
"- Native to JavaScript/Python\n", "- Native to JavaScript/Python\n",
"\n", "\n",
"Choose based on your specific needs and token constraints!" "Choose based on your specific needs and token constraints!\n"
] ]
} }
], ],