mirror of
https://github.com/humanlayer/12-factor-agents.git
synced 2025-08-20 18:59:53 +03:00
workshop stuff
This commit is contained in:
@@ -10,7 +10,7 @@ generator target {
|
||||
|
||||
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
|
||||
// The BAML VSCode extension version should also match this version.
|
||||
version "0.85.0"
|
||||
version "0.202.0"
|
||||
|
||||
// Valid values: "sync", "async"
|
||||
// This controls what `b.FunctionName()` will be (sync or async).
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"name": "my-agent",
|
||||
"version": "0.1.0",
|
||||
"dependencies": {
|
||||
"baml": "^0.0.0",
|
||||
"@boundaryml/baml": "latest",
|
||||
"tsx": "^4.15.0",
|
||||
"typescript": "^5.0.0"
|
||||
},
|
||||
@@ -19,6 +19,142 @@
|
||||
"eslint": "^8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@boundaryml/baml": {
|
||||
"version": "0.202.0",
|
||||
"resolved": "https://registry.npmjs.org/@boundaryml/baml/-/baml-0.202.0.tgz",
|
||||
"integrity": "sha512-0RNgCBp2egdWJfsNqNaWe/qUg6ea9OLzkcUTE8+wHmlpB2SgK5QRYTaOnt9WX4KHnUvIiMJijIOjy35RGYk45g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@scarf/scarf": "^1.3.0"
|
||||
},
|
||||
"bin": {
|
||||
"baml-cli": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@boundaryml/baml-darwin-arm64": "0.202.0",
|
||||
"@boundaryml/baml-darwin-x64": "0.202.0",
|
||||
"@boundaryml/baml-linux-arm64-gnu": "0.202.0",
|
||||
"@boundaryml/baml-linux-arm64-musl": "0.202.0",
|
||||
"@boundaryml/baml-linux-x64-gnu": "0.202.0",
|
||||
"@boundaryml/baml-linux-x64-musl": "0.202.0",
|
||||
"@boundaryml/baml-win32-x64-msvc": "0.202.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@boundaryml/baml-darwin-arm64": {
|
||||
"version": "0.202.0",
|
||||
"resolved": "https://registry.npmjs.org/@boundaryml/baml-darwin-arm64/-/baml-darwin-arm64-0.202.0.tgz",
|
||||
"integrity": "sha512-i0Y9tCkaWcERJL4yL1/lWSvAYzKiGMsuO1MMDFO3R3cBvbGpRlGY13hKsDtpQy7YePoGzy68MMAqQFm1Y6ucLw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@boundaryml/baml-darwin-x64": {
|
||||
"version": "0.202.0",
|
||||
"resolved": "https://registry.npmjs.org/@boundaryml/baml-darwin-x64/-/baml-darwin-x64-0.202.0.tgz",
|
||||
"integrity": "sha512-e9q/igONW33ltNUAxW6Jimv/1bucN1LgD0TqaF6gSjhyelZr4bZ68f3n5rwK0UF+4VBkNkvC+UXoWgYky5dBOg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@boundaryml/baml-linux-arm64-gnu": {
|
||||
"version": "0.202.0",
|
||||
"resolved": "https://registry.npmjs.org/@boundaryml/baml-linux-arm64-gnu/-/baml-linux-arm64-gnu-0.202.0.tgz",
|
||||
"integrity": "sha512-3DWTK9gMUHv+BlsZ1BAprMXQsRzPFKhlzmG71y+G3s0ZJIFzrQ9rmdv93lejyslPPTw0M2TD2CjBDrNsnmSX3A==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@boundaryml/baml-linux-arm64-musl": {
|
||||
"version": "0.202.0",
|
||||
"resolved": "https://registry.npmjs.org/@boundaryml/baml-linux-arm64-musl/-/baml-linux-arm64-musl-0.202.0.tgz",
|
||||
"integrity": "sha512-fTFK+w7ku61dKzIeIaNsMLpiT793MKmj1La6oznhwpuoOdLm861GXzJUut4Bri8n4UFULfnPiCCp4nU5nwpwcQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@boundaryml/baml-linux-x64-gnu": {
|
||||
"version": "0.202.0",
|
||||
"resolved": "https://registry.npmjs.org/@boundaryml/baml-linux-x64-gnu/-/baml-linux-x64-gnu-0.202.0.tgz",
|
||||
"integrity": "sha512-gKainskhyex0c8AmzrfYSbyRXwK4OCSjpO6oKni8+EFcaH/OZD6rDqmS1ggcNoTKw2MqC/H1hfyMCw3BdEDxVA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@boundaryml/baml-linux-x64-musl": {
|
||||
"version": "0.202.0",
|
||||
"resolved": "https://registry.npmjs.org/@boundaryml/baml-linux-x64-musl/-/baml-linux-x64-musl-0.202.0.tgz",
|
||||
"integrity": "sha512-KHrG8iut5vc58L41eKtNF8W1OgDzYMmXRtcuevHuy22cRb4TbhYP2bTOo+r9iZOc/zBN1Yl1Cv3U+u+pX3ypPw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@boundaryml/baml-win32-x64-msvc": {
|
||||
"version": "0.202.0",
|
||||
"resolved": "https://registry.npmjs.org/@boundaryml/baml-win32-x64-msvc/-/baml-win32-x64-msvc-0.202.0.tgz",
|
||||
"integrity": "sha512-DcZiQ/eRKf11FgKFnVN8H1Tsnc6M9UgC6tLKIwr0YUYe2buKPXNkS2tPk0n4gHSnPX/bdWqyeUchk+4E6yqiDQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">= 10"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.25.4",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.4.tgz",
|
||||
@@ -606,6 +742,13 @@
|
||||
"node": ">= 8"
|
||||
}
|
||||
},
|
||||
"node_modules/@scarf/scarf": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@scarf/scarf/-/scarf-1.4.0.tgz",
|
||||
"integrity": "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@types/json-schema": {
|
||||
"version": "7.0.15",
|
||||
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
|
||||
@@ -925,11 +1068,6 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/baml": {
|
||||
"version": "0.0.0",
|
||||
"resolved": "https://registry.npmjs.org/baml/-/baml-0.0.0.tgz",
|
||||
"integrity": "sha512-wlrNMVNrHKoB65HXhjTD8mFLWQZVaapWl35gHB+wrp4Sx1+zm5U32LJ2cgYV+1/UPBVC198E5PXJdwYNf2JFKg=="
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"build": "tsc"
|
||||
},
|
||||
"dependencies": {
|
||||
"baml": "^0.0.0",
|
||||
"@boundaryml/baml": "latest",
|
||||
"tsx": "^4.15.0",
|
||||
"typescript": "^5.0.0"
|
||||
},
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# Workshop 2025-07-16: Python/Jupyter Notebook Implementation
|
||||
|
||||
• **Main Tool**: `hack/walkthroughgen_py.py` - Converts TypeScript walkthrough to Jupyter notebooks
|
||||
• **Config**: `hack/walkthrough_python.yaml` - Defines notebook structure and content
|
||||
• **Output**: `hack/workshop_final.ipynb` - Generated notebook with Chapters 0-7
|
||||
• **Testing**: `hack/test_notebook_colab_sim.sh` - Simulates Google Colab environment
|
||||
• **Main Tool**: `walkthroughgen_py.py` - Converts TypeScript walkthrough to Jupyter notebooks
|
||||
• **Config**: `walkthrough.yaml` - Defines notebook structure and content
|
||||
• **Output**: `workshop_final.ipynb` - Generated notebook with Chapters 0-7
|
||||
• **Testing**: `test_notebook_colab_sim.sh` - Simulates Google Colab environment
|
||||
|
||||
## Key Implementation Learnings
|
||||
|
||||
@@ -53,15 +53,15 @@
|
||||
|
||||
## Testing Commands
|
||||
|
||||
• Generate notebook: `uv run python hack/walkthroughgen_py.py hack/walkthrough_python.yaml -o hack/test.ipynb`
|
||||
• Full Colab sim: `cd hack && ./test_notebook_colab_sim.sh`
|
||||
• Generate notebook: `uv run python walkthroughgen_py.py walkthrough.yaml -o test.ipynb`
|
||||
• Full Colab sim: `./test_notebook_colab_sim.sh`
|
||||
• Run BAML tests: `baml-cli test` (from directory with baml_src)
|
||||
|
||||
## File Structure
|
||||
|
||||
• `walkthrough/*.py` - Python implementations of each chapter's code
|
||||
• `walkthrough/*.baml` - BAML files fetched from GitHub during notebook execution
|
||||
• `hack/walkthroughgen_py.py` - Main conversion tool
|
||||
• `hack/walkthrough_python.yaml` - Notebook definition with all chapters
|
||||
• `hack/test_notebook_colab_sim.sh` - Full Colab environment simulation
|
||||
• `hack/workshop_final.ipynb` - Final generated notebook ready for workshop
|
||||
• `walkthroughgen_py.py` - Main conversion tool
|
||||
• `walkthrough.yaml` - Notebook definition with all chapters
|
||||
• `test_notebook_colab_sim.sh` - Full Colab environment simulation
|
||||
• `workshop_final.ipynb` - Final generated notebook ready for workshop
|
||||
|
||||
71
workshops/2025-07-16/hack/analyze_log_capture.py
Normal file
71
workshops/2025-07-16/hack/analyze_log_capture.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze notebook for BAML log capture success/failure
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
def check_logs(notebook_path):
|
||||
"""Check if BAML logs were captured in the notebook"""
|
||||
|
||||
if not os.path.exists(notebook_path):
|
||||
print(f"❌ Notebook not found: {notebook_path}")
|
||||
return False, False
|
||||
|
||||
with open(notebook_path) as f:
|
||||
nb = json.load(f)
|
||||
|
||||
found_log_pattern = False
|
||||
found_capture_test = False
|
||||
|
||||
for i, cell in enumerate(nb['cells']):
|
||||
if cell['cell_type'] == 'code' and 'outputs' in cell:
|
||||
# Check if this is a log capture test cell
|
||||
source = ''.join(cell.get('source', []))
|
||||
if 'run_with_baml_logs' in source:
|
||||
found_capture_test = True
|
||||
print(f'Found log capture test in cell {i}')
|
||||
|
||||
# Check outputs for BAML logs
|
||||
for output in cell['outputs']:
|
||||
if output.get('output_type') == 'stream' and 'text' in output:
|
||||
text = ''.join(output['text'])
|
||||
# Look for the specific BAML log pattern
|
||||
if '---Parsed Response (class DoneForNow)---' in text:
|
||||
found_log_pattern = True
|
||||
print(f'✅ FOUND BAML LOG PATTERN in cell {i} output!')
|
||||
log_lines = [line for line in text.split('\n') if 'Parsed Response' in line]
|
||||
if log_lines:
|
||||
print(f'Log excerpt: {log_lines[0]}')
|
||||
|
||||
# Also check for our test markers
|
||||
if 'Captured BAML Logs' in text:
|
||||
print(f'Found "Captured BAML Logs" section in cell {i}')
|
||||
if 'No BAML Logs Captured' in text:
|
||||
print(f'Found "No BAML Logs Captured" section in cell {i}')
|
||||
|
||||
return found_capture_test, found_log_pattern
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python analyze_log_capture.py <notebook_path>")
|
||||
sys.exit(1)
|
||||
|
||||
notebook_path = sys.argv[1]
|
||||
capture_test_found, log_pattern_found = check_logs(notebook_path)
|
||||
|
||||
if not capture_test_found:
|
||||
print('❌ FAIL: No log capture test found in notebook')
|
||||
sys.exit(1)
|
||||
|
||||
if log_pattern_found:
|
||||
print('✅ PASS: BAML logs successfully captured in notebook output!')
|
||||
sys.exit(0)
|
||||
else:
|
||||
print('❌ FAIL: BAML log pattern not found in captured output')
|
||||
print('This means the log capture method is NOT working')
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
87
workshops/2025-07-16/hack/inspect_notebook.py
Normal file
87
workshops/2025-07-16/hack/inspect_notebook.py
Normal file
@@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Utility to inspect notebook cell outputs for debugging
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
def inspect_notebook(notebook_path, filter_keyword=None):
|
||||
"""Inspect notebook cells and outputs"""
|
||||
|
||||
if not os.path.exists(notebook_path):
|
||||
print(f"❌ Notebook not found: {notebook_path}")
|
||||
return
|
||||
|
||||
with open(notebook_path) as f:
|
||||
nb = json.load(f)
|
||||
|
||||
print(f"📓 Inspecting notebook: {notebook_path}")
|
||||
print(f"📊 Total cells: {len(nb['cells'])}")
|
||||
print("=" * 60)
|
||||
|
||||
for i, cell in enumerate(nb['cells']):
|
||||
if cell['cell_type'] == 'code':
|
||||
source = ''.join(cell.get('source', []))
|
||||
|
||||
# Filter by keyword if provided
|
||||
if filter_keyword and filter_keyword.lower() not in source.lower():
|
||||
continue
|
||||
|
||||
print(f"\n🔍 CELL {i} ({'code'})")
|
||||
print("📝 SOURCE:")
|
||||
print(source[:300] + "..." if len(source) > 300 else source)
|
||||
|
||||
if 'outputs' in cell and cell['outputs']:
|
||||
print(f"\n📤 OUTPUTS ({len(cell['outputs'])} outputs):")
|
||||
for j, output in enumerate(cell['outputs']):
|
||||
output_type = output.get('output_type', 'unknown')
|
||||
print(f" Output {j}: type={output_type}")
|
||||
|
||||
if 'text' in output:
|
||||
text = ''.join(output['text'])
|
||||
print(f" Text length: {len(text)} chars")
|
||||
|
||||
# Show first few lines for context
|
||||
lines = text.split('\n')[:5]
|
||||
for line in lines:
|
||||
if line.strip():
|
||||
print(f" > {line[:80]}...")
|
||||
|
||||
# Check for interesting patterns
|
||||
patterns = ['BAML', 'Parsed', 'Response', 'Error', 'Exception']
|
||||
found_patterns = [p for p in patterns if p in text]
|
||||
if found_patterns:
|
||||
print(f" 🎯 Found patterns: {found_patterns}")
|
||||
|
||||
elif 'data' in output:
|
||||
data_keys = list(output['data'].keys())
|
||||
print(f" Data keys: {data_keys}")
|
||||
|
||||
# Check for execution errors
|
||||
if output_type == 'error':
|
||||
print(f" ❌ ERROR: {output.get('ename', 'Unknown')}")
|
||||
print(f" 💬 Message: {output.get('evalue', 'No message')}")
|
||||
if 'traceback' in output:
|
||||
print(f" 📍 Traceback: {len(output['traceback'])} lines")
|
||||
# Show last few lines of traceback
|
||||
for line in output['traceback'][-3:]:
|
||||
print(f" 🔍 {line.strip()}")
|
||||
|
||||
else:
|
||||
print("\n📤 No outputs")
|
||||
|
||||
print("-" * 40)
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python inspect_notebook.py <notebook_path> [filter_keyword]")
|
||||
sys.exit(1)
|
||||
|
||||
notebook_path = sys.argv[1]
|
||||
filter_keyword = sys.argv[2] if len(sys.argv) > 2 else None
|
||||
|
||||
inspect_notebook(notebook_path, filter_keyword)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
31
workshops/2025-07-16/hack/minimal_test.ipynb
Normal file
31
workshops/2025-07-16/hack/minimal_test.ipynb
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"print(\"Hello stdout!\")\n",
|
||||
"print(\"Hello stderr!\", file=sys.stderr)\n",
|
||||
"with open(\"test_output.txt\", \"w\") as f:\n",
|
||||
" f.write(\"Notebook executed successfully!\\n\")\n",
|
||||
"print(\"✅ Test complete\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.8.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
35
workshops/2025-07-16/hack/test_log_capture.sh
Executable file
35
workshops/2025-07-16/hack/test_log_capture.sh
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "🧪 Testing BAML Log Capture..."
|
||||
|
||||
# Clean up any previous test
|
||||
rm -f test_capture.ipynb
|
||||
rm -rf tmp/test_capture_*
|
||||
|
||||
# Generate test notebook
|
||||
echo "📝 Generating test notebook..."
|
||||
uv run python walkthroughgen_py.py simple_log_test.yaml -o test_capture.ipynb
|
||||
|
||||
# Run in sim
|
||||
echo "🚀 Running test in sim..."
|
||||
./test_notebook_colab_sim.sh test_capture.ipynb > /dev/null 2>&1
|
||||
|
||||
# Find the executed notebook in the timestamped directory
|
||||
NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1)
|
||||
NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb"
|
||||
|
||||
echo "📋 Analyzing results from $NOTEBOOK_PATH..."
|
||||
|
||||
# First dump debug info
|
||||
echo "🔍 Dumping debug info..."
|
||||
python3 inspect_notebook.py "$NOTEBOOK_PATH" "run_with_baml_logs"
|
||||
|
||||
echo ""
|
||||
echo "📊 Running log capture analysis..."
|
||||
|
||||
# Check for BAML log patterns in the executed notebook
|
||||
python3 analyze_log_capture.py "$NOTEBOOK_PATH"
|
||||
|
||||
echo "🧹 Cleaning up..."
|
||||
rm -f test_capture.ipynb
|
||||
426
workshops/2025-07-16/hack/testing.md
Normal file
426
workshops/2025-07-16/hack/testing.md
Normal file
@@ -0,0 +1,426 @@
|
||||
# Jupyter Notebook Testing Framework
|
||||
|
||||
This document describes the general testing framework for validating any functionality in Jupyter notebooks, with a specific example of testing BAML log capture.
|
||||
|
||||
## General Framework
|
||||
|
||||
### Overview
|
||||
|
||||
The testing framework provides a complete iteration loop for testing notebook implementations:
|
||||
|
||||
1. **Generate** test notebooks with specific functionality
|
||||
2. **Execute** notebooks in a simulated Google Colab environment
|
||||
3. **Analyze** executed notebooks for expected outputs and behaviors
|
||||
4. **Report** clear pass/fail results
|
||||
|
||||
### Core Components
|
||||
|
||||
#### Notebook Simulator (`test_notebook_colab_sim.sh`)
|
||||
|
||||
The simulation script creates a realistic Google Colab environment for any notebook:
|
||||
|
||||
**Environment Setup:**
|
||||
- Creates timestamped test directory: `./tmp/test_YYYYMMDD_HHMMSS/`
|
||||
- Sets up fresh Python virtual environment
|
||||
- Installs Jupyter dependencies (`notebook`, `nbconvert`, `ipykernel`)
|
||||
|
||||
**Notebook Execution:**
|
||||
- Copies test notebook to clean environment
|
||||
- Uses `ExecutePreprocessor` to run all cells (simulates Colab execution)
|
||||
- **Critical:** Activates virtual environment before execution
|
||||
- **Critical:** Saves executed notebook with cell outputs back to disk
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
./test_notebook_colab_sim.sh your_notebook.ipynb
|
||||
```
|
||||
|
||||
The simulator will:
|
||||
- Execute all cells in the notebook
|
||||
- Preserve the test directory for inspection
|
||||
- Show final directory structure
|
||||
- Report success/failure
|
||||
|
||||
#### Output Inspector (`inspect_notebook.py`)
|
||||
|
||||
Debug utility for examining notebook cell outputs in detail:
|
||||
|
||||
**Features:**
|
||||
- Shows cell source code and execution counts
|
||||
- Displays all output types (stream, execute_result, error)
|
||||
- Highlights patterns in output text
|
||||
- Shows execution errors with tracebacks
|
||||
- Filters cells by keywords for focused debugging
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Inspect all cells
|
||||
python3 inspect_notebook.py path/to/notebook.ipynb
|
||||
|
||||
# Filter for specific content
|
||||
python3 inspect_notebook.py path/to/notebook.ipynb "keyword"
|
||||
|
||||
# Look for errors
|
||||
python3 inspect_notebook.py path/to/notebook.ipynb "error"
|
||||
```
|
||||
|
||||
**Sample Output:**
|
||||
```
|
||||
🔍 CELL 0 (code)
|
||||
📝 SOURCE:
|
||||
import sys
|
||||
print("Hello!")
|
||||
print("Error!", file=sys.stderr)
|
||||
|
||||
📤 OUTPUTS (2 outputs):
|
||||
Output 0: type=stream
|
||||
Text length: 7 chars
|
||||
> Hello!...
|
||||
Output 1: type=stream
|
||||
Text length: 7 chars
|
||||
> Error!...
|
||||
🎯 Found patterns: ['Error']
|
||||
```
|
||||
|
||||
### Key Insights for Notebook Testing
|
||||
|
||||
#### Execution Environment
|
||||
1. **Virtual environment activation is critical** - Without it, execution fails silently
|
||||
2. **Output persistence must be explicit** - `ExecutePreprocessor` only modifies notebook in memory
|
||||
3. **Check execution counts** - `execution_count=None` means cell never executed
|
||||
4. **Handle different output types** - stream, execute_result, error, display_data
|
||||
|
||||
#### Common Debugging Steps
|
||||
1. **Verify basic execution:**
|
||||
```bash
|
||||
python3 -c "
|
||||
import json
|
||||
nb = json.load(open('path/to/notebook.ipynb'))
|
||||
print('Execution counts:', [cell.get('execution_count') for cell in nb['cells'] if cell['cell_type']=='code'])
|
||||
"
|
||||
```
|
||||
|
||||
2. **Check for execution errors:**
|
||||
```bash
|
||||
python3 inspect_notebook.py path/to/notebook.ipynb "error"
|
||||
```
|
||||
|
||||
3. **Look for specific output patterns:**
|
||||
```bash
|
||||
python3 inspect_notebook.py path/to/notebook.ipynb "your_pattern"
|
||||
```
|
||||
|
||||
### Creating Custom Tests
|
||||
|
||||
#### 1. Minimal Test Template
|
||||
|
||||
Create a simple notebook that tests basic functionality:
|
||||
|
||||
```json
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Test basic execution\n",
|
||||
"print('Hello from notebook!')\n",
|
||||
"\n",
|
||||
"# Test file creation\n",
|
||||
"with open('test.txt', 'w') as f:\n",
|
||||
" f.write('Test successful\\n')\n",
|
||||
"\n",
|
||||
"# Test error handling\n",
|
||||
"try:\n",
|
||||
" result = your_function_to_test()\n",
|
||||
" print(f'Result: {result}')\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f'Error: {e}')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
```
|
||||
|
||||
#### 2. Test Script Template
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "🧪 Testing [Your Feature]..."
|
||||
|
||||
# Clean up any previous test
|
||||
rm -f test_notebook.ipynb
|
||||
|
||||
# Generate or copy your test notebook
|
||||
cp your_test_notebook.ipynb test_notebook.ipynb
|
||||
|
||||
# Run in simulator
|
||||
echo "🚀 Running test in sim..."
|
||||
./test_notebook_colab_sim.sh test_notebook.ipynb
|
||||
|
||||
# Find the executed notebook
|
||||
NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1)
|
||||
NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb"
|
||||
|
||||
# Analyze results
|
||||
echo "📋 Analyzing results..."
|
||||
python3 inspect_notebook.py "$NOTEBOOK_PATH" "your_search_term"
|
||||
|
||||
# Add your custom analysis
|
||||
python3 -c "
|
||||
import json
|
||||
with open('$NOTEBOOK_PATH') as f:
|
||||
nb = json.load(f)
|
||||
|
||||
# Your custom analysis logic here
|
||||
success = check_for_expected_outputs(nb)
|
||||
|
||||
if success:
|
||||
print('✅ PASS: Test succeeded!')
|
||||
else:
|
||||
print('❌ FAIL: Test failed!')
|
||||
exit(1)
|
||||
"
|
||||
|
||||
echo "🧹 Cleaning up..."
|
||||
rm -f test_notebook.ipynb
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Case: BAML Log Capture Testing
|
||||
|
||||
This section demonstrates how to use the general framework for a specific use case: testing BAML log capture in notebooks.
|
||||
|
||||
### Problem Statement
|
||||
|
||||
BAML (a language model framework) uses FFI bindings to a Rust binary and outputs logs to stderr. We need to test whether different log capture methods can successfully capture these logs in Jupyter notebook cells.
|
||||
|
||||
### Test Implementation
|
||||
|
||||
#### Test Configuration (`simple_log_test.yaml`)
|
||||
|
||||
```yaml
|
||||
title: "BAML Log Capture Test"
|
||||
text: "Simple test for log capture"
|
||||
|
||||
sections:
|
||||
- title: "Log Capture Test"
|
||||
steps:
|
||||
- baml_setup: true
|
||||
- fetch_file:
|
||||
src: "walkthrough/01-agent.baml"
|
||||
dest: "baml_src/agent.baml"
|
||||
- file:
|
||||
src: "./simple_main.py"
|
||||
- text: "Testing log capture with show_logs=true:"
|
||||
- run_main:
|
||||
args: "What is 2+2?"
|
||||
show_logs: true
|
||||
```
|
||||
|
||||
#### Test Function (`simple_main.py`)
|
||||
|
||||
```python
|
||||
def main(message="What is 2+2?"):
|
||||
"""Simple main function that calls BAML directly"""
|
||||
client = get_baml_client()
|
||||
|
||||
# Call the BAML function - this should generate logs
|
||||
result = client.DetermineNextStep(f"User asked: {message}")
|
||||
|
||||
print(f"Input: {message}")
|
||||
print(f"Result: {result}")
|
||||
return result
|
||||
```
|
||||
|
||||
#### Log Capture Implementation
|
||||
|
||||
The current working implementation in `walkthroughgen_py.py`:
|
||||
|
||||
```python
|
||||
def run_with_baml_logs(func, *args, **kwargs):
|
||||
"""Test log capture using IPython capture_output"""
|
||||
# Ensure BAML_LOG is set
|
||||
if 'BAML_LOG' not in os.environ:
|
||||
os.environ['BAML_LOG'] = 'info'
|
||||
|
||||
print(f"[LOG CAPTURE TEST] Running with BAML_LOG={os.environ.get('BAML_LOG')}...")
|
||||
|
||||
# Capture both stdout and stderr
|
||||
with capture_output() as captured:
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
# Display captured outputs
|
||||
if captured.stdout:
|
||||
print("=== Captured Stdout ===")
|
||||
print(captured.stdout)
|
||||
|
||||
if captured.stderr:
|
||||
print("=== Captured BAML Logs ===")
|
||||
print(captured.stderr)
|
||||
else:
|
||||
print("=== No BAML Logs Captured ===")
|
||||
|
||||
print("=== Function Result ===")
|
||||
print(result)
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
### Test Execution
|
||||
|
||||
#### Main Test Script (`test_log_capture.sh`)
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "🧪 Testing BAML Log Capture..."
|
||||
|
||||
# Generate test notebook from YAML config
|
||||
echo "📝 Generating test notebook..."
|
||||
uv run python walkthroughgen_py.py simple_log_test.yaml -o test_capture.ipynb
|
||||
|
||||
# Run in simulator
|
||||
echo "🚀 Running test in sim..."
|
||||
./test_notebook_colab_sim.sh test_capture.ipynb
|
||||
|
||||
# Find the executed notebook
|
||||
NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1)
|
||||
NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb"
|
||||
|
||||
echo "📋 Analyzing results from $NOTEBOOK_PATH..."
|
||||
|
||||
# Debug output
|
||||
echo "🔍 Dumping debug info..."
|
||||
python3 inspect_notebook.py "$NOTEBOOK_PATH" "run_with_baml_logs"
|
||||
|
||||
# Analyze for BAML log patterns
|
||||
echo "📊 Running log capture analysis..."
|
||||
python3 analyze_log_capture.py "$NOTEBOOK_PATH"
|
||||
|
||||
echo "🧹 Cleaning up..."
|
||||
rm -f test_capture.ipynb
|
||||
```
|
||||
|
||||
#### Analysis Script (`analyze_log_capture.py`)
|
||||
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
def check_logs(notebook_path):
|
||||
"""Check if BAML logs were captured in the notebook"""
|
||||
|
||||
with open(notebook_path) as f:
|
||||
nb = json.load(f)
|
||||
|
||||
found_log_pattern = False
|
||||
found_capture_test = False
|
||||
|
||||
for i, cell in enumerate(nb['cells']):
|
||||
if cell['cell_type'] == 'code' and 'outputs' in cell:
|
||||
source = ''.join(cell.get('source', []))
|
||||
if 'run_with_baml_logs' in source:
|
||||
found_capture_test = True
|
||||
print(f'Found log capture test in cell {i}')
|
||||
|
||||
# Check outputs for BAML logs
|
||||
for output in cell['outputs']:
|
||||
if output.get('output_type') == 'stream' and 'text' in output:
|
||||
text = ''.join(output['text'])
|
||||
# Look for the specific BAML log pattern
|
||||
if '---Parsed Response (class DoneForNow)---' in text:
|
||||
found_log_pattern = True
|
||||
print(f'✅ FOUND BAML LOG PATTERN in cell {i} output!')
|
||||
|
||||
return found_capture_test, found_log_pattern
|
||||
|
||||
# Run analysis and return pass/fail
|
||||
capture_test_found, log_pattern_found = check_logs(sys.argv[1])
|
||||
|
||||
if not capture_test_found:
|
||||
print('❌ FAIL: No log capture test found in notebook')
|
||||
sys.exit(1)
|
||||
|
||||
if log_pattern_found:
|
||||
print('✅ PASS: BAML logs successfully captured in notebook output!')
|
||||
sys.exit(0)
|
||||
else:
|
||||
print('❌ FAIL: BAML log pattern not found in captured output')
|
||||
sys.exit(1)
|
||||
```
|
||||
|
||||
### Expected Output Flow
|
||||
|
||||
#### Successful Test Run:
|
||||
```bash
|
||||
$ ./test_log_capture.sh
|
||||
|
||||
🧪 Testing BAML Log Capture...
|
||||
📝 Generating test notebook...
|
||||
Generated notebook: test_capture.ipynb
|
||||
🚀 Running test in sim...
|
||||
🧪 Creating clean test environment in: ./tmp/test_20250716_191106
|
||||
📁 Test directory will be preserved for inspection
|
||||
🐍 Creating fresh Python virtual environment...
|
||||
📦 Installing Jupyter dependencies...
|
||||
🏃 Running notebook in clean environment...
|
||||
✅ Notebook executed successfully!
|
||||
💾 Executed notebook saved with outputs
|
||||
|
||||
📋 Analyzing results from tmp/test_20250716_191106/test_notebook.ipynb...
|
||||
🔍 Dumping debug info...
|
||||
Found log capture test in cell 11
|
||||
|
||||
📤 OUTPUTS (3 outputs):
|
||||
Output 0: type=stream
|
||||
Text length: 49 chars
|
||||
> [LOG CAPTURE TEST] Running with BAML_LOG=info......
|
||||
Output 1: type=stream
|
||||
Text length: 1272 chars
|
||||
> 2025-07-16T19:11:22.445 [BAML [92mINFO[0m] [35mFunction DetermineNextStep[0m...
|
||||
🎯 Found patterns: ['BAML', 'Parsed', 'Response']
|
||||
|
||||
📊 Running log capture analysis...
|
||||
Found log capture test in cell 11
|
||||
✅ FOUND BAML LOG PATTERN in cell 11 output!
|
||||
✅ PASS: BAML logs successfully captured in notebook output!
|
||||
🧹 Cleaning up...
|
||||
```
|
||||
|
||||
### Key BAML-Specific Insights
|
||||
|
||||
1. **BAML logs go to stderr** - Due to FFI bindings to Rust binary
|
||||
2. **Requires `BAML_LOG=info`** - Environment variable controls verbosity
|
||||
3. **Logs include ANSI color codes** - Need to handle terminal formatting
|
||||
4. **Pattern matching** - Look for `---Parsed Response (class DoneForNow)---` to confirm successful execution
|
||||
5. **IPython capture_output() works** - Successfully captures stderr in notebook context
|
||||
|
||||
### Iteration Loop Benefits
|
||||
|
||||
This framework enables rapid testing of different log capture approaches:
|
||||
|
||||
1. **Modify** the `run_with_baml_logs` function in `walkthroughgen_py.py`
|
||||
2. **Run** `./test_log_capture.sh`
|
||||
3. **Get** immediate pass/fail feedback
|
||||
4. **Debug** with `inspect_notebook.py` if needed
|
||||
5. **Repeat** until working implementation found
|
||||
|
||||
This same pattern can be applied to test any notebook functionality: library integrations, environment setup, output formatting, error handling, etc.
|
||||
@@ -64,6 +64,11 @@ try:
|
||||
ep.preprocess(nb, {'metadata': {'path': '.'}})
|
||||
print("\n✅ Notebook executed successfully!")
|
||||
|
||||
# Save the executed notebook back to disk
|
||||
with open('test_notebook.ipynb', 'w') as f:
|
||||
nbformat.write(nb, f)
|
||||
print("💾 Executed notebook saved with outputs")
|
||||
|
||||
# Show final directory structure
|
||||
print("\n📁 Final directory structure:")
|
||||
for root, dirs, files in os.walk('.'):
|
||||
@@ -85,7 +90,7 @@ EOF
|
||||
|
||||
# Run the notebook
|
||||
echo "🏃 Running notebook in clean environment..."
|
||||
python run_notebook.py
|
||||
source venv/bin/activate && python run_notebook.py
|
||||
|
||||
# Check what BAML files were created
|
||||
echo -e "\n📄 BAML files created:"
|
||||
|
||||
@@ -11,68 +11,84 @@ sections:
|
||||
steps:
|
||||
- text: |
|
||||
This guide will walk you through building agents in Python with BAML.
|
||||
|
||||
|
||||
We'll start simple with a hello world program and gradually build up to a full agent.
|
||||
|
||||
|
||||
For this notebook, you'll need to have your OpenAI API key saved in Google Colab secrets.
|
||||
|
||||
|
||||
## Where We're Headed
|
||||
|
||||
Before we dive in, let's understand the journey ahead. We're building toward **micro-agents in deterministic DAGs** - a powerful pattern that combines the flexibility of AI with the reliability of traditional software.
|
||||
|
||||
📖 **Learn more**: [A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md)
|
||||
|
||||

|
||||
|
||||
- text: "Here's our simple hello world program:"
|
||||
- file: {src: ./walkthrough/00-main.py}
|
||||
- text: "Let's run it to verify it works:"
|
||||
- run_main: {regenerate_baml: false}
|
||||
|
||||
|
||||
- name: cli-and-agent
|
||||
title: "Chapter 1 - CLI and Agent Loop"
|
||||
text: "Now let's add BAML and create our first agent with a CLI interface."
|
||||
steps:
|
||||
- text: |
|
||||
In this chapter, we'll integrate BAML to create an AI agent that can respond to user input.
|
||||
|
||||
|
||||
## What is BAML?
|
||||
|
||||
|
||||
BAML (Boundary Markup Language) is a domain-specific language designed to help developers build reliable AI workflows and agents. Created by [BoundaryML](https://www.boundaryml.com/) (a Y Combinator W23 company), BAML adds the engineering to prompt engineering.
|
||||
|
||||
|
||||
### Why BAML?
|
||||
|
||||
|
||||
- **Type-safe outputs**: Get fully type-safe outputs from LLMs, even when streaming
|
||||
- **Language agnostic**: Works with Python, TypeScript, Ruby, Go, and more
|
||||
- **LLM agnostic**: Works with any LLM provider (OpenAI, Anthropic, etc.)
|
||||
- **Better performance**: State-of-the-art structured outputs that outperform even OpenAI's native function calling
|
||||
- **Developer-friendly**: Native VSCode extension with syntax highlighting, autocomplete, and interactive playground
|
||||
|
||||
|
||||
### Learn More
|
||||
|
||||
|
||||
- 📚 [Official Documentation](https://docs.boundaryml.com/home)
|
||||
- 💻 [GitHub Repository](https://github.com/BoundaryML/baml)
|
||||
- 🎯 [What is BAML?](https://docs.boundaryml.com/guide/introduction/what-is-baml)
|
||||
- 📖 [BAML Examples](https://github.com/BoundaryML/baml-examples)
|
||||
- 🏢 [Company Website](https://www.boundaryml.com/)
|
||||
- 📰 [Blog: AI Agents Need a New Syntax](https://www.boundaryml.com/blog/ai-agents-need-new-syntax)
|
||||
|
||||
|
||||
BAML turns prompt engineering into schema engineering, where you focus on defining the structure of your data rather than wrestling with prompts. This approach leads to more reliable and maintainable AI applications.
|
||||
|
||||
|
||||
### Note on Developer Experience
|
||||
|
||||
|
||||
BAML works much better in VS Code with their official extension, which provides syntax highlighting, autocomplete, inline testing, and an interactive playground. However, for this notebook tutorial, we'll work with BAML files directly without the enhanced IDE features.
|
||||
|
||||
|
||||
## Factor 1: Natural Language to Tool Calls
|
||||
|
||||
What we're building implements the first factor of 12-factor agents - converting natural language into structured tool calls.
|
||||
|
||||
📖 **Learn more**: [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md)
|
||||
|
||||

|
||||
|
||||
First, let's set up BAML support in our notebook.
|
||||
- baml_setup: true
|
||||
- command: "!ls baml_src"
|
||||
- text: |
|
||||
Now let's create our agent that will use BAML to process user input.
|
||||
|
||||
|
||||
First, we'll define the core agent logic:
|
||||
- file: {src: ./walkthrough/01-agent.py}
|
||||
- text: |
|
||||
Next, we need to define the BAML function that our agent will use.
|
||||
|
||||
|
||||
### Understanding BAML Syntax
|
||||
|
||||
|
||||
BAML files define:
|
||||
- **Classes**: Structured output schemas (like `DoneForNow` below)
|
||||
- **Functions**: AI-powered functions that take inputs and return structured outputs
|
||||
- **Tests**: Example inputs/outputs to validate your prompts
|
||||
|
||||
|
||||
This BAML file defines what our agent can do:
|
||||
- fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
|
||||
- command: "!ls baml_src"
|
||||
@@ -85,35 +101,37 @@ sections:
|
||||
- `main("Tell me a joke")`
|
||||
- `main("How are you doing today?")`
|
||||
|
||||
in this case, we'll use the baml_generate function to
|
||||
generate the pydantic and python bindings from our
|
||||
baml source, but in the future we'll skip this step as it
|
||||
is done automatically by the get_baml_client() function
|
||||
|
||||
in this case, we'll use the baml_generate function to
|
||||
generate the pydantic and python bindings from our
|
||||
baml source, but in the future we'll skip this step as it
|
||||
is done automatically by the get_baml_client() function
|
||||
|
||||
- run_main: {regenerate_baml: true, args: "Hello from the Python notebook!"}
|
||||
- text: |
|
||||
In a few cases, we'll enable the baml debug logs to see the inputs/outputs to and from the model.
|
||||
- run_main: {regenerate_baml: false, args: "Hello from the Python notebook!", show_logs: true}
|
||||
- text: |
|
||||
what's most important there is that you can see the prompt and how the output_format is injected
|
||||
to tell the model what kind of json we want to return.
|
||||
|
||||
|
||||
- name: calculator-tools
|
||||
title: "Chapter 2 - Add Calculator Tools"
|
||||
text: "Let's add some calculator tools to our agent."
|
||||
steps:
|
||||
- text: |
|
||||
Let's start by adding a tool definition for the calculator.
|
||||
|
||||
|
||||
These are simple structured outputs that we'll ask the model to
|
||||
return as a "next step" in the agentic loop.
|
||||
|
||||
|
||||
## Factor 4: Tools Are Structured Outputs
|
||||
|
||||
This chapter demonstrates that tools are just structured JSON outputs from the LLM - nothing more complex!
|
||||
|
||||
📖 **Learn more**: [Factor 4: Tools Are Structured Outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)
|
||||
|
||||

|
||||
|
||||
- fetch_file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml}
|
||||
- command: "!ls baml_src"
|
||||
- text: |
|
||||
Now, let's update the agent's DetermineNextStep method to
|
||||
expose the calculator tools as potential next steps.
|
||||
|
||||
|
||||
- fetch_file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml}
|
||||
- text: |
|
||||
Now let's update our main function to show the tool call:
|
||||
@@ -122,7 +140,7 @@ sections:
|
||||
Let's try out the calculator! The agent should recognize that you want to perform a calculation
|
||||
and return the appropriate tool call instead of just a message.
|
||||
- run_main: {regenerate_baml: false, args: "can you add 3 and 4"}
|
||||
|
||||
|
||||
- name: tool-loop
|
||||
title: "Chapter 3 - Process Tool Calls in a Loop"
|
||||
text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM."
|
||||
@@ -132,7 +150,21 @@ sections:
|
||||
- The agent can call multiple tools in sequence
|
||||
- Each tool result is fed back to the agent
|
||||
- The agent continues until it has a final answer
|
||||
|
||||
|
||||
## The Agent Loop Pattern
|
||||
|
||||
We're implementing the core agent loop - where the AI makes decisions, executes tools, and continues until done.
|
||||
|
||||

|
||||
|
||||
## Factor 5: Unify Execution State
|
||||
|
||||
Notice how we're storing everything as events in our Thread - this is Factor 5 in action!
|
||||
|
||||
📖 **Learn more**: [Factor 5: Unify Execution State](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md)
|
||||
|
||||

|
||||
|
||||
Let's update our agent to handle tool calls properly:
|
||||
- file: {src: ./walkthrough/03-agent.py}
|
||||
- text: |
|
||||
@@ -141,17 +173,13 @@ sections:
|
||||
- text: |
|
||||
Let's try it out! The agent should now call the tool and return the calculated result:
|
||||
- run_main: {regenerate_baml: false, args: "can you add 3 and 4"}
|
||||
- text: |
|
||||
you can run with baml_logs enabled to see how the prompt changed when we added the New
|
||||
tool types to our union of response types.
|
||||
- run_main: {regenerate_baml: false, args: "can you add 3 and 4", show_logs: true}
|
||||
- text: |
|
||||
You should see the agent:
|
||||
1. Recognize it needs to use the add tool
|
||||
2. Call the tool with the correct parameters
|
||||
3. Get the result (7)
|
||||
4. Generate a final response incorporating the result
|
||||
|
||||
|
||||
For more complex calculations, we need to handle all calculator operations. Let's add support for subtract, multiply, and divide:
|
||||
- file: {src: ./walkthrough/03b-agent.py}
|
||||
- text: |
|
||||
@@ -165,28 +193,28 @@ sections:
|
||||
- run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"}
|
||||
- text: |
|
||||
Congratulations! You've taken your first step into hand-rolling an agent loop.
|
||||
|
||||
|
||||
Key concepts you've learned:
|
||||
- **Thread Management**: Tracking conversation history and tool calls
|
||||
- **Tool Execution**: Processing different tool types and returning results
|
||||
- **Agent Loop**: Continuing until the agent has a final answer
|
||||
|
||||
|
||||
From here, we'll start incorporating more intermediate and advanced concepts for 12-factor agents.
|
||||
|
||||
|
||||
- name: baml-tests
|
||||
title: "Chapter 4 - Add Tests to agent.baml"
|
||||
text: "Let's add some tests to our BAML agent."
|
||||
steps:
|
||||
- text: |
|
||||
In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly.
|
||||
|
||||
|
||||
## Why Test BAML Functions?
|
||||
|
||||
|
||||
- **Catch regressions**: Ensure changes don't break existing behavior
|
||||
- **Document behavior**: Tests serve as living documentation
|
||||
- **Validate edge cases**: Test complex scenarios and conversation flows
|
||||
- **CI/CD integration**: Run tests automatically in your pipeline
|
||||
|
||||
|
||||
Let's start with a simple test that checks the agent's ability to handle basic interactions:
|
||||
- fetch_file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml}
|
||||
- text: |
|
||||
@@ -194,14 +222,14 @@ sections:
|
||||
- command: "!baml-cli test"
|
||||
- text: |
|
||||
Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output.
|
||||
|
||||
|
||||
## BAML Assertion Syntax
|
||||
|
||||
|
||||
Assertions use the `@@assert` directive:
|
||||
```
|
||||
@@assert(name, {{condition}})
|
||||
```
|
||||
|
||||
|
||||
- `name`: A descriptive name for the assertion
|
||||
- `condition`: A boolean expression using `this` to access the output
|
||||
- fetch_file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml}
|
||||
@@ -210,7 +238,7 @@ sections:
|
||||
- command: "!baml-cli test"
|
||||
- text: |
|
||||
Finally, let's add more complex test cases that test multi-step conversations.
|
||||
|
||||
|
||||
These tests simulate an entire conversation flow, including:
|
||||
- User input
|
||||
- Tool calls made by the agent
|
||||
@@ -222,14 +250,14 @@ sections:
|
||||
- command: "!baml-cli test"
|
||||
- text: |
|
||||
## Key Testing Concepts
|
||||
|
||||
|
||||
1. **Test Structure**: Each test specifies functions, arguments, and assertions
|
||||
2. **Progressive Testing**: Start simple, then test complex scenarios
|
||||
3. **Conversation History**: Test how the agent handles multi-turn conversations
|
||||
4. **Tool Integration**: Verify the agent correctly uses tools in sequence
|
||||
|
||||
|
||||
With these tests in place, you can confidently modify your agent knowing that core functionality is protected by automated tests!
|
||||
|
||||
|
||||
- name: human-tools
|
||||
title: "Chapter 5 - Multiple Human Tools"
|
||||
text: |
|
||||
@@ -237,16 +265,16 @@ sections:
|
||||
steps:
|
||||
- text: |
|
||||
So far, our agent only returns a final answer with "done_for_now". But what if the agent needs clarification?
|
||||
|
||||
|
||||
Let's add a new tool that allows the agent to request more information from the user.
|
||||
|
||||
|
||||
## Why Human-in-the-Loop?
|
||||
|
||||
|
||||
- **Handle ambiguous inputs**: When user input is unclear or contains typos
|
||||
- **Request missing information**: When the agent needs more context
|
||||
- **Confirm sensitive operations**: Before performing important actions
|
||||
- **Interactive workflows**: Build conversational agents that engage users
|
||||
|
||||
|
||||
First, let's update our BAML file to include a ClarificationRequest tool:
|
||||
- fetch_file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml}
|
||||
- text: |
|
||||
@@ -265,76 +293,69 @@ sections:
|
||||
3. In Colab, you'll be prompted to type a response
|
||||
4. In local testing, an auto-response is provided
|
||||
5. The agent continues with the clarified input
|
||||
|
||||
|
||||
## Interactive Testing in Colab
|
||||
|
||||
|
||||
When running in Google Colab, the `input()` function will create an interactive text box where you can type your response. Try different clarifications to see how the agent adapts!
|
||||
|
||||
|
||||
## Key Concepts
|
||||
|
||||
|
||||
- **Human Tools**: Special tool types that return control to the human
|
||||
- **Conversation Flow**: The agent can pause execution to get human input
|
||||
- **Context Preservation**: The full conversation history is maintained
|
||||
- **Flexible Handling**: Different behaviors for different environments
|
||||
|
||||
|
||||
- name: customize-prompt
|
||||
title: "Chapter 6 - Customize Your Prompt with Reasoning"
|
||||
text: |
|
||||
In this section, we'll explore how to customize the prompt of the agent with reasoning steps.
|
||||
|
||||
|
||||
This is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)
|
||||
steps:
|
||||
- text: |
|
||||
## Why Add Reasoning to Prompts?
|
||||
|
||||
|
||||
Adding explicit reasoning steps to your prompts can significantly improve agent performance:
|
||||
|
||||
|
||||
- **Better decisions**: The model thinks through problems step-by-step
|
||||
- **Transparency**: You can see the model's thought process
|
||||
- **Fewer errors**: Structured thinking reduces mistakes
|
||||
- **Debugging**: Easier to identify where reasoning went wrong
|
||||
|
||||
|
||||
Let's update our agent prompt to include a reasoning step:
|
||||
- fetch_file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml}
|
||||
- text: |
|
||||
Now let's test it with a simple calculation to see the reasoning in action:
|
||||
|
||||
**Note:** The BAML logs below will show the model's reasoning steps. Look for the `<reasoning>` tags in the logs to see how the model thinks through the problem before deciding what to do.
|
||||
- run_main: {args: "can you multiply 3 and 4", show_logs: true}
|
||||
|
||||
- run_main: {args: "can you multiply 3 and 4"}
|
||||
- text: |
|
||||
You should see the reasoning steps in the BAML logs above. The model explicitly thinks through what it needs to do before making a decision.
|
||||
|
||||
💡 **Tip:** If you want to see BAML logs for any other calls in this notebook, you can use the `run_with_baml_logs` helper function:
|
||||
```python
|
||||
# Instead of: main("your message")
|
||||
# Use: run_with_baml_logs(main, "your message")
|
||||
```
|
||||
|
||||
The model uses explicit reasoning steps to think through the problem before making a decision.
|
||||
|
||||
## Advanced Prompt Engineering
|
||||
|
||||
|
||||
You can enhance your prompts further by:
|
||||
- Adding specific reasoning templates for different tasks
|
||||
- Including examples of good reasoning
|
||||
- Structuring the reasoning with numbered steps
|
||||
- Adding checks for common mistakes
|
||||
|
||||
|
||||
The key is to guide the model's thinking process while still allowing flexibility.
|
||||
|
||||
|
||||
- name: context-window
|
||||
title: "Chapter 7 - Customize Your Context Window"
|
||||
text: |
|
||||
In this section, we'll explore how to customize the context window of the agent.
|
||||
|
||||
|
||||
This is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
|
||||
steps:
|
||||
- text: |
|
||||
## Context Window Serialization
|
||||
|
||||
|
||||
How you format your conversation history can significantly impact:
|
||||
- **Token usage**: Some formats are more efficient
|
||||
- **Model understanding**: Clear structure helps the model
|
||||
- **Debugging**: Readable formats help development
|
||||
|
||||
|
||||
Let's implement two serialization formats: pretty-printed JSON and XML.
|
||||
- file: {src: ./walkthrough/07-agent.py}
|
||||
- text: |
|
||||
@@ -348,15 +369,15 @@ sections:
|
||||
- run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: true}}
|
||||
- text: |
|
||||
## XML vs JSON Trade-offs
|
||||
|
||||
|
||||
**XML Benefits**:
|
||||
- More token-efficient for nested data
|
||||
- Clear hierarchy with opening/closing tags
|
||||
- Better for long conversations
|
||||
|
||||
|
||||
**JSON Benefits**:
|
||||
- Familiar to most developers
|
||||
- Easy to parse and debug
|
||||
- Native to JavaScript/Python
|
||||
|
||||
Choose based on your specific needs and token constraints!
|
||||
|
||||
Choose based on your specific needs and token constraints!
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
# Agent implementation with clarification support
|
||||
import json
|
||||
|
||||
def agent_loop(thread, clarification_handler):
|
||||
"""Run the agent loop until we get a final answer."""
|
||||
while True:
|
||||
def agent_loop(thread, clarification_handler, max_iterations=3):
|
||||
"""Run the agent loop until we get a final answer (max 3 iterations)."""
|
||||
iteration_count = 0
|
||||
while iteration_count < max_iterations:
|
||||
iteration_count += 1
|
||||
print(f"🔄 Agent loop iteration {iteration_count}/{max_iterations}")
|
||||
|
||||
# Get the client
|
||||
baml_client = get_baml_client()
|
||||
|
||||
@@ -63,6 +67,9 @@ def agent_loop(thread, clarification_handler):
|
||||
})
|
||||
else:
|
||||
return "Error: Unexpected result type"
|
||||
|
||||
# If we've reached max iterations without a final answer
|
||||
return f"Agent reached maximum iterations ({max_iterations}) without completing the task."
|
||||
|
||||
class Thread:
|
||||
"""Simple thread to track conversation history."""
|
||||
|
||||
@@ -83,86 +83,6 @@ def get_baml_client():
|
||||
init_code = "!baml-cli init"
|
||||
nb.cells.append(new_code_cell(init_code))
|
||||
|
||||
# Fourth cell: Add BAML logging helper
|
||||
logging_helper = '''# Helper function to capture BAML logs in notebook output
|
||||
import os
|
||||
import sys
|
||||
from IPython.utils.capture import capture_output
|
||||
import contextlib
|
||||
|
||||
def run_with_baml_logs(func, *args, **kwargs):
|
||||
"""Run a function and capture BAML logs in the notebook output."""
|
||||
# Ensure BAML_LOG is set
|
||||
if 'BAML_LOG' not in os.environ:
|
||||
os.environ['BAML_LOG'] = 'info'
|
||||
|
||||
print(f"Running with BAML_LOG={os.environ.get('BAML_LOG')}...")
|
||||
|
||||
# Capture both stdout and stderr
|
||||
with capture_output() as captured:
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
# Display the result first
|
||||
if result is not None:
|
||||
print("=== Result ===")
|
||||
print(result)
|
||||
|
||||
# Display captured stdout if any
|
||||
if captured.stdout:
|
||||
print("\\n=== Output ===")
|
||||
print(captured.stdout)
|
||||
|
||||
# Display BAML logs from stderr
|
||||
if captured.stderr:
|
||||
print("\\n=== BAML Logs ===")
|
||||
# Format the logs for better readability
|
||||
log_lines = captured.stderr.strip().split('\\n')
|
||||
for line in log_lines:
|
||||
if 'reasoning' in line.lower() or '<reasoning>' in line:
|
||||
print(f"🤔 {line}")
|
||||
elif 'error' in line.lower():
|
||||
print(f"❌ {line}")
|
||||
elif 'warn' in line.lower():
|
||||
print(f"⚠️ {line}")
|
||||
else:
|
||||
print(f" {line}")
|
||||
|
||||
return result
|
||||
|
||||
# Alternative: Force stderr to stdout redirection
|
||||
@contextlib.contextmanager
|
||||
def redirect_stderr_to_stdout():
|
||||
"""Context manager to redirect stderr to stdout."""
|
||||
old_stderr = sys.stderr
|
||||
sys.stderr = sys.stdout
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
sys.stderr = old_stderr
|
||||
|
||||
def run_with_baml_logs_redirect(func, *args, **kwargs):
|
||||
"""Run a function with stderr redirected to stdout for immediate display."""
|
||||
if 'BAML_LOG' not in os.environ:
|
||||
os.environ['BAML_LOG'] = 'info'
|
||||
|
||||
print(f"Running with BAML_LOG={os.environ.get('BAML_LOG')} (stderr→stdout)...")
|
||||
|
||||
with redirect_stderr_to_stdout():
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
if result is not None:
|
||||
print("\\n=== Result ===")
|
||||
print(result)
|
||||
|
||||
return result
|
||||
|
||||
# Set BAML log level (options: error, warn, info, debug, trace)
|
||||
os.environ['BAML_LOG'] = 'info'
|
||||
print("BAML logging helpers loaded!")
|
||||
print("- Use run_with_baml_logs() to capture and display logs after execution")
|
||||
print("- Use run_with_baml_logs_redirect() to see logs in real-time as they're generated")
|
||||
'''
|
||||
nb.cells.append(new_code_cell(logging_helper))
|
||||
|
||||
def process_step(nb, step, base_path, current_functions, section_name=None):
|
||||
"""Process different step types."""
|
||||
@@ -244,18 +164,8 @@ def process_step(nb, step, base_path, current_functions, section_name=None):
|
||||
else:
|
||||
main_call = "main()"
|
||||
|
||||
# Check if we should use logging wrapper
|
||||
use_logging = step['run_main'].get('show_logs', False)
|
||||
|
||||
if use_logging:
|
||||
# Use logging wrapper
|
||||
if call_parts:
|
||||
nb.cells.append(new_code_cell(f'run_with_baml_logs(main, {", ".join(call_parts)})'))
|
||||
else:
|
||||
nb.cells.append(new_code_cell('run_with_baml_logs(main)'))
|
||||
else:
|
||||
# Normal execution without logging
|
||||
nb.cells.append(new_code_cell(main_call))
|
||||
# Execute the main function call
|
||||
nb.cells.append(new_code_cell(main_call))
|
||||
|
||||
def convert_walkthrough_to_notebook(yaml_path, output_path):
|
||||
"""Convert walkthrough.yaml to Jupyter notebook."""
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7c856804",
|
||||
"id": "a55820ee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Building the 12-factor agent template from scratch in Python"
|
||||
@@ -10,7 +10,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6c96065f",
|
||||
"id": "ba52e30a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Steps to start from a bare Python repo and build up a 12-factor agent. This walkthrough will guide you through creating a Python agent that follows the 12-factor methodology with BAML."
|
||||
@@ -18,7 +18,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d8a45720",
|
||||
"id": "75b26c9b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chapter 0 - Hello World"
|
||||
@@ -26,7 +26,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a7a5467e",
|
||||
"id": "fa4b9e07",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's start with a basic Python setup and a hello world program."
|
||||
@@ -34,7 +34,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "563ef643",
|
||||
"id": "4e464227",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This guide will walk you through building agents in Python with BAML.\n",
|
||||
@@ -46,7 +46,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7db47ab2",
|
||||
"id": "99dac1bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here's our simple hello world program:"
|
||||
@@ -55,7 +55,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c9cc0758",
|
||||
"id": "9c6946fd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -69,7 +69,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5b920391",
|
||||
"id": "5523efac",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's run it to verify it works:"
|
||||
@@ -78,7 +78,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "29ba0259",
|
||||
"id": "6a437eb2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -87,7 +87,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "26398377",
|
||||
"id": "d9aa0df6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chapter 1 - CLI and Agent Loop"
|
||||
@@ -95,7 +95,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b666a9e",
|
||||
"id": "970c65da",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's add BAML and create our first agent with a CLI interface."
|
||||
@@ -103,7 +103,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a6191d3c",
|
||||
"id": "976a0fca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this chapter, we'll integrate BAML to create an AI agent that can respond to user input.\n",
|
||||
@@ -140,7 +140,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e44cf54f",
|
||||
"id": "ba1f7191",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### BAML Setup\n",
|
||||
@@ -154,7 +154,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f323b5b9",
|
||||
"id": "9910f8a3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -164,7 +164,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e9424fab",
|
||||
"id": "a4ad6e77",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -224,7 +224,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b34a99bc",
|
||||
"id": "b99ba982",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -234,39 +234,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8a2812f6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Helper function to capture BAML logs in notebook output\n",
|
||||
"import os\n",
|
||||
"from IPython.utils.capture import capture_output\n",
|
||||
"\n",
|
||||
"def run_with_baml_logs(func, *args, **kwargs):\n",
|
||||
" \"\"\"Run a function and capture BAML logs in the notebook output.\"\"\"\n",
|
||||
" # Capture both stdout and stderr\n",
|
||||
" with capture_output() as captured:\n",
|
||||
" result = func(*args, **kwargs)\n",
|
||||
" \n",
|
||||
" # Display the captured output\n",
|
||||
" if captured.stdout:\n",
|
||||
" print(captured.stdout)\n",
|
||||
" if captured.stderr:\n",
|
||||
" # BAML logs go to stderr - format them nicely\n",
|
||||
" print(\"\\n=== BAML Logs ===\")\n",
|
||||
" print(captured.stderr)\n",
|
||||
" print(\"=================\\n\")\n",
|
||||
" \n",
|
||||
" return result\n",
|
||||
"\n",
|
||||
"# Set BAML log level (options: error, warn, info, debug, trace)\n",
|
||||
"os.environ['BAML_LOG'] = 'info'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d7efec52",
|
||||
"id": "ee716f3a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -275,7 +243,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eaa41eda",
|
||||
"id": "894474da",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's create our agent that will use BAML to process user input.\n",
|
||||
@@ -286,7 +254,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6048a2f5",
|
||||
"id": "dbf9d929",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -321,7 +289,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "88143079",
|
||||
"id": "b9421cd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we need to define the BAML function that our agent will use.\n",
|
||||
@@ -339,7 +307,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ee4a5f17",
|
||||
"id": "58d8bda5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -349,7 +317,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "47435e42",
|
||||
"id": "1edc5279",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -358,7 +326,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "83a9feee",
|
||||
"id": "ee489cc1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's create our main function that accepts a message parameter:\n"
|
||||
@@ -367,7 +335,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1231c8fc",
|
||||
"id": "f4fea69e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -383,7 +351,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2ddea81d",
|
||||
"id": "fe3fd9c7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's test our agent! Try calling main() with different messages:\n",
|
||||
@@ -391,16 +359,16 @@
|
||||
"- `main(\"Tell me a joke\")`\n",
|
||||
"- `main(\"How are you doing today?\")`\n",
|
||||
"\n",
|
||||
"in this case, we'll use the baml_generate function to \n",
|
||||
"generate the pydantic and python bindings from our \n",
|
||||
"baml source, but in the future we'll skip this step as it \n",
|
||||
"is done automatically by the get_baml_client() function \n"
|
||||
"in this case, we'll use the baml_generate function to\n",
|
||||
"generate the pydantic and python bindings from our\n",
|
||||
"baml source, but in the future we'll skip this step as it\n",
|
||||
"is done automatically by the get_baml_client() function\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c3523c76",
|
||||
"id": "7fc1ee38",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -410,7 +378,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "02f16835",
|
||||
"id": "8756df71",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -419,34 +387,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e0e5c359",
|
||||
"id": "9b5ca88c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In a few cases, we'll enable the baml debug logs to see the inputs/outputs to and from the model.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e7f1d260",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_with_baml_logs(main, \"Hello from the Python notebook!\")"
|
||||
]
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c1323d34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"what's most important there is that you can see the prompt and how the output_format is injected\n",
|
||||
"to tell the model what kind of json we want to return.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dba3ff7f",
|
||||
"id": "e79f4d84",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chapter 2 - Add Calculator Tools"
|
||||
@@ -454,7 +401,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "83fd4e9e",
|
||||
"id": "4659d5ef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's add some calculator tools to our agent."
|
||||
@@ -462,7 +409,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fd85b772",
|
||||
"id": "73df701a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's start by adding a tool definition for the calculator.\n",
|
||||
@@ -474,7 +421,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "92e26be8",
|
||||
"id": "c538cd53",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -484,7 +431,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "02702fa2",
|
||||
"id": "1df07ff3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -493,7 +440,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e2579b34",
|
||||
"id": "1ffe3854",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's update the agent's DetermineNextStep method to\n",
|
||||
@@ -503,7 +450,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3cace82a",
|
||||
"id": "d6f9ee99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -512,7 +459,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e3910c3d",
|
||||
"id": "147bd22c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's update our main function to show the tool call:\n"
|
||||
@@ -521,7 +468,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aeeb0546",
|
||||
"id": "f8f99089",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -542,7 +489,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c456f5c5",
|
||||
"id": "ffb6c213",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's try out the calculator! The agent should recognize that you want to perform a calculation\n",
|
||||
@@ -552,7 +499,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ebede785",
|
||||
"id": "7afaa326",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -561,7 +508,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1790412b",
|
||||
"id": "599d21dd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chapter 3 - Process Tool Calls in a Loop"
|
||||
@@ -569,7 +516,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "15999167",
|
||||
"id": "d80e3f9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's add a real agentic loop that can run the tools and get a final answer from the LLM."
|
||||
@@ -577,7 +524,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "20b3b45e",
|
||||
"id": "427fbc77",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this chapter, we'll enhance our agent to process tool calls in a loop. This means:\n",
|
||||
@@ -591,7 +538,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2860b705",
|
||||
"id": "ac8ae567",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -637,7 +584,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8525277b",
|
||||
"id": "e875f4c2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's update our main function to use the new agent loop:\n"
|
||||
@@ -646,7 +593,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c9d55067",
|
||||
"id": "2aead128",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -664,7 +611,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3945d097",
|
||||
"id": "a29bf07d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's try it out! The agent should now call the tool and return the calculated result:\n"
|
||||
@@ -673,7 +620,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a5897a69",
|
||||
"id": "c6c6a0ca",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -682,26 +629,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "754dac11",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"you can run with baml_logs enabled to see how the prompt changed when we added the New\n",
|
||||
"tool types to our union of response types.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "95dfb524",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_with_baml_logs(main, \"can you add 3 and 4\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ad00ab9f",
|
||||
"id": "4c20a7d5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You should see the agent:\n",
|
||||
@@ -716,7 +644,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "920308ba",
|
||||
"id": "561c0b54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -790,7 +718,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1241ac9e",
|
||||
"id": "7c612b06",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's test subtraction:\n"
|
||||
@@ -799,7 +727,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "27dad2e2",
|
||||
"id": "4be4af22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -808,7 +736,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2911b810",
|
||||
"id": "1da0ad58",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Test multiplication:\n"
|
||||
@@ -817,7 +745,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bd38e06a",
|
||||
"id": "49d5e040",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -826,7 +754,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ed3a2c02",
|
||||
"id": "d5a27929",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, let's test a complex multi-step calculation:\n"
|
||||
@@ -835,7 +763,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3dea94b1",
|
||||
"id": "431414aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -844,7 +772,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "618bea98",
|
||||
"id": "99ab35d5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Congratulations! You've taken your first step into hand-rolling an agent loop.\n",
|
||||
@@ -859,7 +787,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "12b04d87",
|
||||
"id": "9ba4e319",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chapter 4 - Add Tests to agent.baml"
|
||||
@@ -867,7 +795,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7208e80e",
|
||||
"id": "6bf77db0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's add some tests to our BAML agent."
|
||||
@@ -875,7 +803,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6bbdec63",
|
||||
"id": "c6f0d38a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly.\n",
|
||||
@@ -893,7 +821,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8d3602d1",
|
||||
"id": "cd0ae03f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -902,7 +830,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6069cbc3",
|
||||
"id": "5bf05182",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the tests to see them in action:\n"
|
||||
@@ -911,7 +839,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e464d7b5",
|
||||
"id": "30bbcac5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -920,7 +848,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a9be4502",
|
||||
"id": "2cbbf5db",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output.\n",
|
||||
@@ -939,7 +867,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b0984190",
|
||||
"id": "dbbc5283",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -948,7 +876,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7be0ca40",
|
||||
"id": "ecf9cb68",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the tests again to see assertions in action:\n"
|
||||
@@ -957,7 +885,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "52fcc668",
|
||||
"id": "8d0611f3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -966,7 +894,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f035d7a",
|
||||
"id": "8789e20e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, let's add more complex test cases that test multi-step conversations.\n",
|
||||
@@ -981,7 +909,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "587f5e67",
|
||||
"id": "abf5be5b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -990,7 +918,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "702c4652",
|
||||
"id": "8ce0f9de",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the comprehensive test suite:\n"
|
||||
@@ -999,7 +927,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b2e5c012",
|
||||
"id": "4afe82b8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1008,7 +936,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2c49a891",
|
||||
"id": "5d0ba42b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Key Testing Concepts\n",
|
||||
@@ -1023,7 +951,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "44eb50a1",
|
||||
"id": "bf15b77e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chapter 5 - Multiple Human Tools"
|
||||
@@ -1031,7 +959,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "47027888",
|
||||
"id": "e69dbeca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this section, we'll add support for multiple tools that serve to contact humans.\n"
|
||||
@@ -1039,7 +967,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c2327093",
|
||||
"id": "f3e29142",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"So far, our agent only returns a final answer with \"done_for_now\". But what if the agent needs clarification?\n",
|
||||
@@ -1059,7 +987,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1b0d9077",
|
||||
"id": "9b42b75e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1068,7 +996,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dd0b17a5",
|
||||
"id": "7be2af7d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's update our agent to handle clarification requests:\n"
|
||||
@@ -1077,7 +1005,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1a957b14",
|
||||
"id": "21a3f526",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1085,9 +1013,13 @@
|
||||
"# Agent implementation with clarification support\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"def agent_loop(thread, clarification_handler):\n",
|
||||
" \"\"\"Run the agent loop until we get a final answer.\"\"\"\n",
|
||||
" while True:\n",
|
||||
"def agent_loop(thread, clarification_handler, max_iterations=3):\n",
|
||||
" \"\"\"Run the agent loop until we get a final answer (max 3 iterations).\"\"\"\n",
|
||||
" iteration_count = 0\n",
|
||||
" while iteration_count < max_iterations:\n",
|
||||
" iteration_count += 1\n",
|
||||
" print(f\"🔄 Agent loop iteration {iteration_count}/{max_iterations}\")\n",
|
||||
" \n",
|
||||
" # Get the client\n",
|
||||
" baml_client = get_baml_client()\n",
|
||||
" \n",
|
||||
@@ -1147,6 +1079,9 @@
|
||||
" })\n",
|
||||
" else:\n",
|
||||
" return \"Error: Unexpected result type\"\n",
|
||||
" \n",
|
||||
" # If we've reached max iterations without a final answer\n",
|
||||
" return f\"Agent reached maximum iterations ({max_iterations}) without completing the task.\"\n",
|
||||
"\n",
|
||||
"class Thread:\n",
|
||||
" \"\"\"Simple thread to track conversation history.\"\"\"\n",
|
||||
@@ -1156,7 +1091,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "495441f6",
|
||||
"id": "5f017c77",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, let's create a main function that handles human interaction:\n"
|
||||
@@ -1165,7 +1100,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e50ec1ec",
|
||||
"id": "e648be92",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1203,7 +1138,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36ea5481",
|
||||
"id": "2f4b962e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's test with an ambiguous input that should trigger a clarification request:\n"
|
||||
@@ -1212,7 +1147,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "05f7aeff",
|
||||
"id": "948684f2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1221,7 +1156,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "404b23ed",
|
||||
"id": "54b7d0d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You should see:\n",
|
||||
@@ -1245,7 +1180,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "292c8ae5",
|
||||
"id": "253d3f6f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chapter 6 - Customize Your Prompt with Reasoning"
|
||||
@@ -1253,7 +1188,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "40e400d2",
|
||||
"id": "87dc996a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this section, we'll explore how to customize the prompt of the agent with reasoning steps.\n",
|
||||
@@ -1263,7 +1198,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3ab476f7",
|
||||
"id": "7694a842",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Why Add Reasoning to Prompts?\n",
|
||||
@@ -1281,7 +1216,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "877c73a9",
|
||||
"id": "2b38033a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1290,36 +1225,28 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09657b2f",
|
||||
"id": "30aff7de",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's test it with a simple calculation to see the reasoning in action:\n",
|
||||
"\n",
|
||||
"**Note:** The BAML logs below will show the model's reasoning steps. Look for the `<reasoning>` tags in the logs to see how the model thinks through the problem before deciding what to do.\n"
|
||||
"Now let's test it with a simple calculation to see the reasoning in action:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c4cfccff",
|
||||
"id": "515f9755",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_with_baml_logs(main, \"can you multiply 3 and 4\")"
|
||||
"main(\"can you multiply 3 and 4\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6c4cd43c",
|
||||
"id": "2f69536c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You should see the reasoning steps in the BAML logs above. The model explicitly thinks through what it needs to do before making a decision.\n",
|
||||
"\n",
|
||||
"💡 **Tip:** If you want to see BAML logs for any other calls in this notebook, you can use the `run_with_baml_logs` helper function:\n",
|
||||
"```python\n",
|
||||
"# Instead of: main(\"your message\")\n",
|
||||
"# Use: run_with_baml_logs(main, \"your message\")\n",
|
||||
"```\n",
|
||||
"The model uses explicit reasoning steps to think through the problem before making a decision.\n",
|
||||
"\n",
|
||||
"## Advanced Prompt Engineering\n",
|
||||
"\n",
|
||||
@@ -1334,7 +1261,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c23951d",
|
||||
"id": "8274aff0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chapter 7 - Customize Your Context Window"
|
||||
@@ -1342,7 +1269,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fb08c76",
|
||||
"id": "f930c899",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this section, we'll explore how to customize the context window of the agent.\n",
|
||||
@@ -1352,7 +1279,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5e25342a",
|
||||
"id": "1d4235ed",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Context Window Serialization\n",
|
||||
@@ -1368,7 +1295,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eb204207",
|
||||
"id": "dccf9a9f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1481,7 +1408,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a789d22",
|
||||
"id": "e02d1361",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's create a main function that can switch between formats:\n"
|
||||
@@ -1490,7 +1417,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3ca0bab4",
|
||||
"id": "03c71da7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1515,7 +1442,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3865f8a4",
|
||||
"id": "1d1718ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's test with JSON format first:\n"
|
||||
@@ -1524,7 +1451,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ebadc358",
|
||||
"id": "41b41a22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1533,7 +1460,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ef60144f",
|
||||
"id": "d1bb4844",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's try the same with XML format:\n"
|
||||
@@ -1542,7 +1469,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "981012bd",
|
||||
"id": "2ab2a144",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1551,7 +1478,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "68bf94d2",
|
||||
"id": "8883acac",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## XML vs JSON Trade-offs\n",
|
||||
@@ -1566,7 +1493,7 @@
|
||||
"- Easy to parse and debug\n",
|
||||
"- Native to JavaScript/Python\n",
|
||||
"\n",
|
||||
"Choose based on your specific needs and token constraints!"
|
||||
"Choose based on your specific needs and token constraints!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user