Async models

* Tip about pytest --record-mode once

Plus mechanism for setting API key during tests with PYTEST_ANTHROPIC_API_KEY

* Async support for Claude models

Closes #25
Refs https://github.com/simonw/llm/issues/507
Refs https://github.com/simonw/llm/pull/613

* Depend on llm>=0.18a0, refs #25
This commit is contained in:
Simon Willison
2024-11-13 19:36:11 -08:00
committed by GitHub
parent 40b7dca88d
commit 8dc778592f
6 changed files with 770 additions and 22 deletions

View File

@@ -54,3 +54,15 @@ To run the tests:
```bash
pytest
```
This project uses [pytest-recording](https://github.com/kiwicom/pytest-recording) to record Anthropic API responses for the tests.
If you add a new test that calls the API you can capture the API response like this:
```bash
PYTEST_ANTHROPIC_API_KEY="$(llm keys get claude)" pytest --record-mode once
```
You will need to have stored a valid Anthropic API key using this command first:
```bash
llm keys set claude
# Paste key here
```

View File

@@ -1,4 +1,4 @@
from anthropic import Anthropic
from anthropic import Anthropic, AsyncAnthropic
import llm
from pydantic import Field, field_validator, model_validator
from typing import Optional, List
@@ -7,19 +7,42 @@ from typing import Optional, List
@llm.hookimpl
def register_models(register):
# https://docs.anthropic.com/claude/docs/models-overview
register(ClaudeMessages("claude-3-opus-20240229"))
register(ClaudeMessages("claude-3-opus-latest"), aliases=("claude-3-opus",))
register(ClaudeMessages("claude-3-sonnet-20240229"), aliases=("claude-3-sonnet",))
register(ClaudeMessages("claude-3-haiku-20240307"), aliases=("claude-3-haiku",))
register(
ClaudeMessages("claude-3-opus-20240229"),
AsyncClaudeMessages("claude-3-opus-20240229"),
),
register(
ClaudeMessages("claude-3-opus-latest"),
AsyncClaudeMessages("claude-3-opus-latest"),
aliases=("claude-3-opus",),
)
register(
ClaudeMessages("claude-3-sonnet-20240229"),
AsyncClaudeMessages("claude-3-sonnet-20240229"),
aliases=("claude-3-sonnet",),
)
register(
ClaudeMessages("claude-3-haiku-20240307"),
AsyncClaudeMessages("claude-3-haiku-20240307"),
aliases=("claude-3-haiku",),
)
# 3.5 models
register(ClaudeMessagesLong("claude-3-5-sonnet-20240620"))
register(ClaudeMessagesLong("claude-3-5-sonnet-20241022", supports_pdf=True)),
register(
ClaudeMessagesLong("claude-3-5-sonnet-20240620"),
AsyncClaudeMessagesLong("claude-3-5-sonnet-20240620"),
)
register(
ClaudeMessagesLong("claude-3-5-sonnet-20241022", supports_pdf=True),
AsyncClaudeMessagesLong("claude-3-5-sonnet-20241022", supports_pdf=True),
)
register(
ClaudeMessagesLong("claude-3-5-sonnet-latest", supports_pdf=True),
AsyncClaudeMessagesLong("claude-3-5-sonnet-latest", supports_pdf=True),
aliases=("claude-3.5-sonnet", "claude-3.5-sonnet-latest"),
)
register(
ClaudeMessagesLong("claude-3-5-haiku-latest", supports_images=False),
AsyncClaudeMessagesLong("claude-3-5-haiku-latest", supports_images=False),
aliases=("claude-3.5-haiku",),
)
@@ -86,7 +109,13 @@ class ClaudeOptions(llm.Options):
return self
class ClaudeMessages(llm.Model):
long_field = Field(
description="The maximum number of tokens to generate before stopping",
default=4_096 * 2,
)
class _Shared:
needs_key = "claude"
key_env_var = "ANTHROPIC_API_KEY"
can_stream = True
@@ -178,9 +207,7 @@ class ClaudeMessages(llm.Model):
messages.append({"role": "user", "content": prompt.prompt})
return messages
def execute(self, prompt, stream, response, conversation):
client = Anthropic(api_key=self.get_key())
def build_kwargs(self, prompt, conversation):
kwargs = {
"model": self.claude_model_id,
"messages": self.build_messages(prompt, conversation),
@@ -202,7 +229,17 @@ class ClaudeMessages(llm.Model):
if self.extra_headers:
kwargs["extra_headers"] = self.extra_headers
return kwargs
def __str__(self):
return "Anthropic Messages: {}".format(self.model_id)
class ClaudeMessages(_Shared, llm.Model):
def execute(self, prompt, stream, response, conversation):
client = Anthropic(api_key=self.get_key())
kwargs = self.build_kwargs(prompt, conversation)
if stream:
with client.messages.stream(**kwargs) as stream:
for text in stream.text_stream:
@@ -214,13 +251,27 @@ class ClaudeMessages(llm.Model):
yield completion.content[0].text
response.response_json = completion.model_dump()
def __str__(self):
return "Anthropic Messages: {}".format(self.model_id)
class ClaudeMessagesLong(ClaudeMessages):
class Options(ClaudeOptions):
max_tokens: Optional[int] = Field(
description="The maximum number of tokens to generate before stopping",
default=4_096 * 2,
)
max_tokens: Optional[int] = long_field
class AsyncClaudeMessages(_Shared, llm.AsyncModel):
async def execute(self, prompt, stream, response, conversation):
client = AsyncAnthropic(api_key=self.get_key())
kwargs = self.build_kwargs(prompt, conversation)
if stream:
async with client.messages.stream(**kwargs) as stream_obj:
async for text in stream_obj.text_stream:
yield text
response.response_json = (await stream_obj.get_final_message()).model_dump()
else:
completion = await client.messages.create(**kwargs)
yield completion.content[0].text
response.response_json = completion.model_dump()
class AsyncClaudeMessagesLong(AsyncClaudeMessages):
class Options(ClaudeOptions):
max_tokens: Optional[int] = long_field

View File

@@ -9,7 +9,7 @@ classifiers = [
"License :: OSI Approved :: Apache Software License"
]
dependencies = [
"llm>=0.17",
"llm>=0.18a0",
"anthropic>=0.39.0",
]
@@ -23,4 +23,4 @@ CI = "https://github.com/simonw/llm-claude-3/actions"
claude_3 = "llm_claude_3"
[project.optional-dependencies]
test = ["pytest", "pytest-recording"]
test = ["pytest", "pytest-recording", "pytest-asyncio"]

View File

@@ -0,0 +1,656 @@
interactions:
- request:
body: '{"max_tokens": 4096, "messages": [{"role": "user", "content": "Two names
for a pet pelican, be brief"}], "model": "claude-3-opus-latest", "temperature":
1.0, "stream": true}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '173'
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- AsyncAnthropic/Python 0.39.0
x-stainless-arch:
- arm64
x-stainless-async:
- async:asyncio
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 0.39.0
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.10.4
x-stainless-stream-helper:
- messages
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: 'event: message_start
data: {"type":"message_start","message":{"id":"msg_013NHgcGHHSfdsAVk5BRAXis","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":17,"output_tokens":1}} }
event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }
event: ping
data: {"type": "ping"}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"1.
P"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"elly\n2."} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
Beaky"} }
event: content_block_stop
data: {"type":"content_block_stop","index":0 }
event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":15} }
event: message_stop
data: {"type":"message_stop" }
'
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8e234e7de95df98b-SJC
Cache-Control:
- no-cache
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Thu, 14 Nov 2024 01:42:44 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2024-11-14T01:42:44Z'
anthropic-ratelimit-tokens-limit:
- '400000'
anthropic-ratelimit-tokens-remaining:
- '396000'
anthropic-ratelimit-tokens-reset:
- '2024-11-14T01:42:44Z'
request-id:
- req_01NyMtBMFJ5aGGLGtRrMkSET
via:
- 1.1 google
status:
code: 200
message: OK
- request:
body: '{"max_tokens": 4096, "messages": [{"role": "user", "content": "Two names
for a pet pelican, be brief"}], "model": "claude-3-opus-latest", "temperature":
1.0, "stream": true}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '173'
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- AsyncAnthropic/Python 0.39.0
x-stainless-arch:
- arm64
x-stainless-async:
- async:asyncio
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 0.39.0
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.10.4
x-stainless-stream-helper:
- messages
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: 'event: message_start
data: {"type":"message_start","message":{"id":"msg_019hK7A4iGQ75ASSjq2uT9mS","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":17,"output_tokens":1}}
}
event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }
event: ping
data: {"type": "ping"}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"1.
P"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"elly\n2."} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
Beaky"} }
event: content_block_stop
data: {"type":"content_block_stop","index":0 }
event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":15} }
event: message_stop
data: {"type":"message_stop" }
'
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8e235190794915e1-SJC
Cache-Control:
- no-cache
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Thu, 14 Nov 2024 01:44:50 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2024-11-14T01:44:50Z'
anthropic-ratelimit-tokens-limit:
- '400000'
anthropic-ratelimit-tokens-remaining:
- '396000'
anthropic-ratelimit-tokens-reset:
- '2024-11-14T01:44:50Z'
request-id:
- req_0136bnnqjvRQWJq6GEQNTu2G
via:
- 1.1 google
status:
code: 200
message: OK
- request:
body: '{"max_tokens": 4096, "messages": [{"role": "user", "content": "Two names
for a pet pelican, be brief"}], "model": "claude-3-opus-latest", "temperature":
1.0, "stream": true}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '173'
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- AsyncAnthropic/Python 0.39.0
x-stainless-arch:
- arm64
x-stainless-async:
- async:asyncio
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 0.39.0
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.10.4
x-stainless-stream-helper:
- messages
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: 'event: message_start
data: {"type":"message_start","message":{"id":"msg_01E9Jp45kkWnNiVyPDunpmzG","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":17,"output_tokens":1}} }
event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }
event: ping
data: {"type": "ping"}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"1.
P"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"elly\n2."} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
Scoop"} }
event: content_block_stop
data: {"type":"content_block_stop","index":0 }
event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":15} }
event: message_stop
data: {"type":"message_stop" }
'
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8e235453192ef993-SJC
Cache-Control:
- no-cache
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Thu, 14 Nov 2024 01:46:43 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2024-11-14T01:46:43Z'
anthropic-ratelimit-tokens-limit:
- '400000'
anthropic-ratelimit-tokens-remaining:
- '396000'
anthropic-ratelimit-tokens-reset:
- '2024-11-14T01:46:43Z'
request-id:
- req_01URpbvobEqLDEfreUV8omBd
via:
- 1.1 google
status:
code: 200
message: OK
- request:
body: '{"max_tokens": 4096, "messages": [{"role": "user", "content": "Two names
for a pet pelican, be brief"}], "model": "claude-3-opus-latest", "temperature":
1.0, "stream": true}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '173'
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- AsyncAnthropic/Python 0.39.0
x-stainless-arch:
- arm64
x-stainless-async:
- async:asyncio
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 0.39.0
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.10.4
x-stainless-stream-helper:
- messages
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: 'event: message_start
data: {"type":"message_start","message":{"id":"msg_012Law29zMzzFDgYCEKqB7eq","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":17,"output_tokens":1}} }
event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}
}
event: ping
data: {"type": "ping"}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"1.
P"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"elly\n2."} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
Beaky"} }
event: content_block_stop
data: {"type":"content_block_stop","index":0 }
event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":15} }
event: message_stop
data: {"type":"message_stop" }
'
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8e2354b18fe867b3-SJC
Cache-Control:
- no-cache
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Thu, 14 Nov 2024 01:46:59 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2024-11-14T01:46:58Z'
anthropic-ratelimit-tokens-limit:
- '400000'
anthropic-ratelimit-tokens-remaining:
- '396000'
anthropic-ratelimit-tokens-reset:
- '2024-11-14T01:46:58Z'
request-id:
- req_01TPrDRBvvJ1kzFbLUx3Lrs9
via:
- 1.1 google
status:
code: 200
message: OK
- request:
body: '{"max_tokens": 4096, "messages": [{"role": "user", "content": "Two names
for a pet pelican, be brief"}], "model": "claude-3-opus-latest", "temperature":
1.0, "stream": true}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '173'
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- AsyncAnthropic/Python 0.39.0
x-stainless-arch:
- arm64
x-stainless-async:
- async:asyncio
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 0.39.0
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.10.4
x-stainless-stream-helper:
- messages
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: 'event: message_start
data: {"type":"message_start","message":{"id":"msg_01CFHNpT4EP6DBS5Mjurxx8j","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":17,"output_tokens":1}} }
event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}
}
event: ping
data: {"type": "ping"}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"1.
P"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"elly\n2."} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
G"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"ully"}
}
event: content_block_stop
data: {"type":"content_block_stop","index":0 }
event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":15} }
event: message_stop
data: {"type":"message_stop" }
'
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8e235667ccc0cea0-SJC
Cache-Control:
- no-cache
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Thu, 14 Nov 2024 01:48:08 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2024-11-14T01:48:08Z'
anthropic-ratelimit-tokens-limit:
- '400000'
anthropic-ratelimit-tokens-remaining:
- '396000'
anthropic-ratelimit-tokens-reset:
- '2024-11-14T01:48:08Z'
request-id:
- req_01BdZqjn7kWSfAc8dZHNwvvk
via:
- 1.1 google
status:
code: 200
message: OK
version: 1

6
tests/conftest.py Normal file
View File

@@ -0,0 +1,6 @@
import pytest
@pytest.fixture(scope="module")
def vcr_config():
return {"filter_headers": ["X-API-KEY"]}

View File

@@ -1,4 +1,5 @@
import llm
import os
import pytest
TINY_PNG = (
@@ -11,11 +12,13 @@ TINY_PNG = (
b"\x82"
)
ANTHROPIC_API_KEY = os.environ.get("PYTEST_ANTHROPIC_API_KEY", None) or "sk-..."
@pytest.mark.vcr
def test_prompt():
model = llm.get_model("claude-3-opus")
model.key = model.key or "sk-..." # don't override existing key
model.key = model.key or ANTHROPIC_API_KEY
response = model.prompt("Two names for a pet pelican, be brief")
assert str(response) == "1. Pelly\n2. Beaky"
response_dict = dict(response.response_json)
@@ -31,6 +34,26 @@ def test_prompt():
}
@pytest.mark.vcr
@pytest.mark.asyncio
async def test_async_prompt():
model = llm.get_async_model("claude-3-opus")
model.key = model.key or ANTHROPIC_API_KEY # don't override existing key
response = await model.prompt("Two names for a pet pelican, be brief")
assert await response.text() == "1. Pelly\n2. Beaky"
response_dict = dict(response.response_json)
response_dict.pop("id") # differs between requests
assert response_dict == {
"content": [{"text": "1. Pelly\n2. Beaky", "type": "text"}],
"model": "claude-3-opus-20240229",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": None,
"type": "message",
"usage": {"input_tokens": 17, "output_tokens": 15},
}
EXPECTED_IMAGE_TEXT = (
"This image shows two simple rectangular blocks of solid colors stacked "
"vertically. The top rectangle is a bright, vibrant red color, while the "
@@ -43,7 +66,7 @@ EXPECTED_IMAGE_TEXT = (
@pytest.mark.vcr
def test_image_prompt():
model = llm.get_model("claude-3.5-sonnet")
model.key = model.key or "sk-..."
model.key = model.key or ANTHROPIC_API_KEY
response = model.prompt(
"Describe image in three words",
attachments=[llm.Attachment(content=TINY_PNG)],