Image attachment support

Refs #19

Refs https://github.com/simonw/llm/issues/587
This commit is contained in:
Simon Willison
2024-10-28 18:28:01 -07:00
parent 1aa490cf9a
commit 97f2aeeec6
4 changed files with 441 additions and 6 deletions

View File

@@ -15,7 +15,8 @@ def register_models(register):
register(ClaudeMessagesLong("claude-3-5-sonnet-20240620"))
register(ClaudeMessagesLong("claude-3-5-sonnet-20241022"))
register(
ClaudeMessagesLong("claude-3-5-sonnet-latest"), aliases=("claude-3.5-sonnet", "claude-3.5-sonnet-latest")
ClaudeMessagesLong("claude-3-5-sonnet-latest"),
aliases=("claude-3.5-sonnet", "claude-3.5-sonnet-latest"),
)
# register(
# ClaudeMessagesLong("claude-3-5-haiku-latest"), aliases=("claude-3.5-haiku",)
@@ -88,6 +89,12 @@ class ClaudeMessages(llm.Model):
needs_key = "claude"
key_env_var = "ANTHROPIC_API_KEY"
can_stream = True
attachment_types = {
"image/png",
"image/jpeg",
"image/webp",
"image/gif",
}
class Options(ClaudeOptions): ...
@@ -100,16 +107,48 @@ class ClaudeMessages(llm.Model):
messages = []
if conversation:
for response in conversation.responses:
if response.attachments:
content = [
{
"type": "image",
"source": {
"data": attachment.base64_content(),
"media_type": attachment.resolve_type(),
"type": "base64",
},
}
for attachment in response.attachments
]
else:
content = response.prompt.prompt
messages.extend(
[
{
"role": "user",
"content": response.prompt.prompt,
"content": content,
},
{"role": "assistant", "content": response.text()},
]
)
messages.append({"role": "user", "content": prompt.prompt})
if prompt.attachments:
messages.append(
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"data": attachment.base64_content(),
"media_type": attachment.resolve_type(),
"type": "base64",
},
}
for attachment in prompt.attachments
],
}
)
else:
messages.append({"role": "user", "content": prompt.prompt})
return messages
def execute(self, prompt, stream, response, conversation):

View File

@@ -9,8 +9,8 @@ classifiers = [
"License :: OSI Approved :: Apache Software License"
]
dependencies = [
"llm",
"anthropic>=0.17.0",
"llm>=0.17a0",
"anthropic>=0.37.1",
]
[project.urls]

View File

@@ -0,0 +1,347 @@
interactions:
- request:
body: '{"max_tokens": 8192, "messages": [{"role": "user", "content": [{"type":
"image", "source": {"data": "iVBORw0KGgoAAAANSUhEUgAAAKYAAAEaAgMAAADmmcReAAAACVBMVEX///8A/wD+AQASdAFKAAAAR0lEQVR42u3YMREAMAjAwC5d6q8mUYkEVuA+8yvIkVr0oghFURRFURRFURRFUdRCkSRJM7u/CEVRFEVRFEVRFEXRpdQXkcaVBRUPn8UJn6QAAAAASUVORK5CYII=",
"media_type": "image/png", "type": "base64"}}]}], "model": "claude-3-5-sonnet-latest",
"temperature": 1.0, "stream": true}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '426'
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- Anthropic/Python 0.37.1
x-api-key:
- sk-...
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 0.37.1
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.10.4
x-stainless-stream-helper:
- messages
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: 'event: message_start
data: {"type":"message_start","message":{"id":"msg_0131ugsBHJJ73SvVobBS4Rh3","type":"message","role":"assistant","model":"claude-3-5-sonnet-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":76,"output_tokens":1}} }
event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }
event: ping
data: {"type": "ping"}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"This"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
image shows"}}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
two"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
simple"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
rectangular"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
blocks"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
of"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
soli"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"d
colors st"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"acked
vert"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"ically."} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
The top"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
rectangle"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
is a"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
bright"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":","} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
vib"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"rant
red color"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":","} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
while"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
the bottom rectangle is a"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
bright"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":","}
}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
n"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"eon
green color. The"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
rectang"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"les
appear"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
to be of"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
similar width"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
but"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
may"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
be slightly different in height"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":".
The"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
colors"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
are very"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
saturated and create"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
a striking contrast against"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
each"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
other."} }
event: content_block_stop
data: {"type":"content_block_stop","index":0 }
event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":75} }
event: message_stop
data: {"type":"message_stop" }
'
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8d9f5fed49c616a8-SJC
Cache-Control:
- no-cache
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Tue, 29 Oct 2024 01:25:58 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2024-10-29T01:26:06Z'
anthropic-ratelimit-tokens-limit:
- '400000'
anthropic-ratelimit-tokens-remaining:
- '390000'
anthropic-ratelimit-tokens-reset:
- '2024-10-29T01:25:59Z'
request-id:
- req_01BuSg5FeTMAThMmX9z9LSuu
via:
- 1.1 google
status:
code: 200
message: OK
version: 1

View File

@@ -1,6 +1,16 @@
import llm
import pytest
TINY_PNG = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xa6\x00\x00\x01\x1a"
b"\x02\x03\x00\x00\x00\xe6\x99\xc4^\x00\x00\x00\tPLTE\xff\xff\xff"
b"\x00\xff\x00\xfe\x01\x00\x12t\x01J\x00\x00\x00GIDATx\xda\xed\xd81\x11"
b"\x000\x08\xc0\xc0.]\xea\xaf&Q\x89\x04V\xe0>\xf3+\xc8\x91Z\xf4\xa2\x08EQ\x14E"
b"Q\x14EQ\x14EQ\xd4B\x91$I3\xbb\xbf\x08EQ\x14EQ\x14EQ\x14E\xd1\xa5"
b"\xd4\x17\x91\xc6\x95\x05\x15\x0f\x9f\xc5\t\x9f\xa4\x00\x00\x00\x00IEND\xaeB`"
b"\x82"
)
@pytest.mark.vcr
def test_prompt():
@@ -8,7 +18,7 @@ def test_prompt():
model.key = model.key or "sk-..." # don't override existing key
response = model.prompt("Two names for a pet pelican, be brief")
assert str(response) == "1. Pelly\n2. Beaky"
response_dict = response.response_json
response_dict = dict(response.response_json)
response_dict.pop("id") # differs between requests
assert response_dict == {
"content": [{"text": "1. Pelly\n2. Beaky", "type": "text"}],
@@ -19,3 +29,42 @@ def test_prompt():
"type": "message",
"usage": {"input_tokens": 17, "output_tokens": 15},
}
EXPECTED_IMAGE_TEXT = (
"This image shows two simple rectangular blocks of solid colors stacked "
"vertically. The top rectangle is a bright, vibrant red color, while the "
"bottom rectangle is a bright, neon green color. The rectangles appear to "
"be of similar width but may be slightly different in height. The colors "
"are very saturated and create a striking contrast against each other."
)
@pytest.mark.vcr
def test_image_prompt():
model = llm.get_model("claude-3.5-sonnet")
model.key = (
model.key
or "sk-..."
)
response = model.prompt(
"Describe image in three words",
attachments=[llm.Attachment(content=TINY_PNG)],
)
assert str(response) == EXPECTED_IMAGE_TEXT
response_dict = response.response_json
response_dict.pop("id") # differs between requests
assert response_dict == {
"content": [
{
"text": EXPECTED_IMAGE_TEXT,
"type": "text",
}
],
"model": "claude-3-5-sonnet-20241022",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": None,
"type": "message",
"usage": {"input_tokens": 76, "output_tokens": 75},
}