Merge pull request #148 from MadcowD/wguss/fix_serialization_blob

fix blob serialization and np array serialization
This commit is contained in:
William Guss
2024-09-10 19:37:17 -07:00
committed by GitHub
16 changed files with 162 additions and 29 deletions

6
.gitignore vendored
View File

@@ -3,8 +3,12 @@ __pycache__/
*.py[cod]
*$py.class
# Sqlite
blob/
blob*/
blob/**/*
blob/*
*.db

View File

@@ -0,0 +1,51 @@
🚀 I'm excited to announce the future of prompt engineering: 𝚎𝚕𝚕.
developed from ideas during my time at OpenAI, 𝚎𝚕𝚕 is light, functional lm programming library:
- automatic versioning & tracing
- rich local oss visualization tools
- multimodality native
Read on ⬇️
𝚎𝚕𝚕 was built out of frustration for frameworks like @LangChainAI on three principles
- prompts are programs not strings
- prompts are parameters of machine learning models
- every call to a language model is worth its weight in credits
prompting should be readable, scientific, and optimizable
prompt engineering is an optimization process
because you write your prompts as normal python functions, 𝚎𝚕𝚕 automatically versions and serializes them via dynamic analysis of "lexical closures" - no custom IDE or editor required
𝚎𝚕𝚕.𝚒𝚗𝚒𝚝(𝚜𝚝𝚘𝚛𝚎='./𝚕𝚘𝚐𝚍𝚒𝚛')
local tools for monitoring & visualization
prompt engineering goes from a dark art to a science with the right tools. Ell Studio is a local, open source tool for prompt version control, monitoring, visualization.
𝚎𝚕𝚕-𝚜𝚝𝚞𝚍𝚒𝚘 --𝚜𝚝𝚘𝚛𝚊𝚐𝚎 ./𝚕𝚘𝚐𝚍𝚒𝚛
Multimodality should be first class
in anticipation of the upcoming gpt-4o + 🍓 api, 𝚎𝚕𝚕 is built with multimodality first.
with a rich numpy style message api with multimodal type coercion, using images, video, and audio is intuitive
🎉 𝚎𝚕𝚕 is available on PyPI today w/
𝚙𝚒𝚙 𝚒𝚗𝚜𝚝𝚊𝚕𝚕 𝚎𝚕𝚕-𝚊𝚒
check out the source https://github.com/MadcowD/ell
and read the docs https://docs.ell.so/
⏰ new features soon, including SGD & RL on prompts and so much more!
🙏 huge shout out to everyone who's helped with this project
@jakeottiger @a_dixon @shelwin_ zraig, frank hu, & my discord
so many other good convos w @goodside @aidan_mclau and others

View File

@@ -19,6 +19,7 @@
"@testing-library/react": "^13.4.0",
"@testing-library/user-event": "^13.5.0",
"axios": "^1.6.0",
"base64-js": "^1.5.1",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.1",
"d3-force": "^3.0.0",
@@ -7262,6 +7263,26 @@
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
"license": "MIT"
},
"node_modules/base64-js": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"license": "MIT"
},
"node_modules/batch": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/batch/-/batch-0.6.1.tgz",

View File

@@ -14,6 +14,7 @@
"@testing-library/react": "^13.4.0",
"@testing-library/user-event": "^13.5.0",
"axios": "^1.6.0",
"base64-js": "^1.5.1",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.1",
"d3-force": "^3.0.0",

View File

@@ -1,4 +1,5 @@
import React from 'react';
import * as base64 from 'base64-js';
const typeMatchers = {
ToolResult: (data) => data && typeof data === 'object' && 'tool_call_id' in data && 'result' in data,
@@ -103,13 +104,15 @@ const renderInline = (data, customRenderers) => {
}
if (typeof data === 'object' && data !== null) {
const isImage = data.__limage;
const isNdarray = data.__lndarray;
if (isImage) {
return (
<img src={data.content} alt="PIL.Image" style={{display: 'inline-block', verticalAlign: 'middle', maxHeight: '1.5em'}} />
);
} else if (isNdarray) {
return renderNdarray(data);
}
return (
@@ -133,6 +136,41 @@ const renderInline = (data, customRenderers) => {
return <span className="text-yellow-300">{JSON.stringify(data)}</span>;
};
const renderNdarray = (data) => {
const { content, dtype, shape } = data;
const decodedData = base64.toByteArray(content);
const numElements = shape.reduce((a, b) => a * b, 1);
let arrayData;
if (dtype === 'float32') {
arrayData = new Float32Array(decodedData.buffer);
} else if (dtype === 'int32') {
arrayData = new Int32Array(decodedData.buffer);
} else {
// Add more types as needed
arrayData = Array.from(decodedData);
}
console.log(arrayData)
let displayData;
if (numElements > 3) {
displayData = arrayData.slice(0, 3);
displayData = [...displayData, '...']
} else {
displayData = arrayData;
}
console.log(displayData[0])
return (
<span className="text-indigo-400">
np.array(
<span className="text-yellow-300">[{displayData.join(', ')}]</span>,{' '}
<span className="text-green-300">shape=[{shape.join(', ')}]</span>,{' '}
<span className="text-pink-300">dtype={dtype}</span>
)
</span>
);
};
const renderNonInline = (data, customRenderers, level = 0, isArrayItem = false, postfix = '') => {
if (data.__lstr) {
data = data.content;
@@ -193,6 +231,7 @@ const renderNonInline = (data, customRenderers, level = 0, isArrayItem = false,
if (typeof data === 'object' && data !== null) {
const isImage = data.__limage;
const isNdarray = data.__lndarray;
if (isImage)
return (
@@ -200,7 +239,14 @@ const renderNonInline = (data, customRenderers, level = 0, isArrayItem = false,
<img src={data.content} alt="Embedded Image" />
</Indent>
);
else if (isNdarray) {
return (
<Indent level={level}>
{renderNdarray(data)}
{postfix}
</Indent>
);
}
else
return (
<>

View File

@@ -106,7 +106,7 @@ const InvocationDetailsPopover = ({ invocation, onClose, onResize }) => {
<InvocationDataPane invocation={invocation} />
</div>
)}
{(activeTab === "Info" || isNarrowForInfo) && (
{(activeTab === "Info" || !isNarrowForInfo) && (
<div className="h-full">
<InvocationInfoPane invocation={invocation} isFullWidth={true} />
</div>

View File

@@ -17,6 +17,7 @@ def create_test(text: str):
return "do it!"
ell.init(verbose=True, store='./logdir')
import json
if __name__ == "__main__":
result = create_test("ads")

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "ell-ai"
version = "0.0.2"
version = "0.0.3"
description = "ell - the language model programming library"
authors = ["William Guss <will@lrsys.xyz>"]
license = "MIT"

View File

@@ -13,5 +13,6 @@ from ell.__version__ import __version__
# Import all models
import ell.models
# Import everything from configurator
from ell.configurator import *

View File

View File

@@ -1,3 +1,4 @@
import json
import logging
import threading
from ell.types import SerializedLMP, Invocation, InvocationTrace, InvocationContents
@@ -213,10 +214,11 @@ def _write_invocation(func, invocation_id, latency_ms, prompt_tokens, completion
if invocation_contents.should_externalize and config.store.has_blob_storage:
invocation_contents.is_external = True
# Write to the blob store
# Write to the blob store
blob_id = config.store.blob_store.store_blob(
invocation_contents.model_dump_json().encode('utf-8'),
metadata={'invocation_id': invocation_id}
json.dumps(invocation_contents.model_dump(
), default=str).encode('utf-8'),
invocation_id
)
invocation_contents = InvocationContents(
invocation_id=invocation_id,

View File

@@ -8,7 +8,7 @@ from ell.types.message import InvocableLM
class BlobStore(ABC):
@abstractmethod
def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str:
def store_blob(self, blob: bytes, blob_id : str) -> str:
"""Store a blob and return its identifier."""
pass

View File

@@ -218,20 +218,11 @@ class SQLiteStore(SQLStore):
blob_store = SQLBlobStore(db_dir)
super().__init__(f'sqlite:///{db_path}', blob_store=blob_store)
def write_external_blob(self, id: str, json_dump: str, depth: int = 2):
assert self.blob_store is not None, "Blob store is not initialized"
self.blob_store.store_blob(json_dump.encode('utf-8'), metadata={'id': id, 'depth': depth})
def read_external_blob(self, id: str, depth: int = 2) -> str:
assert self.blob_store is not None, "Blob store is not initialized"
return self.blob_store.retrieve_blob(id).decode('utf-8')
class SQLBlobStore(ell.store.BlobStore):
def __init__(self, db_dir: str):
self.db_dir = db_dir
def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str:
blob_id = f"blob-{utc_now().isoformat()}"
def store_blob(self, blob: bytes, blob_id : str) -> str:
file_path = self._get_blob_path(blob_id)
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with gzip.open(file_path, "wb") as f:
@@ -249,7 +240,7 @@ class SQLBlobStore(ell.store.BlobStore):
increment = 2
dirs = [_type] + [_id[i:i+increment] for i in range(0, depth*increment, increment)]
file_name = _id[depth*increment:]
return os.path.join(self.db_dir, "blob", *dirs, file_name)
return os.path.join(self.db_dir, *dirs, file_name)
class PostgresStore(SQLStore):
def __init__(self, db_uri: str):

View File

@@ -28,11 +28,15 @@ def main():
if not args.dev:
# In production mode, serve the built React app
static_dir = os.path.join(os.path.dirname(__file__), "static")
app.mount("/", StaticFiles(directory=static_dir, html=True), name="static")
# app.mount("/", StaticFiles(directory=static_dir, html=True), name="static")
@app.get("/{full_path:path}")
async def serve_react_app(full_path: str):
return FileResponse(os.path.join(static_dir, "index.html"))
file_path = os.path.join(static_dir, full_path)
if os.path.exists(file_path) and os.path.isfile(file_path):
return FileResponse(file_path)
else:
return FileResponse(os.path.join(static_dir, "index.html"))
db_path = os.path.join(args.storage_dir)

View File

@@ -13,10 +13,26 @@ from ell.types._lstr import _lstr
pydantic_ltype_aware_cattr = cattrs.Converter()
def serialize_image(img):
buffer = BytesIO()
img.save(buffer, format="PNG")
return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode()
# Register hooks for complex types
pydantic_ltype_aware_cattr.register_unstructure_hook(
np.ndarray,
lambda arr: arr.tolist()
lambda arr: {
"content": serialize_image(PIL.Image.fromarray(arr)),
"__limage": True
} if arr.ndim == 3 else (
{
"content": base64.b64encode(arr.tobytes()).decode(),
"dtype": str(arr.dtype),
"shape": arr.shape,
"__lndarray": True
}
)
)
pydantic_ltype_aware_cattr.register_unstructure_hook(
set,
@@ -28,11 +44,6 @@ pydantic_ltype_aware_cattr.register_unstructure_hook(
)
def serialize_image(img):
buffer = BytesIO()
img.save(buffer, format="PNG")
return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode()
pydantic_ltype_aware_cattr.register_unstructure_hook(
PIL.Image.Image,
lambda obj: {

View File

@@ -71,7 +71,7 @@ def print_wrapped_messages(messages: List[Message], max_role_length: int, color:
for i, message in enumerate(messages):
role = message.role
text = message.content[0].text or "" # TODO: message repr
text = message.text or "" # TODO: message repr
role_color = SYSTEM_COLOR if role == "system" else USER_COLOR if role == "user" else ASSISTANT_COLOR
role_line = f"{prefix}{role_color}{role.rjust(max_role_length)}: {RESET}"