Merge pull request #148 from MadcowD/wguss/fix_serialization_blob

fix blob serialization and np array serialization
2024-09-22 16:14:36 +03:00 · 2024-09-10 19:37:17 -07:00
parent 611e671e75 1fbf681f88
commit 83ba8ef6c5
16 changed files with 162 additions and 29 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,8 +3,12 @@ __pycache__/
 *.py[cod]
 *$py.class

-
 # Sqlite
+blob/
+blob*/
+blob/**/*
+blob/*
+
 *.db 


--- a/docs/ramblings/announcement
+++ b/docs/ramblings/announcement
@@ -0,0 +1,51 @@
+🚀 I'm excited to announce the future of prompt engineering: 𝚎𝚕𝚕.
+
+developed from ideas during my time at OpenAI, 𝚎𝚕𝚕 is light, functional lm programming library:
+
+- automatic versioning & tracing
+- rich local oss visualization tools
+- multimodality native
+
+Read on ⬇️
+
+𝚎𝚕𝚕 was built out of frustration for frameworks like @LangChainAI  on three principles
+
+- prompts are programs not strings
+- prompts are parameters of machine learning models
+- every call to a language model is worth its weight in credits
+
+prompting should be readable, scientific, and optimizable
+
+prompt engineering is an optimization process
+
+because you write your prompts as normal python functions, 𝚎𝚕𝚕  automatically versions and serializes them via dynamic analysis of "lexical closures" - no custom IDE or editor required
+
+𝚎𝚕𝚕.𝚒𝚗𝚒𝚝(𝚜𝚝𝚘𝚛𝚎='./𝚕𝚘𝚐𝚍𝚒𝚛')
+
+local tools for monitoring & visualization
+
+prompt engineering goes from a dark art to a science with the right tools. Ell Studio is a local, open source tool for prompt version control, monitoring, visualization.
+
+𝚎𝚕𝚕-𝚜𝚝𝚞𝚍𝚒𝚘 --𝚜𝚝𝚘𝚛𝚊𝚐𝚎 ./𝚕𝚘𝚐𝚍𝚒𝚛
+
+
+Multimodality should be first class
+
+in anticipation of the upcoming gpt-4o + 🍓 api, 𝚎𝚕𝚕 is built with multimodality first.
+
+with a rich numpy style message api with multimodal type coercion, using images, video, and audio is intuitive
+
+
+🎉 𝚎𝚕𝚕 is available on PyPI today w/
+
+𝚙𝚒𝚙 𝚒𝚗𝚜𝚝𝚊𝚕𝚕 𝚎𝚕𝚕-𝚊𝚒
+
+check out the source https://github.com/MadcowD/ell
+and read the docs https://docs.ell.so/
+
+⏰ new features soon, including SGD & RL on prompts and so much more!
+
+
+🙏 huge shout out to everyone who's helped with this project
+@jakeottiger @a_dixon @shelwin_ zraig, frank hu, & my discord
+so many other good convos w @goodside @aidan_mclau  and others 
--- a/ell-studio/package-lock.json
+++ b/ell-studio/package-lock.json
@@ -19,6 +19,7 @@
        "@testing-library/react": "^13.4.0",
        "@testing-library/user-event": "^13.5.0",
        "axios": "^1.6.0",
+        "base64-js": "^1.5.1",
        "class-variance-authority": "^0.7.0",
        "clsx": "^2.1.1",
        "d3-force": "^3.0.0",
@@ -7262,6 +7263,26 @@
      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
      "license": "MIT"
    },
+    "node_modules/base64-js": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
+      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
    "node_modules/batch": {
      "version": "0.6.1",
      "resolved": "https://registry.npmjs.org/batch/-/batch-0.6.1.tgz",
--- a/ell-studio/package.json
+++ b/ell-studio/package.json
@@ -14,6 +14,7 @@
    "@testing-library/react": "^13.4.0",
    "@testing-library/user-event": "^13.5.0",
    "axios": "^1.6.0",
+    "base64-js": "^1.5.1",
    "class-variance-authority": "^0.7.0",
    "clsx": "^2.1.1",
    "d3-force": "^3.0.0",
--- a/ell-studio/src/components/IORenderer.js
+++ b/ell-studio/src/components/IORenderer.js
@@ -1,4 +1,5 @@
 import React from 'react';
+import * as base64 from 'base64-js';

 const typeMatchers = {
  ToolResult: (data) => data && typeof data === 'object' && 'tool_call_id' in data && 'result' in data,
@@ -103,13 +104,15 @@ const renderInline = (data, customRenderers) => {
  }
  if (typeof data === 'object' && data !== null) {
    const isImage = data.__limage;
-
+    const isNdarray = data.__lndarray;

    if (isImage) {
      return (
         <img src={data.content} alt="PIL.Image" style={{display: 'inline-block', verticalAlign: 'middle', maxHeight: '1.5em'}} />
        
      );
+    } else if (isNdarray) {
+      return renderNdarray(data);
    }

    return (
@@ -133,6 +136,41 @@ const renderInline = (data, customRenderers) => {
  return <span className="text-yellow-300">{JSON.stringify(data)}</span>;
 };

+const renderNdarray = (data) => {
+  const { content, dtype, shape } = data;
+  const decodedData = base64.toByteArray(content);
+  const numElements = shape.reduce((a, b) => a * b, 1);
+  
+  let arrayData;
+  if (dtype === 'float32') {
+    arrayData = new Float32Array(decodedData.buffer);
+  } else if (dtype === 'int32') {
+    arrayData = new Int32Array(decodedData.buffer);
+  } else {
+    // Add more types as needed
+    arrayData = Array.from(decodedData);
+  }
+  console.log(arrayData)
+
+  let displayData;
+  if (numElements > 3) {
+    displayData = arrayData.slice(0, 3);
+    displayData = [...displayData, '...']
+  } else {
+    displayData = arrayData;
+  }
+  console.log(displayData[0])
+  return (
+    <span className="text-indigo-400">
+      np.array(
+        <span className="text-yellow-300">[{displayData.join(', ')}]</span>,{' '}
+        <span className="text-green-300">shape=[{shape.join(', ')}]</span>,{' '}
+        <span className="text-pink-300">dtype={dtype}</span>
+      )
+    </span>
+  );
+};
+
 const renderNonInline = (data, customRenderers, level = 0, isArrayItem = false, postfix = '') => {
  if (data.__lstr) {
    data = data.content;
@@ -193,6 +231,7 @@ const renderNonInline = (data, customRenderers, level = 0, isArrayItem = false,
  
  if (typeof data === 'object' && data !== null) {
    const isImage = data.__limage;
+    const isNdarray = data.__lndarray;

    if (isImage) 
      return (
@@ -200,7 +239,14 @@ const renderNonInline = (data, customRenderers, level = 0, isArrayItem = false,
          <img src={data.content} alt="Embedded Image" />
        </Indent>
      );
-
+      else if (isNdarray) {
+        return (
+          <Indent level={level}>
+            {renderNdarray(data)}
+            {postfix}
+          </Indent>
+        );
+      } 
    else 
      return (
        <>
--- a/ell-studio/src/components/invocations/details/InvocationDetailsPopover.js
+++ b/ell-studio/src/components/invocations/details/InvocationDetailsPopover.js
@@ -106,7 +106,7 @@ const InvocationDetailsPopover = ({ invocation, onClose, onResize }) => {
                <InvocationDataPane invocation={invocation} />
              </div>
            )}
-            {(activeTab === "Info" || isNarrowForInfo) && (
+            {(activeTab === "Info" || !isNarrowForInfo) && (
              <div className="h-full">
                <InvocationInfoPane invocation={invocation} isFullWidth={true} />
              </div>
--- a/examples/future/structured.py
+++ b/examples/future/structured.py
@@ -17,6 +17,7 @@ def create_test(text: str):
    return "do it!" 


+ell.init(verbose=True, store='./logdir')
 import json
 if __name__ == "__main__":
    result = create_test("ads")
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ell-ai"
-version = "0.0.2"
+version = "0.0.3"
 description = "ell - the language model programming library"
 authors = ["William Guss <will@lrsys.xyz>"]
 license = "MIT"
--- a/src/ell/init.py
+++ b/src/ell/init.py
@@ -13,5 +13,6 @@ from ell.__version__ import __version__
 # Import all models
 import ell.models

+
 # Import everything from configurator
 from ell.configurator import *
--- a/src/ell/contrib/init.py
+++ b/src/ell/contrib/init.py
--- a/src/ell/lmp/_track.py
+++ b/src/ell/lmp/_track.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import threading
 from ell.types import SerializedLMP, Invocation, InvocationTrace, InvocationContents
@@ -213,10 +214,11 @@ def _write_invocation(func, invocation_id, latency_ms, prompt_tokens, completion
    if invocation_contents.should_externalize and config.store.has_blob_storage:
        invocation_contents.is_external = True
        
-        # Write to the blob store
+        # Write to the blob store 
        blob_id = config.store.blob_store.store_blob(
-            invocation_contents.model_dump_json().encode('utf-8'),
-            metadata={'invocation_id': invocation_id}
+            json.dumps(invocation_contents.model_dump(
+            ), default=str).encode('utf-8'),
+            invocation_id
        )
        invocation_contents = InvocationContents(
            invocation_id=invocation_id,
--- a/src/ell/store.py
+++ b/src/ell/store.py
@@ -8,7 +8,7 @@ from ell.types.message import InvocableLM

 class BlobStore(ABC):
    @abstractmethod
-    def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str:
+    def store_blob(self, blob: bytes, blob_id  : str) -> str:
        """Store a blob and return its identifier."""
        pass

--- a/src/ell/stores/sql.py
+++ b/src/ell/stores/sql.py
@@ -218,20 +218,11 @@ class SQLiteStore(SQLStore):
        blob_store = SQLBlobStore(db_dir)
        super().__init__(f'sqlite:///{db_path}', blob_store=blob_store)

-    def write_external_blob(self, id: str, json_dump: str, depth: int = 2):
-        assert self.blob_store is not None, "Blob store is not initialized"
-        self.blob_store.store_blob(json_dump.encode('utf-8'), metadata={'id': id, 'depth': depth})
-
-    def read_external_blob(self, id: str, depth: int = 2) -> str:
-        assert self.blob_store is not None, "Blob store is not initialized"
-        return self.blob_store.retrieve_blob(id).decode('utf-8')
-
 class SQLBlobStore(ell.store.BlobStore):
    def __init__(self, db_dir: str):
        self.db_dir = db_dir

-    def store_blob(self, blob: bytes, metadata: Optional[Dict[str, Any]] = None) -> str:
-        blob_id = f"blob-{utc_now().isoformat()}"
+    def store_blob(self, blob: bytes, blob_id  : str) -> str:
        file_path = self._get_blob_path(blob_id)
        os.makedirs(os.path.dirname(file_path), exist_ok=True)
        with gzip.open(file_path, "wb") as f:
@@ -249,7 +240,7 @@ class SQLBlobStore(ell.store.BlobStore):
        increment = 2
        dirs = [_type] + [_id[i:i+increment] for i in range(0, depth*increment, increment)]
        file_name = _id[depth*increment:]
-        return os.path.join(self.db_dir, "blob", *dirs, file_name)
+        return os.path.join(self.db_dir, *dirs, file_name)

 class PostgresStore(SQLStore):
    def __init__(self, db_uri: str):
--- a/src/ell/studio/main.py
+++ b/src/ell/studio/main.py
@@ -28,11 +28,15 @@ def main():
    if not args.dev:
        # In production mode, serve the built React app
        static_dir = os.path.join(os.path.dirname(__file__), "static")
-        app.mount("/", StaticFiles(directory=static_dir, html=True), name="static")
+        # app.mount("/", StaticFiles(directory=static_dir, html=True), name="static")

        @app.get("/{full_path:path}")
        async def serve_react_app(full_path: str):
-            return FileResponse(os.path.join(static_dir, "index.html"))
+            file_path = os.path.join(static_dir, full_path)
+            if os.path.exists(file_path) and os.path.isfile(file_path):
+                return FileResponse(file_path)
+            else:
+                return FileResponse(os.path.join(static_dir, "index.html"))

    db_path = os.path.join(args.storage_dir)

--- a/src/ell/util/serialization.py
+++ b/src/ell/util/serialization.py
@@ -13,10 +13,26 @@ from ell.types._lstr import _lstr

 pydantic_ltype_aware_cattr = cattrs.Converter()

+def serialize_image(img):
+    buffer = BytesIO()
+    img.save(buffer, format="PNG")
+    return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode()
+
+
 # Register hooks for complex types
 pydantic_ltype_aware_cattr.register_unstructure_hook(
    np.ndarray,
-    lambda arr: arr.tolist()
+    lambda arr: {
+        "content": serialize_image(PIL.Image.fromarray(arr)),
+        "__limage": True
+    } if arr.ndim == 3 else (
+        {
+            "content": base64.b64encode(arr.tobytes()).decode(),
+            "dtype": str(arr.dtype),
+            "shape": arr.shape,
+            "__lndarray": True
+        }
+    )
 )
 pydantic_ltype_aware_cattr.register_unstructure_hook(
    set,
@@ -28,11 +44,6 @@ pydantic_ltype_aware_cattr.register_unstructure_hook(
 )


-def serialize_image(img):
-    buffer = BytesIO()
-    img.save(buffer, format="PNG")
-    return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode()
-
 pydantic_ltype_aware_cattr.register_unstructure_hook(
    PIL.Image.Image,
    lambda obj: {
--- a/src/ell/util/verbosity.py
+++ b/src/ell/util/verbosity.py
@@ -71,7 +71,7 @@ def print_wrapped_messages(messages: List[Message], max_role_length: int, color:

    for i, message in enumerate(messages):
        role = message.role
-        text = message.content[0].text or "" # TODO: message repr
+        text = message.text or "" # TODO: message repr
        role_color = SYSTEM_COLOR if role == "system" else USER_COLOR if role == "user" else ASSISTANT_COLOR
        
        role_line = f"{prefix}{role_color}{role.rjust(max_role_length)}: {RESET}"