Install correct version of Torch

Bump transformers to latest version so it can load the llava_mistral models
Fix typo
2023-10-20 10:09:28 +02:00 · 2023-10-20 10:04:27 +02:00 · 2023-10-20 09:14:53 +02:00 · 2023-10-20 09:13:15 +02:00 · 2023-10-17 17:38:10 +02:00 · 2023-10-17 14:08:25 +02:00
8 changed files with 79 additions and 33 deletions
--- a/23
+++ b/23
@@ -1,7 +1,7 @@
 # Stage 1: Base
 FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 as base

-ARG LLAVA_VERSION=v1.1.0
+ARG LLAVA_VERSION=v1.1.1

 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 ENV DEBIAN_FRONTEND=noninteractive \
@@ -57,17 +57,23 @@ RUN apt update && \
 # Set Python
 RUN ln -s /usr/bin/python3.10 /usr/bin/python

-# Stage 2: Install FaceFusion and python modules
+# Stage 2: Install LLaVA and python modules
 FROM base as setup

 # Create and use the Python venv
 RUN python3 -m venv /venv

-# Clone the git repo of FaceFusion and set version
+# Install Torch
+RUN source /venv/bin/activate && \
+    pip3 install --no-cache-dir torch==2.0.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 && \
+    pip3 install --no-cache-dir xformers==0.0.22 && \
+    deactivate
+
+# Clone the git repo of LLaVA and set version
 WORKDIR /
-RUN git clone https://github.com/haotian-liu/LLaVA.git && \
-    cd /LLaVA && \
-    git checkout ${LLAVA_VERSION}
+RUN git clone https://github.com/ashleykleynhans/LLaVA.git
+#    cd /LLaVA && \
+#    git checkout ${LLAVA_VERSION}

 # Install the dependencies for LLaVA
 WORKDIR /LLaVA
@@ -77,6 +83,7 @@ RUN source /venv/bin/activate && \
    pip3 install -e . && \
    pip3 install ninja && \
    pip3 install flash-attn --no-build-isolation && \
+    pip3 install transformers==4.34.1 && \
    deactivate

 # Install Jupyter
@@ -95,9 +102,9 @@ RUN wget https://github.com/runpod/runpodctl/releases/download/v1.10.0/runpodctl
 COPY nginx/nginx.conf /etc/nginx/nginx.conf
 COPY nginx/502.html /usr/share/nginx/html/502.html

-# Set up the container startup script
+# Copy the scripts
 WORKDIR /
-COPY --chmod=755 pre_start.sh start.sh fix_venv.sh ./
+COPY --chmod=755 scripts/* ./

 # Start the container
 SHELL ["/bin/bash", "--login", "-c"]
--- a/README.md
+++ b/README.md
@@ -1,18 +1,14 @@
 # Docker image for LLaVA: Large Language and Vision Assistant

-> [!IMPORTANT]  
-> Due to the large size of the model, CUDA will result in OOM errors
-> with a GPU that has less than 48GB of VRAM, so A6000 or higher is
-> recommended.
-
 ## Installs

 * Ubuntu 22.04 LTS
 * CUDA 11.8
 * Python 3.10.12
 * [LLaVA](
-  https://github.com/haotian-liu/llava) v1.1.0
-* Torch 2.1.0
+  https://github.com/haotian-liu/llava) v1.1.1
+* Torch 2.0.1
+* BakLLaVA-1 model

 ## Available on RunPod

@@ -37,11 +33,28 @@ docker run -d \
  -p 3000:3001 \
  -p 8888:8888 \
  -e JUPYTER_PASSWORD=Jup1t3R! \
-  ashleykza/llava:1.0.0
+  ashleykza/llava:latest
 ```

 You can obviously substitute the image name and tag with your own.

+#### Models
+
+> [!IMPORTANT]
+> If you select the 13b model, CUDA will result in OOM errors
+> with a GPU that has less than 48GB of VRAM, so A6000 or higher is
+> recommended.
+
+You can add an environment called `MODEL` to your Docker container to
+specify the model that should be downloaded.  If the `MODEL` environment
+variable is not set, the model will default to `SkunkworksAI/BakLLaVA-1`.
+
+| Model                                                              | Environment Variable Value  | Default |
+|--------------------------------------------------------------------|-----------------------------|---------|
+| [llava-v1.5-13b](https://huggingface.co/liuhaotian/llava-v1.5-13b) | liuhaotian/llava-v1.5-13b   | no      |
+| [llava-v1.5-7b](https://huggingface.co/liuhaotian/llava-v1.5-7b)   | liuhaotian/llava-v1.5-7b    | no      |
+| [BakLLaVA-1](https://huggingface.co/SkunkworksAI/BakLLaVA-1)       | SkunkworksAI/BakLLaVA-1     | yes     |
+
 ## Acknowledgements

 1. Matthew Berman for giving me a demo on LLaVA, as well as his amazing
--- a/scripts/fix_venv.sh
+++ b/scripts/fix_venv.sh
--- a/scripts/pre_start.sh
+++ b/scripts/pre_start.sh
@@ -19,29 +19,31 @@ if [[ ${DISABLE_AUTOLAUNCH} ]]
 then
    echo "Auto launching is disabled so the application will not be started automatically"
 else
+    # Configure environment variables
+    export LLAVA_HOST="0.0.0.0"
+    export LLAVA_CONTROLLER_PORT="10000"
+    export LLAVA_MODEL_WORKER_PORT="40000"
+    export GRADIO_SERVER_NAME=${LLAVA_HOST}
+    export GRADIO_SERVER_PORT="3001"
+    export HF_HOME="/workspace"
+
+    if [[ ${MODEL} ]]
+    then
+      export LLAVA_MODEL=${MODEL}
+    else
+      export LLAVA_MODEL="SkunkworksAI/BakLLaVA-1"
+    fi
+
    mkdir -p /workspace/logs
    echo "Starting LLaVA"
-    export HF_HOME="/workspace"
-    source /workspace/venv/bin/activate
-    cd /workspace/LLaVA
-
-    # Launch a controller
-    nohup python3 -m llava.serve.controller --host 0.0.0.0 --port 10000 > /workspace/logs/controller.log 2>&1 &
-
-    # Launch a gradio web server
-    export GRADIO_SERVER_NAME="0.0.0.0"
-    export GRADIO_SERVER_PORT="3001"
-    nohup python -m llava.serve.gradio_web_server --controller http://localhost:10000 --model-list-mode reload > /workspace/logs/webserver.log 2>&1 &
-
-    # Launch a model worker
-    nohup python3 -m llava.serve.model_worker --host 0.0.0.0 --controller http://localhost:10000 --port 40000 --worker http://localhost:40000 --model-path liuhaotian/llava-v1.5-13b > /workspace/logs/model-worker.log 2>&1 &
-
+    /start_controller.sh
+    /start_model_worker.sh
+    /start_webserver.sh
    echo "LLaVA started"
    echo "Log files: "
    echo "   - Controller:   /workspace/logs/controller.log"
-    echo "   - Webserver:    /workspace/logs/webserver.log"
    echo "   - Model Worker: /workspace/logs/model-worker.log"
-    deactivate
+    echo "   - Webserver:    /workspace/logs/webserver.log"
 fi

 echo "All services have been started"
--- a/scripts/start.sh
+++ b/scripts/start.sh
--- a/scripts/start_controller.sh
+++ b/scripts/start_controller.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+source /workspace/venv/bin/activate
+cd /workspace/LLaVA
+nohup python3 -m llava.serve.controller \
+  --host ${LLAVA_HOST} \
+  --port ${LLAVA_CONTROLLER_PORT} > /workspace/logs/controller.log 2>&1 &
+deactivate
--- a/scripts/start_model_worker.sh
+++ b/scripts/start_model_worker.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+source /workspace/venv/bin/activate
+cd /workspace/LLaVA
+nohup python3 -m llava.serve.model_worker \
+  --host ${LLAVA_HOST} \
+  --controller http://localhost:${LLAVA_CONTROLLER_PORT} \
+  --port ${LLAVA_MODEL_WORKER_PORT} \
+  --worker http://localhost:${LLAVA_MODEL_WORKER_PORT} \
+  --model-path ${LLAVA_MODEL} > /workspace/logs/model-worker.log 2>&1 &
+deactivate
--- a/scripts/start_webserver.sh
+++ b/scripts/start_webserver.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+source /workspace/venv/bin/activate
+cd /workspace/LLaVA
+nohup python -m llava.serve.gradio_web_server \
+  --controller http://localhost:${LLAVA_CONTROLLER_PORT} \
+  --model-list-mode reload > /workspace/logs/webserver.log 2>&1 &
+deactivate
Author	SHA1	Message	Date
Ashley Kleynhans	1d71c18a84	Install correct version of Torch	2023-10-20 10:09:28 +02:00
Ashley Kleynhans	bfc8345434	Bump transformers to latest version so it can load the llava_mistral models	2023-10-20 10:04:27 +02:00
Ashley Kleynhans	cbb295ea1f	Fix typo	2023-10-20 09:14:53 +02:00
Ashley Kleynhans	a79384635d	Set SkunkworksAI/BakLLaVA-1 as the default model	2023-10-20 09:13:15 +02:00
Ashley Kleynhans	a83e582b3f	Use my forked repo to get the API feature	2023-10-17 17:38:10 +02:00
Ashley Kleynhans	0fe07a3d47	Updated README	2023-10-17 14:08:25 +02:00
Ashley Kleynhans	d95791b834	Use the correct model	2023-10-17 13:50:13 +02:00
Ashley Kleynhans	4d02c7df35	Default to 7b version of the model if MODEL environment variable is not set and split out the startup scripts each into their own script	2023-10-17 13:39:37 +02:00
Ashley Kleynhans	4f32cbb62d	Bump LLaVA to version 1.1.1	2023-10-12 09:45:52 +02:00