Add support for using wheels when installing llama-cpp-python (#904)

* Initial changes to support wheels

* Format shell files

* Remove curl, move location of .ENV file

* Fix path to shfmt

* Add OPT for ShellCheck

* Fix for SC1091

* Disable SC1091

* Fix delete prompt call when prompt in progress

* Add null check

* Revert changes to Dockerfile

* Fix syntax issue

* Remove duplicated command
This commit is contained in:
Juan Calderon-Perez
2023-11-26 18:34:28 -05:00
committed by GitHub
parent 05224d2c2c
commit 2dfcde881a
12 changed files with 84 additions and 15 deletions

View File

@@ -1,4 +1,4 @@
name: CI/CD Process
name: CI Checks
on:
push:
@@ -68,6 +68,7 @@ jobs:
- uses: luizm/action-sh-checker@v0.8.0
env:
SHFMT_OPTS: "-s"
SHELLCHECK_OPTS: "-P scripts/ -e SC1091"
with:
sh_checker_only_diff: false
sh_checker_comment: false

View File

@@ -1,4 +1,4 @@
name: CI/CD Docker Build/Publish
name: Docker
on:
push:

View File

@@ -1,4 +1,4 @@
name: Lint and Test Helm Chart
name: Helm
on:
push:

View File

@@ -1,4 +1,4 @@
name: LLM Models Healthcheck
name: LLM Healthcheck
on:
push:

View File

@@ -29,6 +29,7 @@ COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
COPY --from=frontend /usr/src/app/web/build /usr/src/app/api/static/
COPY ./api /usr/src/app/api
COPY scripts/deploy.sh /usr/src/app/deploy.sh
COPY scripts/serge.env /usr/src/app/serge.env
# Install api dependencies
RUN apt-get update \

View File

@@ -17,13 +17,14 @@ ENV NODE_ENV='development'
# Install dependencies
RUN apt-get update \
&& apt-get install -y --no-install-recommends cmake build-essential dumb-init curl
&& apt-get install -y --no-install-recommends dumb-init
# Copy database, source code, and scripts
COPY --from=redis /usr/local/bin/redis-server /usr/local/bin/redis-server
COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
COPY --from=node_base /usr/local /usr/local
COPY scripts/dev.sh /usr/src/app/dev.sh
COPY scripts/serge.env /usr/src/app/serge.env
COPY ./web/package.json ./web/package-lock.json ./
RUN npm ci \

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023 Nathan Sarrazin and contributors
Copyright (c) 2023-present Nathan Sarrazin and Contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.

View File

@@ -1,5 +1,5 @@
from typing import Optional
from fastapi import APIRouter
from fastapi import APIRouter, HTTPException
from langchain.memory import RedisChatMessageHistory
from langchain.schema import SystemMessage, messages_to_dict, AIMessage, HumanMessage
from llama_cpp import Llama
@@ -137,7 +137,7 @@ async def delete_prompt(chat_id: str, idx: int):
if idx >= len(history.messages):
logger.error("Unable to delete message, chat in progress")
return False
raise HTTPException(status_code=202, detail="Unable to delete message, chat in progress")
messages = history.messages.copy()[:idx]
history.clear()

23
scripts/deploy.sh Normal file → Executable file
View File

@@ -1,6 +1,27 @@
#!/bin/bash
set -x
source serge.env
# Function to detect CPU features
detect_cpu_features() {
cpu_info=$(lscpu)
if echo "$cpu_info" | grep -q "avx512"; then
echo "AVX512"
elif echo "$cpu_info" | grep -q "avx2"; then
echo "AVX2"
elif echo "$cpu_info" | grep -q "avx"; then
echo "AVX"
else
echo "basic"
fi
}
# Detect CPU features and generate install command
cpu_feature=$(detect_cpu_features)
pip_command="python -m pip install llama-cpp-python==$LLAMA_PYTHON_VERSION --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu"
echo "Recommended install command for llama-cpp-python:"
echo "$pip_command"
# Handle termination signals
_term() {
@@ -10,7 +31,7 @@ _term() {
}
# Install python bindings
UNAME_M=$(dpkg --print-architecture) pip install llama-cpp-python==0.2.19 || {
eval "$pip_command" || {
echo 'Failed to install llama-cpp-python'
exit 1
}

23
scripts/dev.sh Normal file → Executable file
View File

@@ -1,6 +1,27 @@
#!/bin/bash
set -x
source serge.env
# Function to detect CPU features
detect_cpu_features() {
cpu_info=$(lscpu)
if echo "$cpu_info" | grep -q "avx512"; then
echo "AVX512"
elif echo "$cpu_info" | grep -q "avx2"; then
echo "AVX2"
elif echo "$cpu_info" | grep -q "avx"; then
echo "AVX"
else
echo "basic"
fi
}
# Detect CPU features and generate install command
cpu_feature=$(detect_cpu_features)
pip_command="python -m pip install llama-cpp-python==$LLAMA_PYTHON_VERSION --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu"
echo "Recommended install command for llama-cpp-python:"
echo "$pip_command"
# Install python dependencies
pip install -e ./api || {
@@ -9,7 +30,7 @@ pip install -e ./api || {
}
# Install python bindings
UNAME_M=$(dpkg --print-architecture) pip install llama-cpp-python==0.2.19 || {
eval "$pip_command" || {
echo 'Failed to install llama-cpp-python'
exit 1
}

1
scripts/serge.env Normal file
View File

@@ -0,0 +1 @@
LLAMA_PYTHON_VERSION=0.2.19

View File

@@ -102,10 +102,7 @@
});
eventSource.onerror = async (error) => {
console.log("error", error);
eventSource.close();
//history[history.length - 1].data.content = "A server error occurred.";
//await invalidate("/api/chat/" + $page.params.id);
};
}
@@ -143,11 +140,34 @@
if (response.status === 200) {
await invalidate("/api/chat/" + $page.params.id);
} else if (response.status === 202) {
showToast("Chat in progress!");
} else {
console.error("Error " + response.status + ": " + response.statusText);
showToast("An error occurred: " + response.statusText);
}
}
function showToast(message: string) {
// Create the toast element
const toast = document.createElement("div");
toast.className = `alert alert-info`;
toast.textContent = message;
const toastContainer = document.getElementById("toast-container");
// Append the toast to the toast container if it exists
if (toastContainer) {
toastContainer.appendChild(toast);
} else {
console.error("Toast container not found?");
return;
}
// Automatically remove the toast after a delay
setTimeout(() => {
toast.remove();
}, 3000);
}
const md: MarkdownIt = new MarkdownIt({
html: true,
linkify: true,
@@ -527,4 +547,7 @@
</button>
</div>
</div>
<div id="toast-container" class="toast">
<!-- Toast notifications will be added here -->
</div>
</div>