mirror of
https://github.com/serge-chat/serge.git
synced 2024-01-15 09:32:12 +03:00
Add support for using wheels when installing llama-cpp-python (#904)
* Initial changes to support wheels * Format shell files * Remove curl, move location of .ENV file * Fix path to shfmt * Add OPT for ShellCheck * Fix for SC1091 * Disable SC1091 * Fix delete prompt call when prompt in progress * Add null check * Revert changes to Dockerfile * Fix syntax issue * Remove duplicated command
This commit is contained in:
committed by
GitHub
parent
05224d2c2c
commit
2dfcde881a
3
.github/workflows/ci.yml
vendored
3
.github/workflows/ci.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: CI/CD Process
|
||||
name: CI Checks
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -68,6 +68,7 @@ jobs:
|
||||
- uses: luizm/action-sh-checker@v0.8.0
|
||||
env:
|
||||
SHFMT_OPTS: "-s"
|
||||
SHELLCHECK_OPTS: "-P scripts/ -e SC1091"
|
||||
with:
|
||||
sh_checker_only_diff: false
|
||||
sh_checker_comment: false
|
||||
|
||||
2
.github/workflows/docker.yml
vendored
2
.github/workflows/docker.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: CI/CD Docker Build/Publish
|
||||
name: Docker
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
2
.github/workflows/helm-test.yml
vendored
2
.github/workflows/helm-test.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Lint and Test Helm Chart
|
||||
name: Helm
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
2
.github/workflows/model-check.yml
vendored
2
.github/workflows/model-check.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: LLM Models Healthcheck
|
||||
name: LLM Healthcheck
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
@@ -29,6 +29,7 @@ COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
|
||||
COPY --from=frontend /usr/src/app/web/build /usr/src/app/api/static/
|
||||
COPY ./api /usr/src/app/api
|
||||
COPY scripts/deploy.sh /usr/src/app/deploy.sh
|
||||
COPY scripts/serge.env /usr/src/app/serge.env
|
||||
|
||||
# Install api dependencies
|
||||
RUN apt-get update \
|
||||
|
||||
@@ -17,13 +17,14 @@ ENV NODE_ENV='development'
|
||||
|
||||
# Install dependencies
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends cmake build-essential dumb-init curl
|
||||
&& apt-get install -y --no-install-recommends dumb-init
|
||||
|
||||
# Copy database, source code, and scripts
|
||||
COPY --from=redis /usr/local/bin/redis-server /usr/local/bin/redis-server
|
||||
COPY --from=redis /usr/local/bin/redis-cli /usr/local/bin/redis-cli
|
||||
COPY --from=node_base /usr/local /usr/local
|
||||
COPY scripts/dev.sh /usr/src/app/dev.sh
|
||||
COPY scripts/serge.env /usr/src/app/serge.env
|
||||
COPY ./web/package.json ./web/package-lock.json ./
|
||||
|
||||
RUN npm ci \
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 Nathan Sarrazin and contributors
|
||||
Copyright (c) 2023-present Nathan Sarrazin and Contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
SOFTWARE.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from langchain.memory import RedisChatMessageHistory
|
||||
from langchain.schema import SystemMessage, messages_to_dict, AIMessage, HumanMessage
|
||||
from llama_cpp import Llama
|
||||
@@ -137,7 +137,7 @@ async def delete_prompt(chat_id: str, idx: int):
|
||||
|
||||
if idx >= len(history.messages):
|
||||
logger.error("Unable to delete message, chat in progress")
|
||||
return False
|
||||
raise HTTPException(status_code=202, detail="Unable to delete message, chat in progress")
|
||||
|
||||
messages = history.messages.copy()[:idx]
|
||||
history.clear()
|
||||
|
||||
23
scripts/deploy.sh
Normal file → Executable file
23
scripts/deploy.sh
Normal file → Executable file
@@ -1,6 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -x
|
||||
source serge.env
|
||||
|
||||
# Function to detect CPU features
|
||||
detect_cpu_features() {
|
||||
cpu_info=$(lscpu)
|
||||
if echo "$cpu_info" | grep -q "avx512"; then
|
||||
echo "AVX512"
|
||||
elif echo "$cpu_info" | grep -q "avx2"; then
|
||||
echo "AVX2"
|
||||
elif echo "$cpu_info" | grep -q "avx"; then
|
||||
echo "AVX"
|
||||
else
|
||||
echo "basic"
|
||||
fi
|
||||
}
|
||||
|
||||
# Detect CPU features and generate install command
|
||||
cpu_feature=$(detect_cpu_features)
|
||||
pip_command="python -m pip install llama-cpp-python==$LLAMA_PYTHON_VERSION --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu"
|
||||
echo "Recommended install command for llama-cpp-python:"
|
||||
echo "$pip_command"
|
||||
|
||||
# Handle termination signals
|
||||
_term() {
|
||||
@@ -10,7 +31,7 @@ _term() {
|
||||
}
|
||||
|
||||
# Install python bindings
|
||||
UNAME_M=$(dpkg --print-architecture) pip install llama-cpp-python==0.2.19 || {
|
||||
eval "$pip_command" || {
|
||||
echo 'Failed to install llama-cpp-python'
|
||||
exit 1
|
||||
}
|
||||
|
||||
23
scripts/dev.sh
Normal file → Executable file
23
scripts/dev.sh
Normal file → Executable file
@@ -1,6 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -x
|
||||
source serge.env
|
||||
|
||||
# Function to detect CPU features
|
||||
detect_cpu_features() {
|
||||
cpu_info=$(lscpu)
|
||||
if echo "$cpu_info" | grep -q "avx512"; then
|
||||
echo "AVX512"
|
||||
elif echo "$cpu_info" | grep -q "avx2"; then
|
||||
echo "AVX2"
|
||||
elif echo "$cpu_info" | grep -q "avx"; then
|
||||
echo "AVX"
|
||||
else
|
||||
echo "basic"
|
||||
fi
|
||||
}
|
||||
|
||||
# Detect CPU features and generate install command
|
||||
cpu_feature=$(detect_cpu_features)
|
||||
pip_command="python -m pip install llama-cpp-python==$LLAMA_PYTHON_VERSION --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/$cpu_feature/cpu"
|
||||
echo "Recommended install command for llama-cpp-python:"
|
||||
echo "$pip_command"
|
||||
|
||||
# Install python dependencies
|
||||
pip install -e ./api || {
|
||||
@@ -9,7 +30,7 @@ pip install -e ./api || {
|
||||
}
|
||||
|
||||
# Install python bindings
|
||||
UNAME_M=$(dpkg --print-architecture) pip install llama-cpp-python==0.2.19 || {
|
||||
eval "$pip_command" || {
|
||||
echo 'Failed to install llama-cpp-python'
|
||||
exit 1
|
||||
}
|
||||
|
||||
1
scripts/serge.env
Normal file
1
scripts/serge.env
Normal file
@@ -0,0 +1 @@
|
||||
LLAMA_PYTHON_VERSION=0.2.19
|
||||
@@ -102,10 +102,7 @@
|
||||
});
|
||||
|
||||
eventSource.onerror = async (error) => {
|
||||
console.log("error", error);
|
||||
eventSource.close();
|
||||
//history[history.length - 1].data.content = "A server error occurred.";
|
||||
//await invalidate("/api/chat/" + $page.params.id);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -143,11 +140,34 @@
|
||||
|
||||
if (response.status === 200) {
|
||||
await invalidate("/api/chat/" + $page.params.id);
|
||||
} else if (response.status === 202) {
|
||||
showToast("Chat in progress!");
|
||||
} else {
|
||||
console.error("Error " + response.status + ": " + response.statusText);
|
||||
showToast("An error occurred: " + response.statusText);
|
||||
}
|
||||
}
|
||||
|
||||
function showToast(message: string) {
|
||||
// Create the toast element
|
||||
const toast = document.createElement("div");
|
||||
toast.className = `alert alert-info`;
|
||||
toast.textContent = message;
|
||||
const toastContainer = document.getElementById("toast-container");
|
||||
|
||||
// Append the toast to the toast container if it exists
|
||||
if (toastContainer) {
|
||||
toastContainer.appendChild(toast);
|
||||
} else {
|
||||
console.error("Toast container not found?");
|
||||
return;
|
||||
}
|
||||
|
||||
// Automatically remove the toast after a delay
|
||||
setTimeout(() => {
|
||||
toast.remove();
|
||||
}, 3000);
|
||||
}
|
||||
|
||||
const md: MarkdownIt = new MarkdownIt({
|
||||
html: true,
|
||||
linkify: true,
|
||||
@@ -527,4 +547,7 @@
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div id="toast-container" class="toast">
|
||||
<!-- Toast notifications will be added here -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user