From c14201dc0f7c544232e5f894726f8ca4d92424f7 Mon Sep 17 00:00:00 2001 From: Niek van der Maas Date: Wed, 12 Apr 2023 11:53:39 +0200 Subject: [PATCH 1/8] Add Dockerfile + build workflow --- .github/workflows/publish.yaml | 31 ++++++++++++++++++++++++++++++- Dockerfile | 10 ++++++++++ README.md | 8 ++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 Dockerfile diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 92b6e5b..16a6012 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -28,4 +28,33 @@ jobs: # if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 with: - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file + password: ${{ secrets.PYPI_API_TOKEN }} + + docker: + name: Build and push Docker image + runs-on: ubuntu-latest + needs: build-n-publish + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v4 + with: + push: true # push to registry + pull: true # always fetch the latest base images + platforms: linux/amd64,linux/arm64 # build for both amd64 and arm64 + tags: ghcr.io/abetlen/llama-cpp-python:latest \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b500a0b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3-buster + +# We need to set the host to 0.0.0.0 to allow outside access +ENV HOST 0.0.0.0 + +# Install the package +RUN pip install llama-cpp-python[server] + +# Run the server +CMD python3 -m llama_cpp.server \ No newline at end of file diff --git a/README.md b/README.md index 2c8c0a5..81ad723 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,14 @@ python3 -m llama_cpp.server Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the OpenAPI documentation. +## Docker image + +A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server: + +```bash +docker run --rm -it -p8000:8000 -v /path/to/models:/models -eMODEL=/models/ggml-vicuna-7b-4bit.bin ghcr.io/abetlen/llama-cpp-python:latest +``` + ## Low-level API The low-level API is a direct `ctypes` binding to the C API provided by `llama.cpp`. From 9ce8146231d77e9aceb8a0f2c0f2721755640eed Mon Sep 17 00:00:00 2001 From: Niek van der Maas Date: Wed, 12 Apr 2023 11:56:16 +0200 Subject: [PATCH 2/8] More generic model name --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 81ad723..bcb25e3 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server: ```bash -docker run --rm -it -p8000:8000 -v /path/to/models:/models -eMODEL=/models/ggml-vicuna-7b-4bit.bin ghcr.io/abetlen/llama-cpp-python:latest +docker run --rm -it -p8000:8000 -v /path/to/models:/models -eMODEL=/models/ggml-model-name.bin ghcr.io/abetlen/llama-cpp-python:latest ``` ## Low-level API From 59b37bbbd2fb2a69788c5de6bd103439befbc845 Mon Sep 17 00:00:00 2001 From: Niek van der Maas Date: Sat, 15 Apr 2023 20:24:46 +0200 Subject: [PATCH 3/8] Support openblas --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b500a0b..5bd28b7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ FROM python:3-buster ENV HOST 0.0.0.0 # Install the package -RUN pip install llama-cpp-python[server] +RUN apt update && apt install -y libopenblas-dev && LLAMA_OPENBLAS=1 pip install llama-cpp-python[server] # Run the server CMD python3 -m llama_cpp.server \ No newline at end of file From 8476b325f127d66477424f6767c3330fa520728e Mon Sep 17 00:00:00 2001 From: Niek van der Maas Date: Mon, 24 Apr 2023 09:54:38 +0200 Subject: [PATCH 4/8] Change to bullseye --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5bd28b7..ade4ac9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3-buster +FROM python:3-bullseye # We need to set the host to 0.0.0.0 to allow outside access ENV HOST 0.0.0.0 From d605408f9917943bc0c969b502335ab56b5b2d59 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 2 May 2023 00:55:34 -0400 Subject: [PATCH 5/8] Add dockerignore --- .dockerignore | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..fd64c09 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,166 @@ +_skbuild/ + +.envrc + +models/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ From 81631afc48990135c20ece1d52872a7de3033715 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 2 May 2023 00:55:51 -0400 Subject: [PATCH 6/8] Install from local directory --- Dockerfile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ade4ac9..14fb3be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,8 +3,13 @@ FROM python:3-bullseye # We need to set the host to 0.0.0.0 to allow outside access ENV HOST 0.0.0.0 +COPY . . + # Install the package -RUN apt update && apt install -y libopenblas-dev && LLAMA_OPENBLAS=1 pip install llama-cpp-python[server] +RUN apt update && apt install -y libopenblas-dev +RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette + +RUN LLAMA_OPENBLAS=1 python3 setup.py develop # Run the server CMD python3 -m llama_cpp.server \ No newline at end of file From 5d5421b29ddd45ea693d0ce36552c7ff40d83187 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 2 May 2023 01:04:02 -0400 Subject: [PATCH 7/8] Add build docker --- .github/workflows/build-docker.yaml | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/build-docker.yaml diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml new file mode 100644 index 0000000..9a06da5 --- /dev/null +++ b/.github/workflows/build-docker.yaml @@ -0,0 +1,33 @@ +name: Build Docker + +on: workflow_dispatch + +jobs: + docker: + name: Build and push Docker image + runs-on: ubuntu-latest + needs: build-n-publish + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v4 + with: + push: true # push to registry + pull: true # always fetch the latest base images + platforms: linux/amd64,linux/arm64 # build for both amd64 and arm64 + tags: ghcr.io/abetlen/llama-cpp-python:latest \ No newline at end of file From 36c81489e7dfb3af9c78b2f07da62f39775cd23e Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 2 May 2023 01:04:36 -0400 Subject: [PATCH 8/8] Remove docker section of publish --- .github/workflows/publish.yaml | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 16a6012..ddefd68 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -29,32 +29,3 @@ jobs: uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.PYPI_API_TOKEN }} - - docker: - name: Build and push Docker image - runs-on: ubuntu-latest - needs: build-n-publish - steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push - uses: docker/build-push-action@v4 - with: - push: true # push to registry - pull: true # always fetch the latest base images - platforms: linux/amd64,linux/arm64 # build for both amd64 and arm64 - tags: ghcr.io/abetlen/llama-cpp-python:latest \ No newline at end of file