Add the ability to build and publish Conda packages. (#285)

This commit is contained in:
Devin Robison
2024-12-16 11:23:34 -07:00
committed by GitHub
parent cb27f40d9c
commit 6689b65638
11 changed files with 280 additions and 11 deletions

View File

@@ -141,8 +141,8 @@ issues. Look for unassigned issues and follow the steps starting from **Claim an
- Dependencies are managed via 'Conda' and 'Pip'.
- Dependencies are stored in .yml files
1. **Service Dependencies** 'docker/environment/nv_ingest_environment.yml' file.
2. **Client Dependencies** 'docker/environment/nv_ingest_client_environment.yml' file.
1. **Service Dependencies** 'conda/environments/nv_ingest_environment.yml' file.
2. **Client Dependencies** 'conda/environments/nv_ingest_client_environment.yml' file.
- To update dependencies:
- Create a clean environment using the relevant .yml file.
@@ -150,8 +150,8 @@ issues. Look for unassigned issues and follow the steps starting from **Claim an
- Update the .yml file by exporting the updated environment.
- For example:
```bash
conda env export --name nv_ingest_runtime --no-builds > docker/environment/nv_ingest_environment.yml
conda env export --name nv_ingest_client --no-builds > docker/environment/nv_ingest_client_environment.yml
conda env export --name nv_ingest_runtime --no-builds > conda/environment/nv_ingest_environment.yml
conda env export --name nv_ingest_client --no-builds > conda/environment/nv_ingest_client_environment.yml
```
### Common Processing Patterns

View File

@@ -32,7 +32,7 @@ ENV PATH=/opt/conda/bin:$PATH
# Install Mamba, a faster alternative to conda, within the base environment
RUN conda install -y mamba -n base -c conda-forge
COPY ./docker/environments/nv_ingest_environment.yml /workspace/nv_ingest_environment.yml
COPY conda/environments/nv_ingest_environment.yml /workspace/nv_ingest_environment.yml
# Create nv_ingest base environment
RUN mamba env create -f /workspace/nv_ingest_environment.yml \
&& conda clean --all --yes

View File

@@ -169,7 +169,7 @@ To interact with the nv-ingest service, you can do so from the host, or by `dock
To interact from the host, you'll need a Python environment and install the client dependencies:
```bash
# conda not required, but makes it easy to create a fresh python environment
conda create --name nv-ingest-dev --file ./docker/environments/nv_ingest_environment.yml
conda create --name nv-ingest-dev --file ./conda/environments/nv_ingest_environment.yml
conda activate nv-ingest-dev
cd client

View File

@@ -0,0 +1,85 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Fail on errors (-e) and undefined variables (-u)
set -eux
##############################
# Source Validation Script
##############################
BUILD_SCRIPT_BASE="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
source "${BUILD_SCRIPT_BASE}/scripts/helper_functions.sh"
# Validate environment
validate_conda_build_environment
##############################
# Determine Git Root
##############################
GIT_ROOT=$(determine_git_root)
##############################
# Input Arguments
##############################
OUTPUT_DIR=${1:-"${BUILD_SCRIPT_BASE}/output_conda_channel"}
CONDA_CHANNEL=${2:-""}
BUILD_NV_INGEST=${BUILD_NV_INGEST:-1} # 1 = build by default, 0 = skip
BUILD_NV_INGEST_CLIENT=${BUILD_NV_INGEST_CLIENT:-1} # 1 = build by default, 0 = skip
##############################
# Package Directories
##############################
NV_INGEST_DIR="${BUILD_SCRIPT_BASE}/packages/nv_ingest"
NV_INGEST_CLIENT_DIR="${BUILD_SCRIPT_BASE}/packages/nv_ingest_client"
##############################
# Setup Output Dir
##############################
echo "Using OUTPUT_DIR: $OUTPUT_DIR"
mkdir -p "${OUTPUT_DIR}/linux-64"
##############################
# Build Packages
##############################
if [[ "${BUILD_NV_INGEST}" -eq 1 ]]; then
echo "Building nv_ingest..."
GIT_ROOT="${GIT_ROOT}" conda build "${NV_INGEST_DIR}" \
-c nvidia/label/dev -c rapidsai -c nvidia -c conda-forge -c pytorch \
--output-folder "${OUTPUT_DIR}"
else
echo "Skipping nv_ingest build."
fi
if [[ "${BUILD_NV_INGEST_CLIENT}" -eq 1 ]]; then
echo "Building nv_ingest_client..."
GIT_ROOT="${GIT_ROOT}/client" conda build "${NV_INGEST_CLIENT_DIR}" \
-c conda-forge \
--output-folder "${OUTPUT_DIR}"
else
echo "Skipping nv_ingest_client build."
fi
##############################
# Index the Conda Channel
##############################
echo "Indexing conda channel at ${OUTPUT_DIR}..."
conda index "${OUTPUT_DIR}"
##############################
# Publish to User-Specified Conda Channel
##############################
publish_to_conda_channel() {
local channel_path=$1
echo "Publishing to Conda channel at ${channel_path} (stubbed function)"
# TODO(Devin): Implement publishing logic (e.g., upload to Anaconda Cloud or other server)
}
if [[ -n "${CONDA_CHANNEL}" ]]; then
publish_to_conda_channel "${CONDA_CHANNEL}"
else
echo "No Conda channel specified. Skipping publishing step."
fi
echo "Artifacts successfully built and placed in ${OUTPUT_DIR}"

View File

@@ -48,4 +48,4 @@ dependencies:
- opencv-python # For some reason conda cant solve our req set with py-opencv so we need to use pip
- pymilvus>=2.5.0
- pymilvus[bulk_writer, model]
- pydantic<2.0.0 # Prevent llamas from installing pydantic>=2.0.0
- pydantic<2.0.0 # Prevent llama-index from installing pydantic>=2.0.0

View File

@@ -0,0 +1,86 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
{% set data = load_setup_py_data() %}
{% set name = data.get('name', 'nv_ingest') | lower %}
{% set version = data.get('version') %}
# Determine Git root, falling back to default path ../../.. if Git is not available or the directory is not a Git repo
{% set git_root = environ.get('GIT_ROOT', '../../..') %}
package:
name: {{ name }}
version: {{ version }}
source:
path: {{ git_root }}
build:
number: 0
script:
- {{ PYTHON }} -m pip install . --no-deps -vv
requirements:
build:
- pip
- python==3.10
- setuptools>=58.2.0
run:
- azure-core>=1.32.0
- click>=8.1.7
- fastapi>=0.115.6
- fastparquet>=2024.11.0
- fsspec>=2024.10.0
- httpx>=0.28.1
- isodate>=0.7.2
- langdetect>=1.0.9
- minio>=7.2.12
- morpheus-core=25.02.00a
- morpheus-llm=25.02.00a
- openai>=1.57.1
- opentelemetry-api>=1.27.0
- opentelemetry-exporter-otlp>=1.27.0
- opentelemetry-sdk>=1.27.0
- pydantic<2.0.0
- pypdfium2>=4.30.0
- pytest>=8.0.2
- pytest-mock>=3.14.0
- python>=3.10
- python-docx>=1.1.2
- python-dotenv>=1.0.1
- python-magic>=0.4.27
- python-pptx>=1.0.2
- pytorch
- redis-py>=5.2.1
- requests>=2.32.3
- setuptools>=58.2.0
- tabulate>=0.9.0
- torchaudio
- torchvision
- tqdm>=4.67.1
- transformers>=4.47.0
- unstructured-client>=0.25.9
- uvicorn
- wand>=0.6.10
test:
commands:
- pytest ./tests
about:
home: "https://github.com/NVIDIA/nv-ingest"
license: "Apache-2.0"
summary: "Python module supporting document ingestion."
description: "Python module supporting document ingestion."
extra:
recipe-maintainers:
- drobison@nvidia.com
channels:
- nvidia/label/dev
- rapidsai
- nvidia
- conda-forge
- pytorch

View File

@@ -0,0 +1,57 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
{% set data = load_setup_py_data() %}
{% set name = data.get('name', 'nv_ingest_client') | lower %}
{% set version = data.get('version') %}
# Determine Git root, falling back to default path ../../.. if Git is not available or the directory is not a Git repo
{% set git_root = environ.get('GIT_ROOT', '../../../client') %}
package:
name: {{ name }}
version: {{ version }}
source:
path: {{ git_root }}
build:
number: 0
script:
- {{ PYTHON }} -m pip install ./ --no-deps -vv
requirements:
build:
- pip
- python==3.10
- setuptools>=58.2.0
run:
- click>=8.1.7
- fsspec>=2024.10.0
- httpx>=0.28.1
- pydantic<2.0.0
- pypdfium2>=4.30.0
- python>=3.10
- python-docx>=1.1.2
- python-pptx>=1.0.2
- requests>=2.32.3
- setuptools>=58.2.0
- tqdm>=4.67.1
test:
commands:
- pytest ./tests
about:
home: "https://github.com/NVIDIA/nv-ingest"
license: "Apache-2.0"
summary: "Python module supporting document ingestion."
description: "Python module supporting document ingestion."
extra:
recipe-maintainers:
- drobison@nvidia.com
channels:
- conda-forge

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Fail on errors and undefined variables
set -euo pipefail
validate_conda_build_environment() {
##############################
# Validate Dependencies
##############################
# Ensure conda is installed
if ! command -v conda &> /dev/null; then
echo "Error: conda not found in PATH. Please ensure Conda is installed and available."
exit 1
fi
# Ensure conda-build is installed
if ! command -v conda-build &> /dev/null; then
echo "Error: conda-build not found in PATH. Install it via: conda install conda-build"
exit 1
fi
# Ensure git is installed
if ! command -v git &> /dev/null; then
echo "Error: git not found in PATH. Please ensure Git is installed and available."
exit 1
fi
}
determine_git_root() {
##############################
# Determine Git Root
##############################
if git rev-parse --is-inside-work-tree &> /dev/null; then
echo "$(git rev-parse --show-toplevel)"
else
echo "Error: Not inside a Git repository. Unable to determine the Git root."
exit 1
fi
}

View File

@@ -53,7 +53,7 @@ To run the nv-ingest service locally, we will require [Conda (Mamba) to be insta
From the root of the repository, run the following commands to create a new Conda environment and install the required dependencies:
```bash
mamba env create --file ./docker/environments/nv_ingest_environment.yml --name nv_ingest_runtime
mamba env create --file ./conda/environments/nv_ingest_environment.yml --name nv_ingest_runtime
conda activate nv_ingest_runtime

View File

@@ -2,9 +2,6 @@
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
import datetime
import os
import re