mirror of
https://github.com/NVIDIA/nv-ingest.git
synced 2025-01-05 18:58:13 +03:00
Add the ability to build and publish Conda packages. (#285)
This commit is contained in:
@@ -141,8 +141,8 @@ issues. Look for unassigned issues and follow the steps starting from **Claim an
|
||||
|
||||
- Dependencies are managed via 'Conda' and 'Pip'.
|
||||
- Dependencies are stored in .yml files
|
||||
1. **Service Dependencies** 'docker/environment/nv_ingest_environment.yml' file.
|
||||
2. **Client Dependencies** 'docker/environment/nv_ingest_client_environment.yml' file.
|
||||
1. **Service Dependencies** 'conda/environments/nv_ingest_environment.yml' file.
|
||||
2. **Client Dependencies** 'conda/environments/nv_ingest_client_environment.yml' file.
|
||||
|
||||
- To update dependencies:
|
||||
- Create a clean environment using the relevant .yml file.
|
||||
@@ -150,8 +150,8 @@ issues. Look for unassigned issues and follow the steps starting from **Claim an
|
||||
- Update the .yml file by exporting the updated environment.
|
||||
- For example:
|
||||
```bash
|
||||
conda env export --name nv_ingest_runtime --no-builds > docker/environment/nv_ingest_environment.yml
|
||||
conda env export --name nv_ingest_client --no-builds > docker/environment/nv_ingest_client_environment.yml
|
||||
conda env export --name nv_ingest_runtime --no-builds > conda/environment/nv_ingest_environment.yml
|
||||
conda env export --name nv_ingest_client --no-builds > conda/environment/nv_ingest_client_environment.yml
|
||||
```
|
||||
|
||||
### Common Processing Patterns
|
||||
|
||||
@@ -32,7 +32,7 @@ ENV PATH=/opt/conda/bin:$PATH
|
||||
# Install Mamba, a faster alternative to conda, within the base environment
|
||||
RUN conda install -y mamba -n base -c conda-forge
|
||||
|
||||
COPY ./docker/environments/nv_ingest_environment.yml /workspace/nv_ingest_environment.yml
|
||||
COPY conda/environments/nv_ingest_environment.yml /workspace/nv_ingest_environment.yml
|
||||
# Create nv_ingest base environment
|
||||
RUN mamba env create -f /workspace/nv_ingest_environment.yml \
|
||||
&& conda clean --all --yes
|
||||
|
||||
@@ -169,7 +169,7 @@ To interact with the nv-ingest service, you can do so from the host, or by `dock
|
||||
To interact from the host, you'll need a Python environment and install the client dependencies:
|
||||
```bash
|
||||
# conda not required, but makes it easy to create a fresh python environment
|
||||
conda create --name nv-ingest-dev --file ./docker/environments/nv_ingest_environment.yml
|
||||
conda create --name nv-ingest-dev --file ./conda/environments/nv_ingest_environment.yml
|
||||
conda activate nv-ingest-dev
|
||||
|
||||
cd client
|
||||
|
||||
85
conda/build_conda_packages.sh
Normal file
85
conda/build_conda_packages.sh
Normal file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env bash
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
||||
# All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Fail on errors (-e) and undefined variables (-u)
|
||||
set -eux
|
||||
|
||||
##############################
|
||||
# Source Validation Script
|
||||
##############################
|
||||
BUILD_SCRIPT_BASE="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
source "${BUILD_SCRIPT_BASE}/scripts/helper_functions.sh"
|
||||
|
||||
# Validate environment
|
||||
validate_conda_build_environment
|
||||
|
||||
##############################
|
||||
# Determine Git Root
|
||||
##############################
|
||||
GIT_ROOT=$(determine_git_root)
|
||||
|
||||
##############################
|
||||
# Input Arguments
|
||||
##############################
|
||||
OUTPUT_DIR=${1:-"${BUILD_SCRIPT_BASE}/output_conda_channel"}
|
||||
CONDA_CHANNEL=${2:-""}
|
||||
BUILD_NV_INGEST=${BUILD_NV_INGEST:-1} # 1 = build by default, 0 = skip
|
||||
BUILD_NV_INGEST_CLIENT=${BUILD_NV_INGEST_CLIENT:-1} # 1 = build by default, 0 = skip
|
||||
|
||||
##############################
|
||||
# Package Directories
|
||||
##############################
|
||||
NV_INGEST_DIR="${BUILD_SCRIPT_BASE}/packages/nv_ingest"
|
||||
NV_INGEST_CLIENT_DIR="${BUILD_SCRIPT_BASE}/packages/nv_ingest_client"
|
||||
|
||||
##############################
|
||||
# Setup Output Dir
|
||||
##############################
|
||||
echo "Using OUTPUT_DIR: $OUTPUT_DIR"
|
||||
mkdir -p "${OUTPUT_DIR}/linux-64"
|
||||
|
||||
##############################
|
||||
# Build Packages
|
||||
##############################
|
||||
if [[ "${BUILD_NV_INGEST}" -eq 1 ]]; then
|
||||
echo "Building nv_ingest..."
|
||||
GIT_ROOT="${GIT_ROOT}" conda build "${NV_INGEST_DIR}" \
|
||||
-c nvidia/label/dev -c rapidsai -c nvidia -c conda-forge -c pytorch \
|
||||
--output-folder "${OUTPUT_DIR}"
|
||||
else
|
||||
echo "Skipping nv_ingest build."
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_NV_INGEST_CLIENT}" -eq 1 ]]; then
|
||||
echo "Building nv_ingest_client..."
|
||||
GIT_ROOT="${GIT_ROOT}/client" conda build "${NV_INGEST_CLIENT_DIR}" \
|
||||
-c conda-forge \
|
||||
--output-folder "${OUTPUT_DIR}"
|
||||
else
|
||||
echo "Skipping nv_ingest_client build."
|
||||
fi
|
||||
|
||||
##############################
|
||||
# Index the Conda Channel
|
||||
##############################
|
||||
echo "Indexing conda channel at ${OUTPUT_DIR}..."
|
||||
conda index "${OUTPUT_DIR}"
|
||||
|
||||
##############################
|
||||
# Publish to User-Specified Conda Channel
|
||||
##############################
|
||||
publish_to_conda_channel() {
|
||||
local channel_path=$1
|
||||
echo "Publishing to Conda channel at ${channel_path} (stubbed function)"
|
||||
# TODO(Devin): Implement publishing logic (e.g., upload to Anaconda Cloud or other server)
|
||||
}
|
||||
|
||||
if [[ -n "${CONDA_CHANNEL}" ]]; then
|
||||
publish_to_conda_channel "${CONDA_CHANNEL}"
|
||||
else
|
||||
echo "No Conda channel specified. Skipping publishing step."
|
||||
fi
|
||||
|
||||
echo "Artifacts successfully built and placed in ${OUTPUT_DIR}"
|
||||
@@ -48,4 +48,4 @@ dependencies:
|
||||
- opencv-python # For some reason conda cant solve our req set with py-opencv so we need to use pip
|
||||
- pymilvus>=2.5.0
|
||||
- pymilvus[bulk_writer, model]
|
||||
- pydantic<2.0.0 # Prevent llamas from installing pydantic>=2.0.0
|
||||
- pydantic<2.0.0 # Prevent llama-index from installing pydantic>=2.0.0
|
||||
86
conda/packages/nv_ingest/meta.yaml
Normal file
86
conda/packages/nv_ingest/meta.yaml
Normal file
@@ -0,0 +1,86 @@
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
||||
# All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
{% set data = load_setup_py_data() %}
|
||||
{% set name = data.get('name', 'nv_ingest') | lower %}
|
||||
{% set version = data.get('version') %}
|
||||
|
||||
# Determine Git root, falling back to default path ../../.. if Git is not available or the directory is not a Git repo
|
||||
{% set git_root = environ.get('GIT_ROOT', '../../..') %}
|
||||
|
||||
package:
|
||||
name: {{ name }}
|
||||
version: {{ version }}
|
||||
|
||||
source:
|
||||
path: {{ git_root }}
|
||||
|
||||
build:
|
||||
number: 0
|
||||
script:
|
||||
- {{ PYTHON }} -m pip install . --no-deps -vv
|
||||
|
||||
requirements:
|
||||
build:
|
||||
- pip
|
||||
- python==3.10
|
||||
- setuptools>=58.2.0
|
||||
run:
|
||||
- azure-core>=1.32.0
|
||||
- click>=8.1.7
|
||||
- fastapi>=0.115.6
|
||||
- fastparquet>=2024.11.0
|
||||
- fsspec>=2024.10.0
|
||||
- httpx>=0.28.1
|
||||
- isodate>=0.7.2
|
||||
- langdetect>=1.0.9
|
||||
- minio>=7.2.12
|
||||
- morpheus-core=25.02.00a
|
||||
- morpheus-llm=25.02.00a
|
||||
- openai>=1.57.1
|
||||
- opentelemetry-api>=1.27.0
|
||||
- opentelemetry-exporter-otlp>=1.27.0
|
||||
- opentelemetry-sdk>=1.27.0
|
||||
- pydantic<2.0.0
|
||||
- pypdfium2>=4.30.0
|
||||
- pytest>=8.0.2
|
||||
- pytest-mock>=3.14.0
|
||||
- python>=3.10
|
||||
- python-docx>=1.1.2
|
||||
- python-dotenv>=1.0.1
|
||||
- python-magic>=0.4.27
|
||||
- python-pptx>=1.0.2
|
||||
- pytorch
|
||||
- redis-py>=5.2.1
|
||||
- requests>=2.32.3
|
||||
- setuptools>=58.2.0
|
||||
- tabulate>=0.9.0
|
||||
- torchaudio
|
||||
- torchvision
|
||||
- tqdm>=4.67.1
|
||||
- transformers>=4.47.0
|
||||
- unstructured-client>=0.25.9
|
||||
- uvicorn
|
||||
- wand>=0.6.10
|
||||
|
||||
test:
|
||||
commands:
|
||||
- pytest ./tests
|
||||
|
||||
about:
|
||||
home: "https://github.com/NVIDIA/nv-ingest"
|
||||
license: "Apache-2.0"
|
||||
summary: "Python module supporting document ingestion."
|
||||
description: "Python module supporting document ingestion."
|
||||
|
||||
extra:
|
||||
recipe-maintainers:
|
||||
- drobison@nvidia.com
|
||||
|
||||
channels:
|
||||
- nvidia/label/dev
|
||||
- rapidsai
|
||||
- nvidia
|
||||
- conda-forge
|
||||
- pytorch
|
||||
57
conda/packages/nv_ingest_client/meta.yaml
Normal file
57
conda/packages/nv_ingest_client/meta.yaml
Normal file
@@ -0,0 +1,57 @@
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
||||
# All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
{% set data = load_setup_py_data() %}
|
||||
{% set name = data.get('name', 'nv_ingest_client') | lower %}
|
||||
{% set version = data.get('version') %}
|
||||
|
||||
# Determine Git root, falling back to default path ../../.. if Git is not available or the directory is not a Git repo
|
||||
{% set git_root = environ.get('GIT_ROOT', '../../../client') %}
|
||||
|
||||
package:
|
||||
name: {{ name }}
|
||||
version: {{ version }}
|
||||
|
||||
source:
|
||||
path: {{ git_root }}
|
||||
|
||||
build:
|
||||
number: 0
|
||||
script:
|
||||
- {{ PYTHON }} -m pip install ./ --no-deps -vv
|
||||
|
||||
requirements:
|
||||
build:
|
||||
- pip
|
||||
- python==3.10
|
||||
- setuptools>=58.2.0
|
||||
run:
|
||||
- click>=8.1.7
|
||||
- fsspec>=2024.10.0
|
||||
- httpx>=0.28.1
|
||||
- pydantic<2.0.0
|
||||
- pypdfium2>=4.30.0
|
||||
- python>=3.10
|
||||
- python-docx>=1.1.2
|
||||
- python-pptx>=1.0.2
|
||||
- requests>=2.32.3
|
||||
- setuptools>=58.2.0
|
||||
- tqdm>=4.67.1
|
||||
|
||||
test:
|
||||
commands:
|
||||
- pytest ./tests
|
||||
|
||||
about:
|
||||
home: "https://github.com/NVIDIA/nv-ingest"
|
||||
license: "Apache-2.0"
|
||||
summary: "Python module supporting document ingestion."
|
||||
description: "Python module supporting document ingestion."
|
||||
|
||||
extra:
|
||||
recipe-maintainers:
|
||||
- drobison@nvidia.com
|
||||
|
||||
channels:
|
||||
- conda-forge
|
||||
44
conda/scripts/helper_functions.sh
Normal file
44
conda/scripts/helper_functions.sh
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env bash
|
||||
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
||||
# All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Fail on errors and undefined variables
|
||||
set -euo pipefail
|
||||
|
||||
validate_conda_build_environment() {
|
||||
##############################
|
||||
# Validate Dependencies
|
||||
##############################
|
||||
|
||||
# Ensure conda is installed
|
||||
if ! command -v conda &> /dev/null; then
|
||||
echo "Error: conda not found in PATH. Please ensure Conda is installed and available."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure conda-build is installed
|
||||
if ! command -v conda-build &> /dev/null; then
|
||||
echo "Error: conda-build not found in PATH. Install it via: conda install conda-build"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure git is installed
|
||||
if ! command -v git &> /dev/null; then
|
||||
echo "Error: git not found in PATH. Please ensure Git is installed and available."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
determine_git_root() {
|
||||
##############################
|
||||
# Determine Git Root
|
||||
##############################
|
||||
|
||||
if git rev-parse --is-inside-work-tree &> /dev/null; then
|
||||
echo "$(git rev-parse --show-toplevel)"
|
||||
else
|
||||
echo "Error: Not inside a Git repository. Unable to determine the Git root."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
@@ -53,7 +53,7 @@ To run the nv-ingest service locally, we will require [Conda (Mamba) to be insta
|
||||
|
||||
From the root of the repository, run the following commands to create a new Conda environment and install the required dependencies:
|
||||
```bash
|
||||
mamba env create --file ./docker/environments/nv_ingest_environment.yml --name nv_ingest_runtime
|
||||
mamba env create --file ./conda/environments/nv_ingest_environment.yml --name nv_ingest_runtime
|
||||
|
||||
conda activate nv_ingest_runtime
|
||||
|
||||
|
||||
Reference in New Issue
Block a user