Files
nv-ingest/docker-compose.yaml
Jeremy Dyer e9c1555093 Update NIM versions (#310)
Co-authored-by: sosahi <syousefisahi@nvidia.com>
Co-authored-by: Edward Kim <109497216+edknv@users.noreply.github.com>
2024-12-20 15:05:28 -08:00

345 lines
10 KiB
YAML

# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
services:
redis:
image: "redis/redis-stack"
ports:
- "6379:6379"
yolox:
image: ${YOLOX_IMAGE:-nvcr.io/ohlfw0olaadg/ea-participants/nv-yolox-page-elements-v1}:${YOLOX_TAG:-1.0.0}
ports:
- "8000:8000"
- "8001:8001"
- "8002:8002"
user: root
environment:
- NIM_HTTP_API_PORT=8000
- NIM_TRITON_LOG_VERBOSE=1
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
- CUDA_VISIBLE_DEVICES=0
# NIM OpenTelemetry Settings
- NIM_OTEL_SERVICE_NAME=yolox
- NIM_OTEL_TRACES_EXPORTER=otlp
- NIM_OTEL_METRICS_EXPORTER=console
- NIM_OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
- NIM_ENABLE_OTEL=true
# Triton OpenTelemetry Settings
- TRITON_OTEL_URL=http://otel-collector:4318/v1/traces
- TRITON_OTEL_RATE=1
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
runtime: nvidia
deplot:
image: ${DEPLOT_IMAGE:-nvcr.io/ohlfw0olaadg/ea-participants/deplot}:${DEPLOT_TAG:-1.0.0}
ports:
- "8003:8000"
- "8004:8001"
- "8005:8002"
user: root
environment:
- NIM_HTTP_API_PORT=8000
- NIM_TRITON_LOG_VERBOSE=1
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
- CUDA_VISIBLE_DEVICES=0
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
runtime: nvidia
cached:
image: ${CACHED_IMAGE:-nvcr.io/ohlfw0olaadg/ea-participants/cached}:${CACHED_TAG:-0.2.1}
shm_size: 2gb
ports:
- "8006:8000"
- "8007:8001"
- "8008:8002"
user: root
environment:
- NIM_HTTP_API_PORT=8000
- NIM_TRITON_LOG_VERBOSE=1
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
- CUDA_VISIBLE_DEVICES=0
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
runtime: nvidia
paddle:
image: ${PADDLE_IMAGE:-nvcr.io/ohlfw0olaadg/ea-participants/paddleocr}:${PADDLE_TAG:-1.0.0}
shm_size: 2gb
ports:
- "8009:8000"
- "8010:8001"
- "8011:8002"
user: root
environment:
- NIM_HTTP_API_PORT=8000
- NIM_TRITON_LOG_VERBOSE=1
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
- CUDA_VISIBLE_DEVICES=0
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
runtime: nvidia
embedding:
# NIM ON
image: ${EMBEDDING_IMAGE:-nvcr.io/nim/nvidia/nv-embedqa-e5-v5}:${EMBEDDING_TAG:-1.1.0}
shm_size: 16gb
ports:
- "8012:8000"
- "8013:8001"
- "8014:8002"
environment:
- NIM_HTTP_API_PORT=8000
- NIM_TRITON_LOG_VERBOSE=1
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
- CUDA_VISIBLE_DEVICES=0
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
runtime: nvidia
nv-ingest-ms-runtime:
image: nvcr.io/ohlfw0olaadg/ea-participants/nv-ingest:24.10.1
build:
context: ${NV_INGEST_ROOT:-.}
dockerfile: "./Dockerfile"
target: runtime
volumes:
- ${DATASET_ROOT:-./data}:/workspace/data
ports:
# HTTP API
- "7670:7670"
# Simple Broker
- "7671:7671"
cap_add:
- sys_nice
environment:
# Self-hosted cached endpoints.
- CACHED_GRPC_ENDPOINT=cached:8001
- CACHED_HTTP_ENDPOINT=http://cached:8000/v1/infer
- CACHED_INFER_PROTOCOL=grpc
# build.nvidia.com hosted cached endpoints.
#- CACHED_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/university-at-buffalo/cached
#- CACHED_INFER_PROTOCOL=http
- CUDA_VISIBLE_DEVICES=0
#- DEPLOT_GRPC_ENDPOINT=""
# Self-hosted deplot endpoints.
- DEPLOT_HTTP_ENDPOINT=http://deplot:8000/v1/chat/completions
# build.nvidia.com hosted deplot
#- DEPLOT_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/vlm/google/deplot
- DEPLOT_INFER_PROTOCOL=http
- DOUGHNUT_GRPC_TRITON=triton-doughnut:8001
- EMBEDDING_NIM_MODEL_NAME=${EMBEDDING_NIM_MODEL_NAME:-nvidia/nv-embedqa-e5-v5}
- INGEST_LOG_LEVEL=DEFAULT
# Message client for development
#- MESSAGE_CLIENT_HOST=0.0.0.0
#- MESSAGE_CLIENT_PORT=7671
#- MESSAGE_CLIENT_TYPE=simple # Configure the ingest service to use the simple broker
# Message client for production
- MESSAGE_CLIENT_HOST=redis
- MESSAGE_CLIENT_PORT=6379
- MESSAGE_CLIENT_TYPE=redis
- MINIO_BUCKET=${MINIO_BUCKET:-nv-ingest}
- MRC_IGNORE_NUMA_CHECK=1
- NGC_API_KEY=${NGC_API_KEY:-ngcapikey}
- NVIDIA_BUILD_API_KEY=${NVIDIA_BUILD_API_KEY:-${NGC_API_KEY:-ngcapikey}}
- OTEL_EXPORTER_OTLP_ENDPOINT=otel-collector:4317
# Self-hosted paddle endpoints.
- PADDLE_GRPC_ENDPOINT=paddle:8001
- PADDLE_HTTP_ENDPOINT=http://paddle:8000/v1/infer
- PADDLE_INFER_PROTOCOL=grpc
# build.nvidia.com hosted paddle endpoints.
#- PADDLE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/baidu/paddleocr
#- PADDLE_INFER_PROTOCOL=http
- READY_CHECK_ALL_COMPONENTS=True
- REDIS_MORPHEUS_TASK_QUEUE=morpheus_task_queue
# Self-hosted redis endpoints.
- YOLOX_GRPC_ENDPOINT=yolox:8001
- YOLOX_HTTP_ENDPOINT=http://yolox:8000/v1/infer
- YOLOX_INFER_PROTOCOL=grpc
# build.nvidia.com hosted yolox endpoints.
#- YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nv-yolox-page-elements-v1
#- YOLOX_INFER_PROTOCOL=http
- VLM_CAPTION_ENDPOINT=https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions
healthcheck:
test: curl --fail http://nv-ingest-ms-runtime:7670/v1/health/ready || exit 1
interval: 10s
timeout: 5s
retries: 20
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
otel-collector:
image: otel/opentelemetry-collector-contrib:0.91.0
hostname: otel-collector
command: ["--config=/etc/otel-collector-config.yaml"]
volumes:
- ./config/otel-collector-config.yaml:/etc/otel-collector-config.yaml
ports:
- "9988:9988" # Prometheus metrics exposed by the collector
- "8889:8889" # Prometheus exporter metrics
- "13133:13133" # health_check extension
- "9411" # Zipkin receiver
- "4317:4317" # OTLP gRPC receiver
- "4318:4318" # OTLP/HTTP receiver
- "55680:55679" # zpages extension
depends_on:
- zipkin
zipkin:
image: openzipkin/zipkin
environment:
JAVA_OPTS: "-Xms4g -Xmx8g -XX:+ExitOnOutOfMemoryError"
ports:
- "9411:9411" # Zipkin UI and API
prometheus:
image: prom/prometheus:latest
command:
- --web.console.templates=/etc/prometheus/consoles
- --web.console.libraries=/etc/prometheus/console_libraries
- --storage.tsdb.retention.time=1h
- --config.file=/etc/prometheus/prometheus-config.yaml
- --storage.tsdb.path=/prometheus
- --web.enable-lifecycle
- --web.route-prefix=/
- --enable-feature=exemplar-storage
- --enable-feature=otlp-write-receiver
volumes:
- ./config/prometheus.yaml:/etc/prometheus/prometheus-config.yaml
ports:
- "9090:9090"
grafana:
container_name: grafana-service
image: grafana/grafana
ports:
- "3000:3000"
etcd:
# Turn on to leverage the `vdb_upload` task
restart: always
container_name: milvus-etcd
image: quay.io/coreos/etcd:v3.5.5
environment:
- ETCD_AUTO_COMPACTION_MODE=revision
- ETCD_AUTO_COMPACTION_RETENTION=1000
- ETCD_QUOTA_BACKEND_BYTES=4294967296
- ETCD_SNAPSHOT_COUNT=50000
volumes:
- ./.volumes/etcd:/etcd
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
healthcheck:
test: [ "CMD", "etcdctl", "endpoint", "health" ]
interval: 30s
timeout: 20s
retries: 3
profiles:
- retrieval
minio:
# Turn on to leverage the `store` and `vdb_upload` task
restart: always
container_name: minio
hostname: minio
image: minio/minio:RELEASE.2023-03-20T20-16-18Z
environment:
MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin}
MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin}
ports:
- "9001:9001"
- "9000:9000"
volumes:
- ./.volumes/minio:/minio_data
command: minio server /minio_data --console-address ":9001"
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9000/minio/health/live" ]
interval: 30s
timeout: 20s
retries: 3
profiles:
- retrieval
milvus:
# Turn on to leverage the `vdb_upload` task
restart: always
container_name: milvus-standalone
image: milvusdb/milvus:v2.4.17-gpu
command: [ "milvus", "run", "standalone" ]
hostname: milvus
security_opt:
- seccomp:unconfined
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
volumes:
- ./.volumes/milvus:/var/lib/milvus
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9091/healthz" ]
interval: 30s
start_period: 90s
timeout: 20s
retries: 3
ports:
- "19530:19530"
- "9091:9091"
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
depends_on:
- "etcd"
- "minio"
profiles:
- retrieval
attu:
# Turn on to leverage the `vdb_upload` task
restart: always
container_name: milvus-attu
image: zilliz/attu:v2.3.5
hostname: attu
environment:
MILVUS_URL: http://milvus:19530
ports:
- "3001:3000"
depends_on:
- "milvus"
profiles:
- retrieval