mirror of
https://github.com/NVIDIA/nv-ingest.git
synced 2025-01-05 18:58:13 +03:00
Co-authored-by: sosahi <syousefisahi@nvidia.com> Co-authored-by: Edward Kim <109497216+edknv@users.noreply.github.com>
345 lines
10 KiB
YAML
345 lines
10 KiB
YAML
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
# All rights reserved.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
services:
|
|
redis:
|
|
image: "redis/redis-stack"
|
|
ports:
|
|
- "6379:6379"
|
|
|
|
yolox:
|
|
image: ${YOLOX_IMAGE:-nvcr.io/ohlfw0olaadg/ea-participants/nv-yolox-page-elements-v1}:${YOLOX_TAG:-1.0.0}
|
|
ports:
|
|
- "8000:8000"
|
|
- "8001:8001"
|
|
- "8002:8002"
|
|
user: root
|
|
environment:
|
|
- NIM_HTTP_API_PORT=8000
|
|
- NIM_TRITON_LOG_VERBOSE=1
|
|
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
|
|
- CUDA_VISIBLE_DEVICES=0
|
|
# NIM OpenTelemetry Settings
|
|
- NIM_OTEL_SERVICE_NAME=yolox
|
|
- NIM_OTEL_TRACES_EXPORTER=otlp
|
|
- NIM_OTEL_METRICS_EXPORTER=console
|
|
- NIM_OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
|
|
- NIM_ENABLE_OTEL=true
|
|
# Triton OpenTelemetry Settings
|
|
- TRITON_OTEL_URL=http://otel-collector:4318/v1/traces
|
|
- TRITON_OTEL_RATE=1
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["1"]
|
|
capabilities: [gpu]
|
|
runtime: nvidia
|
|
|
|
deplot:
|
|
image: ${DEPLOT_IMAGE:-nvcr.io/ohlfw0olaadg/ea-participants/deplot}:${DEPLOT_TAG:-1.0.0}
|
|
ports:
|
|
- "8003:8000"
|
|
- "8004:8001"
|
|
- "8005:8002"
|
|
user: root
|
|
environment:
|
|
- NIM_HTTP_API_PORT=8000
|
|
- NIM_TRITON_LOG_VERBOSE=1
|
|
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
|
|
- CUDA_VISIBLE_DEVICES=0
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["0"]
|
|
capabilities: [gpu]
|
|
runtime: nvidia
|
|
|
|
cached:
|
|
image: ${CACHED_IMAGE:-nvcr.io/ohlfw0olaadg/ea-participants/cached}:${CACHED_TAG:-0.2.1}
|
|
shm_size: 2gb
|
|
ports:
|
|
- "8006:8000"
|
|
- "8007:8001"
|
|
- "8008:8002"
|
|
user: root
|
|
environment:
|
|
- NIM_HTTP_API_PORT=8000
|
|
- NIM_TRITON_LOG_VERBOSE=1
|
|
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
|
|
- CUDA_VISIBLE_DEVICES=0
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["1"]
|
|
capabilities: [gpu]
|
|
runtime: nvidia
|
|
|
|
paddle:
|
|
image: ${PADDLE_IMAGE:-nvcr.io/ohlfw0olaadg/ea-participants/paddleocr}:${PADDLE_TAG:-1.0.0}
|
|
shm_size: 2gb
|
|
ports:
|
|
- "8009:8000"
|
|
- "8010:8001"
|
|
- "8011:8002"
|
|
user: root
|
|
environment:
|
|
- NIM_HTTP_API_PORT=8000
|
|
- NIM_TRITON_LOG_VERBOSE=1
|
|
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
|
|
- CUDA_VISIBLE_DEVICES=0
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["1"]
|
|
capabilities: [gpu]
|
|
runtime: nvidia
|
|
|
|
embedding:
|
|
# NIM ON
|
|
image: ${EMBEDDING_IMAGE:-nvcr.io/nim/nvidia/nv-embedqa-e5-v5}:${EMBEDDING_TAG:-1.1.0}
|
|
shm_size: 16gb
|
|
ports:
|
|
- "8012:8000"
|
|
- "8013:8001"
|
|
- "8014:8002"
|
|
environment:
|
|
- NIM_HTTP_API_PORT=8000
|
|
- NIM_TRITON_LOG_VERBOSE=1
|
|
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
|
|
- CUDA_VISIBLE_DEVICES=0
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["1"]
|
|
capabilities: [gpu]
|
|
runtime: nvidia
|
|
|
|
nv-ingest-ms-runtime:
|
|
image: nvcr.io/ohlfw0olaadg/ea-participants/nv-ingest:24.10.1
|
|
build:
|
|
context: ${NV_INGEST_ROOT:-.}
|
|
dockerfile: "./Dockerfile"
|
|
target: runtime
|
|
volumes:
|
|
- ${DATASET_ROOT:-./data}:/workspace/data
|
|
ports:
|
|
# HTTP API
|
|
- "7670:7670"
|
|
# Simple Broker
|
|
- "7671:7671"
|
|
cap_add:
|
|
- sys_nice
|
|
environment:
|
|
# Self-hosted cached endpoints.
|
|
- CACHED_GRPC_ENDPOINT=cached:8001
|
|
- CACHED_HTTP_ENDPOINT=http://cached:8000/v1/infer
|
|
- CACHED_INFER_PROTOCOL=grpc
|
|
# build.nvidia.com hosted cached endpoints.
|
|
#- CACHED_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/university-at-buffalo/cached
|
|
#- CACHED_INFER_PROTOCOL=http
|
|
- CUDA_VISIBLE_DEVICES=0
|
|
#- DEPLOT_GRPC_ENDPOINT=""
|
|
# Self-hosted deplot endpoints.
|
|
- DEPLOT_HTTP_ENDPOINT=http://deplot:8000/v1/chat/completions
|
|
# build.nvidia.com hosted deplot
|
|
#- DEPLOT_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/vlm/google/deplot
|
|
- DEPLOT_INFER_PROTOCOL=http
|
|
- DOUGHNUT_GRPC_TRITON=triton-doughnut:8001
|
|
- EMBEDDING_NIM_MODEL_NAME=${EMBEDDING_NIM_MODEL_NAME:-nvidia/nv-embedqa-e5-v5}
|
|
- INGEST_LOG_LEVEL=DEFAULT
|
|
# Message client for development
|
|
#- MESSAGE_CLIENT_HOST=0.0.0.0
|
|
#- MESSAGE_CLIENT_PORT=7671
|
|
#- MESSAGE_CLIENT_TYPE=simple # Configure the ingest service to use the simple broker
|
|
# Message client for production
|
|
- MESSAGE_CLIENT_HOST=redis
|
|
- MESSAGE_CLIENT_PORT=6379
|
|
- MESSAGE_CLIENT_TYPE=redis
|
|
- MINIO_BUCKET=${MINIO_BUCKET:-nv-ingest}
|
|
- MRC_IGNORE_NUMA_CHECK=1
|
|
- NGC_API_KEY=${NGC_API_KEY:-ngcapikey}
|
|
- NVIDIA_BUILD_API_KEY=${NVIDIA_BUILD_API_KEY:-${NGC_API_KEY:-ngcapikey}}
|
|
- OTEL_EXPORTER_OTLP_ENDPOINT=otel-collector:4317
|
|
# Self-hosted paddle endpoints.
|
|
- PADDLE_GRPC_ENDPOINT=paddle:8001
|
|
- PADDLE_HTTP_ENDPOINT=http://paddle:8000/v1/infer
|
|
- PADDLE_INFER_PROTOCOL=grpc
|
|
# build.nvidia.com hosted paddle endpoints.
|
|
#- PADDLE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/baidu/paddleocr
|
|
#- PADDLE_INFER_PROTOCOL=http
|
|
- READY_CHECK_ALL_COMPONENTS=True
|
|
- REDIS_MORPHEUS_TASK_QUEUE=morpheus_task_queue
|
|
# Self-hosted redis endpoints.
|
|
- YOLOX_GRPC_ENDPOINT=yolox:8001
|
|
- YOLOX_HTTP_ENDPOINT=http://yolox:8000/v1/infer
|
|
- YOLOX_INFER_PROTOCOL=grpc
|
|
# build.nvidia.com hosted yolox endpoints.
|
|
#- YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nv-yolox-page-elements-v1
|
|
#- YOLOX_INFER_PROTOCOL=http
|
|
- VLM_CAPTION_ENDPOINT=https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions
|
|
healthcheck:
|
|
test: curl --fail http://nv-ingest-ms-runtime:7670/v1/health/ready || exit 1
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 20
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["1"]
|
|
capabilities: [gpu]
|
|
|
|
otel-collector:
|
|
image: otel/opentelemetry-collector-contrib:0.91.0
|
|
hostname: otel-collector
|
|
command: ["--config=/etc/otel-collector-config.yaml"]
|
|
volumes:
|
|
- ./config/otel-collector-config.yaml:/etc/otel-collector-config.yaml
|
|
ports:
|
|
- "9988:9988" # Prometheus metrics exposed by the collector
|
|
- "8889:8889" # Prometheus exporter metrics
|
|
- "13133:13133" # health_check extension
|
|
- "9411" # Zipkin receiver
|
|
- "4317:4317" # OTLP gRPC receiver
|
|
- "4318:4318" # OTLP/HTTP receiver
|
|
- "55680:55679" # zpages extension
|
|
depends_on:
|
|
- zipkin
|
|
|
|
zipkin:
|
|
image: openzipkin/zipkin
|
|
environment:
|
|
JAVA_OPTS: "-Xms4g -Xmx8g -XX:+ExitOnOutOfMemoryError"
|
|
ports:
|
|
- "9411:9411" # Zipkin UI and API
|
|
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
command:
|
|
- --web.console.templates=/etc/prometheus/consoles
|
|
- --web.console.libraries=/etc/prometheus/console_libraries
|
|
- --storage.tsdb.retention.time=1h
|
|
- --config.file=/etc/prometheus/prometheus-config.yaml
|
|
- --storage.tsdb.path=/prometheus
|
|
- --web.enable-lifecycle
|
|
- --web.route-prefix=/
|
|
- --enable-feature=exemplar-storage
|
|
- --enable-feature=otlp-write-receiver
|
|
volumes:
|
|
- ./config/prometheus.yaml:/etc/prometheus/prometheus-config.yaml
|
|
ports:
|
|
- "9090:9090"
|
|
|
|
grafana:
|
|
container_name: grafana-service
|
|
image: grafana/grafana
|
|
ports:
|
|
- "3000:3000"
|
|
|
|
etcd:
|
|
# Turn on to leverage the `vdb_upload` task
|
|
restart: always
|
|
container_name: milvus-etcd
|
|
image: quay.io/coreos/etcd:v3.5.5
|
|
environment:
|
|
- ETCD_AUTO_COMPACTION_MODE=revision
|
|
- ETCD_AUTO_COMPACTION_RETENTION=1000
|
|
- ETCD_QUOTA_BACKEND_BYTES=4294967296
|
|
- ETCD_SNAPSHOT_COUNT=50000
|
|
volumes:
|
|
- ./.volumes/etcd:/etcd
|
|
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
|
|
healthcheck:
|
|
test: [ "CMD", "etcdctl", "endpoint", "health" ]
|
|
interval: 30s
|
|
timeout: 20s
|
|
retries: 3
|
|
profiles:
|
|
- retrieval
|
|
|
|
minio:
|
|
# Turn on to leverage the `store` and `vdb_upload` task
|
|
restart: always
|
|
container_name: minio
|
|
hostname: minio
|
|
image: minio/minio:RELEASE.2023-03-20T20-16-18Z
|
|
environment:
|
|
MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin}
|
|
MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin}
|
|
ports:
|
|
- "9001:9001"
|
|
- "9000:9000"
|
|
volumes:
|
|
- ./.volumes/minio:/minio_data
|
|
command: minio server /minio_data --console-address ":9001"
|
|
healthcheck:
|
|
test: [ "CMD", "curl", "-f", "http://localhost:9000/minio/health/live" ]
|
|
interval: 30s
|
|
timeout: 20s
|
|
retries: 3
|
|
profiles:
|
|
- retrieval
|
|
|
|
milvus:
|
|
# Turn on to leverage the `vdb_upload` task
|
|
restart: always
|
|
container_name: milvus-standalone
|
|
image: milvusdb/milvus:v2.4.17-gpu
|
|
command: [ "milvus", "run", "standalone" ]
|
|
hostname: milvus
|
|
security_opt:
|
|
- seccomp:unconfined
|
|
environment:
|
|
ETCD_ENDPOINTS: etcd:2379
|
|
MINIO_ADDRESS: minio:9000
|
|
volumes:
|
|
- ./.volumes/milvus:/var/lib/milvus
|
|
healthcheck:
|
|
test: [ "CMD", "curl", "-f", "http://localhost:9091/healthz" ]
|
|
interval: 30s
|
|
start_period: 90s
|
|
timeout: 20s
|
|
retries: 3
|
|
ports:
|
|
- "19530:19530"
|
|
- "9091:9091"
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["1"]
|
|
capabilities: [gpu]
|
|
depends_on:
|
|
- "etcd"
|
|
- "minio"
|
|
profiles:
|
|
- retrieval
|
|
|
|
attu:
|
|
# Turn on to leverage the `vdb_upload` task
|
|
restart: always
|
|
container_name: milvus-attu
|
|
image: zilliz/attu:v2.3.5
|
|
hostname: attu
|
|
environment:
|
|
MILVUS_URL: http://milvus:19530
|
|
ports:
|
|
- "3001:3000"
|
|
depends_on:
|
|
- "milvus"
|
|
profiles:
|
|
- retrieval
|