- Upgrade PyTorch and torchaudio to 2.6.0 with CUDA 12.4 support - Update GPU reset script to gracefully stop/start Ollama via supervisorctl - Add Docker Compose configuration for both API and MCP server modes - Implement comprehensive Docker entrypoint for multi-mode deployment - Add GPU health check cleanup to prevent memory leaks - Fix transcription memory management with proper resource cleanup - Add filename security validation to prevent path traversal attacks - Include .dockerignore for optimized Docker builds - Remove deprecated supervisor configuration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
107 lines
3.4 KiB
YAML
107 lines
3.4 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
# API Server mode with nginx reverse proxy
|
|
transcriptor-api:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
image: transcriptor-apimcp:latest
|
|
container_name: transcriptor-api
|
|
runtime: nvidia
|
|
environment:
|
|
NVIDIA_VISIBLE_DEVICES: "0"
|
|
NVIDIA_DRIVER_CAPABILITIES: compute,utility
|
|
SERVER_MODE: api
|
|
API_HOST: 127.0.0.1
|
|
API_PORT: 33767
|
|
WHISPER_MODEL_DIR: /models
|
|
TRANSCRIPTION_OUTPUT_DIR: /outputs
|
|
TRANSCRIPTION_BATCH_OUTPUT_DIR: /outputs/batch
|
|
TRANSCRIPTION_MODEL: large-v3
|
|
TRANSCRIPTION_DEVICE: auto
|
|
TRANSCRIPTION_COMPUTE_TYPE: auto
|
|
TRANSCRIPTION_OUTPUT_FORMAT: txt
|
|
TRANSCRIPTION_BEAM_SIZE: 5
|
|
TRANSCRIPTION_TEMPERATURE: 0.0
|
|
JOB_QUEUE_MAX_SIZE: 5
|
|
JOB_METADATA_DIR: /outputs/jobs
|
|
JOB_RETENTION_DAYS: 7
|
|
GPU_HEALTH_CHECK_ENABLED: "true"
|
|
GPU_HEALTH_CHECK_INTERVAL_MINUTES: 10
|
|
GPU_HEALTH_TEST_MODEL: tiny
|
|
GPU_HEALTH_TEST_AUDIO: /test-audio/test.mp3
|
|
GPU_RESET_COOLDOWN_MINUTES: 5
|
|
# Optional proxy settings (uncomment if needed)
|
|
# HTTP_PROXY: http://192.168.1.212:8080
|
|
# HTTPS_PROXY: http://192.168.1.212:8080
|
|
ports:
|
|
- "33767:80" # Map host:33767 to container nginx:80
|
|
volumes:
|
|
- /home/uad/agents/tools/mcp-transcriptor/models:/models
|
|
- /home/uad/agents/tools/mcp-transcriptor/outputs:/outputs
|
|
- /home/uad/agents/tools/mcp-transcriptor/logs:/logs
|
|
- /home/uad/agents/tools/mcp-transcriptor/data/test.mp3:/test-audio/test.mp3:ro
|
|
- /etc/localtime:/etc/localtime:ro # Sync container time with host
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 40s
|
|
restart: unless-stopped
|
|
networks:
|
|
- transcriptor-network
|
|
|
|
# MCP Server mode (stdio based)
|
|
transcriptor-mcp:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
image: transcriptor-apimcp:latest
|
|
container_name: transcriptor-mcp
|
|
environment:
|
|
SERVER_MODE: mcp
|
|
WHISPER_MODEL_DIR: /models
|
|
TRANSCRIPTION_OUTPUT_DIR: /outputs
|
|
TRANSCRIPTION_BATCH_OUTPUT_DIR: /outputs/batch
|
|
TRANSCRIPTION_MODEL: large-v3
|
|
TRANSCRIPTION_DEVICE: auto
|
|
TRANSCRIPTION_COMPUTE_TYPE: auto
|
|
TRANSCRIPTION_OUTPUT_FORMAT: txt
|
|
TRANSCRIPTION_BEAM_SIZE: 5
|
|
TRANSCRIPTION_TEMPERATURE: 0.0
|
|
JOB_QUEUE_MAX_SIZE: 100
|
|
JOB_METADATA_DIR: /outputs/jobs
|
|
JOB_RETENTION_DAYS: 7
|
|
GPU_HEALTH_CHECK_ENABLED: "true"
|
|
GPU_HEALTH_CHECK_INTERVAL_MINUTES: 10
|
|
GPU_HEALTH_TEST_MODEL: tiny
|
|
GPU_RESET_COOLDOWN_MINUTES: 5
|
|
# Optional proxy settings (uncomment if needed)
|
|
# HTTP_PROXY: http://192.168.1.212:8080
|
|
# HTTPS_PROXY: http://192.168.1.212:8080
|
|
volumes:
|
|
- /home/uad/agents/tools/mcp-transcriptor/models:/models
|
|
- /home/uad/agents/tools/mcp-transcriptor/outputs:/outputs
|
|
- /home/uad/agents/tools/mcp-transcriptor/logs:/logs
|
|
- /etc/localtime:/etc/localtime:ro
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 1
|
|
capabilities: [gpu]
|
|
stdin_open: true # Enable stdin for MCP stdio mode
|
|
tty: true
|
|
restart: unless-stopped
|
|
networks:
|
|
- transcriptor-network
|
|
profiles:
|
|
- mcp # Only start when explicitly requested
|
|
|
|
networks:
|
|
transcriptor-network:
|
|
driver: bridge
|