# Multi-purpose Whisper Transcriptor Docker Image # Supports both MCP Server and REST API Server modes # Use SERVER_MODE environment variable to select: "mcp" or "api" FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 # Prevent interactive prompts during installation ENV DEBIAN_FRONTEND=noninteractive # Install system dependencies RUN apt-get update && apt-get install -y \ software-properties-common \ curl \ && add-apt-repository ppa:deadsnakes/ppa \ && apt-get update && apt-get install -y \ python3.12 \ python3.12-venv \ python3.12-dev \ ffmpeg \ git \ nginx \ supervisor \ && rm -rf /var/lib/apt/lists/* # Make python3.12 the default RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \ update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 # Install pip using ensurepip (Python 3.12+ doesn't have distutils) RUN python -m ensurepip --upgrade && \ python -m pip install --upgrade pip # Set working directory WORKDIR /app # Copy requirements first for better caching COPY requirements.txt . # Install Python dependencies with CUDA 12.4 support RUN pip install --no-cache-dir \ torch==2.6.0 --index-url https://download.pytorch.org/whl/cu124 && \ pip install --no-cache-dir \ torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124 && \ pip install --no-cache-dir \ faster-whisper \ fastapi>=0.115.0 \ uvicorn[standard]>=0.32.0 \ python-multipart>=0.0.9 \ aiofiles>=23.0.0 \ mcp[cli]>=1.2.0 \ gTTS>=2.3.0 \ pyttsx3>=2.90 \ scipy>=1.10.0 \ numpy>=1.24.0 # Copy application code COPY src/ ./src/ COPY pyproject.toml . # Copy test audio file for GPU health checks COPY test.mp3 . # Copy nginx configuration COPY nginx/transcriptor.conf /etc/nginx/sites-available/transcriptor.conf # Copy entrypoint script and GPU reset script COPY docker-entrypoint.sh /docker-entrypoint.sh COPY reset_gpu.sh /app/reset_gpu.sh RUN chmod +x /docker-entrypoint.sh /app/reset_gpu.sh # Create directories for models, outputs, and logs RUN mkdir -p /models /outputs /logs /app/outputs/uploads /app/outputs/batch /app/outputs/jobs # Set Python path ENV PYTHONPATH=/app/src # Default environment variables (can be overridden) ENV WHISPER_MODEL_DIR=/models \ TRANSCRIPTION_OUTPUT_DIR=/outputs \ TRANSCRIPTION_BATCH_OUTPUT_DIR=/outputs/batch \ TRANSCRIPTION_MODEL=large-v3 \ TRANSCRIPTION_DEVICE=auto \ TRANSCRIPTION_COMPUTE_TYPE=auto \ TRANSCRIPTION_OUTPUT_FORMAT=txt \ TRANSCRIPTION_BEAM_SIZE=5 \ TRANSCRIPTION_TEMPERATURE=0.0 \ API_HOST=127.0.0.1 \ API_PORT=33767 \ JOB_QUEUE_MAX_SIZE=5 \ JOB_METADATA_DIR=/outputs/jobs \ JOB_RETENTION_DAYS=7 \ GPU_HEALTH_CHECK_ENABLED=true \ GPU_HEALTH_CHECK_INTERVAL_MINUTES=10 \ GPU_HEALTH_TEST_MODEL=tiny \ GPU_HEALTH_TEST_AUDIO=/test-audio/test.mp3 \ GPU_RESET_COOLDOWN_MINUTES=5 \ SERVER_MODE=api # Expose port 80 for nginx (API mode only) EXPOSE 80 # Use entrypoint script to handle different server modes ENTRYPOINT ["/docker-entrypoint.sh"]