- Upgrade PyTorch and torchaudio to 2.6.0 with CUDA 12.4 support - Update GPU reset script to gracefully stop/start Ollama via supervisorctl - Add Docker Compose configuration for both API and MCP server modes - Implement comprehensive Docker entrypoint for multi-mode deployment - Add GPU health check cleanup to prevent memory leaks - Fix transcription memory management with proper resource cleanup - Add filename security validation to prevent path traversal attacks - Include .dockerignore for optimized Docker builds - Remove deprecated supervisor configuration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
99 lines
3.2 KiB
Bash
Executable File
99 lines
3.2 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Script to reset NVIDIA GPU drivers without rebooting
|
|
# This reloads kernel modules and restarts nvidia-persistenced service
|
|
# Also handles stopping/starting Ollama to release GPU resources
|
|
|
|
echo "============================================================"
|
|
echo "NVIDIA GPU Driver Reset Script"
|
|
echo "============================================================"
|
|
echo ""
|
|
|
|
# Stop Ollama via supervisorctl
|
|
echo "Stopping Ollama service..."
|
|
sudo supervisorctl stop ollama 2>/dev/null
|
|
if [ $? -eq 0 ]; then
|
|
echo "✓ Ollama stopped via supervisorctl"
|
|
OLLAMA_WAS_RUNNING=true
|
|
else
|
|
echo " Ollama not running or supervisorctl not available"
|
|
OLLAMA_WAS_RUNNING=false
|
|
fi
|
|
echo ""
|
|
|
|
# Give Ollama time to release GPU resources
|
|
sleep 2
|
|
|
|
# Stop nvidia-persistenced service
|
|
echo "Stopping nvidia-persistenced service..."
|
|
sudo systemctl stop nvidia-persistenced
|
|
if [ $? -eq 0 ]; then
|
|
echo "✓ nvidia-persistenced stopped"
|
|
else
|
|
echo "✗ Failed to stop nvidia-persistenced"
|
|
exit 1
|
|
fi
|
|
echo ""
|
|
|
|
# Unload NVIDIA kernel modules (in correct order)
|
|
echo "Unloading NVIDIA kernel modules..."
|
|
sudo rmmod nvidia_uvm 2>/dev/null && echo "✓ nvidia_uvm unloaded" || echo " nvidia_uvm not loaded or failed to unload"
|
|
sudo rmmod nvidia_drm 2>/dev/null && echo "✓ nvidia_drm unloaded" || echo " nvidia_drm not loaded or failed to unload"
|
|
sudo rmmod nvidia_modeset 2>/dev/null && echo "✓ nvidia_modeset unloaded" || echo " nvidia_modeset not loaded or failed to unload"
|
|
sudo rmmod nvidia 2>/dev/null && echo "✓ nvidia unloaded" || echo " nvidia not loaded or failed to unload"
|
|
echo ""
|
|
|
|
# Small delay to ensure clean unload
|
|
sleep 1
|
|
|
|
# Reload NVIDIA kernel modules (in correct order)
|
|
echo "Loading NVIDIA kernel modules..."
|
|
sudo modprobe nvidia && echo "✓ nvidia loaded" || { echo "✗ Failed to load nvidia"; exit 1; }
|
|
sudo modprobe nvidia_modeset && echo "✓ nvidia_modeset loaded" || { echo "✗ Failed to load nvidia_modeset"; exit 1; }
|
|
sudo modprobe nvidia_uvm && echo "✓ nvidia_uvm loaded" || { echo "✗ Failed to load nvidia_uvm"; exit 1; }
|
|
sudo modprobe nvidia_drm && echo "✓ nvidia_drm loaded" || { echo "✗ Failed to load nvidia_drm"; exit 1; }
|
|
echo ""
|
|
|
|
# Restart nvidia-persistenced service
|
|
echo "Starting nvidia-persistenced service..."
|
|
sudo systemctl start nvidia-persistenced
|
|
if [ $? -eq 0 ]; then
|
|
echo "✓ nvidia-persistenced started"
|
|
else
|
|
echo "✗ Failed to start nvidia-persistenced"
|
|
exit 1
|
|
fi
|
|
echo ""
|
|
|
|
# Verify GPU is accessible
|
|
echo "Verifying GPU accessibility..."
|
|
if command -v nvidia-smi &> /dev/null; then
|
|
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
|
|
if [ $? -eq 0 ]; then
|
|
echo "✓ GPU reset successful"
|
|
else
|
|
echo "✗ GPU not accessible"
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "✗ nvidia-smi not found"
|
|
exit 1
|
|
fi
|
|
echo ""
|
|
|
|
# Restart Ollama if it was running
|
|
if [ "$OLLAMA_WAS_RUNNING" = true ]; then
|
|
echo "Restarting Ollama service..."
|
|
sudo supervisorctl start ollama
|
|
if [ $? -eq 0 ]; then
|
|
echo "✓ Ollama restarted"
|
|
else
|
|
echo "✗ Failed to restart Ollama"
|
|
fi
|
|
echo ""
|
|
fi
|
|
|
|
echo "============================================================"
|
|
echo "GPU driver reset completed successfully"
|
|
echo "============================================================"
|