Files
Fast-Whisper-MCP-Server/reset_gpu.sh
Alihan fb1e5dceba Upgrade to PyTorch 2.6.0 and enhance GPU reset script with Ollama management
- Upgrade PyTorch and torchaudio to 2.6.0 with CUDA 12.4 support
- Update GPU reset script to gracefully stop/start Ollama via supervisorctl
- Add Docker Compose configuration for both API and MCP server modes
- Implement comprehensive Docker entrypoint for multi-mode deployment
- Add GPU health check cleanup to prevent memory leaks
- Fix transcription memory management with proper resource cleanup
- Add filename security validation to prevent path traversal attacks
- Include .dockerignore for optimized Docker builds
- Remove deprecated supervisor configuration

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-27 23:01:22 +03:00

99 lines
3.2 KiB
Bash
Executable File

#!/bin/bash
# Script to reset NVIDIA GPU drivers without rebooting
# This reloads kernel modules and restarts nvidia-persistenced service
# Also handles stopping/starting Ollama to release GPU resources
echo "============================================================"
echo "NVIDIA GPU Driver Reset Script"
echo "============================================================"
echo ""
# Stop Ollama via supervisorctl
echo "Stopping Ollama service..."
sudo supervisorctl stop ollama 2>/dev/null
if [ $? -eq 0 ]; then
echo "✓ Ollama stopped via supervisorctl"
OLLAMA_WAS_RUNNING=true
else
echo " Ollama not running or supervisorctl not available"
OLLAMA_WAS_RUNNING=false
fi
echo ""
# Give Ollama time to release GPU resources
sleep 2
# Stop nvidia-persistenced service
echo "Stopping nvidia-persistenced service..."
sudo systemctl stop nvidia-persistenced
if [ $? -eq 0 ]; then
echo "✓ nvidia-persistenced stopped"
else
echo "✗ Failed to stop nvidia-persistenced"
exit 1
fi
echo ""
# Unload NVIDIA kernel modules (in correct order)
echo "Unloading NVIDIA kernel modules..."
sudo rmmod nvidia_uvm 2>/dev/null && echo "✓ nvidia_uvm unloaded" || echo " nvidia_uvm not loaded or failed to unload"
sudo rmmod nvidia_drm 2>/dev/null && echo "✓ nvidia_drm unloaded" || echo " nvidia_drm not loaded or failed to unload"
sudo rmmod nvidia_modeset 2>/dev/null && echo "✓ nvidia_modeset unloaded" || echo " nvidia_modeset not loaded or failed to unload"
sudo rmmod nvidia 2>/dev/null && echo "✓ nvidia unloaded" || echo " nvidia not loaded or failed to unload"
echo ""
# Small delay to ensure clean unload
sleep 1
# Reload NVIDIA kernel modules (in correct order)
echo "Loading NVIDIA kernel modules..."
sudo modprobe nvidia && echo "✓ nvidia loaded" || { echo "✗ Failed to load nvidia"; exit 1; }
sudo modprobe nvidia_modeset && echo "✓ nvidia_modeset loaded" || { echo "✗ Failed to load nvidia_modeset"; exit 1; }
sudo modprobe nvidia_uvm && echo "✓ nvidia_uvm loaded" || { echo "✗ Failed to load nvidia_uvm"; exit 1; }
sudo modprobe nvidia_drm && echo "✓ nvidia_drm loaded" || { echo "✗ Failed to load nvidia_drm"; exit 1; }
echo ""
# Restart nvidia-persistenced service
echo "Starting nvidia-persistenced service..."
sudo systemctl start nvidia-persistenced
if [ $? -eq 0 ]; then
echo "✓ nvidia-persistenced started"
else
echo "✗ Failed to start nvidia-persistenced"
exit 1
fi
echo ""
# Verify GPU is accessible
echo "Verifying GPU accessibility..."
if command -v nvidia-smi &> /dev/null; then
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
if [ $? -eq 0 ]; then
echo "✓ GPU reset successful"
else
echo "✗ GPU not accessible"
exit 1
fi
else
echo "✗ nvidia-smi not found"
exit 1
fi
echo ""
# Restart Ollama if it was running
if [ "$OLLAMA_WAS_RUNNING" = true ]; then
echo "Restarting Ollama service..."
sudo supervisorctl start ollama
if [ $? -eq 0 ]; then
echo "✓ Ollama restarted"
else
echo "✗ Failed to restart Ollama"
fi
echo ""
fi
echo "============================================================"
echo "GPU driver reset completed successfully"
echo "============================================================"