diff --git a/CLAUDE.md b/CLAUDE.md index c43c5cd..4347902 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -48,14 +48,12 @@ pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https # Using the startup script (recommended - sets all env vars) ./run_mcp_server.sh -# Direct Python execution -python whisper_server.py +# Direct Python execution (ensure PYTHONPATH includes src/) +export PYTHONPATH="$(pwd)/src:$PYTHONPATH" +python src/servers/whisper_server.py # Using MCP CLI for development testing -mcp dev whisper_server.py - -# Run server with MCP CLI -mcp run whisper_server.py +mcp dev src/servers/whisper_server.py ``` #### REST API Server (for HTTP clients) @@ -64,14 +62,12 @@ mcp run whisper_server.py # Using the startup script (recommended - sets all env vars) ./run_api_server.sh -# Direct Python execution with uvicorn -python api_server.py - -# Or using uvicorn directly -uvicorn api_server:app --host 0.0.0.0 --port 8000 +# Direct Python execution with uvicorn (ensure PYTHONPATH includes src/) +export PYTHONPATH="$(pwd)/src:$PYTHONPATH" +uvicorn src.servers.api_server:app --host 0.0.0.0 --port 8000 # Development mode with auto-reload -uvicorn api_server:app --reload --host 0.0.0.0 --port 8000 +uvicorn src.servers.api_server:app --reload --host 0.0.0.0 --port 8000 ``` #### Running Both Simultaneously @@ -84,6 +80,20 @@ uvicorn api_server:app --reload --host 0.0.0.0 --port 8000 ./run_api_server.sh ``` +### Running Tests + +```bash +# Run all tests (requires GPU) +python tests/test_core_components.py +python tests/test_e2e_integration.py +python tests/test_async_api_integration.py + +# Or run individual test components +cd tests && python test_core_components.py +``` + +**Important**: All tests require GPU to be available. Tests will fail if CUDA is not properly configured. + ### Docker ```bash @@ -108,11 +118,19 @@ docker run --gpus all -v /path/to/models:/models -v /path/to/outputs:/outputs wh │ │ ├── transcriber.py # Transcription logic (single & batch) │ │ ├── model_manager.py # Model lifecycle & caching │ │ ├── job_queue.py # Async job queue manager -│ │ └── gpu_health.py # GPU health monitoring +│ │ ├── gpu_health.py # GPU health monitoring +│ │ └── gpu_reset.py # GPU driver reset with cooldown │ └── utils/ # Utility modules │ ├── audio_processor.py # Audio validation & preprocessing │ ├── formatters.py # Output format conversion -│ └── test_audio_generator.py # Test audio generation for GPU checks +│ ├── test_audio_generator.py # Test audio generation for GPU checks +│ ├── startup.py # Startup sequence orchestration +│ ├── circuit_breaker.py # Circuit breaker pattern implementation +│ └── input_validation.py # Input validation utilities +├── tests/ # Test suite (requires GPU) +│ ├── test_core_components.py # Core functionality tests +│ ├── test_e2e_integration.py # End-to-end integration tests +│ └── test_async_api_integration.py # Async API tests ├── run_mcp_server.sh # MCP server startup script ├── run_api_server.sh # API server startup script ├── reset_gpu.sh # GPU driver reset script @@ -125,8 +143,9 @@ docker run --gpus all -v /path/to/models:/models -v /path/to/outputs:/outputs wh 1. **src/servers/whisper_server.py** - MCP server entry point - Uses FastMCP framework to expose MCP tools - - Three main tools: `get_model_info_api()`, `transcribe()`, `batch_transcribe_audio()` - - Server initialization at line 19 + - Main tools: `get_model_info_api()`, `transcribe_async()`, `transcribe_upload()`, `check_job_status()`, `get_job_result()` + - Global job queue and health monitor instances + - Server initialization around line 31 2. **src/servers/api_server.py** - REST API server entry point - Uses FastAPI framework for HTTP endpoints @@ -174,6 +193,25 @@ docker run --gpus all -v /path/to/models:/models -v /path/to/outputs:/outputs wh - `generate_test_audio()` - Creates synthetic 1-second audio for GPU health checks - Uses numpy to generate sine wave, no external audio files needed +10. **src/core/gpu_reset.py** - GPU driver reset with cooldown protection + - `reset_gpu_driver()` - Executes reset_gpu.sh script to reload NVIDIA drivers + - `check_reset_cooldown()` - Validates if enough time has passed since last reset + - Cooldown timestamp persists in `/tmp/whisper-gpu-last-reset` + - Prevents reset loops while allowing recovery from sleep/wake issues + +11. **src/utils/startup.py** - Startup sequence orchestration + - `startup_sequence()` - Coordinates GPU health check, queue initialization + - `cleanup_on_shutdown()` - Cleanup handler for graceful shutdown + - Centralizes startup logic shared by both servers + +12. **src/utils/circuit_breaker.py** - Circuit breaker pattern implementation + - Provides fault tolerance for external service calls + - Prevents cascading failures + +13. **src/utils/input_validation.py** - Input validation utilities + - Validates and sanitizes user inputs + - Security layer for API endpoints + ### Key Architecture Patterns - **Dual Server Architecture**: Both MCP and REST API servers import and use the same core modules (core.transcriber, core.model_manager, utils.audio_processor, utils.formatters), ensuring consistent behavior @@ -349,32 +387,6 @@ curl http://localhost:8000/health/gpu ### Overview This service features automatic GPU driver reset on CUDA errors, which is especially useful for recovering from sleep/wake cycles. The reset functionality is **enabled by default** and includes cooldown protection to prevent reset loops. -### Passwordless Sudo Setup (Required) - -For automatic GPU reset to work, you must configure passwordless sudo for NVIDIA commands. Create a sudoers configuration file: - -```bash -sudo visudo -f /etc/sudoers.d/whisper-gpu-reset -``` - -Add the following (replace `your_username` with your actual username): - -``` -# Whisper GPU Auto-Reset Permissions -your_username ALL=(ALL) NOPASSWD: /bin/systemctl stop nvidia-persistenced -your_username ALL=(ALL) NOPASSWD: /bin/systemctl start nvidia-persistenced -your_username ALL=(ALL) NOPASSWD: /sbin/rmmod nvidia_uvm -your_username ALL=(ALL) NOPASSWD: /sbin/rmmod nvidia_drm -your_username ALL=(ALL) NOPASSWD: /sbin/rmmod nvidia_modeset -your_username ALL=(ALL) NOPASSWD: /sbin/rmmod nvidia -your_username ALL=(ALL) NOPASSWD: /sbin/modprobe nvidia -your_username ALL=(ALL) NOPASSWD: /sbin/modprobe nvidia_modeset -your_username ALL=(ALL) NOPASSWD: /sbin/modprobe nvidia_uvm -your_username ALL=(ALL) NOPASSWD: /sbin/modprobe nvidia_drm -``` - -**Security Note:** These permissions are limited to specific NVIDIA driver commands only. The reset script (`reset_gpu.sh`) is executed with sudo but is part of the codebase and can be audited. - ### How It Works 1. **Startup Check**: When the service starts, it performs a GPU health check @@ -456,6 +468,31 @@ Reset only happens on actual CUDA failures ## Development Workflow +### Running Tests + +The test suite requires GPU access. Ensure CUDA is properly configured before running tests. + +```bash +# Set PYTHONPATH to include src directory +export PYTHONPATH="$(pwd)/src:$PYTHONPATH" + +# Run core component tests (GPU health, job queue, audio validation) +python tests/test_core_components.py + +# Run end-to-end integration tests +python tests/test_e2e_integration.py + +# Run async API integration tests +python tests/test_async_api_integration.py +``` + +Tests will automatically: +- Check for GPU availability (exit if not available) +- Validate audio file processing +- Test GPU health monitoring +- Test job queue operations +- Test transcription pipeline + ### Testing GPU Health ```python # Test GPU health check manually diff --git a/api.logs b/api.logs deleted file mode 100644 index b00b111..0000000 --- a/api.logs +++ /dev/null @@ -1,85 +0,0 @@ -INFO:__main__:====================================================================== -INFO:__main__:PERFORMING STARTUP GPU HEALTH CHECK -INFO:__main__:====================================================================== -INFO:faster_whisper:Processing audio with duration 00:01.512 -INFO:faster_whisper:Detected language 'en' with probability 0.95 -INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 1.04s -INFO:__main__:====================================================================== -INFO:__main__:STARTUP GPU CHECK SUCCESSFUL -INFO:__main__:GPU Device: NVIDIA GeForce RTX 3060 -INFO:__main__:Memory Available: 11.66 GB -INFO:__main__:Test Duration: 1.04s -INFO:__main__:====================================================================== -INFO:__main__:Starting Whisper REST API server on 0.0.0.0:8000 -INFO: Started server process [69821] -INFO: Waiting for application startup. -INFO:__main__:Starting job queue and health monitor... -INFO:core.job_queue:Starting job queue (max size: 100) -INFO:core.job_queue:Loading jobs from /media/raid/agents/tools/mcp-transcriptor/outputs/jobs -INFO:core.job_queue:Loaded 8 jobs from disk -INFO:core.job_queue:Job queue worker loop started -INFO:core.job_queue:Job queue worker started -INFO:__main__:Job queue started (max_size=100, metadata_dir=/media/raid/agents/tools/mcp-transcriptor/outputs/jobs) -INFO:core.gpu_health:Starting GPU health monitor (interval: 10.0 minutes) -INFO:faster_whisper:Processing audio with duration 00:01.512 -INFO:faster_whisper:Detected language 'en' with probability 0.95 -INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.37s -INFO:__main__:GPU health monitor started (interval=10 minutes) -INFO: Application startup complete. -INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) -INFO: 127.0.0.1:48092 - "GET /jobs HTTP/1.1" 200 OK -INFO: 127.0.0.1:60874 - "GET /jobs?status=completed&limit=3 HTTP/1.1" 200 OK -INFO: 127.0.0.1:60876 - "GET /jobs?status=failed&limit=10 HTTP/1.1" 200 OK -INFO:core.job_queue:Running GPU health check before job submission -INFO:faster_whisper:Processing audio with duration 00:01.512 -INFO:faster_whisper:Detected language 'en' with probability 0.95 -INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.39s -INFO:core.job_queue:GPU health check passed -INFO:core.job_queue:Job 6be8e49a-bdc1-4508-af99-280bef033cb0 submitted: /tmp/whisper_test_voice_1s.mp3 (queue position: 1) -INFO: 127.0.0.1:58376 - "POST /jobs HTTP/1.1" 200 OK -INFO:core.job_queue:Job 6be8e49a-bdc1-4508-af99-280bef033cb0 started processing -INFO:core.model_manager:Running GPU health check with auto-reset before model loading -INFO:faster_whisper:Processing audio with duration 00:01.512 -INFO:faster_whisper:Detected language 'en' with probability 0.95 -INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.54s -INFO:core.model_manager:Loading Whisper model: tiny device: cuda compute type: float16 -INFO:core.model_manager:Available GPU memory: 12.52 GB -INFO:core.model_manager:Enabling batch processing acceleration, batch size: 16 -INFO:core.transcriber:Starting transcription of file: whisper_test_voice_1s.mp3 -INFO:utils.audio_processor:Successfully preprocessed audio: whisper_test_voice_1s.mp3 -INFO:core.transcriber:Using batch acceleration for transcription... -INFO:faster_whisper:Processing audio with duration 00:01.512 -INFO:faster_whisper:VAD filter removed 00:00.000 of audio -INFO:faster_whisper:Detected language 'en' with probability 0.95 -INFO:core.transcriber:Transcription completed, time used: 0.16 seconds, detected language: en, audio length: 1.51 seconds -INFO:core.transcriber:Transcription results saved to: /media/raid/agents/tools/mcp-transcriptor/outputs/whisper_test_voice_1s.txt -INFO:core.job_queue:Job 6be8e49a-bdc1-4508-af99-280bef033cb0 completed successfully: /media/raid/agents/tools/mcp-transcriptor/outputs/whisper_test_voice_1s.txt -INFO:core.job_queue:Job 6be8e49a-bdc1-4508-af99-280bef033cb0 finished: status=completed, duration=1.1s -INFO: 127.0.0.1:41646 - "GET /jobs/6be8e49a-bdc1-4508-af99-280bef033cb0 HTTP/1.1" 200 OK -INFO: 127.0.0.1:34046 - "GET /jobs/6be8e49a-bdc1-4508-af99-280bef033cb0/result HTTP/1.1" 200 OK -INFO:core.job_queue:Running GPU health check before job submission -INFO:faster_whisper:Processing audio with duration 00:01.512 -INFO:faster_whisper:Detected language 'en' with probability 0.95 -INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.39s -INFO:core.job_queue:GPU health check passed -INFO:core.job_queue:Job 41ce74c0-8929-457b-96b3-1b8e4a720a7a submitted: /home/uad/agents/tools/mcp-transcriptor/data/test.mp3 (queue position: 1) -INFO: 127.0.0.1:44576 - "POST /jobs HTTP/1.1" 200 OK -INFO:core.job_queue:Job 41ce74c0-8929-457b-96b3-1b8e4a720a7a started processing -INFO:core.model_manager:Running GPU health check with auto-reset before model loading -INFO:faster_whisper:Processing audio with duration 00:01.512 -INFO:faster_whisper:Detected language 'en' with probability 0.95 -INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.39s -INFO:core.model_manager:Loading Whisper model: large-v3 device: cuda compute type: float16 -INFO:core.model_manager:Available GPU memory: 12.52 GB -INFO:core.model_manager:Enabling batch processing acceleration, batch size: 16 -INFO:core.transcriber:Starting transcription of file: test.mp3 -INFO:utils.audio_processor:Successfully preprocessed audio: test.mp3 -INFO:core.transcriber:Using batch acceleration for transcription... -INFO:faster_whisper:Processing audio with duration 00:06.955 -INFO:faster_whisper:VAD filter removed 00:00.299 of audio -INFO:core.transcriber:Transcription completed, time used: 0.52 seconds, detected language: en, audio length: 6.95 seconds -INFO:core.transcriber:Transcription results saved to: /media/raid/agents/tools/mcp-transcriptor/outputs/test.txt -INFO:core.job_queue:Job 41ce74c0-8929-457b-96b3-1b8e4a720a7a completed successfully: /media/raid/agents/tools/mcp-transcriptor/outputs/test.txt -INFO:core.job_queue:Job 41ce74c0-8929-457b-96b3-1b8e4a720a7a finished: status=completed, duration=23.3s -INFO: 127.0.0.1:59120 - "GET /jobs/41ce74c0-8929-457b-96b3-1b8e4a720a7a HTTP/1.1" 200 OK -INFO: 127.0.0.1:53806 - "GET /jobs/41ce74c0-8929-457b-96b3-1b8e4a720a7a/result HTTP/1.1" 200 OK diff --git a/mcp.logs b/mcp.logs deleted file mode 100644 index 7f4bb24..0000000 --- a/mcp.logs +++ /dev/null @@ -1,25 +0,0 @@ -starting mcp server for whisper stt transcriptor -INFO:__main__:====================================================================== -INFO:__main__:PERFORMING STARTUP GPU HEALTH CHECK -INFO:__main__:====================================================================== -INFO:faster_whisper:Processing audio with duration 00:01.512 -INFO:faster_whisper:Detected language 'en' with probability 0.95 -INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.93s -INFO:__main__:====================================================================== -INFO:__main__:STARTUP GPU CHECK SUCCESSFUL -INFO:__main__:GPU Device: NVIDIA GeForce RTX 3060 -INFO:__main__:Memory Available: 11.66 GB -INFO:__main__:Test Duration: 0.93s -INFO:__main__:====================================================================== -INFO:__main__:Initializing job queue... -INFO:core.job_queue:Starting job queue (max size: 100) -INFO:core.job_queue:Loading jobs from /media/raid/agents/tools/mcp-transcriptor/outputs/jobs -INFO:core.job_queue:Loaded 5 jobs from disk -INFO:core.job_queue:Job queue worker loop started -INFO:core.job_queue:Job queue worker started -INFO:__main__:Job queue started (max_size=100, metadata_dir=/media/raid/agents/tools/mcp-transcriptor/outputs/jobs) -INFO:core.gpu_health:Starting GPU health monitor (interval: 10.0 minutes) -INFO:faster_whisper:Processing audio with duration 00:01.512 -INFO:faster_whisper:Detected language 'en' with probability 0.95 -INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.38s -INFO:__main__:GPU health monitor started (interval=10 minutes) diff --git a/run_api_server.sh b/run_api_server.sh index 0e52387..81e63c9 100755 --- a/run_api_server.sh +++ b/run_api_server.sh @@ -33,7 +33,7 @@ export API_PORT="8000" export GPU_RESET_COOLDOWN_MINUTES=5 # Minimum time between GPU reset attempts # Job Queue Configuration -export JOB_QUEUE_MAX_SIZE=100 +export JOB_QUEUE_MAX_SIZE=5 export JOB_METADATA_DIR="/media/raid/agents/tools/mcp-transcriptor/outputs/jobs" export JOB_RETENTION_DAYS=7 diff --git a/supervisor/transcriptor-api.conf b/supervisor/transcriptor-api.conf index 1cbe675..e2b81e5 100644 --- a/supervisor/transcriptor-api.conf +++ b/supervisor/transcriptor-api.conf @@ -1,4 +1,4 @@ -[program:whisper-api-server] +[program:transcriptor-api] command=/home/uad/agents/tools/mcp-transcriptor/venv/bin/python /home/uad/agents/tools/mcp-transcriptor/src/servers/api_server.py directory=/home/uad/agents/tools/mcp-transcriptor user=uad