diff --git a/CLAUDE.md b/CLAUDE.md
index c43c5cd..4347902 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -48,14 +48,12 @@ pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https
 # Using the startup script (recommended - sets all env vars)
 ./run_mcp_server.sh
 
-# Direct Python execution
-python whisper_server.py
+# Direct Python execution (ensure PYTHONPATH includes src/)
+export PYTHONPATH="$(pwd)/src:$PYTHONPATH"
+python src/servers/whisper_server.py
 
 # Using MCP CLI for development testing
-mcp dev whisper_server.py
-
-# Run server with MCP CLI
-mcp run whisper_server.py
+mcp dev src/servers/whisper_server.py
 ```
 
 #### REST API Server (for HTTP clients)
@@ -64,14 +62,12 @@ mcp run whisper_server.py
 # Using the startup script (recommended - sets all env vars)
 ./run_api_server.sh
 
-# Direct Python execution with uvicorn
-python api_server.py
-
-# Or using uvicorn directly
-uvicorn api_server:app --host 0.0.0.0 --port 8000
+# Direct Python execution with uvicorn (ensure PYTHONPATH includes src/)
+export PYTHONPATH="$(pwd)/src:$PYTHONPATH"
+uvicorn src.servers.api_server:app --host 0.0.0.0 --port 8000
 
 # Development mode with auto-reload
-uvicorn api_server:app --reload --host 0.0.0.0 --port 8000
+uvicorn src.servers.api_server:app --reload --host 0.0.0.0 --port 8000
 ```
 
 #### Running Both Simultaneously
@@ -84,6 +80,20 @@ uvicorn api_server:app --reload --host 0.0.0.0 --port 8000
 ./run_api_server.sh
 ```
 
+### Running Tests
+
+```bash
+# Run all tests (requires GPU)
+python tests/test_core_components.py
+python tests/test_e2e_integration.py
+python tests/test_async_api_integration.py
+
+# Or run individual test components
+cd tests && python test_core_components.py
+```
+
+**Important**: All tests require GPU to be available. Tests will fail if CUDA is not properly configured.
+
 ### Docker
 
 ```bash
@@ -108,11 +118,19 @@ docker run --gpus all -v /path/to/models:/models -v /path/to/outputs:/outputs wh
 │   │   ├── transcriber.py       # Transcription logic (single & batch)
 │   │   ├── model_manager.py     # Model lifecycle & caching
 │   │   ├── job_queue.py         # Async job queue manager
-│   │   └── gpu_health.py        # GPU health monitoring
+│   │   ├── gpu_health.py        # GPU health monitoring
+│   │   └── gpu_reset.py         # GPU driver reset with cooldown
 │   └── utils/                    # Utility modules
 │       ├── audio_processor.py   # Audio validation & preprocessing
 │       ├── formatters.py        # Output format conversion
-│       └── test_audio_generator.py # Test audio generation for GPU checks
+│       ├── test_audio_generator.py # Test audio generation for GPU checks
+│       ├── startup.py           # Startup sequence orchestration
+│       ├── circuit_breaker.py   # Circuit breaker pattern implementation
+│       └── input_validation.py  # Input validation utilities
+├── tests/                        # Test suite (requires GPU)
+│   ├── test_core_components.py  # Core functionality tests
+│   ├── test_e2e_integration.py  # End-to-end integration tests
+│   └── test_async_api_integration.py # Async API tests
 ├── run_mcp_server.sh            # MCP server startup script
 ├── run_api_server.sh            # API server startup script
 ├── reset_gpu.sh                 # GPU driver reset script
@@ -125,8 +143,9 @@ docker run --gpus all -v /path/to/models:/models -v /path/to/outputs:/outputs wh
 
 1. **src/servers/whisper_server.py** - MCP server entry point
    - Uses FastMCP framework to expose MCP tools
-   - Three main tools: `get_model_info_api()`, `transcribe()`, `batch_transcribe_audio()`
-   - Server initialization at line 19
+   - Main tools: `get_model_info_api()`, `transcribe_async()`, `transcribe_upload()`, `check_job_status()`, `get_job_result()`
+   - Global job queue and health monitor instances
+   - Server initialization around line 31
 
 2. **src/servers/api_server.py** - REST API server entry point
    - Uses FastAPI framework for HTTP endpoints
@@ -174,6 +193,25 @@ docker run --gpus all -v /path/to/models:/models -v /path/to/outputs:/outputs wh
    - `generate_test_audio()` - Creates synthetic 1-second audio for GPU health checks
    - Uses numpy to generate sine wave, no external audio files needed
 
+10. **src/core/gpu_reset.py** - GPU driver reset with cooldown protection
+    - `reset_gpu_driver()` - Executes reset_gpu.sh script to reload NVIDIA drivers
+    - `check_reset_cooldown()` - Validates if enough time has passed since last reset
+    - Cooldown timestamp persists in `/tmp/whisper-gpu-last-reset`
+    - Prevents reset loops while allowing recovery from sleep/wake issues
+
+11. **src/utils/startup.py** - Startup sequence orchestration
+    - `startup_sequence()` - Coordinates GPU health check, queue initialization
+    - `cleanup_on_shutdown()` - Cleanup handler for graceful shutdown
+    - Centralizes startup logic shared by both servers
+
+12. **src/utils/circuit_breaker.py** - Circuit breaker pattern implementation
+    - Provides fault tolerance for external service calls
+    - Prevents cascading failures
+
+13. **src/utils/input_validation.py** - Input validation utilities
+    - Validates and sanitizes user inputs
+    - Security layer for API endpoints
+
 ### Key Architecture Patterns
 
 - **Dual Server Architecture**: Both MCP and REST API servers import and use the same core modules (core.transcriber, core.model_manager, utils.audio_processor, utils.formatters), ensuring consistent behavior
@@ -349,32 +387,6 @@ curl http://localhost:8000/health/gpu
 ### Overview
 This service features automatic GPU driver reset on CUDA errors, which is especially useful for recovering from sleep/wake cycles. The reset functionality is **enabled by default** and includes cooldown protection to prevent reset loops.
 
-### Passwordless Sudo Setup (Required)
-
-For automatic GPU reset to work, you must configure passwordless sudo for NVIDIA commands. Create a sudoers configuration file:
-
-```bash
-sudo visudo -f /etc/sudoers.d/whisper-gpu-reset
-```
-
-Add the following (replace `your_username` with your actual username):
-
-```
-# Whisper GPU Auto-Reset Permissions
-your_username ALL=(ALL) NOPASSWD: /bin/systemctl stop nvidia-persistenced
-your_username ALL=(ALL) NOPASSWD: /bin/systemctl start nvidia-persistenced
-your_username ALL=(ALL) NOPASSWD: /sbin/rmmod nvidia_uvm
-your_username ALL=(ALL) NOPASSWD: /sbin/rmmod nvidia_drm
-your_username ALL=(ALL) NOPASSWD: /sbin/rmmod nvidia_modeset
-your_username ALL=(ALL) NOPASSWD: /sbin/rmmod nvidia
-your_username ALL=(ALL) NOPASSWD: /sbin/modprobe nvidia
-your_username ALL=(ALL) NOPASSWD: /sbin/modprobe nvidia_modeset
-your_username ALL=(ALL) NOPASSWD: /sbin/modprobe nvidia_uvm
-your_username ALL=(ALL) NOPASSWD: /sbin/modprobe nvidia_drm
-```
-
-**Security Note:** These permissions are limited to specific NVIDIA driver commands only. The reset script (`reset_gpu.sh`) is executed with sudo but is part of the codebase and can be audited.
-
 ### How It Works
 
 1. **Startup Check**: When the service starts, it performs a GPU health check
@@ -456,6 +468,31 @@ Reset only happens on actual CUDA failures
 
 ## Development Workflow
 
+### Running Tests
+
+The test suite requires GPU access. Ensure CUDA is properly configured before running tests.
+
+```bash
+# Set PYTHONPATH to include src directory
+export PYTHONPATH="$(pwd)/src:$PYTHONPATH"
+
+# Run core component tests (GPU health, job queue, audio validation)
+python tests/test_core_components.py
+
+# Run end-to-end integration tests
+python tests/test_e2e_integration.py
+
+# Run async API integration tests
+python tests/test_async_api_integration.py
+```
+
+Tests will automatically:
+- Check for GPU availability (exit if not available)
+- Validate audio file processing
+- Test GPU health monitoring
+- Test job queue operations
+- Test transcription pipeline
+
 ### Testing GPU Health
 ```python
 # Test GPU health check manually
diff --git a/api.logs b/api.logs
deleted file mode 100644
index b00b111..0000000
--- a/api.logs
+++ /dev/null
@@ -1,85 +0,0 @@
-INFO:__main__:======================================================================
-INFO:__main__:PERFORMING STARTUP GPU HEALTH CHECK
-INFO:__main__:======================================================================
-INFO:faster_whisper:Processing audio with duration 00:01.512
-INFO:faster_whisper:Detected language 'en' with probability 0.95
-INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 1.04s
-INFO:__main__:======================================================================
-INFO:__main__:STARTUP GPU CHECK SUCCESSFUL
-INFO:__main__:GPU Device: NVIDIA GeForce RTX 3060
-INFO:__main__:Memory Available: 11.66 GB
-INFO:__main__:Test Duration: 1.04s
-INFO:__main__:======================================================================
-INFO:__main__:Starting Whisper REST API server on 0.0.0.0:8000
-INFO:     Started server process [69821]
-INFO:     Waiting for application startup.
-INFO:__main__:Starting job queue and health monitor...
-INFO:core.job_queue:Starting job queue (max size: 100)
-INFO:core.job_queue:Loading jobs from /media/raid/agents/tools/mcp-transcriptor/outputs/jobs
-INFO:core.job_queue:Loaded 8 jobs from disk
-INFO:core.job_queue:Job queue worker loop started
-INFO:core.job_queue:Job queue worker started
-INFO:__main__:Job queue started (max_size=100, metadata_dir=/media/raid/agents/tools/mcp-transcriptor/outputs/jobs)
-INFO:core.gpu_health:Starting GPU health monitor (interval: 10.0 minutes)
-INFO:faster_whisper:Processing audio with duration 00:01.512
-INFO:faster_whisper:Detected language 'en' with probability 0.95
-INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.37s
-INFO:__main__:GPU health monitor started (interval=10 minutes)
-INFO:     Application startup complete.
-INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
-INFO:     127.0.0.1:48092 - "GET /jobs HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60874 - "GET /jobs?status=completed&limit=3 HTTP/1.1" 200 OK
-INFO:     127.0.0.1:60876 - "GET /jobs?status=failed&limit=10 HTTP/1.1" 200 OK
-INFO:core.job_queue:Running GPU health check before job submission
-INFO:faster_whisper:Processing audio with duration 00:01.512
-INFO:faster_whisper:Detected language 'en' with probability 0.95
-INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.39s
-INFO:core.job_queue:GPU health check passed
-INFO:core.job_queue:Job 6be8e49a-bdc1-4508-af99-280bef033cb0 submitted: /tmp/whisper_test_voice_1s.mp3 (queue position: 1)
-INFO:     127.0.0.1:58376 - "POST /jobs HTTP/1.1" 200 OK
-INFO:core.job_queue:Job 6be8e49a-bdc1-4508-af99-280bef033cb0 started processing
-INFO:core.model_manager:Running GPU health check with auto-reset before model loading
-INFO:faster_whisper:Processing audio with duration 00:01.512
-INFO:faster_whisper:Detected language 'en' with probability 0.95
-INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.54s
-INFO:core.model_manager:Loading Whisper model: tiny device: cuda compute type: float16
-INFO:core.model_manager:Available GPU memory: 12.52 GB
-INFO:core.model_manager:Enabling batch processing acceleration, batch size: 16
-INFO:core.transcriber:Starting transcription of file: whisper_test_voice_1s.mp3
-INFO:utils.audio_processor:Successfully preprocessed audio: whisper_test_voice_1s.mp3
-INFO:core.transcriber:Using batch acceleration for transcription...
-INFO:faster_whisper:Processing audio with duration 00:01.512
-INFO:faster_whisper:VAD filter removed 00:00.000 of audio
-INFO:faster_whisper:Detected language 'en' with probability 0.95
-INFO:core.transcriber:Transcription completed, time used: 0.16 seconds, detected language: en, audio length: 1.51 seconds
-INFO:core.transcriber:Transcription results saved to: /media/raid/agents/tools/mcp-transcriptor/outputs/whisper_test_voice_1s.txt
-INFO:core.job_queue:Job 6be8e49a-bdc1-4508-af99-280bef033cb0 completed successfully: /media/raid/agents/tools/mcp-transcriptor/outputs/whisper_test_voice_1s.txt
-INFO:core.job_queue:Job 6be8e49a-bdc1-4508-af99-280bef033cb0 finished: status=completed, duration=1.1s
-INFO:     127.0.0.1:41646 - "GET /jobs/6be8e49a-bdc1-4508-af99-280bef033cb0 HTTP/1.1" 200 OK
-INFO:     127.0.0.1:34046 - "GET /jobs/6be8e49a-bdc1-4508-af99-280bef033cb0/result HTTP/1.1" 200 OK
-INFO:core.job_queue:Running GPU health check before job submission
-INFO:faster_whisper:Processing audio with duration 00:01.512
-INFO:faster_whisper:Detected language 'en' with probability 0.95
-INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.39s
-INFO:core.job_queue:GPU health check passed
-INFO:core.job_queue:Job 41ce74c0-8929-457b-96b3-1b8e4a720a7a submitted: /home/uad/agents/tools/mcp-transcriptor/data/test.mp3 (queue position: 1)
-INFO:     127.0.0.1:44576 - "POST /jobs HTTP/1.1" 200 OK
-INFO:core.job_queue:Job 41ce74c0-8929-457b-96b3-1b8e4a720a7a started processing
-INFO:core.model_manager:Running GPU health check with auto-reset before model loading
-INFO:faster_whisper:Processing audio with duration 00:01.512
-INFO:faster_whisper:Detected language 'en' with probability 0.95
-INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.39s
-INFO:core.model_manager:Loading Whisper model: large-v3 device: cuda compute type: float16
-INFO:core.model_manager:Available GPU memory: 12.52 GB
-INFO:core.model_manager:Enabling batch processing acceleration, batch size: 16
-INFO:core.transcriber:Starting transcription of file: test.mp3
-INFO:utils.audio_processor:Successfully preprocessed audio: test.mp3
-INFO:core.transcriber:Using batch acceleration for transcription...
-INFO:faster_whisper:Processing audio with duration 00:06.955
-INFO:faster_whisper:VAD filter removed 00:00.299 of audio
-INFO:core.transcriber:Transcription completed, time used: 0.52 seconds, detected language: en, audio length: 6.95 seconds
-INFO:core.transcriber:Transcription results saved to: /media/raid/agents/tools/mcp-transcriptor/outputs/test.txt
-INFO:core.job_queue:Job 41ce74c0-8929-457b-96b3-1b8e4a720a7a completed successfully: /media/raid/agents/tools/mcp-transcriptor/outputs/test.txt
-INFO:core.job_queue:Job 41ce74c0-8929-457b-96b3-1b8e4a720a7a finished: status=completed, duration=23.3s
-INFO:     127.0.0.1:59120 - "GET /jobs/41ce74c0-8929-457b-96b3-1b8e4a720a7a HTTP/1.1" 200 OK
-INFO:     127.0.0.1:53806 - "GET /jobs/41ce74c0-8929-457b-96b3-1b8e4a720a7a/result HTTP/1.1" 200 OK
diff --git a/mcp.logs b/mcp.logs
deleted file mode 100644
index 7f4bb24..0000000
--- a/mcp.logs
+++ /dev/null
@@ -1,25 +0,0 @@
-starting mcp server for whisper stt transcriptor
-INFO:__main__:======================================================================
-INFO:__main__:PERFORMING STARTUP GPU HEALTH CHECK
-INFO:__main__:======================================================================
-INFO:faster_whisper:Processing audio with duration 00:01.512
-INFO:faster_whisper:Detected language 'en' with probability 0.95
-INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.93s
-INFO:__main__:======================================================================
-INFO:__main__:STARTUP GPU CHECK SUCCESSFUL
-INFO:__main__:GPU Device: NVIDIA GeForce RTX 3060
-INFO:__main__:Memory Available: 11.66 GB
-INFO:__main__:Test Duration: 0.93s
-INFO:__main__:======================================================================
-INFO:__main__:Initializing job queue...
-INFO:core.job_queue:Starting job queue (max size: 100)
-INFO:core.job_queue:Loading jobs from /media/raid/agents/tools/mcp-transcriptor/outputs/jobs
-INFO:core.job_queue:Loaded 5 jobs from disk
-INFO:core.job_queue:Job queue worker loop started
-INFO:core.job_queue:Job queue worker started
-INFO:__main__:Job queue started (max_size=100, metadata_dir=/media/raid/agents/tools/mcp-transcriptor/outputs/jobs)
-INFO:core.gpu_health:Starting GPU health monitor (interval: 10.0 minutes)
-INFO:faster_whisper:Processing audio with duration 00:01.512
-INFO:faster_whisper:Detected language 'en' with probability 0.95
-INFO:core.gpu_health:GPU health check passed: NVIDIA GeForce RTX 3060, test duration: 0.38s
-INFO:__main__:GPU health monitor started (interval=10 minutes)
diff --git a/run_api_server.sh b/run_api_server.sh
index 0e52387..81e63c9 100755
--- a/run_api_server.sh
+++ b/run_api_server.sh
@@ -33,7 +33,7 @@ export API_PORT="8000"
 export GPU_RESET_COOLDOWN_MINUTES=5  # Minimum time between GPU reset attempts
 
 # Job Queue Configuration
-export JOB_QUEUE_MAX_SIZE=100
+export JOB_QUEUE_MAX_SIZE=5
 export JOB_METADATA_DIR="/media/raid/agents/tools/mcp-transcriptor/outputs/jobs"
 export JOB_RETENTION_DAYS=7
 
diff --git a/supervisor/transcriptor-api.conf b/supervisor/transcriptor-api.conf
index 1cbe675..e2b81e5 100644
--- a/supervisor/transcriptor-api.conf
+++ b/supervisor/transcriptor-api.conf
@@ -1,4 +1,4 @@
-[program:whisper-api-server]
+[program:transcriptor-api]
 command=/home/uad/agents/tools/mcp-transcriptor/venv/bin/python /home/uad/agents/tools/mcp-transcriptor/src/servers/api_server.py
 directory=/home/uad/agents/tools/mcp-transcriptor
 user=uad