- Implement circuit breaker pattern for GPU health checks - Prevents repeated failures with configurable thresholds - Three states: CLOSED, OPEN, HALF_OPEN - Integrated into GPU health monitoring - Add comprehensive input validation and path sanitization - Path traversal attack prevention - Whitelist-based validation for models, devices, formats - Error message sanitization to prevent information leakage - File size limits and security checks - Centralize startup logic across servers - Extract common startup procedures to utils/startup.py - Deduplicate GPU health checks and initialization code - Simplify both MCP and API server startup sequences - Add proper Python package structure - Add __init__.py files to all modules - Improve package organization - Add circuit breaker status API endpoints - GET /health/circuit-breaker - View circuit breaker stats - POST /health/circuit-breaker/reset - Reset circuit breaker - Reorganize test files into tests/ directory - Rename and restructure test files for better organization
288 lines
9.2 KiB
Python
Executable File
288 lines
9.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Test script for Phase 1 components.
|
|
|
|
Tests:
|
|
1. Test audio file validation
|
|
2. GPU health check
|
|
3. Job queue operations
|
|
|
|
IMPORTANT: This service requires GPU. Tests will fail if GPU is not available.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import time
|
|
import torch
|
|
import logging
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
datefmt='%H:%M:%S'
|
|
)
|
|
|
|
# Add src to path (go up one level from tests/ to root)
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
|
|
|
from core.gpu_health import check_gpu_health, HealthMonitor
|
|
from core.job_queue import JobQueue, JobStatus
|
|
|
|
|
|
def check_gpu_available():
|
|
"""
|
|
Check if GPU is available. Exit if not.
|
|
This service requires GPU and will not run on CPU.
|
|
"""
|
|
print("\n" + "="*60)
|
|
print("GPU REQUIREMENT CHECK")
|
|
print("="*60)
|
|
|
|
if not torch.cuda.is_available():
|
|
print("✗ CUDA not available - GPU is required for this service")
|
|
print(" This service is configured for GPU-only operation")
|
|
print(" Please ensure CUDA is properly installed and GPU is accessible")
|
|
print("="*60)
|
|
sys.exit(1)
|
|
|
|
gpu_name = torch.cuda.get_device_name(0)
|
|
gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
|
print(f"✓ GPU available: {gpu_name}")
|
|
print(f"✓ GPU memory: {gpu_memory:.2f} GB")
|
|
print("="*60)
|
|
|
|
|
|
def test_audio_file():
|
|
"""Test audio file existence and validity."""
|
|
print("\n" + "="*60)
|
|
print("TEST 1: Test Audio File")
|
|
print("="*60)
|
|
|
|
try:
|
|
# Use the actual test audio file (relative to project root)
|
|
project_root = os.path.join(os.path.dirname(__file__), '..')
|
|
audio_path = os.path.join(project_root, "data/test.mp3")
|
|
|
|
# Verify file exists
|
|
assert os.path.exists(audio_path), "Audio file not found"
|
|
print(f"✓ Test audio file exists: {audio_path}")
|
|
|
|
# Verify file is not empty
|
|
file_size = os.path.getsize(audio_path)
|
|
assert file_size > 0, "Audio file is empty"
|
|
print(f"✓ Audio file size: {file_size} bytes")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"✗ Audio file test failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
def test_gpu_health():
|
|
"""Test GPU health check."""
|
|
print("\n" + "="*60)
|
|
print("TEST 2: GPU Health Check")
|
|
print("="*60)
|
|
|
|
try:
|
|
# Test with cuda device (enforcing GPU requirement)
|
|
print("\nRunning health check with device='cuda'...")
|
|
logging.info("Starting GPU health check...")
|
|
status = check_gpu_health(expected_device="cuda")
|
|
logging.info("GPU health check completed")
|
|
|
|
print(f"✓ Health check completed")
|
|
print(f" - GPU available: {status.gpu_available}")
|
|
print(f" - GPU working: {status.gpu_working}")
|
|
print(f" - Device used: {status.device_used}")
|
|
print(f" - Device name: {status.device_name}")
|
|
print(f" - Memory total: {status.memory_total_gb:.2f} GB")
|
|
print(f" - Memory available: {status.memory_available_gb:.2f} GB")
|
|
print(f" - Test duration: {status.test_duration_seconds:.2f}s")
|
|
print(f" - Error: {status.error}")
|
|
|
|
# Test health monitor
|
|
print("\nTesting HealthMonitor...")
|
|
monitor = HealthMonitor(check_interval_minutes=1)
|
|
monitor.start()
|
|
print("✓ Health monitor started")
|
|
|
|
time.sleep(1)
|
|
|
|
latest = monitor.get_latest_status()
|
|
assert latest is not None, "No status available from monitor"
|
|
print(f"✓ Latest status retrieved: {latest.device_used}")
|
|
|
|
monitor.stop()
|
|
print("✓ Health monitor stopped")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"✗ GPU health test failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
def test_job_queue():
|
|
"""Test job queue operations."""
|
|
print("\n" + "="*60)
|
|
print("TEST 3: Job Queue")
|
|
print("="*60)
|
|
|
|
# Create temp directory for testing
|
|
import tempfile
|
|
temp_dir = tempfile.mkdtemp(prefix="job_queue_test_")
|
|
print(f"Using temp directory: {temp_dir}")
|
|
|
|
try:
|
|
# Initialize job queue
|
|
print("\nInitializing job queue...")
|
|
job_queue = JobQueue(max_queue_size=10, metadata_dir=temp_dir)
|
|
job_queue.start()
|
|
print("✓ Job queue started")
|
|
|
|
# Use the actual test audio file (relative to project root)
|
|
project_root = os.path.join(os.path.dirname(__file__), '..')
|
|
audio_path = os.path.join(project_root, "data/test.mp3")
|
|
|
|
# Test job submission
|
|
print("\nSubmitting test job...")
|
|
logging.info("Submitting transcription job to queue...")
|
|
job_info = job_queue.submit_job(
|
|
audio_path=audio_path,
|
|
model_name="tiny",
|
|
device="cuda", # Enforcing GPU requirement
|
|
output_format="txt"
|
|
)
|
|
job_id = job_info["job_id"]
|
|
logging.info(f"Job submitted: {job_id}")
|
|
print(f"✓ Job submitted: {job_id}")
|
|
print(f" - Status: {job_info['status']}")
|
|
print(f" - Queue position: {job_info['queue_position']}")
|
|
|
|
# Test job status retrieval
|
|
print("\nRetrieving job status...")
|
|
logging.info("About to call get_job_status()...")
|
|
status = job_queue.get_job_status(job_id)
|
|
logging.info(f"get_job_status() returned: {status['status']}")
|
|
print(f"✓ Job status retrieved")
|
|
print(f" - Status: {status['status']}")
|
|
print(f" - Queue position: {status['queue_position']}")
|
|
|
|
# Wait for job to process
|
|
print("\nWaiting for job to process (max 30 seconds)...", flush=True)
|
|
logging.info("Waiting for transcription to complete...")
|
|
max_wait = 30
|
|
start = time.time()
|
|
while time.time() - start < max_wait:
|
|
logging.info("Calling get_job_status()...")
|
|
status = job_queue.get_job_status(job_id)
|
|
print(f" Status: {status['status']}", flush=True)
|
|
logging.info(f"Job status: {status['status']}")
|
|
|
|
if status['status'] in ['completed', 'failed']:
|
|
logging.info("Job completed or failed, breaking out of loop")
|
|
break
|
|
|
|
logging.info("Job still running, sleeping 2 seconds...")
|
|
time.sleep(2)
|
|
|
|
final_status = job_queue.get_job_status(job_id)
|
|
print(f"\nFinal job status: {final_status['status']}")
|
|
|
|
if final_status['status'] == 'completed':
|
|
print(f"✓ Job completed successfully")
|
|
print(f" - Result path: {final_status['result_path']}")
|
|
print(f" - Processing time: {final_status['processing_time_seconds']:.2f}s")
|
|
|
|
# Test result retrieval
|
|
print("\nRetrieving job result...")
|
|
logging.info("Calling get_job_result()...")
|
|
result = job_queue.get_job_result(job_id)
|
|
logging.info(f"Result retrieved: {len(result)} characters")
|
|
print(f"✓ Result retrieved ({len(result)} characters)")
|
|
print(f" Preview: {result[:100]}...")
|
|
|
|
elif final_status['status'] == 'failed':
|
|
print(f"✗ Job failed: {final_status['error']}")
|
|
|
|
# Test persistence by stopping and restarting
|
|
print("\nTesting persistence...")
|
|
logging.info("Stopping job queue...")
|
|
job_queue.stop(wait_for_current=False)
|
|
print("✓ Job queue stopped")
|
|
logging.info("Job queue stopped")
|
|
|
|
logging.info("Restarting job queue...")
|
|
job_queue2 = JobQueue(max_queue_size=10, metadata_dir=temp_dir)
|
|
job_queue2.start()
|
|
print("✓ Job queue restarted")
|
|
logging.info("Job queue restarted")
|
|
|
|
logging.info("Checking job status after restart...")
|
|
status_after_restart = job_queue2.get_job_status(job_id)
|
|
print(f"✓ Job still exists after restart: {status_after_restart['status']}")
|
|
logging.info(f"Job status after restart: {status_after_restart['status']}")
|
|
|
|
logging.info("Stopping job queue 2...")
|
|
job_queue2.stop()
|
|
logging.info("Job queue 2 stopped")
|
|
|
|
# Cleanup
|
|
import shutil
|
|
shutil.rmtree(temp_dir)
|
|
print(f"✓ Cleaned up temp directory")
|
|
|
|
return final_status['status'] == 'completed'
|
|
|
|
except Exception as e:
|
|
print(f"✗ Job queue test failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
def main():
|
|
"""Run all tests."""
|
|
print("\n" + "="*60)
|
|
print("PHASE 1 COMPONENT TESTS")
|
|
print("="*60)
|
|
|
|
# Check GPU availability first - exit if no GPU
|
|
check_gpu_available()
|
|
|
|
results = {
|
|
"Test Audio File": test_audio_file(),
|
|
"GPU Health Check": test_gpu_health(),
|
|
"Job Queue": test_job_queue(),
|
|
}
|
|
|
|
print("\n" + "="*60)
|
|
print("TEST SUMMARY")
|
|
print("="*60)
|
|
|
|
for test_name, passed in results.items():
|
|
status = "✓ PASSED" if passed else "✗ FAILED"
|
|
print(f"{test_name:.<40} {status}")
|
|
|
|
all_passed = all(results.values())
|
|
print("\n" + "="*60)
|
|
if all_passed:
|
|
print("ALL TESTS PASSED ✓")
|
|
else:
|
|
print("SOME TESTS FAILED ✗")
|
|
print("="*60)
|
|
|
|
return 0 if all_passed else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|