Fast-Whisper-MCP-Server/tests/test_e2e_integration.py

#!/usr/bin/env python3
"""
Test Phase 4: End-to-End Integration Testing

Comprehensive integration tests for the async job queue system.
Tests all scenarios from the DEV_PLAN.md Phase 4 checklist.
"""

import os
import sys
import time
import json
import logging
import requests
import subprocess
import signal
from pathlib import Path
from datetime import datetime

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# Add src to path (go up one level from tests/ to root)
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

# Color codes for terminal output
class Colors:
    GREEN = '\033[92m'
    RED = '\033[91m'
    YELLOW = '\033[93m'
    BLUE = '\033[94m'
    CYAN = '\033[96m'
    END = '\033[0m'
    BOLD = '\033[1m'

def print_success(msg):
    print(f"{Colors.GREEN}✓ {msg}{Colors.END}")

def print_error(msg):
    print(f"{Colors.RED}✗ {msg}{Colors.END}")

def print_info(msg):
    print(f"{Colors.BLUE}ℹ {msg}{Colors.END}")

def print_warning(msg):
    print(f"{Colors.YELLOW}⚠ {msg}{Colors.END}")

def print_section(msg):
    print(f"\n{Colors.BOLD}{Colors.YELLOW}{'='*70}{Colors.END}")
    print(f"{Colors.BOLD}{Colors.YELLOW}{msg}{Colors.END}")
    print(f"{Colors.BOLD}{Colors.YELLOW}{'='*70}{Colors.END}\n")


class Phase4Tester:
    def __init__(self, api_url="http://localhost:8000", test_audio=None):
        self.api_url = api_url
        # Use relative path from project root if not provided
        if test_audio is None:
            project_root = Path(__file__).parent.parent
            test_audio = str(project_root / "data" / "test.mp3")
        self.test_audio = test_audio
        self.test_results = []
        self.server_process = None

        # Verify test audio exists
        if not os.path.exists(test_audio):
            raise FileNotFoundError(f"Test audio file not found: {test_audio}")

    def test(self, name, func):
        """Run a test and record result"""
        try:
            logger.info(f"Testing: {name}")
            print_info(f"Testing: {name}")
            func()
            logger.info(f"PASSED: {name}")
            print_success(f"PASSED: {name}")
            self.test_results.append((name, True, None))
            return True
        except AssertionError as e:
            logger.error(f"FAILED: {name} - {str(e)}")
            print_error(f"FAILED: {name}")
            print_error(f"  Reason: {str(e)}")
            self.test_results.append((name, False, str(e)))
            return False
        except Exception as e:
            logger.error(f"ERROR: {name} - {str(e)}")
            print_error(f"ERROR: {name}")
            print_error(f"  Exception: {str(e)}")
            self.test_results.append((name, False, f"Exception: {str(e)}"))
            return False

    def start_api_server(self, wait_time=5):
        """Start the API server in background"""
        print_info("Starting API server...")
        # Script is in project root, one level up from tests/
        script_path = Path(__file__).parent.parent / "run_api_server.sh"

        # Start server in background
        self.server_process = subprocess.Popen(
            [str(script_path)],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            preexec_fn=os.setsid
        )

        # Wait for server to start
        time.sleep(wait_time)

        # Verify server is running
        try:
            response = requests.get(f"{self.api_url}/health", timeout=5)
            if response.status_code == 200:
                print_success("API server started successfully")
                return True
        except:
            pass

        print_error("API server failed to start")
        return False

    def stop_api_server(self):
        """Stop the API server"""
        if self.server_process:
            print_info("Stopping API server...")
            os.killpg(os.getpgid(self.server_process.pid), signal.SIGTERM)
            self.server_process.wait(timeout=10)
            print_success("API server stopped")

    def wait_for_job_completion(self, job_id, timeout=60, poll_interval=2):
        """Poll job status until completed or failed"""
        start_time = time.time()
        last_status = None

        while time.time() - start_time < timeout:
            try:
                response = requests.get(f"{self.api_url}/jobs/{job_id}")
                assert response.status_code == 200, f"Failed to get job status: {response.status_code}"

                status_data = response.json()
                current_status = status_data['status']

                # Print status changes
                if current_status != last_status:
                    if status_data.get('queue_position') is not None:
                        print_info(f"  Job status: {current_status}, queue position: {status_data['queue_position']}")
                    else:
                        print_info(f"  Job status: {current_status}")
                    last_status = current_status

                if current_status == "completed":
                    return status_data
                elif current_status == "failed":
                    raise AssertionError(f"Job failed: {status_data.get('error', 'Unknown error')}")

                time.sleep(poll_interval)
            except requests.exceptions.RequestException as e:
                raise AssertionError(f"Request failed: {e}")

        raise AssertionError(f"Job did not complete within {timeout} seconds")

    # ========================================================================
    # TEST 1: Single Job Submission and Completion
    # ========================================================================
    def test_single_job_flow(self):
        """Test complete job flow: submit → poll → get result"""
        # Submit job
        print_info("  Submitting job...")
        response = requests.post(f"{self.api_url}/jobs", json={
            "audio_path": self.test_audio,
            "model_name": "large-v3",
            "output_format": "txt"
        })
        assert response.status_code == 200, f"Job submission failed: {response.status_code}"

        job_data = response.json()
        assert 'job_id' in job_data, "No job_id in response"
        # Status can be 'queued' or 'running' (if queue is empty and job starts immediately)
        assert job_data['status'] in ['queued', 'running'], f"Expected status 'queued' or 'running', got '{job_data['status']}'"

        job_id = job_data['job_id']
        print_success(f"  Job submitted: {job_id}")

        # Wait for completion
        print_info("  Waiting for job completion...")
        final_status = self.wait_for_job_completion(job_id)

        assert final_status['status'] == 'completed', "Job did not complete"
        assert final_status['result_path'] is not None, "No result_path in completed job"
        assert final_status['processing_time_seconds'] is not None, "No processing time"
        print_success(f"  Job completed in {final_status['processing_time_seconds']:.2f}s")

        # Get result
        print_info("  Retrieving result...")
        response = requests.get(f"{self.api_url}/jobs/{job_id}/result")
        assert response.status_code == 200, f"Failed to get result: {response.status_code}"

        result_text = response.text
        assert len(result_text) > 0, "Empty result"
        print_success(f"  Got result: {len(result_text)} characters")

    # ========================================================================
    # TEST 2: Multiple Jobs in Queue (FIFO)
    # ========================================================================
    def test_multiple_jobs_fifo(self):
        """Test multiple jobs are processed in FIFO order"""
        job_ids = []

        # Submit 3 jobs
        print_info("  Submitting 3 jobs...")
        for i in range(3):
            response = requests.post(f"{self.api_url}/jobs", json={
                "audio_path": self.test_audio,
                "model_name": "tiny",  # Use tiny model for faster processing
                "output_format": "txt"
            })
            assert response.status_code == 200, f"Job {i+1} submission failed"

            job_data = response.json()
            job_ids.append(job_data['job_id'])
            print_info(f"    Job {i+1} submitted: {job_data['job_id']}, queue_position: {job_data.get('queue_position', 0)}")

        # Wait for all jobs to complete
        print_info("  Waiting for all jobs to complete...")
        for i, job_id in enumerate(job_ids):
            print_info(f"  Waiting for job {i+1}/{len(job_ids)}...")
            final_status = self.wait_for_job_completion(job_id, timeout=120)
            assert final_status['status'] == 'completed', f"Job {i+1} failed"

        print_success(f"  All {len(job_ids)} jobs completed in FIFO order")

    # ========================================================================
    # TEST 3: GPU Health Check
    # ========================================================================
    def test_gpu_health_check(self):
        """Test GPU health check endpoint"""
        print_info("  Checking GPU health...")
        response = requests.get(f"{self.api_url}/health/gpu")
        assert response.status_code == 200, f"GPU health check failed: {response.status_code}"

        health_data = response.json()
        assert 'gpu_available' in health_data, "Missing gpu_available field"
        assert 'gpu_working' in health_data, "Missing gpu_working field"
        assert 'device_used' in health_data, "Missing device_used field"

        print_info(f"    GPU Available: {health_data['gpu_available']}")
        print_info(f"    GPU Working: {health_data['gpu_working']}")
        print_info(f"    Device: {health_data['device_used']}")

        if health_data['gpu_available']:
            assert health_data['device_name'], "GPU available but no device_name"
            assert health_data['test_duration_seconds'] < 3, "GPU test took too long (might be using CPU)"
            print_success(f"  GPU is healthy: {health_data['device_name']}")
        else:
            print_warning("  GPU not available on this system")

    # ========================================================================
    # TEST 4: Invalid Audio Path
    # ========================================================================
    def test_invalid_audio_path(self):
        """Test job submission with invalid audio path"""
        print_info("  Submitting job with invalid path...")
        response = requests.post(f"{self.api_url}/jobs", json={
            "audio_path": "/invalid/path/does/not/exist.mp3",
            "model_name": "large-v3"
        })

        # Should return 400 Bad Request
        assert response.status_code == 400, f"Expected 400, got {response.status_code}"

        error_data = response.json()
        assert 'detail' in error_data or 'error' in error_data, "No error message in response"
        print_success("  Invalid path rejected correctly")

    # ========================================================================
    # TEST 5: Job Not Found
    # ========================================================================
    def test_job_not_found(self):
        """Test retrieving non-existent job"""
        print_info("  Requesting non-existent job...")
        fake_job_id = "00000000-0000-0000-0000-000000000000"

        response = requests.get(f"{self.api_url}/jobs/{fake_job_id}")
        assert response.status_code == 404, f"Expected 404, got {response.status_code}"
        print_success("  Non-existent job handled correctly")

    # ========================================================================
    # TEST 6: Result Before Completion
    # ========================================================================
    def test_result_before_completion(self):
        """Test getting result for job that hasn't completed"""
        print_info("  Submitting job and trying to get result immediately...")

        # Submit job
        response = requests.post(f"{self.api_url}/jobs", json={
            "audio_path": self.test_audio,
            "model_name": "large-v3"
        })
        assert response.status_code == 200
        job_id = response.json()['job_id']

        # Try to get result immediately (job is still queued/running)
        time.sleep(0.5)
        response = requests.get(f"{self.api_url}/jobs/{job_id}/result")

        # Should return 409 Conflict or similar
        assert response.status_code in [409, 400, 404], f"Expected 4xx error, got {response.status_code}"
        print_success("  Result request before completion handled correctly")

        # Clean up: wait for job to complete
        self.wait_for_job_completion(job_id)

    # ========================================================================
    # TEST 7: List Jobs
    # ========================================================================
    def test_list_jobs(self):
        """Test listing jobs with filters"""
        print_info("  Testing job listing...")

        # List all jobs
        response = requests.get(f"{self.api_url}/jobs")
        assert response.status_code == 200, f"List jobs failed: {response.status_code}"

        jobs_data = response.json()
        assert 'jobs' in jobs_data, "No jobs array in response"
        assert isinstance(jobs_data['jobs'], list), "Jobs is not a list"
        print_info(f"    Found {len(jobs_data['jobs'])} jobs")

        # List only completed jobs
        response = requests.get(f"{self.api_url}/jobs?status=completed")
        assert response.status_code == 200
        completed_jobs = response.json()['jobs']
        print_info(f"    Found {len(completed_jobs)} completed jobs")

        # List with limit
        response = requests.get(f"{self.api_url}/jobs?limit=5")
        assert response.status_code == 200
        limited_jobs = response.json()['jobs']
        assert len(limited_jobs) <= 5, "Limit not respected"
        print_success("  Job listing works correctly")

    # ========================================================================
    # TEST 8: Server Restart with Job Persistence
    # ========================================================================
    def test_server_restart_persistence(self):
        """Test that jobs persist across server restarts"""
        print_info("  Testing job persistence across restart...")

        # Submit a job
        response = requests.post(f"{self.api_url}/jobs", json={
            "audio_path": self.test_audio,
            "model_name": "tiny"
        })
        assert response.status_code == 200
        job_id = response.json()['job_id']
        print_info(f"    Submitted job: {job_id}")

        # Get job count before restart
        response = requests.get(f"{self.api_url}/jobs")
        jobs_before = len(response.json()['jobs'])
        print_info(f"    Jobs before restart: {jobs_before}")

        # Restart server
        print_info("  Restarting server...")
        self.stop_api_server()
        time.sleep(2)
        assert self.start_api_server(wait_time=8), "Server failed to restart"

        # Check jobs after restart
        response = requests.get(f"{self.api_url}/jobs")
        assert response.status_code == 200
        jobs_after = len(response.json()['jobs'])
        print_info(f"    Jobs after restart: {jobs_after}")

        # Check our specific job is still there (this is the key test)
        response = requests.get(f"{self.api_url}/jobs/{job_id}")
        assert response.status_code == 200, "Job not found after restart"

        # Note: Total count may differ due to job retention/cleanup, but persistence works if we can find the job
        if jobs_after < jobs_before:
            print_warning(f"  Job count decreased ({jobs_before} -> {jobs_after}), may be due to cleanup")

        print_success("  Jobs persisted correctly across restart")

    # ========================================================================
    # TEST 9: Health Endpoint
    # ========================================================================
    def test_health_endpoint(self):
        """Test basic health endpoint"""
        print_info("  Checking health endpoint...")
        response = requests.get(f"{self.api_url}/health")
        assert response.status_code == 200, f"Health check failed: {response.status_code}"

        health_data = response.json()
        assert health_data['status'] == 'healthy', "Server not healthy"
        print_success("  Health endpoint OK")

    # ========================================================================
    # TEST 10: Models Endpoint
    # ========================================================================
    def test_models_endpoint(self):
        """Test models information endpoint"""
        print_info("  Checking models endpoint...")
        response = requests.get(f"{self.api_url}/models")
        assert response.status_code == 200, f"Models endpoint failed: {response.status_code}"

        models_data = response.json()
        assert 'available_models' in models_data, "No available_models field"
        assert 'available_devices' in models_data, "No available_devices field"
        assert len(models_data['available_models']) > 0, "No models listed"
        print_info(f"    Available models: {len(models_data['available_models'])}")
        print_success("  Models endpoint OK")

    def print_summary(self):
        """Print test summary"""
        print_section("TEST SUMMARY")

        passed = sum(1 for _, result, _ in self.test_results if result)
        failed = len(self.test_results) - passed

        for name, result, error in self.test_results:
            if result:
                print_success(f"{name}")
            else:
                print_error(f"{name}")
                if error:
                    print(f"    {error}")

        print(f"\n{Colors.BOLD}Total: {len(self.test_results)} | ", end="")
        print(f"{Colors.GREEN}Passed: {passed}{Colors.END} | ", end="")
        print(f"{Colors.RED}Failed: {failed}{Colors.END}\n")

        return failed == 0

    def run_all_tests(self, start_server=True):
        """Run all Phase 4 integration tests"""
        print_section("PHASE 4: END-TO-END INTEGRATION TESTING")

        try:
            # Start server if requested
            if start_server:
                if not self.start_api_server():
                    print_error("Failed to start API server. Aborting tests.")
                    return False
            else:
                # Verify server is already running
                try:
                    response = requests.get(f"{self.api_url}/health", timeout=5)
                    if response.status_code != 200:
                        print_error("Server is not responding. Please start it first.")
                        return False
                    print_info("Using existing API server")
                except:
                    print_error("Cannot connect to API server. Please start it first.")
                    return False

            # Run tests
            print_section("TEST 1: Single Job Submission and Completion")
            self.test("Single job flow (submit → poll → get result)", self.test_single_job_flow)

            print_section("TEST 2: Multiple Jobs (FIFO Order)")
            self.test("Multiple jobs in queue (FIFO)", self.test_multiple_jobs_fifo)

            print_section("TEST 3: GPU Health Check")
            self.test("GPU health check endpoint", self.test_gpu_health_check)

            print_section("TEST 4: Error Handling - Invalid Path")
            self.test("Invalid audio path rejection", self.test_invalid_audio_path)

            print_section("TEST 5: Error Handling - Job Not Found")
            self.test("Non-existent job handling", self.test_job_not_found)

            print_section("TEST 6: Error Handling - Result Before Completion")
            self.test("Result request before completion", self.test_result_before_completion)

            print_section("TEST 7: Job Listing")
            self.test("List jobs with filters", self.test_list_jobs)

            print_section("TEST 8: Health Endpoint")
            self.test("Basic health endpoint", self.test_health_endpoint)

            print_section("TEST 9: Models Endpoint")
            self.test("Models information endpoint", self.test_models_endpoint)

            print_section("TEST 10: Server Restart Persistence")
            self.test("Job persistence across server restart", self.test_server_restart_persistence)

            # Print summary
            success = self.print_summary()

            return success

        finally:
            # Cleanup
            if start_server and self.server_process:
                self.stop_api_server()


def main():
    """Main test runner"""
    import argparse

    parser = argparse.ArgumentParser(description='Phase 4 Integration Tests')
    parser.add_argument('--url', default='http://localhost:8000', help='API server URL')
    # Default to None so Phase4Tester uses relative path
    parser.add_argument('--audio', default=None,
                       help='Path to test audio file (default: <project_root>/data/test.mp3)')
    parser.add_argument('--no-start-server', action='store_true',
                       help='Do not start server (assume it is already running)')
    args = parser.parse_args()

    tester = Phase4Tester(api_url=args.url, test_audio=args.audio)
    success = tester.run_all_tests(start_server=not args.no_start_server)

    sys.exit(0 if success else 1)


if __name__ == '__main__':
    main()