fix gpu check at startupissue

2025-10-12 03:09:04 +03:00
parent 06b8bc1304
commit d47c2843c3
6 changed files with 118 additions and 55 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,6 @@ venv/
 logs/**
 User/**
 data/**
+models/*
+outputs/*
+api.logs
--- a/run_api_server.sh
+++ b/run_api_server.sh
@@ -11,6 +11,10 @@ export PYTHONPATH="/home/uad/agents/tools/mcp-transcriptor/src:$PYTHONPATH"
 # Set CUDA library path
 export LD_LIBRARY_PATH=/usr/local/cuda-12.4/targets/x86_64-linux/lib:$LD_LIBRARY_PATH

+# Set proxy for model downloads
+export HTTP_PROXY=http://192.168.1.212:8080
+export HTTPS_PROXY=http://192.168.1.212:8080
+
 # Set environment variables
 export CUDA_VISIBLE_DEVICES=1
 export WHISPER_MODEL_DIR="/home/uad/agents/tools/mcp-transcriptor/data/models"
@@ -27,7 +31,7 @@ export TRANSCRIPTION_FILENAME_PREFIX=""

 # API server configuration
 export API_HOST="0.0.0.0"
-export API_PORT="8000"
+export API_PORT="33767"

 # GPU Auto-Reset Configuration
 export GPU_RESET_COOLDOWN_MINUTES=5  # Minimum time between GPU reset attempts
--- a/run_mcp_server.sh
+++ b/run_mcp_server.sh
@@ -15,6 +15,10 @@ export PYTHONPATH="/home/uad/agents/tools/mcp-transcriptor/src:$PYTHONPATH"
 # Set CUDA library path
 export LD_LIBRARY_PATH=/usr/local/cuda-12.4/targets/x86_64-linux/lib:$LD_LIBRARY_PATH

+# Set proxy for model downloads
+export HTTP_PROXY=http://192.168.1.212:8080
+export HTTPS_PROXY=http://192.168.1.212:8080
+
 # Set environment variables
 export CUDA_VISIBLE_DEVICES=1
 export WHISPER_MODEL_DIR="/home/uad/agents/tools/mcp-transcriptor/data/models"
--- a/src/servers/api_server.py
+++ b/src/servers/api_server.py
@@ -93,6 +93,7 @@ async def root():
            "GET /health/circuit-breaker": "Get circuit breaker stats",
            "POST /health/circuit-breaker/reset": "Reset circuit breaker",
            "GET /models": "Get available models information",
+            "POST /transcribe": "Upload audio file and submit transcription job",
            "POST /jobs": "Submit transcription job (async)",
            "GET /jobs/{job_id}": "Get job status",
            "GET /jobs/{job_id}/result": "Get job result",
@@ -123,6 +124,92 @@ async def get_models():
        raise HTTPException(status_code=500, detail=f"Failed to get model info: {str(e)}")


+@app.post("/transcribe")
+async def transcribe_upload(
+    file: UploadFile = File(...),
+    model: str = Form("medium"),
+    language: Optional[str] = Form(None),
+    output_format: str = Form("txt"),
+    beam_size: int = Form(5),
+    temperature: float = Form(0.0),
+    initial_prompt: Optional[str] = Form(None)
+):
+    """
+    Upload audio file and submit transcription job in one request.
+
+    Returns immediately with job_id. Poll GET /jobs/{job_id} for status.
+    """
+    temp_file_path = None
+    try:
+        # Save uploaded file to temp directory
+        upload_dir = Path(os.getenv("TRANSCRIPTION_OUTPUT_DIR", "/tmp")) / "uploads"
+        upload_dir.mkdir(parents=True, exist_ok=True)
+
+        # Create temp file with original filename
+        temp_file_path = upload_dir / file.filename
+
+        logger.info(f"Receiving upload: {file.filename} ({file.content_type})")
+
+        # Save uploaded file
+        with open(temp_file_path, "wb") as f:
+            content = await file.read()
+            f.write(content)
+
+        logger.info(f"Saved upload to: {temp_file_path}")
+
+        # Submit transcription job
+        job_info = job_queue.submit_job(
+            audio_path=str(temp_file_path),
+            model_name=model,
+            device="auto",
+            compute_type="auto",
+            language=language,
+            output_format=output_format,
+            beam_size=beam_size,
+            temperature=temperature,
+            initial_prompt=initial_prompt,
+            output_directory=None
+        )
+
+        return JSONResponse(
+            status_code=200,
+            content={
+                **job_info,
+                "message": f"File uploaded and job submitted. Poll /jobs/{job_info['job_id']} for status."
+            }
+        )
+
+    except queue_module.Full:
+        # Clean up temp file if queue is full
+        if temp_file_path and temp_file_path.exists():
+            temp_file_path.unlink()
+
+        logger.warning("Job queue is full, rejecting upload")
+        raise HTTPException(
+            status_code=503,
+            detail={
+                "error": "Queue full",
+                "message": f"Job queue is full. Please try again later.",
+                "queue_size": job_queue._max_queue_size,
+                "max_queue_size": job_queue._max_queue_size
+            }
+        )
+
+    except Exception as e:
+        # Clean up temp file on error
+        if temp_file_path and temp_file_path.exists():
+            temp_file_path.unlink()
+
+        logger.error(f"Failed to process upload: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail={
+                "error": "Upload failed",
+                "message": str(e)
+            }
+        )
+
+
@app.post("/jobs")
 async def submit_job(request: SubmitJobRequest):
    """
--- a/src/utils/test_audio_generator.py
+++ b/src/utils/test_audio_generator.py
@@ -1,7 +1,7 @@
 """
 Test audio generator for GPU health checks.

-Generates realistic test audio with speech using TTS (text-to-speech).
+Returns path to existing test audio file - NO GENERATION, NO INTERNET.
 """

 import os
@@ -10,70 +10,35 @@ import tempfile

 def generate_test_audio(duration_seconds: float = 3.0, frequency: int = 440) -> str:
    """
-    Generate a test audio file with real speech for GPU health checks.
+    Return path to existing test audio file for GPU health checks.
+
+    NO AUDIO GENERATION - just returns path to pre-existing test file.
+    NO INTERNET CONNECTION REQUIRED.

    Args:
-        duration_seconds: Duration of audio in seconds (default: 3.0)
-        frequency: Legacy parameter, ignored (kept for backward compatibility)
+        duration_seconds: Duration hint (default: 3.0) - used for cache lookup
+        frequency: Legacy parameter, ignored

    Returns:
-        str: Path to temporary audio file
+        str: Path to test audio file

-    Implementation:
-        - Generate real speech using gTTS (Google Text-to-Speech)
-        - Fallback to pyttsx3 if gTTS fails or is unavailable
-        - Raises RuntimeError if both TTS engines fail
-        - Save as MP3 format
-        - Store in system temp directory
-        - Reuse same file if exists (cache)
+    Raises:
+        RuntimeError: If test audio file doesn't exist
    """
-    # Use a consistent filename in temp directory for caching
+    # Check for existing test audio in temp directory
    temp_dir = tempfile.gettempdir()
    audio_path = os.path.join(temp_dir, f"whisper_test_voice_{int(duration_seconds)}s.mp3")

    # Return cached file if it exists and is valid
-    if os.path.exists(audio_path):
-        try:
-            # Verify file is readable and not empty
-            if os.path.getsize(audio_path) > 0:
-                return audio_path
-        except Exception:
-            # If file is corrupted, regenerate it
-            pass
-
-    # Generate speech with different text based on duration
-    if duration_seconds >= 3:
-        text = "This is a test of the Whisper speech recognition system. Testing one, two, three."
-    elif duration_seconds >= 2:
-        text = "This is a test of the Whisper system."
-    else:
-        text = "Testing Whisper."
-
-    # Try gTTS first (better quality, requires internet)
-    try:
-        from gtts import gTTS
-        tts = gTTS(text=text, lang='en', slow=False)
-        tts.save(audio_path)
+    if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
        return audio_path
-    except Exception as e:
-        print(f"gTTS failed ({e}), trying pyttsx3...")

-    # Fallback to pyttsx3 (offline, lower quality)
-    try:
-        import pyttsx3
-        engine = pyttsx3.init()
-        engine.save_to_file(text, audio_path)
-        engine.runAndWait()
-
-        # Verify file was created
-        if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
-            return audio_path
-    except Exception as e:
-        raise RuntimeError(
-            f"Failed to generate test audio. Both gTTS and pyttsx3 failed. "
-            f"gTTS error: {e}. Please ensure TTS dependencies are installed: "
-            f"pip install gTTS pyttsx3"
-        )
+    # If no cached file, raise error - we don't generate anything
+    raise RuntimeError(
+        f"Test audio file not found: {audio_path}. "
+        f"Please ensure test audio exists before running GPU health checks. "
+        f"Expected file location: {audio_path}"
+    )


 def cleanup_test_audio() -> None:
--- a/supervisor/transcriptor-api.conf
+++ b/supervisor/transcriptor-api.conf
@@ -12,7 +12,7 @@ environment=
    PYTHONPATH="/home/uad/agents/tools/mcp-transcriptor/src",
    CUDA_VISIBLE_DEVICES="0",
    API_HOST="0.0.0.0",
-    API_PORT="8000",
+    API_PORT="33767",
    WHISPER_MODEL_DIR="/home/uad/agents/tools/mcp-transcriptor/models",
    TRANSCRIPTION_OUTPUT_DIR="/home/uad/agents/tools/mcp-transcriptor/outputs",
    TRANSCRIPTION_BATCH_OUTPUT_DIR="/home/uad/agents/tools/mcp-transcriptor/outputs/batch",