fix gpu check at startupissue

This commit is contained in:
Alihan
2025-10-12 03:09:04 +03:00
parent 06b8bc1304
commit d47c2843c3
6 changed files with 118 additions and 55 deletions

3
.gitignore vendored
View File

@@ -17,3 +17,6 @@ venv/
logs/**
User/**
data/**
models/*
outputs/*
api.logs

View File

@@ -11,6 +11,10 @@ export PYTHONPATH="/home/uad/agents/tools/mcp-transcriptor/src:$PYTHONPATH"
# Set CUDA library path
export LD_LIBRARY_PATH=/usr/local/cuda-12.4/targets/x86_64-linux/lib:$LD_LIBRARY_PATH
# Set proxy for model downloads
export HTTP_PROXY=http://192.168.1.212:8080
export HTTPS_PROXY=http://192.168.1.212:8080
# Set environment variables
export CUDA_VISIBLE_DEVICES=1
export WHISPER_MODEL_DIR="/home/uad/agents/tools/mcp-transcriptor/data/models"
@@ -27,7 +31,7 @@ export TRANSCRIPTION_FILENAME_PREFIX=""
# API server configuration
export API_HOST="0.0.0.0"
export API_PORT="8000"
export API_PORT="33767"
# GPU Auto-Reset Configuration
export GPU_RESET_COOLDOWN_MINUTES=5 # Minimum time between GPU reset attempts

View File

@@ -15,6 +15,10 @@ export PYTHONPATH="/home/uad/agents/tools/mcp-transcriptor/src:$PYTHONPATH"
# Set CUDA library path
export LD_LIBRARY_PATH=/usr/local/cuda-12.4/targets/x86_64-linux/lib:$LD_LIBRARY_PATH
# Set proxy for model downloads
export HTTP_PROXY=http://192.168.1.212:8080
export HTTPS_PROXY=http://192.168.1.212:8080
# Set environment variables
export CUDA_VISIBLE_DEVICES=1
export WHISPER_MODEL_DIR="/home/uad/agents/tools/mcp-transcriptor/data/models"

View File

@@ -93,6 +93,7 @@ async def root():
"GET /health/circuit-breaker": "Get circuit breaker stats",
"POST /health/circuit-breaker/reset": "Reset circuit breaker",
"GET /models": "Get available models information",
"POST /transcribe": "Upload audio file and submit transcription job",
"POST /jobs": "Submit transcription job (async)",
"GET /jobs/{job_id}": "Get job status",
"GET /jobs/{job_id}/result": "Get job result",
@@ -123,6 +124,92 @@ async def get_models():
raise HTTPException(status_code=500, detail=f"Failed to get model info: {str(e)}")
@app.post("/transcribe")
async def transcribe_upload(
file: UploadFile = File(...),
model: str = Form("medium"),
language: Optional[str] = Form(None),
output_format: str = Form("txt"),
beam_size: int = Form(5),
temperature: float = Form(0.0),
initial_prompt: Optional[str] = Form(None)
):
"""
Upload audio file and submit transcription job in one request.
Returns immediately with job_id. Poll GET /jobs/{job_id} for status.
"""
temp_file_path = None
try:
# Save uploaded file to temp directory
upload_dir = Path(os.getenv("TRANSCRIPTION_OUTPUT_DIR", "/tmp")) / "uploads"
upload_dir.mkdir(parents=True, exist_ok=True)
# Create temp file with original filename
temp_file_path = upload_dir / file.filename
logger.info(f"Receiving upload: {file.filename} ({file.content_type})")
# Save uploaded file
with open(temp_file_path, "wb") as f:
content = await file.read()
f.write(content)
logger.info(f"Saved upload to: {temp_file_path}")
# Submit transcription job
job_info = job_queue.submit_job(
audio_path=str(temp_file_path),
model_name=model,
device="auto",
compute_type="auto",
language=language,
output_format=output_format,
beam_size=beam_size,
temperature=temperature,
initial_prompt=initial_prompt,
output_directory=None
)
return JSONResponse(
status_code=200,
content={
**job_info,
"message": f"File uploaded and job submitted. Poll /jobs/{job_info['job_id']} for status."
}
)
except queue_module.Full:
# Clean up temp file if queue is full
if temp_file_path and temp_file_path.exists():
temp_file_path.unlink()
logger.warning("Job queue is full, rejecting upload")
raise HTTPException(
status_code=503,
detail={
"error": "Queue full",
"message": f"Job queue is full. Please try again later.",
"queue_size": job_queue._max_queue_size,
"max_queue_size": job_queue._max_queue_size
}
)
except Exception as e:
# Clean up temp file on error
if temp_file_path and temp_file_path.exists():
temp_file_path.unlink()
logger.error(f"Failed to process upload: {e}")
raise HTTPException(
status_code=500,
detail={
"error": "Upload failed",
"message": str(e)
}
)
@app.post("/jobs")
async def submit_job(request: SubmitJobRequest):
"""

View File

@@ -1,7 +1,7 @@
"""
Test audio generator for GPU health checks.
Generates realistic test audio with speech using TTS (text-to-speech).
Returns path to existing test audio file - NO GENERATION, NO INTERNET.
"""
import os
@@ -10,70 +10,35 @@ import tempfile
def generate_test_audio(duration_seconds: float = 3.0, frequency: int = 440) -> str:
"""
Generate a test audio file with real speech for GPU health checks.
Return path to existing test audio file for GPU health checks.
NO AUDIO GENERATION - just returns path to pre-existing test file.
NO INTERNET CONNECTION REQUIRED.
Args:
duration_seconds: Duration of audio in seconds (default: 3.0)
frequency: Legacy parameter, ignored (kept for backward compatibility)
duration_seconds: Duration hint (default: 3.0) - used for cache lookup
frequency: Legacy parameter, ignored
Returns:
str: Path to temporary audio file
str: Path to test audio file
Implementation:
- Generate real speech using gTTS (Google Text-to-Speech)
- Fallback to pyttsx3 if gTTS fails or is unavailable
- Raises RuntimeError if both TTS engines fail
- Save as MP3 format
- Store in system temp directory
- Reuse same file if exists (cache)
Raises:
RuntimeError: If test audio file doesn't exist
"""
# Use a consistent filename in temp directory for caching
# Check for existing test audio in temp directory
temp_dir = tempfile.gettempdir()
audio_path = os.path.join(temp_dir, f"whisper_test_voice_{int(duration_seconds)}s.mp3")
# Return cached file if it exists and is valid
if os.path.exists(audio_path):
try:
# Verify file is readable and not empty
if os.path.getsize(audio_path) > 0:
return audio_path
except Exception:
# If file is corrupted, regenerate it
pass
# Generate speech with different text based on duration
if duration_seconds >= 3:
text = "This is a test of the Whisper speech recognition system. Testing one, two, three."
elif duration_seconds >= 2:
text = "This is a test of the Whisper system."
else:
text = "Testing Whisper."
# Try gTTS first (better quality, requires internet)
try:
from gtts import gTTS
tts = gTTS(text=text, lang='en', slow=False)
tts.save(audio_path)
if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
return audio_path
except Exception as e:
print(f"gTTS failed ({e}), trying pyttsx3...")
# Fallback to pyttsx3 (offline, lower quality)
try:
import pyttsx3
engine = pyttsx3.init()
engine.save_to_file(text, audio_path)
engine.runAndWait()
# Verify file was created
if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
return audio_path
except Exception as e:
raise RuntimeError(
f"Failed to generate test audio. Both gTTS and pyttsx3 failed. "
f"gTTS error: {e}. Please ensure TTS dependencies are installed: "
f"pip install gTTS pyttsx3"
)
# If no cached file, raise error - we don't generate anything
raise RuntimeError(
f"Test audio file not found: {audio_path}. "
f"Please ensure test audio exists before running GPU health checks. "
f"Expected file location: {audio_path}"
)
def cleanup_test_audio() -> None:

View File

@@ -12,7 +12,7 @@ environment=
PYTHONPATH="/home/uad/agents/tools/mcp-transcriptor/src",
CUDA_VISIBLE_DEVICES="0",
API_HOST="0.0.0.0",
API_PORT="8000",
API_PORT="33767",
WHISPER_MODEL_DIR="/home/uad/agents/tools/mcp-transcriptor/models",
TRANSCRIPTION_OUTPUT_DIR="/home/uad/agents/tools/mcp-transcriptor/outputs",
TRANSCRIPTION_BATCH_OUTPUT_DIR="/home/uad/agents/tools/mcp-transcriptor/outputs/batch",