.

2025-07-05 14:35:47 +03:00
parent 53af30619f 046204d555
commit 56ccc0e1d7
7 changed files with 80 additions and 10 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,6 @@ venv/
 # Cython
 *.pyd

-
+logs/**
+User/**
+data/**
--- a/1
+++ b/1
@@ -0,0 +1 @@
+/media/raid/agents/tools/mcp-transcriptor
--- a/mcp.logs
+++ b/mcp.logs
@@ -0,0 +1,6 @@
+{"jsonrpc":"2.0","id":1,"result":{"protocolVersion":"2025-03-26","capabilities":{"experimental":{},"prompts":{"listChanged":false},"resources":{"subscribe":false,"listChanged":false},"tools":{"listChanged":false}},"serverInfo":{"name":"fast-whisper-mcp-server","version":"1.9.4"}}}
+INFO:mcp.server.lowlevel.server:Processing request of type ListToolsRequest
+{"jsonrpc":"2.0","id":2,"result":{"tools":[{"name":"get_model_info_api","description":"\n    Get available Whisper model information\n    ","inputSchema":{"properties":{},"title":"get_model_info_apiArguments","type":"object"}},{"name":"transcribe","description":"\n    Transcribe audio files using Faster Whisper\n\n    Args:\n        audio_path: Path to the audio file\n        model_name: Model name (tiny, base, small, medium, large-v1, large-v2, large-v3)\n        device: Execution device (cpu, cuda, auto)\n        compute_type: Computation type (float16, int8, auto)\n        language: Language code (such as zh, en, ja, etc., auto-detect by default)\n        output_format: Output format (vtt, srt, json or txt)\n        beam_size: Beam search size, larger values may improve accuracy but reduce speed\n        temperature: Sampling temperature, greedy decoding\n        initial_prompt: Initial prompt text, can help the model better understand context\n        output_directory: Output directory path, defaults to the audio file's directory\n\n    Returns:\n        str: Transcription result, in VTT subtitle or JSON format\n    ","inputSchema":{"properties":{"audio_path":{"title":"Audio Path","type":"string"},"model_name":{"default":"large-v3","title":"Model Name","type":"string"},"device":{"default":"auto","title":"Device","type":"string"},"compute_type":{"default":"auto","title":"Compute Type","type":"string"},"language":{"default":null,"title":"Language","type":"string"},"output_format":{"default":"vtt","title":"Output Format","type":"string"},"beam_size":{"default":5,"title":"Beam Size","type":"integer"},"temperature":{"default":0.0,"title":"Temperature","type":"number"},"initial_prompt":{"default":null,"title":"Initial Prompt","type":"string"},"output_directory":{"default":null,"title":"Output Directory","type":"string"}},"required":["audio_path"],"title":"transcribeArguments","type":"object"}},{"name":"batch_transcribe_audio","description":"\n    Batch transcribe audio files in a folder\n\n    Args:\n        audio_folder: Path to the folder containing audio files\n        output_folder: Output folder path, defaults to a 'transcript' subfolder in audio_folder\n        model_name: Model name (tiny, base, small, medium, large-v1, large-v2, large-v3)\n        device: Execution device (cpu, cuda, auto)\n        compute_type: Computation type (float16, int8, auto)\n        language: Language code (such as zh, en, ja, etc., auto-detect by default)\n        output_format: Output format (vtt, srt, json or txt)\n        beam_size: Beam search size, larger values may improve accuracy but reduce speed\n        temperature: Sampling temperature, 0 means greedy decoding\n        initial_prompt: Initial prompt text, can help the model better understand context\n        parallel_files: Number of files to process in parallel (only effective in CPU mode)\n\n    Returns:\n        str: Batch processing summary, including processing time and success rate\n    ","inputSchema":{"properties":{"audio_folder":{"title":"Audio Folder","type":"string"},"output_folder":{"default":null,"title":"Output Folder","type":"string"},"model_name":{"default":"large-v3","title":"Model Name","type":"string"},"device":{"default":"auto","title":"Device","type":"string"},"compute_type":{"default":"auto","title":"Compute Type","type":"string"},"language":{"default":null,"title":"Language","type":"string"},"output_format":{"default":"vtt","title":"Output Format","type":"string"},"beam_size":{"default":5,"title":"Beam Size","type":"integer"},"temperature":{"default":0.0,"title":"Temperature","type":"number"},"initial_prompt":{"default":null,"title":"Initial Prompt","type":"string"},"parallel_files":{"default":1,"title":"Parallel Files","type":"integer"}},"required":["audio_folder"],"title":"batch_transcribe_audioArguments","type":"object"}}]}}
+INFO:mcp.server.lowlevel.server:Processing request of type CallToolRequest
+INFO:model_manager:GPU test passed: NVIDIA GeForce RTX 3060 (12.5GB)
+INFO:model_manager:Loading Whisper model: large-v3 device: cuda compute type: float16
--- a/model_manager.py
+++ b/model_manager.py
@@ -4,7 +4,7 @@ Model Management Module
 Responsible for loading, caching, and managing Whisper models
 """

-import os
+import os; print(os.environ.get("WHISPER_MODEL_DIR"))
 import time
 import logging
 from typing import Dict, Any
@@ -17,6 +17,30 @@ logger = logging.getLogger(__name__)
 # Global model instance cache
 model_instances = {}

+def test_gpu_driver():
+    """Simple GPU driver test"""
+    try:
+        if not torch.cuda.is_available():
+            logger.error("CUDA not available in PyTorch")
+            raise RuntimeError("CUDA not available")
+        
+        gpu_count = torch.cuda.device_count()
+        if gpu_count == 0:
+            logger.error("No CUDA devices found")
+            raise RuntimeError("No CUDA devices")
+        
+        # Quick GPU test
+        test_tensor = torch.randn(10, 10).cuda()
+        _ = test_tensor @ test_tensor.T
+        
+        device_name = torch.cuda.get_device_name(0)
+        memory_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
+        logger.info(f"GPU test passed: {device_name} ({memory_gb:.1f}GB)")
+        
+    except Exception as e:
+        logger.error(f"GPU test failed: {e}")
+        raise RuntimeError(f"GPU initialization failed: {e}")
+
 def get_whisper_model(model_name: str, device: str, compute_type: str) -> Dict[str, Any]:
    """
    Get or create Whisper model instance
@@ -46,9 +70,8 @@ def get_whisper_model(model_name: str, device: str, compute_type: str) -> Dict[s
        raise ValueError(f"Invalid device: {device}. Valid devices: cpu, cuda")

    if device == "cuda" and not torch.cuda.is_available():
-        logger.warning("CUDA not available, automatically switching to CPU")
-        device = "cpu"
-        compute_type = "int8"
+        logger.error("CUDA requested but not available")
+        raise RuntimeError("CUDA not available but explicitly requested")

    if compute_type not in ["float16", "int8"]:
        raise ValueError(f"Invalid compute type: {compute_type}. Valid compute types: float16, int8")
@@ -65,8 +88,9 @@ def get_whisper_model(model_name: str, device: str, compute_type: str) -> Dict[s
        logger.info(f"Using cached model instance: {model_key}")
        return model_instances[model_key]

-    # Clean GPU memory (if using CUDA)
+    # Test GPU driver before loading model and clean
    if device == "cuda":
+        test_gpu_driver()
        torch.cuda.empty_cache()

    # Instantiate model
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,4 +20,3 @@ mcp[cli]
 #    • CPU version:
 #      pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cpu
 #
-#    You can use the command `nvcc --version` or `nvidia-smi` to check your CUDA version
--- a/run_server.sh
+++ b/run_server.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+set -e
+
+datetime_prefix() {
+  date "+[%Y-%m-%d %H:%M:%S]"
+}
+
+# Get current user ID to avoid permission issues
+USER_ID=$(id -u)
+GROUP_ID=$(id -g)
+
+# Set environment variables
+export CUDA_VISIBLE_DEVICES=1
+export WHISPER_MODEL_DIR="/home/uad/agents/tools/mcp-transcriptor/data/models"
+export TRANSCRIPTION_OUTPUT_DIR="/media/raid/agents/tools/mcp-transcriptor/outputs"
+export TRANSCRIPTION_BATCH_OUTPUT_DIR="/media/raid/agents/tools/mcp-transcriptor/outputs/batch"
+export TRANSCRIPTION_MODEL="large-v3"
+export TRANSCRIPTION_DEVICE="cuda"
+export TRANSCRIPTION_COMPUTE_TYPE="cuda"
+export TRANSCRIPTION_OUTPUT_FORMAT="txt"
+export TRANSCRIPTION_BEAM_SIZE="2"
+export TRANSCRIPTION_TEMPERATURE="0.0"
+export TRANSCRIPTION_USE_TIMESTAMP="false"
+export TRANSCRIPTION_FILENAME_PREFIX="test_"
+
+# Log start of the script
+echo "$(datetime_prefix) Starting whisper server script..."
+echo "test: $WHISPER_MODEL_DIR"
+
+# Optional: Verify required directories exist
+if [ ! -d "$WHISPER_MODEL_DIR" ]; then
+  echo "$(datetime_prefix) Error: Whisper model directory does not exist: $WHISPER_MODEL_DIR"
+  exit 1
+fi
+
+# Run the Python script with the defined environment variables
+/home/uad/agents/tools/mcp-transcriptor/venv/bin/python /home/uad/agents/tools/mcp-transcriptor/whisper_server.py 2>&1 | tee /home/uad/agents/tools/mcp-transcriptor/mcp.logs
--- a/transcriber.py
+++ b/transcriber.py
@@ -32,7 +32,7 @@ DEFAULT_TEMPERATURE = float(os.getenv('TRANSCRIPTION_TEMPERATURE', '0.0'))
 WHISPER_MODEL_DIR = os.getenv('WHISPER_MODEL_DIR', None)

 # File naming configuration
-USE_TIMESTAMP = os.getenv('TRANSCRIPTION_USE_TIMESTAMP', 'true').lower() == 'true'
+USE_TIMESTAMP = os.getenv('TRANSCRIPTION_USE_TIMESTAMP', 'false').lower() == 'true'
 FILENAME_PREFIX = os.getenv('TRANSCRIPTION_FILENAME_PREFIX', '')
 FILENAME_SUFFIX = os.getenv('TRANSCRIPTION_FILENAME_SUFFIX', '')

@@ -98,6 +98,7 @@ def transcribe_audio(

        # Set transcription parameters
        options = {
+            "verbose": True,
            "language": language,
            "vad_filter": True,
            "vad_parameters": {"min_silence_duration_ms": 500},
@@ -107,7 +108,7 @@ def transcribe_audio(
            "word_timestamps": True,
            "suppress_tokens": [-1],
            "condition_on_previous_text": True,
-            "compression_ratio_threshold": 2.4
+            "compression_ratio_threshold": 2.4,
        }

        start_time = time.time()