refactor(whisper_server): 重构代码以模块化转录功能

将转录核心逻辑拆分为独立模块（transcriber.py、model_manager.py、audio_processor.py、formatters.py），提升代码可维护性和复用性。删除main.py文件，优化依赖管理并更新requirements.txt和pyproject.toml。
2025-03-22 05:23:56 +08:00
parent 38060d755a
commit 9d22de2ac9
9 changed files with 1175 additions and 515 deletions
--- a/audio_processor.py
+++ b/audio_processor.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+"""
+音频处理模块
+负责音频文件的验证和预处理
+"""
+
+import os
+import logging
+from typing import Union, Any
+from faster_whisper import decode_audio
+
+# 日志配置
+logger = logging.getLogger(__name__)
+
+def validate_audio_file(audio_path: str) -> str:
+    """
+    验证音频文件是否有效
+
+    Args:
+        audio_path: 音频文件路径
+
+    Returns:
+        str: 验证结果，"ok"表示验证通过，否则返回错误信息
+    """
+    # 验证参数
+    if not os.path.exists(audio_path):
+        return f"错误: 音频文件不存在: {audio_path}"
+
+    # 验证文件格式
+    supported_formats = [".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac"]
+    file_ext = os.path.splitext(audio_path)[1].lower()
+    if file_ext not in supported_formats:
+        return f"错误: 不支持的音频格式: {file_ext}。支持的格式: {', '.join(supported_formats)}"
+
+    # 验证文件大小
+    try:
+        file_size = os.path.getsize(audio_path)
+        if file_size == 0:
+            return f"错误: 音频文件为空: {audio_path}"
+
+        # 大文件警告（超过1GB）
+        if file_size > 1024 * 1024 * 1024:
+            logger.warning(f"警告: 文件大小超过1GB，可能需要较长处理时间: {audio_path}")
+    except Exception as e:
+        logger.error(f"检查文件大小失败: {str(e)}")
+        return f"错误: 检查文件大小失败: {str(e)}"
+
+    return "ok"
+
+def process_audio(audio_path: str) -> Union[str, Any]:
+    """
+    处理音频文件，进行解码和预处理
+
+    Args:
+        audio_path: 音频文件路径
+
+    Returns:
+        Union[str, Any]: 处理后的音频数据或原始文件路径
+    """
+    # 尝试使用decode_audio预处理音频，以处理更多格式
+    try:
+        audio_data = decode_audio(audio_path)
+        logger.info(f"成功预处理音频: {os.path.basename(audio_path)}")
+        return audio_data
+    except Exception as audio_error:
+        logger.warning(f"音频预处理失败，将直接使用文件路径: {str(audio_error)}")
+        return audio_path
--- a/formatters.py
+++ b/formatters.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+格式化输出模块
+负责将转录结果格式化为不同的输出格式（VTT、SRT、JSON）
+"""
+
+import json
+from typing import List, Dict, Any
+
+def format_vtt(segments: List) -> str:
+    """
+    将转录结果格式化为VTT
+
+    Args:
+        segments: 转录段落列表
+
+    Returns:
+        str: VTT格式的字幕内容
+    """
+    vtt_content = "WEBVTT\n\n"
+
+    for segment in segments:
+        start = format_timestamp(segment.start)
+        end = format_timestamp(segment.end)
+        text = segment.text.strip()
+
+        if text:
+            vtt_content += f"{start} --> {end}\n{text}\n\n"
+
+    return vtt_content
+
+def format_srt(segments: List) -> str:
+    """
+    将转录结果格式化为SRT
+
+    Args:
+        segments: 转录段落列表
+
+    Returns:
+        str: SRT格式的字幕内容
+    """
+    srt_content = ""
+    index = 1
+
+    for segment in segments:
+        start = format_timestamp_srt(segment.start)
+        end = format_timestamp_srt(segment.end)
+        text = segment.text.strip()
+
+        if text:
+            srt_content += f"{index}\n{start} --> {end}\n{text}\n\n"
+            index += 1
+
+    return srt_content
+
+def format_json(segments: List, info: Any) -> str:
+    """
+    将转录结果格式化为JSON
+
+    Args:
+        segments: 转录段落列表
+        info: 转录信息对象
+
+    Returns:
+        str: JSON格式的转录结果
+    """
+    result = {
+        "segments": [{
+            "id": i,
+            "start": segment.start,
+            "end": segment.end,
+            "text": segment.text.strip(),
+            "words": [{
+                "word": word.word,
+                "start": word.start,
+                "end": word.end,
+                "probability": word.probability
+            } for word in segment.words] if hasattr(segment, 'words') and segment.words else []
+        } for i, segment in enumerate(segments)],
+        "language": info.language,
+        "language_probability": info.language_probability if hasattr(info, 'language_probability') else None,
+        "duration": info.duration,
+        "all_language_probs": info.all_language_probs if hasattr(info, 'all_language_probs') else None
+    }
+    return json.dumps(result, indent=2, ensure_ascii=False)
+
+def format_timestamp(seconds: float) -> str:
+    """
+    格式化时间戳为VTT格式
+
+    Args:
+        seconds: 秒数
+
+    Returns:
+        str: 格式化的时间戳 (HH:MM:SS.mmm)
+    """
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds = seconds % 60
+    return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
+
+def format_timestamp_srt(seconds: float) -> str:
+    """
+    格式化时间戳为SRT格式
+
+    Args:
+        seconds: 秒数
+
+    Returns:
+        str: 格式化的时间戳 (HH:MM:SS,mmm)
+    """
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+    msecs = int((seconds - int(seconds)) * 1000)
+    return f"{hours:02d}:{minutes:02d}:{secs:02d},{msecs:03d}"
+
+def format_time(seconds: float) -> str:
+    """
+    格式化时间为可读格式
+
+    Args:
+        seconds: 秒数
+
+    Returns:
+        str: 格式化的时间 (HH:MM:SS)
+    """
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+    return f"{hours:02d}:{minutes:02d}:{secs:02d}"
--- a/main.py
+++ b/main.py
@@ -1,6 +0,0 @@
-def main():
-    print("Hello from fast-whisper-mcp-server!")
-
-
-if __name__ == "__main__":
-    main()
--- a/model_manager.py
+++ b/model_manager.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+"""
+模型管理模块
+负责Whisper模型的加载、缓存和管理
+"""
+
+import os
+import time
+import logging
+from typing import Dict, Any
+import torch
+from faster_whisper import WhisperModel, BatchedInferencePipeline
+
+# 日志配置
+logger = logging.getLogger(__name__)
+
+# 全局模型实例缓存
+model_instances = {}
+
+def get_whisper_model(model_name: str, device: str, compute_type: str) -> Dict[str, Any]:
+    """
+    获取或创建Whisper模型实例
+
+    Args:
+        model_name: 模型名称 (tiny, base, small, medium, large-v1, large-v2, large-v3)
+        device: 运行设备 (cpu, cuda, auto)
+        compute_type: 计算类型 (float16, int8, auto)
+
+    Returns:
+        dict: 包含模型实例和配置的字典
+    """
+    global model_instances
+
+    # 验证模型名称
+    valid_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2", "large-v3"]
+    if model_name not in valid_models:
+        raise ValueError(f"无效的模型名称: {model_name}。有效的模型: {', '.join(valid_models)}")
+
+    # 自动检测设备
+    if device == "auto":
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        compute_type = "float16" if device == "cuda" else "int8"
+
+    # 验证设备和计算类型
+    if device not in ["cpu", "cuda"]:
+        raise ValueError(f"无效的设备: {device}。有效的设备: cpu, cuda")
+
+    if device == "cuda" and not torch.cuda.is_available():
+        logger.warning("CUDA不可用，自动切换到CPU")
+        device = "cpu"
+        compute_type = "int8"
+
+    if compute_type not in ["float16", "int8"]:
+        raise ValueError(f"无效的计算类型: {compute_type}。有效的计算类型: float16, int8")
+
+    if device == "cpu" and compute_type == "float16":
+        logger.warning("CPU设备不支持float16计算类型，自动切换到int8")
+        compute_type = "int8"
+
+    # 生成模型键
+    model_key = f"{model_name}_{device}_{compute_type}"
+
+    # 如果模型已实例化，直接返回
+    if model_key in model_instances:
+        logger.info(f"使用缓存的模型实例: {model_key}")
+        return model_instances[model_key]
+
+    # 清理GPU内存（如果使用CUDA）
+    if device == "cuda":
+        torch.cuda.empty_cache()
+
+    # 实例化模型
+    try:
+        logger.info(f"加载Whisper模型: {model_name} 设备: {device} 计算类型: {compute_type}")
+
+        # 基础模型
+        model = WhisperModel(
+            model_name,
+            device=device,
+            compute_type=compute_type,
+            download_root=os.environ.get("WHISPER_MODEL_DIR", None)  # 支持自定义模型目录
+        )
+
+        # 批处理设置 - 默认启用批处理以提高速度
+        batched_model = None
+        batch_size = 0
+
+        if device == "cuda":  # 只在CUDA设备上使用批处理
+            # 根据显存大小确定合适的批大小
+            if torch.cuda.is_available():
+                gpu_mem = torch.cuda.get_device_properties(0).total_memory
+                free_mem = gpu_mem - torch.cuda.memory_allocated()
+                # 根据GPU显存动态调整批大小
+                if free_mem > 16e9:  # >16GB
+                    batch_size = 32
+                elif free_mem > 12e9:  # >12GB
+                    batch_size = 16
+                elif free_mem > 8e9:   # >8GB
+                    batch_size = 8
+                elif free_mem > 4e9:   # >4GB
+                    batch_size = 4
+                else:                # 较小显存
+                    batch_size = 2
+
+                logger.info(f"可用GPU显存: {free_mem / 1e9:.2f} GB")
+            else:
+                batch_size = 8  # 默认值
+
+            logger.info(f"启用批处理加速，批大小: {batch_size}")
+            batched_model = BatchedInferencePipeline(model=model)
+
+        # 创建结果对象
+        result = {
+            'model': model,
+            'device': device,
+            'compute_type': compute_type,
+            'batched_model': batched_model,
+            'batch_size': batch_size,
+            'load_time': time.time()
+        }
+
+        # 缓存实例
+        model_instances[model_key] = result
+        return result
+
+    except Exception as e:
+        logger.error(f"加载模型失败: {str(e)}")
+        raise
+
+def get_model_info() -> str:
+    """
+    获取可用的Whisper模型信息
+
+    Returns:
+        str: 模型信息的JSON字符串
+    """
+    import json
+
+    models = [
+        "tiny", "base", "small", "medium", "large-v1", "large-v2", "large-v3"
+    ]
+    devices = ["cpu", "cuda"] if torch.cuda.is_available() else ["cpu"]
+    compute_types = ["float16", "int8"] if torch.cuda.is_available() else ["int8"]
+
+    # 支持的语言列表
+    languages = {
+        "zh": "中文", "en": "英语", "ja": "日语", "ko": "韩语", "de": "德语",
+        "fr": "法语", "es": "西班牙语", "ru": "俄语", "it": "意大利语",
+        "pt": "葡萄牙语", "nl": "荷兰语", "ar": "阿拉伯语", "hi": "印地语",
+        "tr": "土耳其语", "vi": "越南语", "th": "泰语", "id": "印尼语"
+    }
+
+    # 支持的音频格式
+    audio_formats = [".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac"]
+
+    info = {
+        "available_models": models,
+        "default_model": "large-v3",
+        "available_devices": devices,
+        "default_device": "cuda" if torch.cuda.is_available() else "cpu",
+        "available_compute_types": compute_types,
+        "default_compute_type": "float16" if torch.cuda.is_available() else "int8",
+        "cuda_available": torch.cuda.is_available(),
+        "supported_languages": languages,
+        "supported_audio_formats": audio_formats,
+        "version": "0.1.1"
+    }
+
+    if torch.cuda.is_available():
+        info["gpu_info"] = {
+            "name": torch.cuda.get_device_name(0),
+            "memory_total": f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB",
+            "memory_available": f"{torch.cuda.get_device_properties(0).total_memory / 1e9 - torch.cuda.memory_allocated() / 1e9:.2f} GB"
+        }
+
+    return json.dumps(info, indent=2)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,4 +4,6 @@ version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.12"
-dependencies = []
+dependencies = [
+    "faster-whisper>=1.1.1",
+]
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,22 @@
-faster-whisper>=0.9.0
+# uv pip install -r ./requirements.txt --index-url https://download.pytorch.org/whl/cu126
+faster-whisper
 torch==2.6.0+cu126
 torchaudio==2.6.0+cu126
-mcp[cli]>=1.2.0
+
+# uv pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu126
+# pip install faster-whisper>=0.9.0
+# pip install mcp[cli]>=1.2.0
+
+# PyTorch安装指南:
+#    请根据您的CUDA版本安装适当版本的PyTorch:
+#
+#    • CUDA 12.6:
+#      pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu126
+#
+#    • CUDA 12.1:
+#      pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121
+#
+#    • CPU版本:
+#      pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cpu
+#
+#    可用命令`nvcc --version`或`nvidia-smi`查看CUDA版本
--- a/transcriber.py
+++ b/transcriber.py
@@ -0,0 +1,326 @@
+#!/usr/bin/env python3
+"""
+转录核心模块
+包含音频转录的核心逻辑
+"""
+
+import os
+import time
+import logging
+from typing import Dict, Any, Tuple, List, Optional, Union
+
+from model_manager import get_whisper_model
+from audio_processor import validate_audio_file, process_audio
+from formatters import format_vtt, format_srt, format_json, format_time
+
+# 日志配置
+logger = logging.getLogger(__name__)
+
+def transcribe_audio(
+    audio_path: str,
+    model_name: str = "large-v3",
+    device: str = "auto",
+    compute_type: str = "auto",
+    language: str = None,
+    output_format: str = "vtt",
+    beam_size: int = 5,
+    temperature: float = 0.0,
+    initial_prompt: str = None,
+    output_directory: str = None
+) -> str:
+    """
+    使用Faster Whisper转录音频文件
+
+    Args:
+        audio_path: 音频文件路径
+        model_name: 模型名称 (tiny, base, small, medium, large-v1, large-v2, large-v3)
+        device: 运行设备 (cpu, cuda, auto)
+        compute_type: 计算类型 (float16, int8, auto)
+        language: 语言代码 (如zh, en, ja等，默认自动检测)
+        output_format: 输出格式 (vtt, srt或json)
+        beam_size: 波束搜索大小，较大的值可能提高准确性但会降低速度
+        temperature: 采样温度，贪婪解码
+        initial_prompt: 初始提示文本，可以帮助模型更好地理解上下文
+        output_directory: 输出目录路径，默认为音频文件所在目录
+
+    Returns:
+        str: 转录结果，格式为VTT字幕或JSON
+    """
+    # 验证音频文件
+    validation_result = validate_audio_file(audio_path)
+    if validation_result != "ok":
+        return validation_result
+
+    try:
+        # 获取模型实例
+        model_instance = get_whisper_model(model_name, device, compute_type)
+
+        # 验证语言代码
+        supported_languages = {
+            "zh": "中文", "en": "英语", "ja": "日语", "ko": "韩语", "de": "德语",
+            "fr": "法语", "es": "西班牙语", "ru": "俄语", "it": "意大利语",
+            "pt": "葡萄牙语", "nl": "荷兰语", "ar": "阿拉伯语", "hi": "印地语",
+            "tr": "土耳其语", "vi": "越南语", "th": "泰语", "id": "印尼语"
+        }
+
+        if language is not None and language not in supported_languages:
+            logger.warning(f"未知的语言代码: {language}，将使用自动检测")
+            language = None
+
+        # 设置转录参数
+        options = {
+            "language": language,
+            "vad_filter": True,  # 使用语音活动检测
+            "vad_parameters": {"min_silence_duration_ms": 500},  # VAD参数优化
+            "beam_size": beam_size,
+            "temperature": temperature,
+            "initial_prompt": initial_prompt,
+            "word_timestamps": True,  # 启用单词级时间戳
+            "suppress_tokens": [-1],  # 抑制特殊标记
+            "condition_on_previous_text": True,  # 基于前文进行条件生成
+            "compression_ratio_threshold": 2.4  # 压缩比阈值，用于过滤重复内容
+        }
+
+        start_time = time.time()
+        logger.info(f"开始转录文件: {os.path.basename(audio_path)}")
+
+        # 处理音频
+        audio_source = process_audio(audio_path)
+
+        # 执行转录 - 优先使用批处理模型
+        if model_instance['batched_model'] is not None and model_instance['device'] == 'cuda':
+            logger.info("使用批处理加速进行转录...")
+            # 批处理模型需要单独设置batch_size参数
+            segments, info = model_instance['batched_model'].transcribe(
+                audio_source,
+                batch_size=model_instance['batch_size'],
+                **options
+            )
+        else:
+            logger.info("使用标准模型进行转录...")
+            segments, info = model_instance['model'].transcribe(audio_source, **options)
+
+        # 将生成器转换为列表
+        segment_list = list(segments)
+
+        if not segment_list:
+            return "转录失败，未获得结果"
+
+        # 记录转录信息
+        elapsed_time = time.time() - start_time
+        logger.info(f"转录完成，用时: {elapsed_time:.2f}秒，检测语言: {info.language}，音频长度: {info.duration:.2f}秒")
+
+        # 格式化转录结果
+        if output_format.lower() == "vtt":
+            transcription_result = format_vtt(segment_list)
+        elif output_format.lower() == "srt":
+            transcription_result = format_srt(segment_list)
+        else:
+            transcription_result = format_json(segment_list, info)
+
+        # 获取音频文件的目录和文件名
+        audio_dir = os.path.dirname(audio_path)
+        audio_filename = os.path.splitext(os.path.basename(audio_path))[0]
+
+        # 设置输出目录
+        if output_directory is None:
+            output_dir = audio_dir
+        else:
+            output_dir = output_directory
+            # 确保输出目录存在
+            os.makedirs(output_dir, exist_ok=True)
+
+        # 生成带有时间戳的文件名
+        timestamp = time.strftime("%Y%m%d%H%M%S")
+        output_filename = f"{audio_filename}_{timestamp}.{output_format.lower()}"
+        output_path = os.path.join(output_dir, output_filename)
+
+        # 将转录结果写入文件
+        try:
+            with open(output_path, "w", encoding="utf-8") as f:
+                f.write(transcription_result)
+            logger.info(f"转录结果已保存到: {output_path}")
+            return f"转录成功，结果已保存到: {output_path}"
+        except Exception as e:
+            logger.error(f"保存转录结果失败: {str(e)}")
+            return f"转录成功，但保存结果失败: {str(e)}"
+
+    except Exception as e:
+        logger.error(f"转录失败: {str(e)}")
+        return f"转录过程中发生错误: {str(e)}"
+
+
+def report_progress(current: int, total: int, elapsed_time: float) -> str:
+    """
+    生成进度报告
+
+    Args:
+        current: 当前处理的项目数
+        total: 总项目数
+        elapsed_time: 已用时间（秒）
+
+    Returns:
+        str: 格式化的进度报告
+    """
+    progress = current / total * 100
+    eta = (elapsed_time / current) * (total - current) if current > 0 else 0
+    return (f"进度: {current}/{total} ({progress:.1f}%)" +
+            f" | 已用时间: {format_time(elapsed_time)}" +
+            f" | 预计剩余: {format_time(eta)}")
+
+def batch_transcribe(
+    audio_folder: str,
+    output_folder: str = None,
+    model_name: str = "large-v3",
+    device: str = "auto",
+    compute_type: str = "auto",
+    language: str = None,
+    output_format: str = "vtt",
+    beam_size: int = 5,
+    temperature: float = 0.0,
+    initial_prompt: str = None,
+    parallel_files: int = 1
+) -> str:
+    """
+    批量转录文件夹中的音频文件
+
+    Args:
+        audio_folder: 包含音频文件的文件夹路径
+        output_folder: 输出文件夹路径，默认为audio_folder下的transcript子文件夹
+        model_name: 模型名称 (tiny, base, small, medium, large-v1, large-v2, large-v3)
+        device: 运行设备 (cpu, cuda, auto)
+        compute_type: 计算类型 (float16, int8, auto)
+        language: 语言代码 (如zh, en, ja等，默认自动检测)
+        output_format: 输出格式 (vtt, srt或json)
+        beam_size: 波束搜索大小，较大的值可能提高准确性但会降低速度
+        temperature: 采样温度，0表示贪婪解码
+        initial_prompt: 初始提示文本，可以帮助模型更好地理解上下文
+        parallel_files: 并行处理的文件数量（仅在CPU模式下有效）
+
+    Returns:
+        str: 批处理结果摘要，包含处理时间和成功率
+    """
+    if not os.path.isdir(audio_folder):
+        return f"错误: 文件夹不存在: {audio_folder}"
+
+    # 设置输出文件夹
+    if output_folder is None:
+        output_folder = os.path.join(audio_folder, "transcript")
+
+    # 确保输出目录存在
+    os.makedirs(output_folder, exist_ok=True)
+
+    # 验证输出格式
+    valid_formats = ["vtt", "srt", "json"]
+    if output_format.lower() not in valid_formats:
+        return f"错误: 不支持的输出格式: {output_format}。支持的格式: {', '.join(valid_formats)}"
+
+    # 获取所有音频文件
+    audio_files = []
+    supported_formats = [".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac"]
+
+    for filename in os.listdir(audio_folder):
+        file_ext = os.path.splitext(filename)[1].lower()
+        if file_ext in supported_formats:
+            audio_files.append(os.path.join(audio_folder, filename))
+
+    if not audio_files:
+        return f"在 {audio_folder} 中未找到支持的音频文件。支持的格式: {', '.join(supported_formats)}"
+
+    # 记录开始时间
+    start_time = time.time()
+    total_files = len(audio_files)
+    logger.info(f"开始批量转录 {total_files} 个文件，输出格式: {output_format}")
+
+    # 预加载模型以避免重复加载
+    try:
+        get_whisper_model(model_name, device, compute_type)
+        logger.info(f"已预加载模型: {model_name}")
+    except Exception as e:
+        logger.error(f"预加载模型失败: {str(e)}")
+        return f"批处理失败: 无法加载模型 {model_name}: {str(e)}"
+
+    # 处理每个文件
+    results = []
+    success_count = 0
+    error_count = 0
+    total_audio_duration = 0
+
+    # 处理每个文件
+    for i, audio_path in enumerate(audio_files):
+        file_name = os.path.basename(audio_path)
+        elapsed = time.time() - start_time
+
+        # 报告进度
+        progress_msg = report_progress(i, total_files, elapsed)
+        logger.info(f"{progress_msg} | 当前处理: {file_name}")
+
+        # 执行转录
+        try:
+            result = transcribe_audio(
+                audio_path=audio_path,
+                model_name=model_name,
+                device=device,
+                compute_type=compute_type,
+                language=language,
+                output_format=output_format,
+                beam_size=beam_size,
+                temperature=temperature,
+                initial_prompt=initial_prompt,
+                output_directory=output_folder
+            )
+
+            # 检查结果是否包含错误信息
+            if result.startswith("错误:") or result.startswith("转录过程中发生错误:"):
+                logger.error(f"转录失败: {file_name} - {result}")
+                results.append(f"❌ 失败: {file_name} - {result}")
+                error_count += 1
+                continue
+
+            # 如果转录成功，提取输出路径信息
+            if result.startswith("转录成功"):
+                # 从返回消息中提取输出路径
+                output_path = result.split(": ")[1] if ": " in result else "未知路径"
+                success_count += 1
+                results.append(f"✅ 成功: {file_name} -> {os.path.basename(output_path)}")
+
+                # 提取音频时长
+                audio_duration = 0
+                if output_format.lower() == "json":
+                    # 尝试从输出文件中解析音频时长
+                    try:
+                        import json
+                        # 从输出文件中读取JSON内容
+                        with open(output_path, "r", encoding="utf-8") as json_file:
+                            json_content = json_file.read()
+                            json_data = json.loads(json_content)
+                            audio_duration = json_data.get("duration", 0)
+                    except Exception as e:
+                        logger.warning(f"无法从JSON文件中提取音频时长: {str(e)}")
+                        audio_duration = 0
+                else:
+                    # 尝试从文件名中提取音频信息
+                    try:
+                        # 这里我们不能直接访问info对象，因为它在transcribe_audio函数的作用域内
+                        # 使用一个保守的估计值或从结果字符串中提取信息
+                        audio_duration = 0  # 默认为0
+                    except Exception as e:
+                        logger.warning(f"无法从文件名中提取音频时长: {str(e)}")
+                        audio_duration = 0
+
+                # 累加音频时长
+                total_audio_duration += audio_duration
+        except Exception as e:
+            logger.error(f"转录过程中发生错误: {file_name} - {str(e)}")
+            results.append(f"❌ 失败: {file_name} - {str(e)}")
+            error_count += 1
+    # 计算总转录时间
+    total_transcription_time = time.time() - start_time
+    # 生成批处理结果摘要
+    summary = f"批处理完成，总转录时间: {format_time(total_transcription_time)}"
+    summary += f" | 成功: {success_count}/{total_files}"
+    summary += f" | 失败: {error_count}/{total_files}"
+    # 输出结果
+    for result in results:
+        logger.info(result)
+    return summary
--- a/uv.lock
+++ b/uv.lock
@@ -0,0 +1,419 @@
+version = 1
+revision = 1
+requires-python = ">=3.12"
+
+[[package]]
+name = "av"
+version = "14.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/b6/83129e0337376214b0304893cbf0ad0a54718bb47845517fa5870439ca0b/av-14.2.0.tar.gz", hash = "sha256:132b5d52ca262b97b0356e8f48cbbe54d0ac232107a722ab8cc8c0c19eafa17b", size = 4063022 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5b/88/b56f5e5fa2486ee51413b043e08c7f5ed119c1e10b72725593da30adc28f/av-14.2.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:a3da3e951148291d70f6cb3fb37bf81580b01992e915ef1030108e4076f62d38", size = 22070132 },
+    { url = "https://files.pythonhosted.org/packages/89/36/787af232db9b3d5bbd5eb4d1d46c51b9669cba5b2273bb68a445cb281db8/av-14.2.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:6a6aae9e17aae4f2a97335825c0a701b763b72aaf89428f2a70bbdc83b64ad23", size = 27454954 },
+    { url = "https://files.pythonhosted.org/packages/d3/c3/a174388d393f1564ad4c1b8300eb4f3e972851a4d392c1eba66a6848749e/av-14.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:897be9a665c365dfcf0c10a257fe223521ed4d3b478e6b258f55f7cd13fdedd3", size = 37748788 },
+    { url = "https://files.pythonhosted.org/packages/f1/b4/96469f9e2b2763d49cd185be31a2512e52c9ff8526ee113cadfbab036850/av-14.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9b5fc39524903c0bae26e856b7cff4b227f8472a9e8851b117a7711d3a01ac6", size = 36062884 },
+    { url = "https://files.pythonhosted.org/packages/ed/e8/cf60f3fcde3d0eedee3e9ff66b674a9b85bffc907dccebbc56fb5ac4a954/av-14.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14c5f00b0b60d127ac0cde46a5bce9b67e905ba93033fdd48ae550c0c05d51b8", size = 40040294 },
+    { url = "https://files.pythonhosted.org/packages/93/47/94b8fcfb8f102b45f2ca427b65a1243376d83d20c27f409170a4cc20e8ff/av-14.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:de04052374dbd36d9e8bcf2ead6501cc45e16bc13036d8cc17dacec96b7f6c51", size = 30857257 },
+    { url = "https://files.pythonhosted.org/packages/09/5b/cd6c553af8385e590b5f816093ecb6e267e3f00c2669f8323be8f62b96c3/av-14.2.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e745ac7db026f4f68e4b5aebeda0d6188d2fb78a26825e628b97ee7ccaadc7e0", size = 22029217 },
+    { url = "https://files.pythonhosted.org/packages/ce/bd/82c55b903fc1fc9428881742a10f5a4180a4f60ad2d75eb451acf85e7ceb/av-14.2.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:69e93ae8fd4e55247ebcc966a0bf1bcc7fcba2f6b9811eb622613c2615aec59f", size = 27412669 },
+    { url = "https://files.pythonhosted.org/packages/a9/a5/39b9705e23b8b2369a45d00de24cbe080d4cd0ad2907c9a72bd5b5e42141/av-14.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01dfdd042a1077e37308a9c2538eb7cfb01588b916c9083f66fbf1b94432fb1a", size = 37392185 },
+    { url = "https://files.pythonhosted.org/packages/56/4d/7b741803a88342d1e532d651be7a4a3f00a225dbc3a1648f8c447b64cc93/av-14.2.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c357421d4ec2f2eb919c0a4d48814328b93f456da12e8d751ca13be02920a82e", size = 35719211 },
+    { url = "https://files.pythonhosted.org/packages/44/58/5f156af35eb58857f3a1c21b0d9b1bbfa535c2b4cecd6e0789c2202ead08/av-14.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aeec3413822ffacc67a4832a0254cb67a3cfe6e3774ed80c0fa1b349dd1fe2b", size = 39691118 },
+    { url = "https://files.pythonhosted.org/packages/5f/87/d7a5d6995f90b73b70554eea5ee9743ef1e2897be8117aa7a48e8c834239/av-14.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:b1c8b180cf339644f01b9a3c9a55aedbd1cf60ac60335f0254dcd6af3ba3fab4", size = 30827999 },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.1.31"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0a/9a/dd1e1cdceb841925b7798369a09279bd1cf183cef0f9ddf15a3a6502ee45/charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", size = 196105 },
+    { url = "https://files.pythonhosted.org/packages/d3/8c/90bfabf8c4809ecb648f39794cf2a84ff2e7d2a6cf159fe68d9a26160467/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", size = 140404 },
+    { url = "https://files.pythonhosted.org/packages/ad/8f/e410d57c721945ea3b4f1a04b74f70ce8fa800d393d72899f0a40526401f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", size = 150423 },
+    { url = "https://files.pythonhosted.org/packages/f0/b8/e6825e25deb691ff98cf5c9072ee0605dc2acfca98af70c2d1b1bc75190d/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa", size = 143184 },
+    { url = "https://files.pythonhosted.org/packages/3e/a2/513f6cbe752421f16d969e32f3583762bfd583848b763913ddab8d9bfd4f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d", size = 145268 },
+    { url = "https://files.pythonhosted.org/packages/74/94/8a5277664f27c3c438546f3eb53b33f5b19568eb7424736bdc440a88a31f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616", size = 147601 },
+    { url = "https://files.pythonhosted.org/packages/7c/5f/6d352c51ee763623a98e31194823518e09bfa48be2a7e8383cf691bbb3d0/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b", size = 141098 },
+    { url = "https://files.pythonhosted.org/packages/78/d4/f5704cb629ba5ab16d1d3d741396aec6dc3ca2b67757c45b0599bb010478/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d", size = 149520 },
+    { url = "https://files.pythonhosted.org/packages/c5/96/64120b1d02b81785f222b976c0fb79a35875457fa9bb40827678e54d1bc8/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a", size = 152852 },
+    { url = "https://files.pythonhosted.org/packages/84/c9/98e3732278a99f47d487fd3468bc60b882920cef29d1fa6ca460a1fdf4e6/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9", size = 150488 },
+    { url = "https://files.pythonhosted.org/packages/13/0e/9c8d4cb99c98c1007cc11eda969ebfe837bbbd0acdb4736d228ccaabcd22/charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1", size = 146192 },
+    { url = "https://files.pythonhosted.org/packages/b2/21/2b6b5b860781a0b49427309cb8670785aa543fb2178de875b87b9cc97746/charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35", size = 95550 },
+    { url = "https://files.pythonhosted.org/packages/21/5b/1b390b03b1d16c7e382b561c5329f83cc06623916aab983e8ab9239c7d5c/charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f", size = 102785 },
+    { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 },
+    { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 },
+    { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 },
+    { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 },
+    { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 },
+    { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 },
+    { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 },
+    { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 },
+    { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 },
+    { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 },
+    { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 },
+    { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 },
+    { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 },
+    { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
+]
+
+[[package]]
+name = "coloredlogs"
+version = "15.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "humanfriendly" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018 },
+]
+
+[[package]]
+name = "ctranslate2"
+version = "4.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "pyyaml" },
+    { name = "setuptools" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/30/54/d65d3ae24ffd82581e4b0823960d81cfe753dd8f118cf9ef2106632e1909/ctranslate2-4.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1a0509f172edc994aec6870fe0a90c799d85fd7ddf564059d25b60932ab2e2c4", size = 1357850 },
+    { url = "https://files.pythonhosted.org/packages/cc/46/3615f9bdb9bc18f05b4371bb974befc380b73f6ba415e813e9d7ac0c2fb5/ctranslate2-4.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c158f2ada6e3347388ad13c69e4a6a729ba40c035a400dd447995950ecf5e62f", size = 17460247 },
+    { url = "https://files.pythonhosted.org/packages/e2/f0/3be15ad93c44cf60cd014f8e6f9ee604fc992b671451e480fae40f79ef87/ctranslate2-4.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3c5877fce31a0fcf3b5edbc8d4e6e22fd94a86c6b49680740ef41130efffc1", size = 38798520 },
+    { url = "https://files.pythonhosted.org/packages/66/97/e50a97b0025baac851ce68928ee51ceadc9f0f9e0b9b543dd32da56d5571/ctranslate2-4.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:a16a784ec7924166bdf3e86754feda0441f04d9851fc3412f34f1e2de7cbd51b", size = 19464880 },
+]
+
+[[package]]
+name = "fast-whisper-mcp-server"
+version = "0.1.0"
+source = { virtual = "." }
+dependencies = [
+    { name = "faster-whisper" },
+]
+
+[package.metadata]
+requires-dist = [{ name = "faster-whisper", specifier = ">=1.1.1" }]
+
+[[package]]
+name = "faster-whisper"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "av" },
+    { name = "ctranslate2" },
+    { name = "huggingface-hub" },
+    { name = "onnxruntime" },
+    { name = "tokenizers" },
+    { name = "tqdm" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/be/53/195e5b42ede5f09453828d3b00d52bd952ed0e07a8e5c6497affefcfa3be/faster-whisper-1.1.1.tar.gz", hash = "sha256:50d27571970c1be0c2b2680a2593d5d12f9f5d2f10484f242a1afbe7cb946604", size = 1124684 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ad/69/28359d152f9e2ec1ff4dff3da47011b6346e9a472f89b409bb13017a7d1f/faster_whisper-1.1.1-py3-none-any.whl", hash = "sha256:5808dc334fb64fb4336921450abccfe5e313a859b31ba61def0ac7f639383d90", size = 1118368 },
+]
+
+[[package]]
+name = "filelock"
+version = "3.18.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215 },
+]
+
+[[package]]
+name = "flatbuffers"
+version = "25.2.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e4/30/eb5dce7994fc71a2f685d98ec33cc660c0a5887db5610137e60d8cbc4489/flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e", size = 22170 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b8/25/155f9f080d5e4bc0082edfda032ea2bc2b8fab3f4d25d46c1e9dd22a1a89/flatbuffers-25.2.10-py2.py3-none-any.whl", hash = "sha256:ebba5f4d5ea615af3f7fd70fc310636fbb2bbd1f566ac0a23d98dd412de50051", size = 30953 },
+]
+
+[[package]]
+name = "fsspec"
+version = "2025.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/34/f4/5721faf47b8c499e776bc34c6a8fc17efdf7fdef0b00f398128bc5dcb4ac/fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972", size = 298491 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/56/53/eb690efa8513166adef3e0669afd31e95ffde69fb3c52ec2ac7223ed6018/fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3", size = 193615 },
+]
+
+[[package]]
+name = "huggingface-hub"
+version = "0.29.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "fsspec" },
+    { name = "packaging" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e5/f9/851f34b02970e8143d41d4001b2d49e54ef113f273902103823b8bc95ada/huggingface_hub-0.29.3.tar.gz", hash = "sha256:64519a25716e0ba382ba2d3fb3ca082e7c7eb4a2fc634d200e8380006e0760e5", size = 390123 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/40/0c/37d380846a2e5c9a3c6a73d26ffbcfdcad5fc3eacf42fdf7cff56f2af634/huggingface_hub-0.29.3-py3-none-any.whl", hash = "sha256:0b25710932ac649c08cdbefa6c6ccb8e88eef82927cacdb048efb726429453aa", size = 468997 },
+]
+
+[[package]]
+name = "humanfriendly"
+version = "10.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyreadline3", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794 },
+]
+
+[[package]]
+name = "idna"
+version = "3.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
+]
+
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 },
+]
+
+[[package]]
+name = "numpy"
+version = "2.2.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e1/78/31103410a57bc2c2b93a3597340a8119588571f6a4539067546cb9a0bfac/numpy-2.2.4.tar.gz", hash = "sha256:9ba03692a45d3eef66559efe1d1096c4b9b75c0986b5dff5530c378fb8331d4f", size = 20270701 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/30/182db21d4f2a95904cec1a6f779479ea1ac07c0647f064dea454ec650c42/numpy-2.2.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a7b9084668aa0f64e64bd00d27ba5146ef1c3a8835f3bd912e7a9e01326804c4", size = 20947156 },
+    { url = "https://files.pythonhosted.org/packages/24/6d/9483566acfbda6c62c6bc74b6e981c777229d2af93c8eb2469b26ac1b7bc/numpy-2.2.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dbe512c511956b893d2dacd007d955a3f03d555ae05cfa3ff1c1ff6df8851854", size = 14133092 },
+    { url = "https://files.pythonhosted.org/packages/27/f6/dba8a258acbf9d2bed2525cdcbb9493ef9bae5199d7a9cb92ee7e9b2aea6/numpy-2.2.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:bb649f8b207ab07caebba230d851b579a3c8711a851d29efe15008e31bb4de24", size = 5163515 },
+    { url = "https://files.pythonhosted.org/packages/62/30/82116199d1c249446723c68f2c9da40d7f062551036f50b8c4caa42ae252/numpy-2.2.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:f34dc300df798742b3d06515aa2a0aee20941c13579d7a2f2e10af01ae4901ee", size = 6696558 },
+    { url = "https://files.pythonhosted.org/packages/0e/b2/54122b3c6df5df3e87582b2e9430f1bdb63af4023c739ba300164c9ae503/numpy-2.2.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3f7ac96b16955634e223b579a3e5798df59007ca43e8d451a0e6a50f6bfdfba", size = 14084742 },
+    { url = "https://files.pythonhosted.org/packages/02/e2/e2cbb8d634151aab9528ef7b8bab52ee4ab10e076509285602c2a3a686e0/numpy-2.2.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f92084defa704deadd4e0a5ab1dc52d8ac9e8a8ef617f3fbb853e79b0ea3592", size = 16134051 },
+    { url = "https://files.pythonhosted.org/packages/8e/21/efd47800e4affc993e8be50c1b768de038363dd88865920439ef7b422c60/numpy-2.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7a4e84a6283b36632e2a5b56e121961f6542ab886bc9e12f8f9818b3c266bfbb", size = 15578972 },
+    { url = "https://files.pythonhosted.org/packages/04/1e/f8bb88f6157045dd5d9b27ccf433d016981032690969aa5c19e332b138c0/numpy-2.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:11c43995255eb4127115956495f43e9343736edb7fcdb0d973defd9de14cd84f", size = 17898106 },
+    { url = "https://files.pythonhosted.org/packages/2b/93/df59a5a3897c1f036ae8ff845e45f4081bb06943039ae28a3c1c7c780f22/numpy-2.2.4-cp312-cp312-win32.whl", hash = "sha256:65ef3468b53269eb5fdb3a5c09508c032b793da03251d5f8722b1194f1790c00", size = 6311190 },
+    { url = "https://files.pythonhosted.org/packages/46/69/8c4f928741c2a8efa255fdc7e9097527c6dc4e4df147e3cadc5d9357ce85/numpy-2.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:2aad3c17ed2ff455b8eaafe06bcdae0062a1db77cb99f4b9cbb5f4ecb13c5146", size = 12644305 },
+    { url = "https://files.pythonhosted.org/packages/2a/d0/bd5ad792e78017f5decfb2ecc947422a3669a34f775679a76317af671ffc/numpy-2.2.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cf4e5c6a278d620dee9ddeb487dc6a860f9b199eadeecc567f777daace1e9e7", size = 20933623 },
+    { url = "https://files.pythonhosted.org/packages/c3/bc/2b3545766337b95409868f8e62053135bdc7fa2ce630aba983a2aa60b559/numpy-2.2.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1974afec0b479e50438fc3648974268f972e2d908ddb6d7fb634598cdb8260a0", size = 14148681 },
+    { url = "https://files.pythonhosted.org/packages/6a/70/67b24d68a56551d43a6ec9fe8c5f91b526d4c1a46a6387b956bf2d64744e/numpy-2.2.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:79bd5f0a02aa16808fcbc79a9a376a147cc1045f7dfe44c6e7d53fa8b8a79392", size = 5148759 },
+    { url = "https://files.pythonhosted.org/packages/1c/8b/e2fc8a75fcb7be12d90b31477c9356c0cbb44abce7ffb36be39a0017afad/numpy-2.2.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:3387dd7232804b341165cedcb90694565a6015433ee076c6754775e85d86f1fc", size = 6683092 },
+    { url = "https://files.pythonhosted.org/packages/13/73/41b7b27f169ecf368b52533edb72e56a133f9e86256e809e169362553b49/numpy-2.2.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f527d8fdb0286fd2fd97a2a96c6be17ba4232da346931d967a0630050dfd298", size = 14081422 },
+    { url = "https://files.pythonhosted.org/packages/4b/04/e208ff3ae3ddfbafc05910f89546382f15a3f10186b1f56bd99f159689c2/numpy-2.2.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bce43e386c16898b91e162e5baaad90c4b06f9dcbe36282490032cec98dc8ae7", size = 16132202 },
+    { url = "https://files.pythonhosted.org/packages/fe/bc/2218160574d862d5e55f803d88ddcad88beff94791f9c5f86d67bd8fbf1c/numpy-2.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31504f970f563d99f71a3512d0c01a645b692b12a63630d6aafa0939e52361e6", size = 15573131 },
+    { url = "https://files.pythonhosted.org/packages/a5/78/97c775bc4f05abc8a8426436b7cb1be806a02a2994b195945600855e3a25/numpy-2.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:81413336ef121a6ba746892fad881a83351ee3e1e4011f52e97fba79233611fd", size = 17894270 },
+    { url = "https://files.pythonhosted.org/packages/b9/eb/38c06217a5f6de27dcb41524ca95a44e395e6a1decdc0c99fec0832ce6ae/numpy-2.2.4-cp313-cp313-win32.whl", hash = "sha256:f486038e44caa08dbd97275a9a35a283a8f1d2f0ee60ac260a1790e76660833c", size = 6308141 },
+    { url = "https://files.pythonhosted.org/packages/52/17/d0dd10ab6d125c6d11ffb6dfa3423c3571befab8358d4f85cd4471964fcd/numpy-2.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:207a2b8441cc8b6a2a78c9ddc64d00d20c303d79fba08c577752f080c4007ee3", size = 12636885 },
+    { url = "https://files.pythonhosted.org/packages/fa/e2/793288ede17a0fdc921172916efb40f3cbc2aa97e76c5c84aba6dc7e8747/numpy-2.2.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8120575cb4882318c791f839a4fd66161a6fa46f3f0a5e613071aae35b5dd8f8", size = 20961829 },
+    { url = "https://files.pythonhosted.org/packages/3a/75/bb4573f6c462afd1ea5cbedcc362fe3e9bdbcc57aefd37c681be1155fbaa/numpy-2.2.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a761ba0fa886a7bb33c6c8f6f20213735cb19642c580a931c625ee377ee8bd39", size = 14161419 },
+    { url = "https://files.pythonhosted.org/packages/03/68/07b4cd01090ca46c7a336958b413cdbe75002286295f2addea767b7f16c9/numpy-2.2.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:ac0280f1ba4a4bfff363a99a6aceed4f8e123f8a9b234c89140f5e894e452ecd", size = 5196414 },
+    { url = "https://files.pythonhosted.org/packages/a5/fd/d4a29478d622fedff5c4b4b4cedfc37a00691079623c0575978d2446db9e/numpy-2.2.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:879cf3a9a2b53a4672a168c21375166171bc3932b7e21f622201811c43cdd3b0", size = 6709379 },
+    { url = "https://files.pythonhosted.org/packages/41/78/96dddb75bb9be730b87c72f30ffdd62611aba234e4e460576a068c98eff6/numpy-2.2.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f05d4198c1bacc9124018109c5fba2f3201dbe7ab6e92ff100494f236209c960", size = 14051725 },
+    { url = "https://files.pythonhosted.org/packages/00/06/5306b8199bffac2a29d9119c11f457f6c7d41115a335b78d3f86fad4dbe8/numpy-2.2.4-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2f085ce2e813a50dfd0e01fbfc0c12bbe5d2063d99f8b29da30e544fb6483b8", size = 16101638 },
+    { url = "https://files.pythonhosted.org/packages/fa/03/74c5b631ee1ded596945c12027649e6344614144369fd3ec1aaced782882/numpy-2.2.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:92bda934a791c01d6d9d8e038363c50918ef7c40601552a58ac84c9613a665bc", size = 15571717 },
+    { url = "https://files.pythonhosted.org/packages/cb/dc/4fc7c0283abe0981e3b89f9b332a134e237dd476b0c018e1e21083310c31/numpy-2.2.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ee4d528022f4c5ff67332469e10efe06a267e32f4067dc76bb7e2cddf3cd25ff", size = 17879998 },
+    { url = "https://files.pythonhosted.org/packages/e5/2b/878576190c5cfa29ed896b518cc516aecc7c98a919e20706c12480465f43/numpy-2.2.4-cp313-cp313t-win32.whl", hash = "sha256:05c076d531e9998e7e694c36e8b349969c56eadd2cdcd07242958489d79a7286", size = 6366896 },
+    { url = "https://files.pythonhosted.org/packages/3e/05/eb7eec66b95cf697f08c754ef26c3549d03ebd682819f794cb039574a0a6/numpy-2.2.4-cp313-cp313t-win_amd64.whl", hash = "sha256:188dcbca89834cc2e14eb2f106c96d6d46f200fe0200310fc29089657379c58d", size = 12739119 },
+]
+
+[[package]]
+name = "onnxruntime"
+version = "1.21.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "coloredlogs" },
+    { name = "flatbuffers" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "protobuf" },
+    { name = "sympy" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ff/21/593c9bc56002a6d1ea7c2236f4a648e081ec37c8d51db2383a9e83a63325/onnxruntime-1.21.0-cp312-cp312-macosx_13_0_universal2.whl", hash = "sha256:893d67c68ca9e7a58202fa8d96061ed86a5815b0925b5a97aef27b8ba246a20b", size = 33658780 },
+    { url = "https://files.pythonhosted.org/packages/4a/b4/33ec675a8ac150478091262824413e5d4acc359e029af87f9152e7c1c092/onnxruntime-1.21.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:37b7445c920a96271a8dfa16855e258dc5599235b41c7bbde0d262d55bcc105f", size = 14159975 },
+    { url = "https://files.pythonhosted.org/packages/8b/08/eead6895ed83b56711ca6c0d31d82f109401b9937558b425509e497d6fb4/onnxruntime-1.21.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9a04aafb802c1e5573ba4552f8babcb5021b041eb4cfa802c9b7644ca3510eca", size = 16019285 },
+    { url = "https://files.pythonhosted.org/packages/77/39/e83d56e3c215713b5263cb4d4f0c69e3964bba11634233d8ae04fc7e6bf3/onnxruntime-1.21.0-cp312-cp312-win_amd64.whl", hash = "sha256:7f801318476cd7003d636a5b392f7a37c08b6c8d2f829773f3c3887029e03f32", size = 11760975 },
+    { url = "https://files.pythonhosted.org/packages/f2/25/93f65617b06c741a58eeac9e373c99df443b02a774f4cb6511889757c0da/onnxruntime-1.21.0-cp313-cp313-macosx_13_0_universal2.whl", hash = "sha256:85718cbde1c2912d3a03e3b3dc181b1480258a229c32378408cace7c450f7f23", size = 33659581 },
+    { url = "https://files.pythonhosted.org/packages/f9/03/6b6829ee8344490ab5197f39a6824499ed097d1fc8c85b1f91c0e6767819/onnxruntime-1.21.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94dff3a61538f3b7b0ea9a06bc99e1410e90509c76e3a746f039e417802a12ae", size = 14160534 },
+    { url = "https://files.pythonhosted.org/packages/a6/81/e280ddf05f83ad5e0d066ef08e31515b17bd50bb52ef2ea713d9e455e67a/onnxruntime-1.21.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1e704b0eda5f2bbbe84182437315eaec89a450b08854b5a7762c85d04a28a0a", size = 16018947 },
+    { url = "https://files.pythonhosted.org/packages/d3/ea/011dfc2536e46e2ea984d2c0256dc585ebb1352366dffdd98764f1f44ee4/onnxruntime-1.21.0-cp313-cp313-win_amd64.whl", hash = "sha256:19b630c6a8956ef97fb7c94948b17691167aa1aaf07b5f214fa66c3e4136c108", size = 11760731 },
+    { url = "https://files.pythonhosted.org/packages/47/6b/a00f31322e91c610c7825377ef0cad884483c30d1370b896d57e7032e912/onnxruntime-1.21.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3995c4a2d81719623c58697b9510f8de9fa42a1da6b4474052797b0d712324fe", size = 14172215 },
+    { url = "https://files.pythonhosted.org/packages/58/4b/98214f13ac1cd675dfc2713ba47b5722f55ce4fba526d2b2826f2682a42e/onnxruntime-1.21.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:36b18b8f39c0f84e783902112a0dd3c102466897f96d73bb83f6a6bff283a423", size = 15990612 },
+]
+
+[[package]]
+name = "packaging"
+version = "24.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 },
+]
+
+[[package]]
+name = "protobuf"
+version = "6.30.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/55/de/8216061897a67b2ffe302fd51aaa76bbf613001f01cd96e2416a4955dd2b/protobuf-6.30.1.tar.gz", hash = "sha256:535fb4e44d0236893d5cf1263a0f706f1160b689a7ab962e9da8a9ce4050b780", size = 429304 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/83/f6/28460c49a8a93229e2264cd35fd147153fb524cbd944789db6b6f3cc9b13/protobuf-6.30.1-cp310-abi3-win32.whl", hash = "sha256:ba0706f948d0195f5cac504da156d88174e03218d9364ab40d903788c1903d7e", size = 419150 },
+    { url = "https://files.pythonhosted.org/packages/96/82/7045f5b3f3e338a8ab5852d22ce9c31e0a40d8b0f150a3735dc494be769a/protobuf-6.30.1-cp310-abi3-win_amd64.whl", hash = "sha256:ed484f9ddd47f0f1bf0648806cccdb4fe2fb6b19820f9b79a5adf5dcfd1b8c5f", size = 431007 },
+    { url = "https://files.pythonhosted.org/packages/b0/b6/732d04d0cdf457d05b7cba83ae73735d91ceced2439735b4500e311c44a5/protobuf-6.30.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:aa4f7dfaed0d840b03d08d14bfdb41348feaee06a828a8c455698234135b4075", size = 417579 },
+    { url = "https://files.pythonhosted.org/packages/fc/22/29dd085f6e828ab0424e73f1bae9dbb9e8bb4087cba5a9e6f21dc614694e/protobuf-6.30.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:47cd320b7db63e8c9ac35f5596ea1c1e61491d8a8eb6d8b45edc44760b53a4f6", size = 317319 },
+    { url = "https://files.pythonhosted.org/packages/26/10/8863ba4baa4660e3f50ad9ae974c47fb63fa6d4089b15f7db82164b1c879/protobuf-6.30.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:e3083660225fa94748ac2e407f09a899e6a28bf9c0e70c75def8d15706bf85fc", size = 316213 },
+    { url = "https://files.pythonhosted.org/packages/a1/d6/683a3d470398e45b4ad9b6c95b7cbabc32f9a8daf454754f0e3df1edffa6/protobuf-6.30.1-py3-none-any.whl", hash = "sha256:3c25e51e1359f1f5fa3b298faa6016e650d148f214db2e47671131b9063c53be", size = 167064 },
+]
+
+[[package]]
+name = "pyreadline3"
+version = "3.5.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/49/4cea918a08f02817aabae639e3d0ac046fef9f9180518a3ad394e22da148/pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7", size = 99839 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178 },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 },
+    { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 },
+    { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 },
+    { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 },
+    { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 },
+    { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 },
+    { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 },
+    { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 },
+    { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 },
+    { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309 },
+    { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679 },
+    { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 },
+    { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 },
+    { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 },
+    { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 },
+    { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 },
+    { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 },
+    { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
+]
+
+[[package]]
+name = "setuptools"
+version = "77.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/81/ed/7101d53811fd359333583330ff976e5177c5e871ca8b909d1d6c30553aa3/setuptools-77.0.3.tar.gz", hash = "sha256:583b361c8da8de57403743e756609670de6fb2345920e36dc5c2d914c319c945", size = 1367236 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a9/07/99f2cefae815c66eb23148f15d79ec055429c38fa8986edcc712ab5f3223/setuptools-77.0.3-py3-none-any.whl", hash = "sha256:67122e78221da5cf550ddd04cf8742c8fe12094483749a792d56cd669d6cf58c", size = 1255678 },
+]
+
+[[package]]
+name = "sympy"
+version = "1.13.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mpmath" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/11/8a/5a7fd6284fa8caac23a26c9ddf9c30485a48169344b4bd3b0f02fef1890f/sympy-1.13.3.tar.gz", hash = "sha256:b27fd2c6530e0ab39e275fc9b683895367e51d5da91baa8d3d64db2565fec4d9", size = 7533196 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/99/ff/c87e0622b1dadea79d2fb0b25ade9ed98954c9033722eb707053d310d4f3/sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73", size = 6189483 },
+]
+
+[[package]]
+name = "tokenizers"
+version = "0.21.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "huggingface-hub" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/92/76/5ac0c97f1117b91b7eb7323dcd61af80d72f790b4df71249a7850c195f30/tokenizers-0.21.1.tar.gz", hash = "sha256:a1bb04dc5b448985f86ecd4b05407f5a8d97cb2c0532199b2a302a604a0165ab", size = 343256 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a5/1f/328aee25f9115bf04262e8b4e5a2050b7b7cf44b59c74e982db7270c7f30/tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e78e413e9e668ad790a29456e677d9d3aa50a9ad311a40905d6861ba7692cf41", size = 2780767 },
+    { url = "https://files.pythonhosted.org/packages/ae/1a/4526797f3719b0287853f12c5ad563a9be09d446c44ac784cdd7c50f76ab/tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:cd51cd0a91ecc801633829fcd1fda9cf8682ed3477c6243b9a095539de4aecf3", size = 2650555 },
+    { url = "https://files.pythonhosted.org/packages/4d/7a/a209b29f971a9fdc1da86f917fe4524564924db50d13f0724feed37b2a4d/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28da6b72d4fb14ee200a1bd386ff74ade8992d7f725f2bde2c495a9a98cf4d9f", size = 2937541 },
+    { url = "https://files.pythonhosted.org/packages/3c/1e/b788b50ffc6191e0b1fc2b0d49df8cff16fe415302e5ceb89f619d12c5bc/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:34d8cfde551c9916cb92014e040806122295a6800914bab5865deb85623931cf", size = 2819058 },
+    { url = "https://files.pythonhosted.org/packages/36/aa/3626dfa09a0ecc5b57a8c58eeaeb7dd7ca9a37ad9dd681edab5acd55764c/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aaa852d23e125b73d283c98f007e06d4595732104b65402f46e8ef24b588d9f8", size = 3133278 },
+    { url = "https://files.pythonhosted.org/packages/a4/4d/8fbc203838b3d26269f944a89459d94c858f5b3f9a9b6ee9728cdcf69161/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a21a15d5c8e603331b8a59548bbe113564136dc0f5ad8306dd5033459a226da0", size = 3144253 },
+    { url = "https://files.pythonhosted.org/packages/d8/1b/2bd062adeb7c7511b847b32e356024980c0ffcf35f28947792c2d8ad2288/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2fdbd4c067c60a0ac7eca14b6bd18a5bebace54eb757c706b47ea93204f7a37c", size = 3398225 },
+    { url = "https://files.pythonhosted.org/packages/8a/63/38be071b0c8e06840bc6046991636bcb30c27f6bb1e670f4f4bc87cf49cc/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd9a0061e403546f7377df940e866c3e678d7d4e9643d0461ea442b4f89e61a", size = 3038874 },
+    { url = "https://files.pythonhosted.org/packages/ec/83/afa94193c09246417c23a3c75a8a0a96bf44ab5630a3015538d0c316dd4b/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:db9484aeb2e200c43b915a1a0150ea885e35f357a5a8fabf7373af333dcc8dbf", size = 9014448 },
+    { url = "https://files.pythonhosted.org/packages/ae/b3/0e1a37d4f84c0f014d43701c11eb8072704f6efe8d8fc2dcdb79c47d76de/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:ed248ab5279e601a30a4d67bdb897ecbe955a50f1e7bb62bd99f07dd11c2f5b6", size = 8937877 },
+    { url = "https://files.pythonhosted.org/packages/ac/33/ff08f50e6d615eb180a4a328c65907feb6ded0b8f990ec923969759dc379/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:9ac78b12e541d4ce67b4dfd970e44c060a2147b9b2a21f509566d556a509c67d", size = 9186645 },
+    { url = "https://files.pythonhosted.org/packages/5f/aa/8ae85f69a9f6012c6f8011c6f4aa1c96154c816e9eea2e1b758601157833/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e5a69c1a4496b81a5ee5d2c1f3f7fbdf95e90a0196101b0ee89ed9956b8a168f", size = 9384380 },
+    { url = "https://files.pythonhosted.org/packages/e8/5b/a5d98c89f747455e8b7a9504910c865d5e51da55e825a7ae641fb5ff0a58/tokenizers-0.21.1-cp39-abi3-win32.whl", hash = "sha256:1039a3a5734944e09de1d48761ade94e00d0fa760c0e0551151d4dd851ba63e3", size = 2239506 },
+    { url = "https://files.pythonhosted.org/packages/e6/b6/072a8e053ae600dcc2ac0da81a23548e3b523301a442a6ca900e92ac35be/tokenizers-0.21.1-cp39-abi3-win_amd64.whl", hash = "sha256:0f0dcbcc9f6e13e675a66d7a5f2f225a736745ce484c1a4e07476a89ccdad382", size = 2435481 },
+]
+
+[[package]]
+name = "tqdm"
+version = "4.67.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 },
+]
--- a/whisper_server.py
+++ b/whisper_server.py
@@ -5,14 +5,12 @@
 """

 import os
-import json
 import logging
-import time
-from typing import Dict
-import torch
-from faster_whisper import WhisperModel, BatchedInferencePipeline, decode_audio
 from mcp.server.fastmcp import FastMCP

+from model_manager import get_model_info
+from transcriber import transcribe_audio, batch_transcribe
+
 # 日志配置
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -24,161 +22,12 @@ mcp = FastMCP(
    dependencies=["faster-whisper>=0.9.0", "torch==2.6.0+cu126", "torchaudio==2.6.0+cu126", "numpy>=1.20.0"]
 )

-# 全局模型实例缓存
-model_instances = {}
-
@mcp.tool()
-def get_model_info() -> str:
-    """获取可用的Whisper模型信息"""
-    models = [
-        "tiny", "base", "small", "medium", "large-v1", "large-v2", "large-v3"
-    ]
-    devices = ["cpu", "cuda"] if torch.cuda.is_available() else ["cpu"]
-    compute_types = ["float16", "int8"] if torch.cuda.is_available() else ["int8"]
-
-    # 支持的语言列表
-    languages = {
-        "zh": "中文", "en": "英语", "ja": "日语", "ko": "韩语", "de": "德语",
-        "fr": "法语", "es": "西班牙语", "ru": "俄语", "it": "意大利语",
-        "pt": "葡萄牙语", "nl": "荷兰语", "ar": "阿拉伯语", "hi": "印地语",
-        "tr": "土耳其语", "vi": "越南语", "th": "泰语", "id": "印尼语"
-    }
-
-    # 支持的音频格式
-    audio_formats = [".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac"]
-
-    info = {
-        "available_models": models,
-        "default_model": "large-v3",
-        "available_devices": devices,
-        "default_device": "cuda" if torch.cuda.is_available() else "cpu",
-        "available_compute_types": compute_types,
-        "default_compute_type": "float16" if torch.cuda.is_available() else "int8",
-        "cuda_available": torch.cuda.is_available(),
-        "supported_languages": languages,
-        "supported_audio_formats": audio_formats,
-        "version": "0.1.1"
-    }
-
-    if torch.cuda.is_available():
-        info["gpu_info"] = {
-            "name": torch.cuda.get_device_name(0),
-            "memory_total": f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB",
-            "memory_available": f"{torch.cuda.get_device_properties(0).total_memory / 1e9 - torch.cuda.memory_allocated() / 1e9:.2f} GB"
-        }
-
-    return json.dumps(info, indent=2)
-
-def get_whisper_model(model_name: str, device: str, compute_type: str) -> Dict:
+def get_model_info_api() -> str:
    """
-    获取或创建Whisper模型实例
-
-    Args:
-        model_name: 模型名称 (tiny, base, small, medium, large-v1, large-v2, large-v3)
-        device: 运行设备 (cpu, cuda, auto)
-        compute_type: 计算类型 (float16, int8, auto)
-
-    Returns:
-        dict: 包含模型实例和配置的字典
+    获取可用的Whisper模型信息
    """
-    global model_instances
-
-    # 验证模型名称
-    valid_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2", "large-v3"]
-    if model_name not in valid_models:
-        raise ValueError(f"无效的模型名称: {model_name}。有效的模型: {', '.join(valid_models)}")
-
-    # 自动检测设备
-    if device == "auto":
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        compute_type = "float16" if device == "cuda" else "int8"
-
-    # 验证设备和计算类型
-    if device not in ["cpu", "cuda"]:
-        raise ValueError(f"无效的设备: {device}。有效的设备: cpu, cuda")
-
-    if device == "cuda" and not torch.cuda.is_available():
-        logger.warning("CUDA不可用，自动切换到CPU")
-        device = "cpu"
-        compute_type = "int8"
-
-    if compute_type not in ["float16", "int8"]:
-        raise ValueError(f"无效的计算类型: {compute_type}。有效的计算类型: float16, int8")
-
-    if device == "cpu" and compute_type == "float16":
-        logger.warning("CPU设备不支持float16计算类型，自动切换到int8")
-        compute_type = "int8"
-
-    # 生成模型键
-    model_key = f"{model_name}_{device}_{compute_type}"
-
-    # 如果模型已实例化，直接返回
-    if model_key in model_instances:
-        logger.info(f"使用缓存的模型实例: {model_key}")
-        return model_instances[model_key]
-
-    # 清理GPU内存（如果使用CUDA）
-    if device == "cuda":
-        torch.cuda.empty_cache()
-
-    # 实例化模型
-    try:
-        logger.info(f"加载Whisper模型: {model_name} 设备: {device} 计算类型: {compute_type}")
-
-        # 基础模型
-        model = WhisperModel(
-            model_name,
-            device=device,
-            compute_type=compute_type,
-            download_root=os.environ.get("WHISPER_MODEL_DIR", None)  # 支持自定义模型目录
-        )
-
-        # 批处理设置 - 默认启用批处理以提高速度
-        batched_model = None
-        batch_size = 0
-
-        if device == "cuda":  # 只在CUDA设备上使用批处理
-            # 根据显存大小确定合适的批大小
-            if torch.cuda.is_available():
-                gpu_mem = torch.cuda.get_device_properties(0).total_memory
-                free_mem = gpu_mem - torch.cuda.memory_allocated()
-                # 根据GPU显存动态调整批大小
-                if free_mem > 16e9:  # >16GB
-                    batch_size = 32
-                elif free_mem > 12e9:  # >12GB
-                    batch_size = 16
-                elif free_mem > 8e9:   # >8GB
-                    batch_size = 8
-                elif free_mem > 4e9:   # >4GB
-                    batch_size = 4
-                else:                # 较小显存
-                    batch_size = 2
-
-                logger.info(f"可用GPU显存: {free_mem / 1e9:.2f} GB")
-            else:
-                batch_size = 8  # 默认值
-
-            logger.info(f"启用批处理加速，批大小: {batch_size}")
-            batched_model = BatchedInferencePipeline(model=model)
-
-        # 创建结果对象
-        result = {
-            'model': model,
-            'device': device,
-            'compute_type': compute_type,
-            'batched_model': batched_model,
-            'batch_size': batch_size,
-            'load_time': time.time()
-        }
-
-        # 缓存实例
-        model_instances[model_key] = result
-        return result
-
-    except Exception as e:
-        logger.error(f"加载模型失败: {str(e)}")
-        raise
-
+    return get_model_info()

@mcp.tool()
 def transcribe(audio_path: str, model_name: str = "large-v3", device: str = "auto",
@@ -203,200 +52,21 @@ def transcribe(audio_path: str, model_name: str = "large-v3", device: str = "aut
    Returns:
        str: 转录结果，格式为VTT字幕或JSON
    """
-    # 验证参数
-    if not os.path.exists(audio_path):
-        return f"错误: 音频文件不存在: {audio_path}"
-
-    # 验证文件格式
-    supported_formats = [".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac"]
-    file_ext = os.path.splitext(audio_path)[1].lower()
-    if file_ext not in supported_formats:
-        return f"错误: 不支持的音频格式: {file_ext}。支持的格式: {', '.join(supported_formats)}"
-
-    # 验证文件大小
-    try:
-        file_size = os.path.getsize(audio_path)
-        if file_size == 0:
-            return f"错误: 音频文件为空: {audio_path}"
-
-        # 大文件警告（超过1GB）
-        if file_size > 1024 * 1024 * 1024:
-            logger.warning(f"警告: 文件大小超过1GB，可能需要较长处理时间: {audio_path}")
-    except Exception as e:
-        logger.error(f"检查文件大小失败: {str(e)}")
-
-    try:
-        # 获取模型实例
-        model_instance = get_whisper_model(model_name, device, compute_type)
-
-        # 验证语言代码
-        if language is not None:
-            # 支持的语言列表
-            supported_languages = {
-                "zh": "中文", "en": "英语", "ja": "日语", "ko": "韩语", "de": "德语",
-                "fr": "法语", "es": "西班牙语", "ru": "俄语", "it": "意大利语",
-                "pt": "葡萄牙语", "nl": "荷兰语", "ar": "阿拉伯语", "hi": "印地语",
-                "tr": "土耳其语", "vi": "越南语", "th": "泰语", "id": "印尼语"
-            }
-
-            if language not in supported_languages:
-                logger.warning(f"未知的语言代码: {language}，将使用自动检测")
-                language = None
-
-        # 设置转录参数
-        options = {
-            "language": language,
-            "vad_filter": True,  # 使用语音活动检测
-            "vad_parameters": {"min_silence_duration_ms": 500},  # VAD参数优化
-            "beam_size": beam_size,
-            "temperature": temperature,
-            "initial_prompt": initial_prompt,
-            "word_timestamps": True,  # 启用单词级时间戳
-            "suppress_tokens": [-1],  # 抑制特殊标记
-            "condition_on_previous_text": True,  # 基于前文进行条件生成
-            "compression_ratio_threshold": 2.4  # 压缩比阈值，用于过滤重复内容
-        }
-
-        start_time = time.time()
-        logger.info(f"开始转录文件: {os.path.basename(audio_path)}")
-
-        # 尝试使用decode_audio预处理音频，以处理更多格式
-        try:
-            audio_data = decode_audio(audio_path)
-            audio_source = audio_data
-            logger.info(f"成功预处理音频: {os.path.basename(audio_path)}")
-        except Exception as audio_error:
-            logger.warning(f"音频预处理失败，将直接使用文件路径: {str(audio_error)}")
-            audio_source = audio_path
-
-        # 执行转录 - 优先使用批处理模型
-        if model_instance['batched_model'] is not None and model_instance['device'] == 'cuda':
-            logger.info("使用批处理加速进行转录...")
-            # 批处理模型需要单独设置batch_size参数
-            segments, info = model_instance['batched_model'].transcribe(
-                audio_source,
-                batch_size=model_instance['batch_size'],
-                **options
-            )
-        else:
-            logger.info("使用标准模型进行转录...")
-            segments, info = model_instance['model'].transcribe(audio_source, **options)
-
-        # 将生成器转换为列表
-        segment_list = list(segments)
-
-        if not segment_list:
-            return "转录失败，未获得结果"
-
-        # 记录转录信息
-        elapsed_time = time.time() - start_time
-        logger.info(f"转录完成，用时: {elapsed_time:.2f}秒，检测语言: {info.language}，音频长度: {info.duration:.2f}秒")
-
-        # 格式化转录结果
-        if output_format.lower() == "vtt":
-            transcription_result = format_vtt(segment_list)
-        elif output_format.lower() == "srt":
-            transcription_result = format_srt(segment_list)
-        else:
-            transcription_result = format_json(segment_list, info)
-
-        # 获取音频文件的目录和文件名
-        audio_dir = os.path.dirname(audio_path)
-        audio_filename = os.path.splitext(os.path.basename(audio_path))[0]
-
-        # 设置输出目录
-        if output_directory is None:
-            output_dir = audio_dir
-        else:
-            output_dir = output_directory
-            # 确保输出目录存在
-            os.makedirs(output_dir, exist_ok=True)
-
-        # 生成带有时间戳的文件名
-        timestamp = time.strftime("%Y%m%d%H%M%S")
-        output_filename = f"{audio_filename}_{timestamp}.{output_format.lower()}"
-        output_path = os.path.join(output_dir, output_filename)
-
-        # 将转录结果写入文件
-        try:
-            with open(output_path, "w", encoding="utf-8") as f:
-                f.write(transcription_result)
-            logger.info(f"转录结果已保存到: {output_path}")
-            return f"转录成功，结果已保存到: {output_path}"
-        except Exception as e:
-            logger.error(f"保存转录结果失败: {str(e)}")
-            return f"转录成功，但保存结果失败: {str(e)}"
-
-    except Exception as e:
-        logger.error(f"转录失败: {str(e)}")
-        return f"转录过程中发生错误: {str(e)}"
-def format_vtt(segments) -> str:
-    """将转录结果格式化为VTT"""
-    vtt_content = "WEBVTT\n\n"
-
-    for segment in segments:
-        start = format_timestamp(segment.start)
-        end = format_timestamp(segment.end)
-        text = segment.text.strip()
-
-        if text:
-            vtt_content += f"{start} --> {end}\n{text}\n\n"
-
-    return vtt_content
-
-def format_srt(segments) -> str:
-    """将转录结果格式化为SRT"""
-    srt_content = ""
-
-    for segment in segments:
-        start = format_timestamp_srt(segment.start)
-        end = format_timestamp_srt(segment.end)
-        text = segment.text.strip()
-
-        if text:
-            srt_content += f"{len(srt_content.splitlines()) + 1}\n{start} --> {end}\n{text}\n\n"
-
-    return srt_content
-
-def format_json(segments, info) -> str:
-    """将转录结果格式化为JSON"""
-    result = {
-        "segments": [{
-            "id": segments.index(segment),
-            "start": segment.start,
-            "end": segment.end,
-            "text": segment.text.strip(),
-            "words": [{
-                "word": word.word,
-                "start": word.start,
-                "end": word.end,
-                "probability": word.probability
-            } for word in segment.words] if hasattr(segment, 'words') and segment.words else []
-        } for segment in segments],
-        "language": info.language,
-        "language_probability": info.language_probability if hasattr(info, 'language_probability') else None,
-        "duration": info.duration,
-        "all_language_probs": info.all_language_probs if hasattr(info, 'all_language_probs') else None
-    }
-    return json.dumps(result, indent=2, ensure_ascii=False)
-
-def format_timestamp(seconds: float) -> str:
-    """格式化时间戳为VTT格式"""
-    hours = int(seconds // 3600)
-    minutes = int((seconds % 3600) // 60)
-    seconds = seconds % 60
-    return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
-
-def format_timestamp_srt(seconds: float) -> str:
-    """格式化时间戳为SRT格式"""
-    hours = int(seconds // 3600)
-    minutes = int((seconds % 3600) // 60)
-    secs = int(seconds % 60)
-    msecs = int((seconds - int(seconds)) * 1000)
-    return f"{hours:02d}:{minutes:02d}:{secs:02d},{msecs:03d}"
+    return transcribe_audio(
+        audio_path=audio_path,
+        model_name=model_name,
+        device=device,
+        compute_type=compute_type,
+        language=language,
+        output_format=output_format,
+        beam_size=beam_size,
+        temperature=temperature,
+        initial_prompt=initial_prompt,
+        output_directory=output_directory
+    )

@mcp.tool()
-def batch_transcribe(audio_folder: str, output_folder: str = None, model_name: str = "large-v3",
+def batch_transcribe_audio(audio_folder: str, output_folder: str = None, model_name: str = "large-v3",
                    device: str = "auto", compute_type: str = "auto", language: str = None,
                    output_format: str = "vtt", beam_size: int = 5, temperature: float = 0.0,
                    initial_prompt: str = None, parallel_files: int = 1) -> str:
@@ -419,162 +89,19 @@ def batch_transcribe(audio_folder: str, output_folder: str = None, model_name: s
    Returns:
        str: 批处理结果摘要，包含处理时间和成功率
    """
-    if not os.path.isdir(audio_folder):
-        return f"错误: 文件夹不存在: {audio_folder}"
-
-    # 设置输出文件夹
-    if output_folder is None:
-        output_folder = os.path.join(audio_folder, "transcript")
-
-    # 确保输出目录存在
-    os.makedirs(output_folder, exist_ok=True)
-
-    # 验证输出格式
-    valid_formats = ["vtt", "srt", "json"]
-    if output_format.lower() not in valid_formats:
-        return f"错误: 不支持的输出格式: {output_format}。支持的格式: {', '.join(valid_formats)}"
-
-    # 获取所有音频文件
-    audio_files = []
-    supported_formats = [".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac"]
-
-    for filename in os.listdir(audio_folder):
-        file_ext = os.path.splitext(filename)[1].lower()
-        if file_ext in supported_formats:
-            audio_files.append(os.path.join(audio_folder, filename))
-
-    if not audio_files:
-        return f"在 {audio_folder} 中未找到支持的音频文件。支持的格式: {', '.join(supported_formats)}"
-
-    # 记录开始时间
-    start_time = time.time()
-    total_files = len(audio_files)
-    logger.info(f"开始批量转录 {total_files} 个文件，输出格式: {output_format}")
-
-    # 处理每个文件
-    results = []
-    success_count = 0
-    error_count = 0
-    total_audio_duration = 0
-
-        # 预加载模型以避免重复加载
-    try:
-        get_whisper_model(model_name, device, compute_type)
-        logger.info(f"已预加载模型: {model_name}")
-    except Exception as e:
-        logger.error(f"预加载模型失败: {str(e)}")
-        return f"批处理失败: 无法加载模型 {model_name}: {str(e)}"
-
-    # 处理进度报告函数
-    def report_progress(current, total, elapsed_time):
-        progress = current / total * 100
-        eta = (elapsed_time / current) * (total - current) if current > 0 else 0
-        return (f"进度: {current}/{total} ({progress:.1f}%)" +
-                f" | 已用时间: {format_time(elapsed_time)}" +
-                f" | 预计剩余: {format_time(eta)}")
-
-    # 格式化时间函数
-    def format_time(seconds):
-        hours = int(seconds // 3600)
-        minutes = int((seconds % 3600) // 60)
-        secs = int(seconds % 60)
-        return f"{hours:02d}:{minutes:02d}:{secs:02d}"
-
-    # 处理每个文件
-    for i, audio_path in enumerate(audio_files):
-        file_name = os.path.basename(audio_path)
-        elapsed = time.time() - start_time
-
-        # 报告进度
-        progress_msg = report_progress(i, total_files, elapsed)
-        logger.info(f"{progress_msg} | 当前处理: {file_name}")
-
-        # 设置输出文件路径
-        base_name = os.path.splitext(file_name)[0]
-        output_ext = "." + output_format.lower()
-        if output_format.lower() == "json":
-            output_ext = ".json"
-        elif output_format.lower() == "vtt":
-            output_ext = ".vtt"
-        elif output_format.lower() == "srt":
-            output_ext = ".srt"
-
-        output_path = os.path.join(output_folder, f"{base_name}{output_ext}")
-
-        # 执行转录
-        try:
-            result = transcribe(
-                audio_path=audio_path,
-                model_name=model_name,
-                device=device,
-                compute_type=compute_type,
-                language=language,
-                output_format=output_format,
-                beam_size=beam_size,
-                temperature=temperature,
-                initial_prompt=initial_prompt,
-                output_directory=output_folder
-            )
-
-            # 检查结果是否包含错误信息
-            if result.startswith("错误:") or result.startswith("转录过程中发生错误:"):
-                logger.error(f"转录失败: {file_name} - {result}")
-                results.append(f"❌ 失败: {file_name} - {result}")
-                error_count += 1
-                continue
-
-            # 如果转录成功，提取输出路径信息
-            if result.startswith("转录成功"):
-                # 从返回消息中提取输出路径
-                output_path = result.split(": ")[1] if ": " in result else "未知路径"
-                success_count += 1
-                results.append(f"✅ 成功: {file_name} -> {os.path.basename(output_path)}")
-                continue
-
-            # 检查转录结果是否已成功保存
-            if result.startswith("转录成功"):
-                logger.info(f"转录结果已保存: {file_name}")
-            else:
-                logger.error(f"转录未成功保存: {file_name} - {result}")
-                continue
-
-            # 提取音频时长（如果是JSON格式）
-            audio_duration = 0
-            if output_format.lower() == "json":
-                try:
-                    json_result = json.loads(result)
-                    audio_duration = json_result.get("duration", 0)
-                    total_audio_duration += audio_duration
-                except:
-                    pass
-
-            success_count += 1
-            duration_info = f" (时长: {audio_duration:.1f}秒)" if audio_duration > 0 else ""
-            results.append(f"✅ 成功: {file_name} -> {os.path.basename(output_path)}{duration_info}")
-
-        except Exception as e:
-            logger.error(f"转录失败: {file_name} - {str(e)}")
-            results.append(f"❌ 失败: {file_name} - {str(e)}")
-            error_count += 1
-
-    # 计算总时间和处理速度
-    total_time = time.time() - start_time
-    processing_speed = total_audio_duration / total_time if total_audio_duration > 0 and total_time > 0 else 0
-
-    # 生成摘要
-    summary = f"批处理完成，用时: {format_time(total_time)}\n"
-    summary += f"成功: {success_count}/{total_files} ({success_count/total_files*100:.1f}%)\n"
-    if error_count > 0:
-        summary += f"失败: {error_count}/{total_files} ({error_count/total_files*100:.1f}%)\n"
-    if total_audio_duration > 0:
-        summary += f"总音频时长: {total_audio_duration:.1f}秒\n"
-        summary += f"处理速度: {processing_speed:.2f}x 实时速度\n"
-    summary += f"输出目录: {output_folder}\n\n"
-
-    # 添加详细结果
-    summary += "详细结果:\n" + "\n".join(results)
-
-    return summary
+    return batch_transcribe(
+        audio_folder=audio_folder,
+        output_folder=output_folder,
+        model_name=model_name,
+        device=device,
+        compute_type=compute_type,
+        language=language,
+        output_format=output_format,
+        beam_size=beam_size,
+        temperature=temperature,
+        initial_prompt=initial_prompt,
+        parallel_files=parallel_files
+    )

 if __name__ == "__main__":
    # 运行服务器