Files
Fast-Whisper-MCP-Server/whisper_server.py
BigUncleHomePC 9d22de2ac9 refactor(whisper_server): 重构代码以模块化转录功能
将转录核心逻辑拆分为独立模块(transcriber.py、model_manager.py、audio_processor.py、formatters.py),提升代码可维护性和复用性。删除main.py文件,优化依赖管理并更新requirements.txt和pyproject.toml。
2025-03-22 05:26:17 +08:00

108 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
基于Faster Whisper的语音识别MCP服务
提供高性能的音频转录功能,支持批处理加速和多种输出格式
"""
import os
import logging
from mcp.server.fastmcp import FastMCP
from model_manager import get_model_info
from transcriber import transcribe_audio, batch_transcribe
# 日志配置
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 创建FastMCP服务器实例
mcp = FastMCP(
name="fast-whisper-mcp-server",
version="0.1.1",
dependencies=["faster-whisper>=0.9.0", "torch==2.6.0+cu126", "torchaudio==2.6.0+cu126", "numpy>=1.20.0"]
)
@mcp.tool()
def get_model_info_api() -> str:
"""
获取可用的Whisper模型信息
"""
return get_model_info()
@mcp.tool()
def transcribe(audio_path: str, model_name: str = "large-v3", device: str = "auto",
compute_type: str = "auto", language: str = None, output_format: str = "vtt",
beam_size: int = 5, temperature: float = 0.0, initial_prompt: str = None,
output_directory: str = None) -> str:
"""
使用Faster Whisper转录音频文件
Args:
audio_path: 音频文件路径
model_name: 模型名称 (tiny, base, small, medium, large-v1, large-v2, large-v3)
device: 运行设备 (cpu, cuda, auto)
compute_type: 计算类型 (float16, int8, auto)
language: 语言代码 (如zh, en, ja等默认自动检测)
output_format: 输出格式 (vtt, srt或json)
beam_size: 波束搜索大小,较大的值可能提高准确性但会降低速度
temperature: 采样温度,贪婪解码
initial_prompt: 初始提示文本,可以帮助模型更好地理解上下文
output_directory: 输出目录路径,默认为音频文件所在目录
Returns:
str: 转录结果格式为VTT字幕或JSON
"""
return transcribe_audio(
audio_path=audio_path,
model_name=model_name,
device=device,
compute_type=compute_type,
language=language,
output_format=output_format,
beam_size=beam_size,
temperature=temperature,
initial_prompt=initial_prompt,
output_directory=output_directory
)
@mcp.tool()
def batch_transcribe_audio(audio_folder: str, output_folder: str = None, model_name: str = "large-v3",
device: str = "auto", compute_type: str = "auto", language: str = None,
output_format: str = "vtt", beam_size: int = 5, temperature: float = 0.0,
initial_prompt: str = None, parallel_files: int = 1) -> str:
"""
批量转录文件夹中的音频文件
Args:
audio_folder: 包含音频文件的文件夹路径
output_folder: 输出文件夹路径默认为audio_folder下的transcript子文件夹
model_name: 模型名称 (tiny, base, small, medium, large-v1, large-v2, large-v3)
device: 运行设备 (cpu, cuda, auto)
compute_type: 计算类型 (float16, int8, auto)
language: 语言代码 (如zh, en, ja等默认自动检测)
output_format: 输出格式 (vtt, srt或json)
beam_size: 波束搜索大小,较大的值可能提高准确性但会降低速度
temperature: 采样温度0表示贪婪解码
initial_prompt: 初始提示文本,可以帮助模型更好地理解上下文
parallel_files: 并行处理的文件数量仅在CPU模式下有效
Returns:
str: 批处理结果摘要,包含处理时间和成功率
"""
return batch_transcribe(
audio_folder=audio_folder,
output_folder=output_folder,
model_name=model_name,
device=device,
compute_type=compute_type,
language=language,
output_format=output_format,
beam_size=beam_size,
temperature=temperature,
initial_prompt=initial_prompt,
parallel_files=parallel_files
)
if __name__ == "__main__":
# 运行服务器
mcp.run()