将转录核心逻辑拆分为独立模块(transcriber.py、model_manager.py、audio_processor.py、formatters.py),提升代码可维护性和复用性。删除main.py文件,优化依赖管理并更新requirements.txt和pyproject.toml。
108 lines
4.0 KiB
Python
108 lines
4.0 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
基于Faster Whisper的语音识别MCP服务
|
||
提供高性能的音频转录功能,支持批处理加速和多种输出格式
|
||
"""
|
||
|
||
import os
|
||
import logging
|
||
from mcp.server.fastmcp import FastMCP
|
||
|
||
from model_manager import get_model_info
|
||
from transcriber import transcribe_audio, batch_transcribe
|
||
|
||
# 日志配置
|
||
logging.basicConfig(level=logging.INFO)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 创建FastMCP服务器实例
|
||
mcp = FastMCP(
|
||
name="fast-whisper-mcp-server",
|
||
version="0.1.1",
|
||
dependencies=["faster-whisper>=0.9.0", "torch==2.6.0+cu126", "torchaudio==2.6.0+cu126", "numpy>=1.20.0"]
|
||
)
|
||
|
||
@mcp.tool()
|
||
def get_model_info_api() -> str:
|
||
"""
|
||
获取可用的Whisper模型信息
|
||
"""
|
||
return get_model_info()
|
||
|
||
@mcp.tool()
|
||
def transcribe(audio_path: str, model_name: str = "large-v3", device: str = "auto",
|
||
compute_type: str = "auto", language: str = None, output_format: str = "vtt",
|
||
beam_size: int = 5, temperature: float = 0.0, initial_prompt: str = None,
|
||
output_directory: str = None) -> str:
|
||
"""
|
||
使用Faster Whisper转录音频文件
|
||
|
||
Args:
|
||
audio_path: 音频文件路径
|
||
model_name: 模型名称 (tiny, base, small, medium, large-v1, large-v2, large-v3)
|
||
device: 运行设备 (cpu, cuda, auto)
|
||
compute_type: 计算类型 (float16, int8, auto)
|
||
language: 语言代码 (如zh, en, ja等,默认自动检测)
|
||
output_format: 输出格式 (vtt, srt或json)
|
||
beam_size: 波束搜索大小,较大的值可能提高准确性但会降低速度
|
||
temperature: 采样温度,贪婪解码
|
||
initial_prompt: 初始提示文本,可以帮助模型更好地理解上下文
|
||
output_directory: 输出目录路径,默认为音频文件所在目录
|
||
|
||
Returns:
|
||
str: 转录结果,格式为VTT字幕或JSON
|
||
"""
|
||
return transcribe_audio(
|
||
audio_path=audio_path,
|
||
model_name=model_name,
|
||
device=device,
|
||
compute_type=compute_type,
|
||
language=language,
|
||
output_format=output_format,
|
||
beam_size=beam_size,
|
||
temperature=temperature,
|
||
initial_prompt=initial_prompt,
|
||
output_directory=output_directory
|
||
)
|
||
|
||
@mcp.tool()
|
||
def batch_transcribe_audio(audio_folder: str, output_folder: str = None, model_name: str = "large-v3",
|
||
device: str = "auto", compute_type: str = "auto", language: str = None,
|
||
output_format: str = "vtt", beam_size: int = 5, temperature: float = 0.0,
|
||
initial_prompt: str = None, parallel_files: int = 1) -> str:
|
||
"""
|
||
批量转录文件夹中的音频文件
|
||
|
||
Args:
|
||
audio_folder: 包含音频文件的文件夹路径
|
||
output_folder: 输出文件夹路径,默认为audio_folder下的transcript子文件夹
|
||
model_name: 模型名称 (tiny, base, small, medium, large-v1, large-v2, large-v3)
|
||
device: 运行设备 (cpu, cuda, auto)
|
||
compute_type: 计算类型 (float16, int8, auto)
|
||
language: 语言代码 (如zh, en, ja等,默认自动检测)
|
||
output_format: 输出格式 (vtt, srt或json)
|
||
beam_size: 波束搜索大小,较大的值可能提高准确性但会降低速度
|
||
temperature: 采样温度,0表示贪婪解码
|
||
initial_prompt: 初始提示文本,可以帮助模型更好地理解上下文
|
||
parallel_files: 并行处理的文件数量(仅在CPU模式下有效)
|
||
|
||
Returns:
|
||
str: 批处理结果摘要,包含处理时间和成功率
|
||
"""
|
||
return batch_transcribe(
|
||
audio_folder=audio_folder,
|
||
output_folder=output_folder,
|
||
model_name=model_name,
|
||
device=device,
|
||
compute_type=compute_type,
|
||
language=language,
|
||
output_format=output_format,
|
||
beam_size=beam_size,
|
||
temperature=temperature,
|
||
initial_prompt=initial_prompt,
|
||
parallel_files=parallel_files
|
||
)
|
||
|
||
if __name__ == "__main__":
|
||
# 运行服务器
|
||
mcp.run() |