Files
Fast-Whisper-MCP-Server/formatters.py
BigUncleHomePC 9d22de2ac9 refactor(whisper_server): 重构代码以模块化转录功能
将转录核心逻辑拆分为独立模块(transcriber.py、model_manager.py、audio_processor.py、formatters.py),提升代码可维护性和复用性。删除main.py文件,优化依赖管理并更新requirements.txt和pyproject.toml。
2025-03-22 05:26:17 +08:00

131 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
格式化输出模块
负责将转录结果格式化为不同的输出格式VTT、SRT、JSON
"""
import json
from typing import List, Dict, Any
def format_vtt(segments: List) -> str:
"""
将转录结果格式化为VTT
Args:
segments: 转录段落列表
Returns:
str: VTT格式的字幕内容
"""
vtt_content = "WEBVTT\n\n"
for segment in segments:
start = format_timestamp(segment.start)
end = format_timestamp(segment.end)
text = segment.text.strip()
if text:
vtt_content += f"{start} --> {end}\n{text}\n\n"
return vtt_content
def format_srt(segments: List) -> str:
"""
将转录结果格式化为SRT
Args:
segments: 转录段落列表
Returns:
str: SRT格式的字幕内容
"""
srt_content = ""
index = 1
for segment in segments:
start = format_timestamp_srt(segment.start)
end = format_timestamp_srt(segment.end)
text = segment.text.strip()
if text:
srt_content += f"{index}\n{start} --> {end}\n{text}\n\n"
index += 1
return srt_content
def format_json(segments: List, info: Any) -> str:
"""
将转录结果格式化为JSON
Args:
segments: 转录段落列表
info: 转录信息对象
Returns:
str: JSON格式的转录结果
"""
result = {
"segments": [{
"id": i,
"start": segment.start,
"end": segment.end,
"text": segment.text.strip(),
"words": [{
"word": word.word,
"start": word.start,
"end": word.end,
"probability": word.probability
} for word in segment.words] if hasattr(segment, 'words') and segment.words else []
} for i, segment in enumerate(segments)],
"language": info.language,
"language_probability": info.language_probability if hasattr(info, 'language_probability') else None,
"duration": info.duration,
"all_language_probs": info.all_language_probs if hasattr(info, 'all_language_probs') else None
}
return json.dumps(result, indent=2, ensure_ascii=False)
def format_timestamp(seconds: float) -> str:
"""
格式化时间戳为VTT格式
Args:
seconds: 秒数
Returns:
str: 格式化的时间戳 (HH:MM:SS.mmm)
"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
def format_timestamp_srt(seconds: float) -> str:
"""
格式化时间戳为SRT格式
Args:
seconds: 秒数
Returns:
str: 格式化的时间戳 (HH:MM:SS,mmm)
"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
msecs = int((seconds - int(seconds)) * 1000)
return f"{hours:02d}:{minutes:02d}:{secs:02d},{msecs:03d}"
def format_time(seconds: float) -> str:
"""
格式化时间为可读格式
Args:
seconds: 秒数
Returns:
str: 格式化的时间 (HH:MM:SS)
"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
return f"{hours:02d}:{minutes:02d}:{secs:02d}"