Fast-Whisper-MCP-Server/audio_processor.py

#!/usr/bin/env python3
"""
Audio Processing Module
Responsible for audio file validation and preprocessing
"""

import os
import logging
from typing import Union, Any
from faster_whisper import decode_audio

# Log configuration
logger = logging.getLogger(__name__)

def validate_audio_file(audio_path: str) -> str:
    """
    Validate if an audio file is valid

    Args:
        audio_path: Path to the audio file

    Returns:
        str: Validation result, "ok" indicates validation passed, otherwise returns error message
    """
    # Validate parameters
    if not os.path.exists(audio_path):
        return f"Error: Audio file does not exist: {audio_path}"

    # Validate file format
    supported_formats = [".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac"]
    file_ext = os.path.splitext(audio_path)[1].lower()
    if file_ext not in supported_formats:
        return f"Error: Unsupported audio format: {file_ext}. Supported formats: {', '.join(supported_formats)}"

    # Validate file size
    try:
        file_size = os.path.getsize(audio_path)
        if file_size == 0:
            return f"Error: Audio file is empty: {audio_path}"

        # Warning for large files (over 1GB)
        if file_size > 1024 * 1024 * 1024:
            logger.warning(f"Warning: File size exceeds 1GB, may require longer processing time: {audio_path}")
    except Exception as e:
        logger.error(f"Failed to check file size: {str(e)}")
        return f"Error: Failed to check file size: {str(e)}"

    return "ok"

def process_audio(audio_path: str) -> Union[str, Any]:
    """
    Process audio file, perform decoding and preprocessing

    Args:
        audio_path: Path to the audio file

    Returns:
        Union[str, Any]: Processed audio data or original file path
    """
    # Try to preprocess audio using decode_audio to handle more formats
    try:
        audio_data = decode_audio(audio_path)
        logger.info(f"Successfully preprocessed audio: {os.path.basename(audio_path)}")
        return audio_data
    except Exception as audio_error:
        logger.warning(f"Audio preprocessing failed, will use file path directly: {str(audio_error)}")
        return audio_path