48 lines
1.7 KiB
Python
48 lines
1.7 KiB
Python
from dataclasses import dataclass, field
|
|
|
|
|
|
@dataclass
|
|
class VADHandlerArguments:
|
|
thresh: float = field(
|
|
default=0.3,
|
|
metadata={
|
|
"help": "The threshold value for voice activity detection (VAD). Values typically range from 0 to 1, with higher values requiring higher confidence in speech detection."
|
|
},
|
|
)
|
|
sample_rate: int = field(
|
|
default=16000,
|
|
metadata={
|
|
"help": "The sample rate of the audio in Hertz. Default is 16000 Hz, which is a common setting for voice audio."
|
|
},
|
|
)
|
|
min_silence_ms: int = field(
|
|
default=250,
|
|
metadata={
|
|
"help": "Minimum length of silence intervals to be used for segmenting speech. Measured in milliseconds. Default is 250 ms."
|
|
},
|
|
)
|
|
min_speech_ms: int = field(
|
|
default=500,
|
|
metadata={
|
|
"help": "Minimum length of speech segments to be considered valid speech. Measured in milliseconds. Default is 500 ms."
|
|
},
|
|
)
|
|
max_speech_ms: float = field(
|
|
default=float("inf"),
|
|
metadata={
|
|
"help": "Maximum length of continuous speech before forcing a split. Default is infinite, allowing for uninterrupted speech segments."
|
|
},
|
|
)
|
|
speech_pad_ms: int = field(
|
|
default=500,
|
|
metadata={
|
|
"help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms."
|
|
},
|
|
)
|
|
audio_enhancement: bool = field(
|
|
default=False,
|
|
metadata={
|
|
"help": "improves sound quality by applying techniques like noise reduction, equalization, and echo cancellation. Default is False."
|
|
},
|
|
)
|