Merge pull request #58 from huggingface/fix_stt_compile_mode

Assigning min new tokens to a compiled whisper graph on a thread brea…
This commit is contained in:
Andrés Marafioti
2024-08-28 12:09:22 +02:00
committed by GitHub
2 changed files with 2 additions and 7 deletions

View File

@@ -66,8 +66,9 @@ class WhisperSTTHandler(BaseHandler):
if self.compile_mode not in (None, "default"):
# generating more tokens than previously will trigger CUDA graphs capture
# one should warmup with a number of generated tokens above max tokens targeted for subsequent generation
# hence, having min_new_tokens < max_new_tokens in the future doesn't make sense
warmup_gen_kwargs = {
"min_new_tokens": self.gen_kwargs["min_new_tokens"],
"min_new_tokens": self.gen_kwargs["max_new_tokens"], # Yes, assign max_new_tokens to min_new_tokens
"max_new_tokens": self.gen_kwargs["max_new_tokens"],
**self.gen_kwargs,
}

View File

@@ -33,12 +33,6 @@ class WhisperSTTHandlerArguments:
"help": "The maximum number of new tokens to generate. Default is 128."
},
)
stt_gen_min_new_tokens: int = field(
default=0,
metadata={
"help": "The minimum number of new tokens to generate. Default is 0."
},
)
stt_gen_num_beams: int = field(
default=1,
metadata={