Merge pull request #58 from huggingface/fix_stt_compile_mode
Assigning min new tokens to a compiled whisper graph on a thread brea…
This commit is contained in:
@@ -66,8 +66,9 @@ class WhisperSTTHandler(BaseHandler):
|
||||
if self.compile_mode not in (None, "default"):
|
||||
# generating more tokens than previously will trigger CUDA graphs capture
|
||||
# one should warmup with a number of generated tokens above max tokens targeted for subsequent generation
|
||||
# hence, having min_new_tokens < max_new_tokens in the future doesn't make sense
|
||||
warmup_gen_kwargs = {
|
||||
"min_new_tokens": self.gen_kwargs["min_new_tokens"],
|
||||
"min_new_tokens": self.gen_kwargs["max_new_tokens"], # Yes, assign max_new_tokens to min_new_tokens
|
||||
"max_new_tokens": self.gen_kwargs["max_new_tokens"],
|
||||
**self.gen_kwargs,
|
||||
}
|
||||
|
||||
@@ -33,12 +33,6 @@ class WhisperSTTHandlerArguments:
|
||||
"help": "The maximum number of new tokens to generate. Default is 128."
|
||||
},
|
||||
)
|
||||
stt_gen_min_new_tokens: int = field(
|
||||
default=0,
|
||||
metadata={
|
||||
"help": "The minimum number of new tokens to generate. Default is 0."
|
||||
},
|
||||
)
|
||||
stt_gen_num_beams: int = field(
|
||||
default=1,
|
||||
metadata={
|
||||
|
||||
Reference in New Issue
Block a user