Merge pull request #58 from huggingface/fix_stt_compile_mode
Assigning min new tokens to a compiled whisper graph on a thread brea…
This commit is contained in:
@@ -66,8 +66,9 @@ class WhisperSTTHandler(BaseHandler):
|
|||||||
if self.compile_mode not in (None, "default"):
|
if self.compile_mode not in (None, "default"):
|
||||||
# generating more tokens than previously will trigger CUDA graphs capture
|
# generating more tokens than previously will trigger CUDA graphs capture
|
||||||
# one should warmup with a number of generated tokens above max tokens targeted for subsequent generation
|
# one should warmup with a number of generated tokens above max tokens targeted for subsequent generation
|
||||||
|
# hence, having min_new_tokens < max_new_tokens in the future doesn't make sense
|
||||||
warmup_gen_kwargs = {
|
warmup_gen_kwargs = {
|
||||||
"min_new_tokens": self.gen_kwargs["min_new_tokens"],
|
"min_new_tokens": self.gen_kwargs["max_new_tokens"], # Yes, assign max_new_tokens to min_new_tokens
|
||||||
"max_new_tokens": self.gen_kwargs["max_new_tokens"],
|
"max_new_tokens": self.gen_kwargs["max_new_tokens"],
|
||||||
**self.gen_kwargs,
|
**self.gen_kwargs,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,12 +33,6 @@ class WhisperSTTHandlerArguments:
|
|||||||
"help": "The maximum number of new tokens to generate. Default is 128."
|
"help": "The maximum number of new tokens to generate. Default is 128."
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
stt_gen_min_new_tokens: int = field(
|
|
||||||
default=0,
|
|
||||||
metadata={
|
|
||||||
"help": "The minimum number of new tokens to generate. Default is 0."
|
|
||||||
},
|
|
||||||
)
|
|
||||||
stt_gen_num_beams: int = field(
|
stt_gen_num_beams: int = field(
|
||||||
default=1,
|
default=1,
|
||||||
metadata={
|
metadata={
|
||||||
|
|||||||
Reference in New Issue
Block a user