mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2023-11-04 02:52:44 +03:00
This turned out pretty good overall. The algorithm has been moved from main.cpp to whisper.cpp and can be reused for all subtitles types. This means that now you can specify the maximum length of the generated lines. Simply provide the "-ml" argument specifying the max length in number of characters
This commit is contained in:
23
whisper.h
23
whisper.h
@@ -68,14 +68,21 @@ extern "C" {
|
||||
|
||||
typedef int whisper_token;
|
||||
|
||||
struct whisper_token_data {
|
||||
typedef struct whisper_token_data {
|
||||
whisper_token id; // token id
|
||||
whisper_token tid; // forced timestamp token id
|
||||
|
||||
float p; // probability of the token
|
||||
float pt; // probability of the timestamp token
|
||||
float ptsum; // sum of probabilities of all timestamp tokens
|
||||
};
|
||||
|
||||
// token-level timestamp data
|
||||
// do not use if you haven't computed token-level timestamps
|
||||
int64_t t0; // start time of the token
|
||||
int64_t t1; // end time of the token
|
||||
|
||||
float vlen; // voice length of the token
|
||||
} whisper_token_data;
|
||||
|
||||
// Allocates all memory needed for the model and loads the model from the given file.
|
||||
// Returns NULL on failure.
|
||||
@@ -129,7 +136,7 @@ extern "C" {
|
||||
// You can also implement your own sampling method using the whisper_get_probs() function.
|
||||
// whisper_sample_best() returns the token with the highest probability
|
||||
// whisper_sample_timestamp() returns the most probable timestamp token
|
||||
WHISPER_API struct whisper_token_data whisper_sample_best(struct whisper_context * ctx);
|
||||
WHISPER_API whisper_token_data whisper_sample_best(struct whisper_context * ctx);
|
||||
WHISPER_API whisper_token whisper_sample_timestamp(struct whisper_context * ctx);
|
||||
|
||||
// Return the id of the specified language, returns -1 if not found
|
||||
@@ -172,7 +179,7 @@ extern "C" {
|
||||
// Text segment callback
|
||||
// Called on every newly generated text segment
|
||||
// Use the whisper_full_...() functions to obtain the text segments
|
||||
typedef void (*whisper_new_segment_callback)(struct whisper_context * ctx, void * user_data);
|
||||
typedef void (*whisper_new_segment_callback)(struct whisper_context * ctx, int n_new, void * user_data);
|
||||
|
||||
struct whisper_full_params {
|
||||
enum whisper_sampling_strategy strategy;
|
||||
@@ -188,6 +195,12 @@ extern "C" {
|
||||
bool print_realtime;
|
||||
bool print_timestamps;
|
||||
|
||||
// [EXPERIMENTAL] token-level timestamps
|
||||
bool token_timestamps; // enable token-level timestamps
|
||||
float thold_pt; // timestamp token probability threshold (~0.01)
|
||||
float thold_ptsum; // timestamp token sum probability threshold (~0.01)
|
||||
int max_len; // max segment length in characters
|
||||
|
||||
const char * language;
|
||||
|
||||
struct {
|
||||
@@ -244,7 +257,7 @@ extern "C" {
|
||||
|
||||
// Get token data for the specified token in the specified segment.
|
||||
// This contains probabilities, timestamps, etc.
|
||||
WHISPER_API struct whisper_token_data whisper_full_get_token_data(struct whisper_context * ctx, int i_segment, int i_token);
|
||||
WHISPER_API whisper_token_data whisper_full_get_token_data(struct whisper_context * ctx, int i_segment, int i_token);
|
||||
|
||||
// Get the probability of the specified token in the specified segment.
|
||||
WHISPER_API float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int i_token);
|
||||
|
||||
Reference in New Issue
Block a user