mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2023-11-04 02:52:44 +03:00
ggml : sync latest repo (mostly refactoring changes)
This commit is contained in:
133
ggml.h
133
ggml.h
@@ -198,9 +198,11 @@
|
||||
#define GGML_MAX_PARAMS 256
|
||||
#define GGML_MAX_CONTEXTS 64
|
||||
#define GGML_MAX_OPT 4
|
||||
#define GGML_MAX_NAME 32
|
||||
#define GGML_MAX_NAME 48
|
||||
#define GGML_DEFAULT_N_THREADS 4
|
||||
|
||||
#define GGML_UNUSED(x) (void)(x)
|
||||
|
||||
#define GGML_ASSERT(x) \
|
||||
do { \
|
||||
if (!(x)) { \
|
||||
@@ -209,6 +211,30 @@
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
||||
// main purpose is to reduce code duplication and improve readability.
|
||||
//
|
||||
// example:
|
||||
//
|
||||
// GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
|
||||
// GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
||||
//
|
||||
#define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
|
||||
const type prefix##0 = (pointer)->array[0]; \
|
||||
GGML_UNUSED(prefix##0);
|
||||
#define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
|
||||
GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
|
||||
const type prefix##1 = (pointer)->array[1]; \
|
||||
GGML_UNUSED(prefix##1);
|
||||
#define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
|
||||
GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
|
||||
const type prefix##2 = (pointer)->array[2]; \
|
||||
GGML_UNUSED(prefix##2);
|
||||
#define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
|
||||
GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
|
||||
const type prefix##3 = (pointer)->array[3]; \
|
||||
GGML_UNUSED(prefix##3);
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -295,12 +321,15 @@ extern "C" {
|
||||
GGML_OP_SUM,
|
||||
GGML_OP_SUM_ROWS,
|
||||
GGML_OP_MEAN,
|
||||
GGML_OP_ARGMAX,
|
||||
GGML_OP_REPEAT,
|
||||
GGML_OP_REPEAT_BACK,
|
||||
GGML_OP_ABS,
|
||||
GGML_OP_SGN,
|
||||
GGML_OP_NEG,
|
||||
GGML_OP_STEP,
|
||||
GGML_OP_TANH,
|
||||
GGML_OP_ELU,
|
||||
GGML_OP_RELU,
|
||||
GGML_OP_GELU,
|
||||
GGML_OP_GELU_QUICK,
|
||||
@@ -332,9 +361,8 @@ extern "C" {
|
||||
GGML_OP_ROPE_BACK,
|
||||
GGML_OP_ALIBI,
|
||||
GGML_OP_CLAMP,
|
||||
GGML_OP_CONV_1D_S1_PH,
|
||||
GGML_OP_CONV_1D_S2_PH,
|
||||
GGML_OP_CONV_2D_SK_P0,
|
||||
GGML_OP_CONV_1D,
|
||||
GGML_OP_CONV_2D,
|
||||
|
||||
GGML_OP_FLASH_ATTN,
|
||||
GGML_OP_FLASH_FF,
|
||||
@@ -444,6 +472,9 @@ extern "C" {
|
||||
|
||||
|
||||
// compute types
|
||||
|
||||
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
||||
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
||||
enum ggml_task_type {
|
||||
GGML_TASK_INIT = 0,
|
||||
GGML_TASK_COMPUTE,
|
||||
@@ -469,6 +500,9 @@ extern "C" {
|
||||
GGML_API int64_t ggml_cycles(void);
|
||||
GGML_API int64_t ggml_cycles_per_ms(void);
|
||||
|
||||
GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
|
||||
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
||||
|
||||
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
||||
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
|
||||
|
||||
@@ -684,6 +718,11 @@ extern "C" {
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// argmax along rows
|
||||
GGML_API struct ggml_tensor * ggml_argmax(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// if a is the same shape as b, and a is not parameter, return a
|
||||
// otherwise, return a new tensor: repeat(a) to fit in b
|
||||
GGML_API struct ggml_tensor * ggml_repeat(
|
||||
@@ -728,6 +767,22 @@ extern "C" {
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_tanh(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_tanh_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_elu(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_elu_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_relu(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
@@ -1033,13 +1088,15 @@ extern "C" {
|
||||
// rotary position embedding
|
||||
// if mode & 1 == 1, skip n_past elements
|
||||
// if mode & 2 == 1, GPT-NeoX style
|
||||
// if mode & 4 == 1, ChatGLM style
|
||||
// TODO: avoid creating a new tensor every time
|
||||
GGML_API struct ggml_tensor * ggml_rope(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_past,
|
||||
int n_dims,
|
||||
int mode);
|
||||
int mode,
|
||||
int n_ctx);
|
||||
|
||||
// in-place, returns view(a)
|
||||
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
||||
@@ -1047,7 +1104,8 @@ extern "C" {
|
||||
struct ggml_tensor * a,
|
||||
int n_past,
|
||||
int n_dims,
|
||||
int mode);
|
||||
int mode,
|
||||
int n_ctx);
|
||||
|
||||
// rotary position embedding backward, i.e compute dx from dy
|
||||
// a - dy
|
||||
@@ -1075,58 +1133,33 @@ extern "C" {
|
||||
float min,
|
||||
float max);
|
||||
|
||||
// TODO: implement general-purpose convolutions
|
||||
// GGML_API struct ggml_tensor * ggml_conv_1d(
|
||||
// struct ggml_context * ctx,
|
||||
// struct ggml_tensor * a,
|
||||
// struct ggml_tensor * b,
|
||||
// int s0
|
||||
// int p0,
|
||||
// int d0);
|
||||
//
|
||||
// GGML_API struct ggml_tensor * ggml_conv_2d(
|
||||
// struct ggml_context * ctx,
|
||||
// struct ggml_tensor * a,
|
||||
// struct ggml_tensor * b,
|
||||
// int s0,
|
||||
// int s1,
|
||||
// int p0,
|
||||
// int p1,
|
||||
// int d0,
|
||||
// int d1);
|
||||
|
||||
// padding = half
|
||||
// TODO: we don't support extra parameters for now
|
||||
// that's why we are hard-coding the stride, padding, and dilation
|
||||
// not great ..
|
||||
// example:
|
||||
// a: 3 80 768 1
|
||||
// b: 3000 80 1 1
|
||||
// res: 3000 768 1 1
|
||||
// used in whisper
|
||||
GGML_API struct ggml_tensor * ggml_conv_1d_s1_ph(
|
||||
GGML_API struct ggml_tensor * ggml_conv_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
struct ggml_tensor * b,
|
||||
int s0, // stride
|
||||
int p0, // padding
|
||||
int d0); // dilation
|
||||
|
||||
// used in whisper
|
||||
GGML_API struct ggml_tensor * ggml_conv_1d_s2_ph(
|
||||
GGML_API struct ggml_tensor * ggml_conv_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
struct ggml_tensor * b,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
int p1,
|
||||
int d0,
|
||||
int d1);
|
||||
|
||||
// kernel size is a->ne[0] x a->ne[1]
|
||||
// stride is equal to kernel size
|
||||
// padding is zero
|
||||
// example:
|
||||
// a: 16 16 3 768
|
||||
// b: 1024 1024 3 1
|
||||
// res: 64 64 768 1
|
||||
// used in sam
|
||||
GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
|
||||
// conv_1d with padding = half
|
||||
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
||||
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b);
|
||||
struct ggml_tensor * b,
|
||||
int s,
|
||||
int d);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_flash_attn(
|
||||
struct ggml_context * ctx,
|
||||
|
||||
Reference in New Issue
Block a user