Use Accelerate framework on Apple silicon

Huge performance improvement in the Encode (almost x2 on MacBook M1 Pro)

Also various extra optimizations:

- Multi-threaded NORM operator
- Faster GELU via F16 cast
This commit is contained in:
Georgi Gerganov
2022-10-17 21:44:16 +03:00
parent 130b5c02d6
commit 72d967bce4
5 changed files with 217 additions and 121 deletions

View File

@@ -15,7 +15,7 @@
#include <vector>
#define USE_FLASH_ATTN
#define USE_FLASH_FF
//#define USE_FLASH_FF
// available whisper models
enum e_model {
@@ -148,11 +148,11 @@ static const std::map<e_model, size_t> MEM_REQ_ENCODE = {
};
static const std::map<e_model, size_t> MEM_REQ_ENCODE_LAYER = {
{ MODEL_TINY, 64ull*MB },
{ MODEL_BASE, 84ull*MB },
{ MODEL_SMALL, 128ull*MB },
{ MODEL_MEDIUM, 172ull*MB },
{ MODEL_LARGE, 216ull*MB },
{ MODEL_TINY, 104ull*MB },
{ MODEL_BASE, 138ull*MB },
{ MODEL_SMALL, 208ull*MB },
{ MODEL_MEDIUM, 280ull*MB },
{ MODEL_LARGE, 354ull*MB },
};
static const std::map<e_model, size_t> MEM_REQ_DECODE = {