Use Accelerate framework on Apple silicon

Huge performance improvement in the Encode (almost x2 on MacBook M1 Pro) Also various extra optimizations: - Multi-threaded NORM operator - Faster GELU via F16 cast
2023-11-04 02:52:44 +03:00 · 2022-10-17 21:44:16 +03:00
parent 130b5c02d6
commit 72d967bce4
5 changed files with 217 additions and 121 deletions
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -15,7 +15,7 @@
 #include <vector>

 #define USE_FLASH_ATTN
-#define USE_FLASH_FF
+//#define USE_FLASH_FF

 // available whisper models
 enum e_model {
@@ -148,11 +148,11 @@ static const std::map<e_model, size_t> MEM_REQ_ENCODE = {
 };

 static const std::map<e_model, size_t> MEM_REQ_ENCODE_LAYER = {
-    { MODEL_TINY,     64ull*MB },
-    { MODEL_BASE,     84ull*MB },
-    { MODEL_SMALL,   128ull*MB },
-    { MODEL_MEDIUM,  172ull*MB },
-    { MODEL_LARGE,   216ull*MB },
+    { MODEL_TINY,    104ull*MB },
+    { MODEL_BASE,    138ull*MB },
+    { MODEL_SMALL,   208ull*MB },
+    { MODEL_MEDIUM,  280ull*MB },
+    { MODEL_LARGE,   354ull*MB },
 };

 static const std::map<e_model, size_t> MEM_REQ_DECODE = {