whisper : add GPU support via cuBLAS (#834)

* make : add WHISPER_CUBLAS * make : fix CUBLAS build * whisper : disable Flash Attention + adjust memory buffers * whisper : remove old commented code * readme : add cuBLAS instructions * cmake : add WHISPER_CUBLAS option * gitignore : ignore build-cublas
2023-11-04 02:52:44 +03:00 · 2023-04-30 12:14:33 +03:00
parent 0ccd6746c9
commit 5fd1bdd7fc
10 changed files with 97 additions and 46 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,7 +51,7 @@ option(WHISPER_SANITIZE_UNDEFINED     "whisper: enable undefined sanitizer" OFF)
 option(WHISPER_BUILD_TESTS            "whisper: build tests"    ${WHISPER_STANDALONE})
 option(WHISPER_BUILD_EXAMPLES         "whisper: build examples" ${WHISPER_STANDALONE})

-option(WHISPER_SUPPORT_SDL2           "whisper: support for libSDL2" OFF)
+option(WHISPER_SDL2                   "whisper: support for libSDL2" OFF)

 if (APPLE)
    option(WHISPER_NO_ACCELERATE         "whisper: disable Accelerate framework" OFF)
@@ -62,7 +62,8 @@ if (APPLE)
    option(WHISPER_COREML                "whisper: enable Core ML framework" OFF)
    option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
 else()
-    option(WHISPER_SUPPORT_OPENBLAS      "whisper: support for OpenBLAS" OFF)
+    option(WHISPER_OPENBLAS              "whisper: support for OpenBLAS" OFF)
+    option(WHISPER_CUBLAS                "whisper: support for cuBLAS" OFF)
 endif()

 option(WHISPER_PERF "whisper: enable perf timings" OFF)
@@ -127,7 +128,7 @@ if (APPLE)
    endif()
 endif()

-if (WHISPER_SUPPORT_OPENBLAS)
+if (WHISPER_OPENBLAS)
    find_library(OPENBLAS_LIB
        NAMES openblas libopenblas
        )
@@ -141,6 +142,31 @@ if (WHISPER_SUPPORT_OPENBLAS)
    endif()
 endif()

+if (WHISPER_CUBLAS)
+    cmake_minimum_required(VERSION 3.17)
+
+    find_package(CUDAToolkit)
+
+    if (CUDAToolkit_FOUND)
+        message(STATUS "cuBLAS found")
+
+        enable_language(CUDA)
+
+        set(GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h)
+
+        add_compile_definitions(GGML_USE_CUBLAS)
+
+        if (WHISPER_STATIC)
+            set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
+        else()
+            set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
+        endif()
+
+    else()
+        message(WARNING "cuBLAS not found")
+    endif()
+endif()
+
 # compiler flags

 if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -247,6 +273,7 @@ set(TARGET whisper)
 add_library(${TARGET}
    ggml.h
    ggml.c
+    ${GGML_CUDA_SOURCES}
    whisper.h
    whisper.cpp
    )
@@ -279,6 +306,12 @@ if (BUILD_SHARED_LIBS)
        )
 endif()

+if (GGML_CUDA_SOURCES)
+    message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
+    set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES OFF)
+    set_property(TARGET whisper PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
+endif()
+
 if (EMSCRIPTEN)
    set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
 endif()