whisper : add GPU support via cuBLAS (#834)

* make : add WHISPER_CUBLAS

* make : fix CUBLAS build

* whisper : disable Flash Attention + adjust memory buffers

* whisper : remove old commented code

* readme : add cuBLAS instructions

* cmake : add WHISPER_CUBLAS option

* gitignore : ignore build-cublas
This commit is contained in:
Georgi Gerganov
2023-04-30 12:14:33 +03:00
committed by GitHub
parent 0ccd6746c9
commit 5fd1bdd7fc
10 changed files with 97 additions and 46 deletions

View File

@@ -51,7 +51,7 @@ option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
if (APPLE)
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
@@ -62,7 +62,8 @@ if (APPLE)
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
else()
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
option(WHISPER_OPENBLAS "whisper: support for OpenBLAS" OFF)
option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
endif()
option(WHISPER_PERF "whisper: enable perf timings" OFF)
@@ -127,7 +128,7 @@ if (APPLE)
endif()
endif()
if (WHISPER_SUPPORT_OPENBLAS)
if (WHISPER_OPENBLAS)
find_library(OPENBLAS_LIB
NAMES openblas libopenblas
)
@@ -141,6 +142,31 @@ if (WHISPER_SUPPORT_OPENBLAS)
endif()
endif()
if (WHISPER_CUBLAS)
cmake_minimum_required(VERSION 3.17)
find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
message(STATUS "cuBLAS found")
enable_language(CUDA)
set(GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h)
add_compile_definitions(GGML_USE_CUBLAS)
if (WHISPER_STATIC)
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
else()
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
endif()
else()
message(WARNING "cuBLAS not found")
endif()
endif()
# compiler flags
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -247,6 +273,7 @@ set(TARGET whisper)
add_library(${TARGET}
ggml.h
ggml.c
${GGML_CUDA_SOURCES}
whisper.h
whisper.cpp
)
@@ -279,6 +306,12 @@ if (BUILD_SHARED_LIBS)
)
endif()
if (GGML_CUDA_SOURCES)
message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES OFF)
set_property(TARGET whisper PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
endif()
if (EMSCRIPTEN)
set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
endif()