mirror of
https://github.com/abetlen/llama-cpp-python.git
synced 2023-09-07 17:34:22 +03:00
Update llama.cpp
This commit is contained in:
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- (llama.cpp) Fix struct misalignment bug
|
||||||
|
|
||||||
## [0.1.64]
|
## [0.1.64]
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|||||||
@@ -150,47 +150,43 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
|
|||||||
|
|
||||||
|
|
||||||
# struct llama_context_params {
|
# struct llama_context_params {
|
||||||
|
# int seed; // RNG seed, -1 for random
|
||||||
# int n_ctx; // text context
|
# int n_ctx; // text context
|
||||||
# int n_batch; // prompt processing batch size
|
# int n_batch; // prompt processing batch size
|
||||||
# int n_gpu_layers; // number of layers to store in VRAM
|
# int n_gpu_layers; // number of layers to store in VRAM
|
||||||
# int main_gpu; // the GPU that is used for scratch and small tensors
|
# int main_gpu; // the GPU that is used for scratch and small tensors
|
||||||
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
|
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
|
||||||
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
# // called with a progress value between 0 and 1, pass NULL to disable
|
||||||
# int seed; // RNG seed, -1 for random
|
# llama_progress_callback progress_callback;
|
||||||
|
# // context pointer passed to the progress callback
|
||||||
|
# void * progress_callback_user_data;
|
||||||
|
|
||||||
|
# // Keep the booleans together to avoid misalignment during copy-by-value.
|
||||||
|
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
||||||
# bool f16_kv; // use fp16 for KV cache
|
# bool f16_kv; // use fp16 for KV cache
|
||||||
# bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
# bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
||||||
# bool vocab_only; // only load the vocabulary, no weights
|
# bool vocab_only; // only load the vocabulary, no weights
|
||||||
# bool use_mmap; // use mmap if possible
|
# bool use_mmap; // use mmap if possible
|
||||||
# bool use_mlock; // force system to keep model in RAM
|
# bool use_mlock; // force system to keep model in RAM
|
||||||
# bool embedding; // embedding mode only
|
# bool embedding; // embedding mode only
|
||||||
|
|
||||||
|
|
||||||
# // called with a progress value between 0 and 1, pass NULL to disable
|
|
||||||
# llama_progress_callback progress_callback;
|
|
||||||
# // context pointer passed to the progress callback
|
|
||||||
# void * progress_callback_user_data;
|
|
||||||
# };
|
# };
|
||||||
class llama_context_params(Structure):
|
class llama_context_params(Structure):
|
||||||
_fields_ = [
|
_fields_ = [
|
||||||
|
("seed", c_int),
|
||||||
("n_ctx", c_int),
|
("n_ctx", c_int),
|
||||||
("n_batch", c_int),
|
("n_batch", c_int),
|
||||||
("n_gpu_layers", c_int),
|
("n_gpu_layers", c_int),
|
||||||
("main_gpu", c_int),
|
("main_gpu", c_int),
|
||||||
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
|
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
|
||||||
|
("progress_callback", llama_progress_callback),
|
||||||
|
("progress_callback_user_data", c_void_p),
|
||||||
("low_vram", c_bool),
|
("low_vram", c_bool),
|
||||||
("seed", c_int),
|
|
||||||
("f16_kv", c_bool),
|
("f16_kv", c_bool),
|
||||||
(
|
("logits_all", c_bool),
|
||||||
"logits_all",
|
|
||||||
c_bool,
|
|
||||||
),
|
|
||||||
("vocab_only", c_bool),
|
("vocab_only", c_bool),
|
||||||
("use_mmap", c_bool),
|
("use_mmap", c_bool),
|
||||||
("use_mlock", c_bool),
|
("use_mlock", c_bool),
|
||||||
("embedding", c_bool),
|
("embedding", c_bool),
|
||||||
("progress_callback", llama_progress_callback),
|
|
||||||
("progress_callback_user_data", c_void_p),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
Submodule vendor/llama.cpp updated: 8596af4277...2322ec223a
Reference in New Issue
Block a user