mirror of
https://github.com/abetlen/llama-cpp-python.git
synced 2023-09-07 17:34:22 +03:00
Update llama.cpp
This commit is contained in:
@@ -120,6 +120,8 @@ LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(
|
|||||||
LLAMA_FTYPE_MOSTLY_Q4_2 = ctypes.c_int(5) # except 1d tensors
|
LLAMA_FTYPE_MOSTLY_Q4_2 = ctypes.c_int(5) # except 1d tensors
|
||||||
LLAMA_FTYPE_MOSTYL_Q4_3 = ctypes.c_int(6) # except 1d tensors
|
LLAMA_FTYPE_MOSTYL_Q4_3 = ctypes.c_int(6) # except 1d tensors
|
||||||
LLAMA_FTYPE_MOSTYL_Q8_0 = ctypes.c_int(7) # except 1d tensors
|
LLAMA_FTYPE_MOSTYL_Q8_0 = ctypes.c_int(7) # except 1d tensors
|
||||||
|
LLAMA_FTYPE_MOSTYL_Q5_0 = ctypes.c_int(8) # except 1d tensors
|
||||||
|
LLAMA_FTYPE_MOSTYL_Q5_1 = ctypes.c_int(9) # except 1d tensors
|
||||||
|
|
||||||
# Functions
|
# Functions
|
||||||
|
|
||||||
@@ -210,6 +212,12 @@ def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int:
|
|||||||
_lib.llama_get_kv_cache_token_count.argtypes = [llama_context_p]
|
_lib.llama_get_kv_cache_token_count.argtypes = [llama_context_p]
|
||||||
_lib.llama_get_kv_cache_token_count.restype = c_int
|
_lib.llama_get_kv_cache_token_count.restype = c_int
|
||||||
|
|
||||||
|
# Sets the current rng seed.
|
||||||
|
def llama_set_rng_seed(ctx: llama_context_p, seed: c_int):
|
||||||
|
return _lib.llama_set_rng_seed(ctx, seed)
|
||||||
|
|
||||||
|
_lib.llama_set_rng_seed.argtypes = [llama_context_p, c_int]
|
||||||
|
_lib.llama_set_rng_seed.restype = None
|
||||||
|
|
||||||
# Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
|
# Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
|
||||||
def llama_get_state_size(ctx: llama_context_p) -> c_size_t:
|
def llama_get_state_size(ctx: llama_context_p) -> c_size_t:
|
||||||
|
|||||||
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
Submodule vendor/llama.cpp updated: 4afcc37869...0b2da20538
Reference in New Issue
Block a user