mirror of
https://github.com/abetlen/llama-cpp-python.git
synced 2023-09-07 17:34:22 +03:00
Add use_mmap flag to server
This commit is contained in:
@@ -29,9 +29,10 @@ class Settings(BaseSettings):
|
|||||||
model: str
|
model: str
|
||||||
n_ctx: int = 2048
|
n_ctx: int = 2048
|
||||||
n_batch: int = 8
|
n_batch: int = 8
|
||||||
n_threads: int = ((os.cpu_count() or 2) // 2) or 1
|
n_threads: int = max((os.cpu_count() or 2) // 2, 1)
|
||||||
f16_kv: bool = True
|
f16_kv: bool = True
|
||||||
use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
|
use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
|
||||||
|
use_mmap: bool = True
|
||||||
embedding: bool = True
|
embedding: bool = True
|
||||||
last_n_tokens_size: int = 64
|
last_n_tokens_size: int = 64
|
||||||
logits_all: bool = False
|
logits_all: bool = False
|
||||||
@@ -54,6 +55,7 @@ llama = llama_cpp.Llama(
|
|||||||
settings.model,
|
settings.model,
|
||||||
f16_kv=settings.f16_kv,
|
f16_kv=settings.f16_kv,
|
||||||
use_mlock=settings.use_mlock,
|
use_mlock=settings.use_mlock,
|
||||||
|
use_mmap=settings.use_mmap,
|
||||||
embedding=settings.embedding,
|
embedding=settings.embedding,
|
||||||
logits_all=settings.logits_all,
|
logits_all=settings.logits_all,
|
||||||
n_threads=settings.n_threads,
|
n_threads=settings.n_threads,
|
||||||
|
|||||||
Reference in New Issue
Block a user