mirror of
https://github.com/abetlen/llama-cpp-python.git
synced 2023-09-07 17:34:22 +03:00
Add low_vram to server settings
This commit is contained in:
@@ -48,6 +48,10 @@ class Settings(BaseSettings):
|
|||||||
description="Use mmap.",
|
description="Use mmap.",
|
||||||
)
|
)
|
||||||
embedding: bool = Field(default=True, description="Whether to use embeddings.")
|
embedding: bool = Field(default=True, description="Whether to use embeddings.")
|
||||||
|
low_vram: bool = Field(
|
||||||
|
default=False,
|
||||||
|
description="Whether to use less VRAM. This will reduce performance.",
|
||||||
|
)
|
||||||
last_n_tokens_size: int = Field(
|
last_n_tokens_size: int = Field(
|
||||||
default=64,
|
default=64,
|
||||||
ge=0,
|
ge=0,
|
||||||
|
|||||||
Reference in New Issue
Block a user