Add low_vram to server settings

2023-09-07 17:34:22 +03:00 · 2023-06-14 22:13:42 -04:00
parent 44b83cada5
commit 1e20be6d0c
1 changed files with 4 additions and 0 deletions
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -48,6 +48,10 @@ class Settings(BaseSettings):
        description="Use mmap.",
    )
    embedding: bool = Field(default=True, description="Whether to use embeddings.")
    low_vram: bool = Field(
        default=False,
        description="Whether to use less VRAM. This will reduce performance.",
    )
    last_n_tokens_size: int = Field(
        default=64,
        ge=0,