Update n_batch for server

2023-09-07 17:34:22 +03:00 · 2023-04-25 09:11:32 -04:00
parent cc706fb944
commit 3cab3ef4cb
1 changed files with 1 additions and 1 deletions
--- a/llama_cpp/server/main.py
+++ b/llama_cpp/server/main.py
@@ -28,7 +28,7 @@ from sse_starlette.sse import EventSourceResponse
 class Settings(BaseSettings):
    model: str
    n_ctx: int = 2048
-    n_batch: int = 8
+    n_batch: int = 512
    n_threads: int = max((os.cpu_count() or 2) // 2, 1)
    f16_kv: bool = True
    use_mlock: bool = False  # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...