diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index d19c45a..44ee1f0 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -30,7 +30,7 @@ class Settings(BaseSettings): n_batch: int = 8 n_threads: int = ((os.cpu_count() or 2) // 2) or 1 f16_kv: bool = True - use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out... + use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out... embedding: bool = True last_n_tokens_size: int = 64