From 14da46f16e46dba2a6964c8d0d7ddbce388182e5 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Sun, 7 May 2023 19:33:17 -0400 Subject: [PATCH] Added cache size to settins object. --- llama_cpp/server/app.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index f46f920..e74d17d 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -45,6 +45,10 @@ class Settings(BaseSettings): default=False, description="Use a cache to reduce processing times for evaluated prompts.", ) + cache_size: int = Field( + default=2 << 30, + description="The size of the cache in bytes. Only used if cache is True.", + ) vocab_only: bool = Field( default=False, description="Whether to only return the vocabulary." ) @@ -89,7 +93,7 @@ def create_app(settings: Optional[Settings] = None): verbose=settings.verbose, ) if settings.cache: - cache = llama_cpp.LlamaCache() + cache = llama_cpp.LlamaCache(capacity_bytes=settings.cache_size) llama.set_cache(cache) return app