From 14da46f16e46dba2a6964c8d0d7ddbce388182e5 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sun, 7 May 2023 19:33:17 -0400
Subject: [PATCH] Added cache size to settins object.

---
 llama_cpp/server/app.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index f46f920..e74d17d 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -45,6 +45,10 @@ class Settings(BaseSettings):
         default=False,
         description="Use a cache to reduce processing times for evaluated prompts.",
     )
+    cache_size: int = Field(
+        default=2 << 30,
+        description="The size of the cache in bytes. Only used if cache is True.",
+    )
     vocab_only: bool = Field(
         default=False, description="Whether to only return the vocabulary."
     )
@@ -89,7 +93,7 @@ def create_app(settings: Optional[Settings] = None):
         verbose=settings.verbose,
     )
     if settings.cache:
-        cache = llama_cpp.LlamaCache()
+        cache = llama_cpp.LlamaCache(capacity_bytes=settings.cache_size)
         llama.set_cache(cache)
     return app