Added cache size to settins object.

2023-09-07 17:34:22 +03:00 · 2023-05-07 19:33:17 -04:00
parent 0e94a70de1
commit 14da46f16e
1 changed files with 5 additions and 1 deletions
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -45,6 +45,10 @@ class Settings(BaseSettings):
        default=False,
        description="Use a cache to reduce processing times for evaluated prompts.",
    )
+    cache_size: int = Field(
+        default=2 << 30,
+        description="The size of the cache in bytes. Only used if cache is True.",
+    )
    vocab_only: bool = Field(
        default=False, description="Whether to only return the vocabulary."
    )
@@ -89,7 +93,7 @@ def create_app(settings: Optional[Settings] = None):
        verbose=settings.verbose,
    )
    if settings.cache:
-        cache = llama_cpp.LlamaCache()
+        cache = llama_cpp.LlamaCache(capacity_bytes=settings.cache_size)
        llama.set_cache(cache)
    return app