Fix cache implementation breaking changes

2023-09-07 17:34:22 +03:00 · 2023-06-08 13:19:23 -04:00
parent 90874c01cd
commit 0c42168508
2 changed files with 247 additions and 212 deletions
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -58,6 +58,10 @@ class Settings(BaseSettings):
        default=False,
        description="Use a cache to reduce processing times for evaluated prompts.",
    )
+    cache_type: Literal["ram", "disk"] = Field(
+        default="ram",
+        description="The type of cache to use. Only used if cache is True.",
+    )
    cache_size: int = Field(
        default=2 << 30,
        description="The size of the cache in bytes. Only used if cache is True.",
@@ -108,6 +112,11 @@ def create_app(settings: Optional[Settings] = None):
        verbose=settings.verbose,
    )
    if settings.cache:
+        if settings.cache_type == "disk":
+            cache = llama_cpp.LlamaDiskCache(capacity_bytes=settings.cache_size)
+        else:
+            cache = llama_cpp.LlamaRAMCache(capacity_bytes=settings.cache_size)
+
        cache = llama_cpp.LlamaCache(capacity_bytes=settings.cache_size)
        llama.set_cache(cache)