mirror of
https://github.com/abetlen/llama-cpp-python.git
synced 2023-09-07 17:34:22 +03:00
Added cache size to settins object.
This commit is contained in:
@@ -45,6 +45,10 @@ class Settings(BaseSettings):
|
|||||||
default=False,
|
default=False,
|
||||||
description="Use a cache to reduce processing times for evaluated prompts.",
|
description="Use a cache to reduce processing times for evaluated prompts.",
|
||||||
)
|
)
|
||||||
|
cache_size: int = Field(
|
||||||
|
default=2 << 30,
|
||||||
|
description="The size of the cache in bytes. Only used if cache is True.",
|
||||||
|
)
|
||||||
vocab_only: bool = Field(
|
vocab_only: bool = Field(
|
||||||
default=False, description="Whether to only return the vocabulary."
|
default=False, description="Whether to only return the vocabulary."
|
||||||
)
|
)
|
||||||
@@ -89,7 +93,7 @@ def create_app(settings: Optional[Settings] = None):
|
|||||||
verbose=settings.verbose,
|
verbose=settings.verbose,
|
||||||
)
|
)
|
||||||
if settings.cache:
|
if settings.cache:
|
||||||
cache = llama_cpp.LlamaCache()
|
cache = llama_cpp.LlamaCache(capacity_bytes=settings.cache_size)
|
||||||
llama.set_cache(cache)
|
llama.set_cache(cache)
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user