Add experimental cache

This commit is contained in:
Andrei Betlen
2023-04-15 12:03:09 -04:00
parent a6372a7ae5
commit 92c077136d
2 changed files with 69 additions and 5 deletions

View File

@@ -35,6 +35,7 @@ class Settings(BaseSettings):
embedding: bool = True
last_n_tokens_size: int = 64
logits_all: bool = False
cache: bool = False # WARNING: This is an experimental feature
app = FastAPI(
@@ -60,6 +61,9 @@ llama = llama_cpp.Llama(
n_ctx=settings.n_ctx,
last_n_tokens_size=settings.last_n_tokens_size,
)
if settings.cache:
cache = llama_cpp.LlamaCache()
llama.set_cache(cache)
llama_lock = Lock()
@@ -68,7 +72,6 @@ def get_llama():
yield llama
class CreateCompletionRequest(BaseModel):
prompt: Union[str, List[str]]
suffix: Optional[str] = Field(None)