Allow user to set llama config from env vars

This commit is contained in:
Andrei Betlen
2023-04-04 00:52:44 -04:00
parent 05eb2087d8
commit ffe34cf64d

View File

@@ -11,6 +11,7 @@ uvicorn fastapi_server_chat:app --reload
Then visit http://localhost:8000/docs to see the interactive API docs. Then visit http://localhost:8000/docs to see the interactive API docs.
""" """
import os
import json import json
from typing import List, Optional, Literal, Union, Iterator from typing import List, Optional, Literal, Union, Iterator
@@ -24,6 +25,13 @@ from sse_starlette.sse import EventSourceResponse
class Settings(BaseSettings): class Settings(BaseSettings):
model: str model: str
n_ctx: int = 2048
n_batch: int = 2048
n_threads: int = os.cpu_count() or 1
f16_kv: bool = True
use_mlock: bool = True
embedding: bool = True
last_n_tokens_size: int = 64
app = FastAPI( app = FastAPI(
@@ -40,12 +48,13 @@ app.add_middleware(
settings = Settings() settings = Settings()
llama = llama_cpp.Llama( llama = llama_cpp.Llama(
settings.model, settings.model,
f16_kv=True, f16_kv=settings.f16_kv,
use_mlock=True, use_mlock=settings.use_mlock,
embedding=True, embedding=settings.embedding,
n_threads=6, n_threads=settings.n_threads,
n_batch=2048, n_batch=settings.n_batch,
n_ctx=2048, n_ctx=settings.n_ctx,
last_n_tokens_size=settings.last_n_tokens_size,
) )