mirror of
https://github.com/abetlen/llama-cpp-python.git
synced 2023-09-07 17:34:22 +03:00
Allow user to set llama config from env vars
This commit is contained in:
@@ -11,6 +11,7 @@ uvicorn fastapi_server_chat:app --reload
|
|||||||
Then visit http://localhost:8000/docs to see the interactive API docs.
|
Then visit http://localhost:8000/docs to see the interactive API docs.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
import json
|
import json
|
||||||
from typing import List, Optional, Literal, Union, Iterator
|
from typing import List, Optional, Literal, Union, Iterator
|
||||||
|
|
||||||
@@ -24,6 +25,13 @@ from sse_starlette.sse import EventSourceResponse
|
|||||||
|
|
||||||
class Settings(BaseSettings):
|
class Settings(BaseSettings):
|
||||||
model: str
|
model: str
|
||||||
|
n_ctx: int = 2048
|
||||||
|
n_batch: int = 2048
|
||||||
|
n_threads: int = os.cpu_count() or 1
|
||||||
|
f16_kv: bool = True
|
||||||
|
use_mlock: bool = True
|
||||||
|
embedding: bool = True
|
||||||
|
last_n_tokens_size: int = 64
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
@@ -40,12 +48,13 @@ app.add_middleware(
|
|||||||
settings = Settings()
|
settings = Settings()
|
||||||
llama = llama_cpp.Llama(
|
llama = llama_cpp.Llama(
|
||||||
settings.model,
|
settings.model,
|
||||||
f16_kv=True,
|
f16_kv=settings.f16_kv,
|
||||||
use_mlock=True,
|
use_mlock=settings.use_mlock,
|
||||||
embedding=True,
|
embedding=settings.embedding,
|
||||||
n_threads=6,
|
n_threads=settings.n_threads,
|
||||||
n_batch=2048,
|
n_batch=settings.n_batch,
|
||||||
n_ctx=2048,
|
n_ctx=settings.n_ctx,
|
||||||
|
last_n_tokens_size=settings.last_n_tokens_size,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user