From b47b9549d57f146a00ee19cd7d2bb294111abb67 Mon Sep 17 00:00:00 2001 From: Lucas Doyle Date: Sat, 29 Apr 2023 01:19:30 -0700 Subject: [PATCH] llama_cpp server: delete some ignored / unused parameters `n`, `presence_penalty`, `frequency_penalty`, `best_of`, `logit_bias`, `user`: not supported, excluded from the calls into llama. decision: delete it --- llama_cpp/llama_types.py | 2 -- llama_cpp/server/app.py | 30 +++--------------------------- 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/llama_cpp/llama_types.py b/llama_cpp/llama_types.py index b62ff1b..b8bdb08 100644 --- a/llama_cpp/llama_types.py +++ b/llama_cpp/llama_types.py @@ -60,8 +60,6 @@ class Completion(TypedDict): class ChatCompletionMessage(TypedDict): role: Union[Literal["assistant"], Literal["user"], Literal["system"]] content: str - user: NotRequired[str] - class ChatCompletionChoice(TypedDict): index: int diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 5d87e78..cc467db 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -83,13 +83,7 @@ class CreateCompletionRequest(BaseModel): # ignored, but marked as required for the sake of compatibility with openai's api model: str = model_field - n: Optional[int] = 1 logprobs: Optional[int] = Field(None) - presence_penalty: Optional[float] = 0 - frequency_penalty: Optional[float] = 0 - best_of: Optional[int] = 1 - logit_bias: Optional[Dict[str, float]] = Field(None) - user: Optional[str] = Field(None) # llama.cpp specific parameters top_k: int = 40 @@ -120,13 +114,7 @@ def create_completion( completion_or_chunks = llama( **request.dict( exclude={ - "model", - "n", - "frequency_penalty", - "presence_penalty", - "best_of", - "logit_bias", - "user", + "model" } ) ) @@ -141,7 +129,6 @@ class CreateEmbeddingRequest(BaseModel): # ignored, but marked as required for the sake of compatibility with openai's api model: str = model_field input: str - user: Optional[str] class Config: schema_extra = { @@ -161,7 +148,7 @@ CreateEmbeddingResponse = create_model_from_typeddict(llama_cpp.Embedding) def create_embedding( request: CreateEmbeddingRequest, llama: llama_cpp.Llama = Depends(get_llama) ): - return llama.create_embedding(**request.dict(exclude={"model", "user"})) + return llama.create_embedding(**request.dict(exclude={"model"})) class ChatCompletionRequestMessage(BaseModel): @@ -181,12 +168,6 @@ class CreateChatCompletionRequest(BaseModel): # ignored, but marked as required for the sake of compatibility with openai's api model: str = model_field - - n: Optional[int] = 1 - presence_penalty: Optional[float] = 0 - frequency_penalty: Optional[float] = 0 - logit_bias: Optional[Dict[str, float]] = Field(None) - user: Optional[str] = Field(None) # llama.cpp specific parameters repeat_penalty: float = 1.1 @@ -220,12 +201,7 @@ def create_chat_completion( completion_or_chunks = llama.create_chat_completion( **request.dict( exclude={ - "model", - "n", - "presence_penalty", - "frequency_penalty", - "logit_bias", - "user", + "model" } ), )