llama_cpp server: delete some ignored / unused parameters

`n`, `presence_penalty`, `frequency_penalty`, `best_of`, `logit_bias`, `user`: not supported, excluded from the calls into llama. decision: delete it
This commit is contained in:
Lucas Doyle
2023-04-29 01:19:30 -07:00
parent e40fcb0575
commit b47b9549d5
2 changed files with 3 additions and 29 deletions

View File

@@ -60,8 +60,6 @@ class Completion(TypedDict):
class ChatCompletionMessage(TypedDict): class ChatCompletionMessage(TypedDict):
role: Union[Literal["assistant"], Literal["user"], Literal["system"]] role: Union[Literal["assistant"], Literal["user"], Literal["system"]]
content: str content: str
user: NotRequired[str]
class ChatCompletionChoice(TypedDict): class ChatCompletionChoice(TypedDict):
index: int index: int

View File

@@ -83,13 +83,7 @@ class CreateCompletionRequest(BaseModel):
# ignored, but marked as required for the sake of compatibility with openai's api # ignored, but marked as required for the sake of compatibility with openai's api
model: str = model_field model: str = model_field
n: Optional[int] = 1
logprobs: Optional[int] = Field(None) logprobs: Optional[int] = Field(None)
presence_penalty: Optional[float] = 0
frequency_penalty: Optional[float] = 0
best_of: Optional[int] = 1
logit_bias: Optional[Dict[str, float]] = Field(None)
user: Optional[str] = Field(None)
# llama.cpp specific parameters # llama.cpp specific parameters
top_k: int = 40 top_k: int = 40
@@ -120,13 +114,7 @@ def create_completion(
completion_or_chunks = llama( completion_or_chunks = llama(
**request.dict( **request.dict(
exclude={ exclude={
"model", "model"
"n",
"frequency_penalty",
"presence_penalty",
"best_of",
"logit_bias",
"user",
} }
) )
) )
@@ -141,7 +129,6 @@ class CreateEmbeddingRequest(BaseModel):
# ignored, but marked as required for the sake of compatibility with openai's api # ignored, but marked as required for the sake of compatibility with openai's api
model: str = model_field model: str = model_field
input: str input: str
user: Optional[str]
class Config: class Config:
schema_extra = { schema_extra = {
@@ -161,7 +148,7 @@ CreateEmbeddingResponse = create_model_from_typeddict(llama_cpp.Embedding)
def create_embedding( def create_embedding(
request: CreateEmbeddingRequest, llama: llama_cpp.Llama = Depends(get_llama) request: CreateEmbeddingRequest, llama: llama_cpp.Llama = Depends(get_llama)
): ):
return llama.create_embedding(**request.dict(exclude={"model", "user"})) return llama.create_embedding(**request.dict(exclude={"model"}))
class ChatCompletionRequestMessage(BaseModel): class ChatCompletionRequestMessage(BaseModel):
@@ -181,12 +168,6 @@ class CreateChatCompletionRequest(BaseModel):
# ignored, but marked as required for the sake of compatibility with openai's api # ignored, but marked as required for the sake of compatibility with openai's api
model: str = model_field model: str = model_field
n: Optional[int] = 1
presence_penalty: Optional[float] = 0
frequency_penalty: Optional[float] = 0
logit_bias: Optional[Dict[str, float]] = Field(None)
user: Optional[str] = Field(None)
# llama.cpp specific parameters # llama.cpp specific parameters
repeat_penalty: float = 1.1 repeat_penalty: float = 1.1
@@ -220,12 +201,7 @@ def create_chat_completion(
completion_or_chunks = llama.create_chat_completion( completion_or_chunks = llama.create_chat_completion(
**request.dict( **request.dict(
exclude={ exclude={
"model", "model"
"n",
"presence_penalty",
"frequency_penalty",
"logit_bias",
"user",
} }
), ),
) )