From 6595ad84bfd5360ac22e311f91eef3d78bdb65f2 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Thu, 13 Apr 2023 00:28:00 -0400 Subject: [PATCH] Add field to disable reseting between generations --- llama_cpp/llama.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 67fefe5..db9a337 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -218,6 +218,7 @@ class Llama: top_p: float, temp: float, repeat_penalty: float, + reset: bool = True, ) -> Generator[ llama_cpp.llama_token, Optional[Sequence[llama_cpp.llama_token]], None ]: @@ -235,12 +236,14 @@ class Llama: top_p: The top-p sampling parameter. temp: The temperature parameter. repeat_penalty: The repeat penalty parameter. + reset: Whether to reset the model state. Yields: The generated tokens. """ assert self.ctx is not None - self.reset() + if reset: + self.reset() while True: self.eval(tokens) token = self.sample(