Fix streaming hang on last token when cache is on.

2023-09-07 17:34:22 +03:00 · 2023-05-26 03:03:01 -04:00
parent 5be8354e11
commit f74b90ed67
1 changed files with 9 additions and 5 deletions
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -848,11 +848,6 @@ class Llama:
                finish_reason = "length"
                break
        if self.cache:
            if self.verbose:
                print("Llama._create_completion: cache save", file=sys.stderr)
            self.cache[prompt_tokens + completion_tokens] = self.save_state()
        if self.verbose:
            llama_cpp.llama_print_timings(self.ctx)
@@ -941,8 +936,17 @@ class Llama:
                        }
                    ],
                }
            if self.cache:
                if self.verbose:
                    print("Llama._create_completion: cache save", file=sys.stderr)
                self.cache[prompt_tokens + completion_tokens] = self.save_state()
            return
        if self.cache:
            if self.verbose:
                print("Llama._create_completion: cache save", file=sys.stderr)
            self.cache[prompt_tokens + completion_tokens] = self.save_state()
        text_str = text.decode("utf-8", errors="ignore")
        if echo: