mirror of
https://github.com/abetlen/llama-cpp-python.git
synced 2023-09-07 17:34:22 +03:00
Fix streaming hang on last token when cache is on.
This commit is contained in:
@@ -848,11 +848,6 @@ class Llama:
|
|||||||
finish_reason = "length"
|
finish_reason = "length"
|
||||||
break
|
break
|
||||||
|
|
||||||
if self.cache:
|
|
||||||
if self.verbose:
|
|
||||||
print("Llama._create_completion: cache save", file=sys.stderr)
|
|
||||||
self.cache[prompt_tokens + completion_tokens] = self.save_state()
|
|
||||||
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
llama_cpp.llama_print_timings(self.ctx)
|
llama_cpp.llama_print_timings(self.ctx)
|
||||||
|
|
||||||
@@ -941,8 +936,17 @@ class Llama:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
if self.cache:
|
||||||
|
if self.verbose:
|
||||||
|
print("Llama._create_completion: cache save", file=sys.stderr)
|
||||||
|
self.cache[prompt_tokens + completion_tokens] = self.save_state()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if self.cache:
|
||||||
|
if self.verbose:
|
||||||
|
print("Llama._create_completion: cache save", file=sys.stderr)
|
||||||
|
self.cache[prompt_tokens + completion_tokens] = self.save_state()
|
||||||
|
|
||||||
text_str = text.decode("utf-8", errors="ignore")
|
text_str = text.decode("utf-8", errors="ignore")
|
||||||
|
|
||||||
if echo:
|
if echo:
|
||||||
|
|||||||
Reference in New Issue
Block a user