Detect multi-byte responses and wait

2023-09-07 17:34:22 +03:00 · 2023-04-28 12:50:30 +02:00
parent 5f81400fcb
commit c39547a986
2 changed files with 14 additions and 2 deletions
--- a/examples/low_level_api/low_level_api_chat_cpp.py
+++ b/examples/low_level_api/low_level_api_chat_cpp.py
@@ -96,7 +96,7 @@ specified) expect poor results""", file=sys.stderr)

 		print(file=sys.stderr)
 		print(f"system_info: n_threads = {self.params.n_threads} / {cpu_count()} \
-| {llama_cpp.llama_print_system_info().decode('utf8', errors='ignore')}", file=sys.stderr)
+| {llama_cpp.llama_print_system_info().decode('utf8')}", file=sys.stderr)

 		# determine the required inference memory per token:
 		if (self.params.mem_test):