Remove usage of eval_tokens for cache check

2023-09-07 17:34:22 +03:00 · 2023-05-26 20:12:05 -04:00
parent fe331ec589
commit 7fc7bc30e7
1 changed files with 2 additions and 2 deletions
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -735,10 +735,10 @@ class Llama:
            try:
                cache_item = self.cache[prompt_tokens]
                cache_prefix_len = Llama.longest_token_prefix(
-                    cache_item.eval_tokens, prompt_tokens
+                    cache_item.input_ids.tolist(), prompt_tokens
                )
                eval_prefix_len = Llama.longest_token_prefix(
-                    self.eval_tokens, prompt_tokens
+                    self._input_ids.tolist(), prompt_tokens
                )
                if cache_prefix_len > eval_prefix_len:
                    self.load_state(cache_item)