mirror of
https://github.com/abetlen/llama-cpp-python.git
synced 2023-09-07 17:34:22 +03:00
Added tokenizer
This commit is contained in:
@@ -1416,8 +1416,10 @@ class LlamaTokenizer:
|
|||||||
def __init__(self, llama: Llama):
|
def __init__(self, llama: Llama):
|
||||||
self.llama = llama
|
self.llama = llama
|
||||||
|
|
||||||
def encode(self, text: str) -> List[int]:
|
def encode(self, text: str, add_bos: bool = True) -> List[int]:
|
||||||
return self.llama.tokenize(text.encode("utf-8", errors="ignore"))
|
return self.llama.tokenize(
|
||||||
|
text.encode("utf-8", errors="ignore"), add_bos=add_bos
|
||||||
|
)
|
||||||
|
|
||||||
def decode(self, tokens: List[int]) -> str:
|
def decode(self, tokens: List[int]) -> str:
|
||||||
return self.llama.detokenize(tokens).decode("utf-8", errors="ignore")
|
return self.llama.detokenize(tokens).decode("utf-8", errors="ignore")
|
||||||
|
|||||||
Reference in New Issue
Block a user