diff --git a/examples/high_level_api_inference.py b/examples/high_level_api_inference.py index 136f22c..0fa9cb7 100644 --- a/examples/high_level_api_inference.py +++ b/examples/high_level_api_inference.py @@ -11,7 +11,7 @@ llm = Llama(model_path=args.model) output = llm( "Question: What are the names of the planets in the solar system? Answer: ", - max_tokens=1, + max_tokens=48, stop=["Q:", "\n"], echo=True, ) diff --git a/examples/high_level_api_streaming.py b/examples/high_level_api_streaming.py index d744090..787bc6e 100644 --- a/examples/high_level_api_streaming.py +++ b/examples/high_level_api_streaming.py @@ -4,7 +4,7 @@ import argparse from llama_cpp import Llama parser = argparse.ArgumentParser() -parser.add_argument("-m", "--model", type=str, default=".//models/...") +parser.add_argument("-m", "--model", type=str, default="./models/...") args = parser.parse_args() llm = Llama(model_path=args.model)