Merge branch 'main' of github.com:abetlen/llama_cpp_python into main

2023-09-07 17:34:22 +03:00 · 2023-04-06 21:07:38 -04:00
parent 55279b679d c2e690b326
commit 930db37dd2
4 changed files with 8 additions and 7 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -163,4 +163,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
--- a/examples/high_level_api/fastapi_server.py
+++ b/examples/high_level_api/fastapi_server.py
@@ -27,10 +27,10 @@ from sse_starlette.sse import EventSourceResponse
 class Settings(BaseSettings):
    model: str
    n_ctx: int = 2048
-    n_batch: int = 2048
-    n_threads: int = os.cpu_count() or 1
+    n_batch: int = 8
+    n_threads: int = int(os.cpu_count() / 2) or 1
    f16_kv: bool = True
-    use_mlock: bool = True
+    use_mlock: bool = False     # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
    embedding: bool = True
    last_n_tokens_size: int = 64

--- a/llama_cpp/server/main.py
+++ b/llama_cpp/server/main.py
@@ -27,10 +27,10 @@ from sse_starlette.sse import EventSourceResponse
 class Settings(BaseSettings):
    model: str
    n_ctx: int = 2048
-    n_batch: int = 2048
-    n_threads: int = os.cpu_count() or 1
+    n_batch: int = 8
+    n_threads: int = int(os.cpu_count() / 2) or 1
    f16_kv: bool = True
-    use_mlock: bool = True
+    use_mlock: bool = False     # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
    embedding: bool = True
    last_n_tokens_size: int = 64

--- a/setup.py
+++ b/setup.py
@@ -19,6 +19,7 @@ setup(
    entry_points={"console_scripts": ["llama_cpp.server=llama_cpp.server:main"]},
    install_requires=[
        "typing-extensions>=4.5.0",
+        "pydantic==1.10.7",
    ],
    extras_require={
        "server": ["uvicorn>=0.21.1", "fastapi>=0.95.0", "sse-starlette>=1.3.3"],