diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
index e0424c6..30414f5 100644
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@@ -71,7 +71,7 @@ LLAMA_FILE_VERSION = ctypes.c_int(1)
 LLAMA_FILE_MAGIC = b"ggjt"
 LLAMA_FILE_MAGIC_UNVERSIONED = b"ggml"
 LLAMA_SESSION_MAGIC = b"ggsn"
-LLAMA_SESSION_VERSION = ctypes.c_int(0)
+LLAMA_SESSION_VERSION = ctypes.c_int(1)
 
 llama_context_p = c_void_p
 
@@ -239,7 +239,8 @@ _lib.llama_set_rng_seed.argtypes = [llama_context_p, c_int]
 _lib.llama_set_rng_seed.restype = None
 
 
-# Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
+# Returns the maximum size in bytes of the state (rng, logits, embedding
+# and kv_cache) - will often be smaller after compacting tokens
 def llama_get_state_size(ctx: llama_context_p) -> c_size_t:
     return _lib.llama_get_state_size(ctx)
 
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index 58b367c..e216aa0 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 58b367c2d757c0ea12aec672382462b42204c724
+Subproject commit e216aa04633892b972d013719e38b59fd4917341