Add gpu_memory_utilization to engine arguments

Prevent shortage of GPU memory.
This commit is contained in:
STEVENTAN100
2025-10-21 10:32:54 +08:00
committed by GitHub
parent 22d21b9b23
commit 5478b117c8

View File

@@ -19,7 +19,7 @@ from itertools import cycle
class LLMGenerator:
def __init__(self, model_name, device, **model_args):
# Create a vllm LLM instance
engine_args = EngineArgs(model=model_name, device=device, **model_args)
engine_args = EngineArgs(model=model_name, gpu_memory_utilization=0.8, device=device, **model_args)
self.model = LLM(**vars(engine_args))
self.model_name = model_name
self.device = device
@@ -58,7 +58,7 @@ class LLMGenerator:
class LLMScorer:
def __init__(self, model_name, device, **model_args):
# Create a vllm LLM instance
engine_args = EngineArgs(model=model_name, device=device, **model_args)
engine_args = EngineArgs(model=model_name, gpu_memory_utilization=0.8, device=device, **model_args)
self.model = LLM(**vars(engine_args))
self.model_name = model_name
self.device = device
@@ -1315,4 +1315,4 @@ def run_documentation_task(
if __name__ == "__main__":
fire.Fire(run_documentation_task)
fire.Fire(run_documentation_task)