more compact operator formatting

This commit is contained in:
Alex Cheema
2024-08-22 14:12:00 +01:00
parent 14f2846a9c
commit f53056dede
29 changed files with 248 additions and 255 deletions

View File

@@ -12,3 +12,8 @@ allow_multiline_dictionary_keys = True
each_dict_entry_on_separate_line = False
allow_multiline_lambdas = True
blank_line_before_nested_class_or_def = False
arithmetic_precedence_indication = True
no_spaces_around_selected_binary_operators = "*,/"
coalesce_brackets = True
space_between_ending_comma_and_closing_bracket = False
split_before_expression_after_opening_paren = False

View File

@@ -2,24 +2,20 @@ from exo.inference.shard import Shard
model_base_shards = {
### llama
"llama-3.1-8b":
{
"llama-3.1-8b": {
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
"TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32),
},
"llama-3.1-70b":
{
"llama-3.1-70b": {
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80),
"TinygradDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3.1-70B", start_layer=0, end_layer=0, n_layers=80),
},
"llama-3.1-405b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-405B-4bit", start_layer=0, end_layer=0, n_layers=126),},
"llama-3-8b":
{
"llama-3-8b": {
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
"TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", start_layer=0, end_layer=0, n_layers=32),
},
"llama-3-70b":
{
"llama-3-70b": {
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80),
"TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80),
},

View File

@@ -84,8 +84,7 @@ class StandardNode(Node):
asyncio.create_task(
self.broadcast_opaque_status(
request_id,
json.dumps(
{
json.dumps({
"type": "node_status",
"node_id": self.id,
"status": "start_process_prompt",
@@ -95,8 +94,7 @@ class StandardNode(Node):
"image_str": image_str,
"inference_state": inference_state,
"request_id": request_id,
}
),
}),
)
)
start_time = time.perf_counter_ns()
@@ -106,8 +104,7 @@ class StandardNode(Node):
asyncio.create_task(
self.broadcast_opaque_status(
request_id,
json.dumps(
{
json.dumps({
"type": "node_status",
"node_id": self.id,
"status": "end_process_prompt",
@@ -119,8 +116,7 @@ class StandardNode(Node):
"request_id": request_id,
"elapsed_time_ns": elapsed_time_ns,
"result_size": resp.size if resp is not None else 0,
}
),
}),
)
)
return resp
@@ -166,8 +162,7 @@ class StandardNode(Node):
asyncio.create_task(
self.broadcast_opaque_status(
request_id,
json.dumps(
{
json.dumps({
"type": "node_status",
"node_id": self.id,
"status": "start_process_tensor",
@@ -177,8 +172,7 @@ class StandardNode(Node):
"tensor_shape": tensor.shape,
"request_id": request_id,
"inference_state": inference_state,
}
),
}),
)
)
start_time = time.perf_counter_ns()
@@ -188,8 +182,7 @@ class StandardNode(Node):
asyncio.create_task(
self.broadcast_opaque_status(
request_id,
json.dumps(
{
json.dumps({
"type": "node_status",
"node_id": self.id,
"status": "end_process_tensor",
@@ -198,8 +191,7 @@ class StandardNode(Node):
"request_id": request_id,
"elapsed_time_ns": elapsed_time_ns,
"result_size": resp.size if resp is not None else 0,
}
),
}),
)
)
return resp