mirror of
https://github.com/exo-explore/exo.git
synced 2025-10-23 02:57:14 +03:00
more compact operator formatting
This commit is contained in:
@@ -12,3 +12,8 @@ allow_multiline_dictionary_keys = True
|
|||||||
each_dict_entry_on_separate_line = False
|
each_dict_entry_on_separate_line = False
|
||||||
allow_multiline_lambdas = True
|
allow_multiline_lambdas = True
|
||||||
blank_line_before_nested_class_or_def = False
|
blank_line_before_nested_class_or_def = False
|
||||||
|
arithmetic_precedence_indication = True
|
||||||
|
no_spaces_around_selected_binary_operators = "*,/"
|
||||||
|
coalesce_brackets = True
|
||||||
|
space_between_ending_comma_and_closing_bracket = False
|
||||||
|
split_before_expression_after_opening_paren = False
|
||||||
@@ -2,24 +2,20 @@ from exo.inference.shard import Shard
|
|||||||
|
|
||||||
model_base_shards = {
|
model_base_shards = {
|
||||||
### llama
|
### llama
|
||||||
"llama-3.1-8b":
|
"llama-3.1-8b": {
|
||||||
{
|
|
||||||
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
|
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
|
||||||
"TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32),
|
"TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32),
|
||||||
},
|
},
|
||||||
"llama-3.1-70b":
|
"llama-3.1-70b": {
|
||||||
{
|
|
||||||
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80),
|
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80),
|
||||||
"TinygradDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3.1-70B", start_layer=0, end_layer=0, n_layers=80),
|
"TinygradDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3.1-70B", start_layer=0, end_layer=0, n_layers=80),
|
||||||
},
|
},
|
||||||
"llama-3.1-405b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-405B-4bit", start_layer=0, end_layer=0, n_layers=126),},
|
"llama-3.1-405b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-405B-4bit", start_layer=0, end_layer=0, n_layers=126),},
|
||||||
"llama-3-8b":
|
"llama-3-8b": {
|
||||||
{
|
|
||||||
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
|
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
|
||||||
"TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", start_layer=0, end_layer=0, n_layers=32),
|
"TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", start_layer=0, end_layer=0, n_layers=32),
|
||||||
},
|
},
|
||||||
"llama-3-70b":
|
"llama-3-70b": {
|
||||||
{
|
|
||||||
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80),
|
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80),
|
||||||
"TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80),
|
"TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80),
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -84,8 +84,7 @@ class StandardNode(Node):
|
|||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
self.broadcast_opaque_status(
|
self.broadcast_opaque_status(
|
||||||
request_id,
|
request_id,
|
||||||
json.dumps(
|
json.dumps({
|
||||||
{
|
|
||||||
"type": "node_status",
|
"type": "node_status",
|
||||||
"node_id": self.id,
|
"node_id": self.id,
|
||||||
"status": "start_process_prompt",
|
"status": "start_process_prompt",
|
||||||
@@ -95,8 +94,7 @@ class StandardNode(Node):
|
|||||||
"image_str": image_str,
|
"image_str": image_str,
|
||||||
"inference_state": inference_state,
|
"inference_state": inference_state,
|
||||||
"request_id": request_id,
|
"request_id": request_id,
|
||||||
}
|
}),
|
||||||
),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
start_time = time.perf_counter_ns()
|
start_time = time.perf_counter_ns()
|
||||||
@@ -106,8 +104,7 @@ class StandardNode(Node):
|
|||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
self.broadcast_opaque_status(
|
self.broadcast_opaque_status(
|
||||||
request_id,
|
request_id,
|
||||||
json.dumps(
|
json.dumps({
|
||||||
{
|
|
||||||
"type": "node_status",
|
"type": "node_status",
|
||||||
"node_id": self.id,
|
"node_id": self.id,
|
||||||
"status": "end_process_prompt",
|
"status": "end_process_prompt",
|
||||||
@@ -119,8 +116,7 @@ class StandardNode(Node):
|
|||||||
"request_id": request_id,
|
"request_id": request_id,
|
||||||
"elapsed_time_ns": elapsed_time_ns,
|
"elapsed_time_ns": elapsed_time_ns,
|
||||||
"result_size": resp.size if resp is not None else 0,
|
"result_size": resp.size if resp is not None else 0,
|
||||||
}
|
}),
|
||||||
),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return resp
|
return resp
|
||||||
@@ -166,8 +162,7 @@ class StandardNode(Node):
|
|||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
self.broadcast_opaque_status(
|
self.broadcast_opaque_status(
|
||||||
request_id,
|
request_id,
|
||||||
json.dumps(
|
json.dumps({
|
||||||
{
|
|
||||||
"type": "node_status",
|
"type": "node_status",
|
||||||
"node_id": self.id,
|
"node_id": self.id,
|
||||||
"status": "start_process_tensor",
|
"status": "start_process_tensor",
|
||||||
@@ -177,8 +172,7 @@ class StandardNode(Node):
|
|||||||
"tensor_shape": tensor.shape,
|
"tensor_shape": tensor.shape,
|
||||||
"request_id": request_id,
|
"request_id": request_id,
|
||||||
"inference_state": inference_state,
|
"inference_state": inference_state,
|
||||||
}
|
}),
|
||||||
),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
start_time = time.perf_counter_ns()
|
start_time = time.perf_counter_ns()
|
||||||
@@ -188,8 +182,7 @@ class StandardNode(Node):
|
|||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
self.broadcast_opaque_status(
|
self.broadcast_opaque_status(
|
||||||
request_id,
|
request_id,
|
||||||
json.dumps(
|
json.dumps({
|
||||||
{
|
|
||||||
"type": "node_status",
|
"type": "node_status",
|
||||||
"node_id": self.id,
|
"node_id": self.id,
|
||||||
"status": "end_process_tensor",
|
"status": "end_process_tensor",
|
||||||
@@ -198,8 +191,7 @@ class StandardNode(Node):
|
|||||||
"request_id": request_id,
|
"request_id": request_id,
|
||||||
"elapsed_time_ns": elapsed_time_ns,
|
"elapsed_time_ns": elapsed_time_ns,
|
||||||
"result_size": resp.size if resp is not None else 0,
|
"result_size": resp.size if resp is not None else 0,
|
||||||
}
|
}),
|
||||||
),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return resp
|
return resp
|
||||||
|
|||||||
Reference in New Issue
Block a user