mirror of
https://github.com/exo-explore/exo.git
synced 2025-10-23 02:57:14 +03:00
more compact operator formatting
This commit is contained in:
@@ -12,3 +12,8 @@ allow_multiline_dictionary_keys = True
|
||||
each_dict_entry_on_separate_line = False
|
||||
allow_multiline_lambdas = True
|
||||
blank_line_before_nested_class_or_def = False
|
||||
arithmetic_precedence_indication = True
|
||||
no_spaces_around_selected_binary_operators = "*,/"
|
||||
coalesce_brackets = True
|
||||
space_between_ending_comma_and_closing_bracket = False
|
||||
split_before_expression_after_opening_paren = False
|
||||
@@ -2,24 +2,20 @@ from exo.inference.shard import Shard
|
||||
|
||||
model_base_shards = {
|
||||
### llama
|
||||
"llama-3.1-8b":
|
||||
{
|
||||
"llama-3.1-8b": {
|
||||
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
|
||||
"TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32),
|
||||
},
|
||||
"llama-3.1-70b":
|
||||
{
|
||||
"llama-3.1-70b": {
|
||||
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80),
|
||||
"TinygradDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3.1-70B", start_layer=0, end_layer=0, n_layers=80),
|
||||
},
|
||||
"llama-3.1-405b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-405B-4bit", start_layer=0, end_layer=0, n_layers=126),},
|
||||
"llama-3-8b":
|
||||
{
|
||||
"llama-3-8b": {
|
||||
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32),
|
||||
"TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", start_layer=0, end_layer=0, n_layers=32),
|
||||
},
|
||||
"llama-3-70b":
|
||||
{
|
||||
"llama-3-70b": {
|
||||
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80),
|
||||
"TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80),
|
||||
},
|
||||
|
||||
@@ -84,8 +84,7 @@ class StandardNode(Node):
|
||||
asyncio.create_task(
|
||||
self.broadcast_opaque_status(
|
||||
request_id,
|
||||
json.dumps(
|
||||
{
|
||||
json.dumps({
|
||||
"type": "node_status",
|
||||
"node_id": self.id,
|
||||
"status": "start_process_prompt",
|
||||
@@ -95,8 +94,7 @@ class StandardNode(Node):
|
||||
"image_str": image_str,
|
||||
"inference_state": inference_state,
|
||||
"request_id": request_id,
|
||||
}
|
||||
),
|
||||
}),
|
||||
)
|
||||
)
|
||||
start_time = time.perf_counter_ns()
|
||||
@@ -106,8 +104,7 @@ class StandardNode(Node):
|
||||
asyncio.create_task(
|
||||
self.broadcast_opaque_status(
|
||||
request_id,
|
||||
json.dumps(
|
||||
{
|
||||
json.dumps({
|
||||
"type": "node_status",
|
||||
"node_id": self.id,
|
||||
"status": "end_process_prompt",
|
||||
@@ -119,8 +116,7 @@ class StandardNode(Node):
|
||||
"request_id": request_id,
|
||||
"elapsed_time_ns": elapsed_time_ns,
|
||||
"result_size": resp.size if resp is not None else 0,
|
||||
}
|
||||
),
|
||||
}),
|
||||
)
|
||||
)
|
||||
return resp
|
||||
@@ -166,8 +162,7 @@ class StandardNode(Node):
|
||||
asyncio.create_task(
|
||||
self.broadcast_opaque_status(
|
||||
request_id,
|
||||
json.dumps(
|
||||
{
|
||||
json.dumps({
|
||||
"type": "node_status",
|
||||
"node_id": self.id,
|
||||
"status": "start_process_tensor",
|
||||
@@ -177,8 +172,7 @@ class StandardNode(Node):
|
||||
"tensor_shape": tensor.shape,
|
||||
"request_id": request_id,
|
||||
"inference_state": inference_state,
|
||||
}
|
||||
),
|
||||
}),
|
||||
)
|
||||
)
|
||||
start_time = time.perf_counter_ns()
|
||||
@@ -188,8 +182,7 @@ class StandardNode(Node):
|
||||
asyncio.create_task(
|
||||
self.broadcast_opaque_status(
|
||||
request_id,
|
||||
json.dumps(
|
||||
{
|
||||
json.dumps({
|
||||
"type": "node_status",
|
||||
"node_id": self.id,
|
||||
"status": "end_process_tensor",
|
||||
@@ -198,8 +191,7 @@ class StandardNode(Node):
|
||||
"request_id": request_id,
|
||||
"elapsed_time_ns": elapsed_time_ns,
|
||||
"result_size": resp.size if resp is not None else 0,
|
||||
}
|
||||
),
|
||||
}),
|
||||
)
|
||||
)
|
||||
return resp
|
||||
|
||||
Reference in New Issue
Block a user