pr suggestions fix

2025-10-23 02:57:14 +03:00 · 2024-11-18 23:02:03 -08:00
parent 0ac1b87f35
commit e991438e72
5 changed files with 25 additions and 34 deletions
--- a/exo/api/chatgpt_api.py
+++ b/exo/api/chatgpt_api.py
@@ -13,7 +13,7 @@ import signal
 import sys
 from exo import DEBUG, VERSION
 from exo.download.download_progress import RepoProgressEvent
-from exo.helpers import PrefixDict
+from exo.helpers import PrefixDict, shutdown
 from exo.inference.inference_engine import inference_engine_classes
 from exo.inference.shard import Shard
 from exo.inference.tokenizers import resolve_tokenizer
@@ -148,11 +148,6 @@ class PromptSession:
    self.timestamp = timestamp
    self.prompt = prompt
 def is_frozen():
  return getattr(sys, 'frozen', False) or os.path.basename(sys.executable) == "exo" \
    or ('Contents/MacOS' in str(os.path.dirname(sys.executable))) \
    or '__nuitka__' in globals() or getattr(sys, '__compiled__', False)
 class ChatGPTAPI:
  def __init__(self, node: Node, inference_engine_classname: str, response_timeout: int = 90, on_chat_completion_request: Callable[[str, ChatCompletionRequest, str], None] = None):
    self.node = node
@@ -193,7 +188,6 @@ class ChatGPTAPI:
  async def handle_quit(self, request):
    print("Received quit signal")
    from exo.main import shutdown
    response = web.json_response({"detail": "Quit signal received"}, status=200)
    await response.prepare(request)
    await response.write_eof()
--- a/exo/download/hf/hf_helpers.py
+++ b/exo/download/hf/hf_helpers.py
@@ -10,7 +10,7 @@ from fnmatch import fnmatch
 from pathlib import Path
 from typing import Generator, Iterable, TypeVar, TypedDict
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
-from exo.helpers import DEBUG
+from exo.helpers import DEBUG, is_frozen
 from exo.download.download_progress import RepoProgressEvent, RepoFileProgressEvent, RepoProgressCallback, RepoFileProgressCallback
 from exo.inference.shard import Shard
 import aiofiles
@@ -18,11 +18,6 @@ from aiofiles import os as aios
 T = TypeVar("T")
 def is_frozen():
  return getattr(sys, 'frozen', False) or os.path.basename(sys.executable) == "exo" \
    or ('Contents/MacOS' in str(os.path.dirname(sys.executable))) \
    or ('__compiled__' in globals())
 async def get_local_snapshot_dir(repo_id: str, revision: str = "main") -> Optional[Path]:
  refs_dir = get_repo_root(repo_id)/"refs"
  refs_file = refs_dir/revision
@@ -105,7 +100,7 @@ async def get_auth_headers():
 def get_repo_root(repo_id: str) -> Path:
  """Get the root directory for a given repo ID in the Hugging Face cache."""
  sanitized_repo_id = str(repo_id).replace("/", "--")
-  if "Qwen2.5-0.5B-Instruct-4bit" in str(repo_id) and is_frozen():
+  if is_frozen():
    repo_root = Path(sys.argv[0]).parent/f"models--{sanitized_repo_id}"
    return repo_root
  return get_hf_home()/"hub"/f"models--{sanitized_repo_id}"
--- a/exo/helpers.py
+++ b/exo/helpers.py
@@ -1,4 +1,5 @@
 import os
 import sys
 import asyncio
 from typing import Callable, TypeVar, Optional, Dict, Generic, Tuple, List
 import socket
@@ -234,3 +235,22 @@ def get_all_ip_addresses():
  except:
    if DEBUG >= 1: print("Failed to get all IP addresses. Defaulting to localhost.")
    return ["localhost"]
 async def shutdown(signal, loop):
  """Gracefully shutdown the server and close the asyncio loop."""
  print(f"Received exit signal {signal.name}...")
  print("Thank you for using exo.")
  print_yellow_exo()
  server_tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
  [task.cancel() for task in server_tasks]
  print(f"Cancelling {len(server_tasks)} outstanding tasks")
  await asyncio.gather(*server_tasks, return_exceptions=True)
  await server.stop()
  loop.stop()
 def is_frozen():
  return getattr(sys, 'frozen', False) or os.path.basename(sys.executable) == "exo" \
    or ('Contents/MacOS' in str(os.path.dirname(sys.executable))) \
    or '__nuitka__' in globals() or getattr(sys, '__compiled__', False)
--- a/exo/main.py
+++ b/exo/main.py
@@ -20,7 +20,7 @@ from exo.topology.ring_memory_weighted_partitioning_strategy import RingMemoryWe
 from exo.api import ChatGPTAPI
 from exo.download.shard_download import ShardDownloader, RepoProgressEvent, NoopShardDownloader
 from exo.download.hf.hf_shard_download import HFShardDownloader
-from exo.helpers import print_yellow_exo, find_available_port, DEBUG, get_system_info, get_or_create_node_id, get_all_ip_addresses, terminal_link
+from exo.helpers import print_yellow_exo, find_available_port, DEBUG, get_system_info, get_or_create_node_id, get_all_ip_addresses, terminal_link, shutdown
 from exo.inference.shard import Shard
 from exo.inference.inference_engine import get_inference_engine, InferenceEngine
 from exo.inference.dummy_inference_engine import DummyInferenceEngine
@@ -163,20 +163,6 @@ def throttled_broadcast(shard: Shard, event: RepoProgressEvent):
 shard_downloader.on_progress.register("broadcast").on_next(throttled_broadcast)
 async def shutdown(signal, loop):
  """Gracefully shutdown the server and close the asyncio loop."""
  print(f"Received exit signal {signal.name}...")
  print("Thank you for using exo.")
  print_yellow_exo()
  server_tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
  [task.cancel() for task in server_tasks]
  print(f"Cancelling {len(server_tasks)} outstanding tasks")
  await asyncio.gather(*server_tasks, return_exceptions=True)
  await server.stop()
  loop.stop()
 async def run_model_cli(node: Node, inference_engine: InferenceEngine, model_name: str, prompt: str):
  inference_class = inference_engine.__class__.__name__
  shard = build_base_shard(model_name, inference_class)
--- a/scripts/build_exo.py
+++ b/scripts/build_exo.py
@@ -21,11 +21,7 @@ def run():
            "--macos-app-name=exo",
            "--macos-app-mode=gui",
            "--macos-app-version=0.0.1",
-            "--include-module=exo.inference.mlx.models.llama",
+            "--include-module=exo.inference.mlx.models.*",
            "--include-module=exo.inference.mlx.models.deepseek_v2",
            "--include-module=exo.inference.mlx.models.base",
            "--include-module=exo.inference.mlx.models.llava",
            "--include-module=exo.inference.mlx.models.qwen2",
            "--include-distribution-meta=mlx",
            "--include-module=mlx._reprlib_fix",
            "--include-module=mlx._os_warning",