super secret feature

This commit is contained in:
Alex Cheema
2025-10-07 21:28:18 +01:00
parent 823a4fcd95
commit 51c57f77c2
14 changed files with 1786 additions and 194 deletions

176
.gitignore vendored
View File

@@ -19,178 +19,4 @@ dist/
# for the gitingest enthusiasts
digest.txt
__pycache__/
.venv*
test_weights.npz
.exo_used_ports
.exo_node_id
.idea
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
/.Python
/develop-eggs/
/dist/
/downloads/
/eggs/
/.eggs/
/lib/
/lib64/
/parts/
/sdist/
/var/
/wheels/
/share/python-wheels/
/*.egg-info/
/.installed.cfg
/*.egg
/MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
Untitled.ipynb
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
**/*.xcodeproj/*
.aider*
exo/tinychat/images/*.png
.chainlit/

Binary file not shown.

After

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 99 KiB

BIN
dashboard/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

View File

@@ -15,8 +15,3 @@ sync:
sync-clean:
uv sync --all-packages --force-reinstall --no-cache
clean:
rm -rf **/__pycache__
rm -rf rust/target
rm -rf .venv

View File

@@ -36,6 +36,7 @@ dependencies = [
"exo_pyo3_bindings", # rust bindings
"anyio>=4.10.0",
"bidict>=0.23.1",
"chainlit>=2.8.3",
]
[project.scripts]

View File

@@ -1,4 +1,5 @@
import argparse
import os
from dataclasses import dataclass
from typing import Self
@@ -19,6 +20,12 @@ from exo.utils.channels import Receiver, channel
from exo.utils.pydantic_ext import CamelCaseModel
from exo.worker.download.impl_shard_downloader import exo_shard_downloader
from exo.worker.main import Worker
from exo.utils.chainlit_ui import (
ChainlitConfig,
ChainlitLaunchError,
launch_chainlit,
terminate_process,
)
# TODO: Entrypoint refactor
@@ -162,8 +169,29 @@ def main():
logger_setup(EXO_LOG, args.verbosity)
logger.info("Starting EXO")
ui_proc = None
if args.with_chainlit:
cfg = ChainlitConfig(
port=args.chainlit_port,
host=args.chainlit_host,
ui_dir=os.path.abspath(os.path.join(os.path.dirname(__file__), "ui")),
)
try:
ui_proc = launch_chainlit(cfg)
logger.info(
f"Chainlit running at http://{cfg.host}:{cfg.port} (UI -> API http://localhost:8000/v1)"
)
except ChainlitLaunchError as e:
logger.error(str(e))
logger_cleanup()
raise
node = anyio.run(Node.create, args)
anyio.run(node.run)
try:
anyio.run(node.run)
finally:
if ui_proc is not None:
terminate_process(ui_proc)
logger_cleanup()
@@ -174,6 +202,10 @@ class Args(CamelCaseModel):
spawn_api: bool = False
api_port: PositiveInt = 8000
tb_only: bool = False
# Chainlit options
with_chainlit: bool = False
chainlit_port: PositiveInt = 8001
chainlit_host: str = "127.0.0.1"
@classmethod
def parse(cls) -> Self:
@@ -216,6 +248,23 @@ class Args(CamelCaseModel):
action="store_true",
dest="tb_only",
)
parser.add_argument(
"--with-chainlit",
action="store_true",
dest="with_chainlit",
)
parser.add_argument(
"--chainlit-port",
type=int,
dest="chainlit_port",
default=8001,
)
parser.add_argument(
"--chainlit-host",
type=str,
dest="chainlit_host",
default="127.0.0.1",
)
args = parser.parse_args()
return cls(**vars(args)) # pyright: ignore[reportAny] - We are intentionally validating here, we can't do it statically

View File

@@ -95,19 +95,6 @@ MODEL_CARDS: dict[str, ModelCard] = {
n_layers=61,
),
),
"kimi-k2-instruct-4bit": ModelCard(
short_id="kimi-k2-instruct-4bit",
model_id="mlx-community/Kimi-K2-Instruct-4bit",
name="Kimi K2 Instruct (4-bit)",
description="""Kimi K2 is a large language model trained on the Kimi K2 dataset.""",
tags=[],
metadata=ModelMetadata(
model_id=ModelId("mlx-community/Kimi-K2-Instruct-4bit"),
pretty_name="Kimi K2 Instruct (4-bit)",
storage_size=Memory.from_bytes(577597603840),
n_layers=61,
),
),
# llama-3.1
"llama-3.1-8b": ModelCard(
short_id="llama-3.1-8b",

14
src/exo/ui/chainlit.md Normal file
View File

@@ -0,0 +1,14 @@
# Welcome to Chainlit! 🚀🤖
Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
## Useful Links 🔗
- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
We can't wait to see what you create with Chainlit! Happy coding! 💻😊
## Welcome screen
To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.

120
src/exo/ui/chainlit_app.py Normal file
View File

@@ -0,0 +1,120 @@
from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Iterable, Optional, Protocol, Sequence, cast, final, runtime_checkable
import chainlit as cl
import chainlit.callbacks as callbacks
from chainlit.action import Action
from chainlit.context import context
from chainlit.message import ErrorMessage
from chainlit.chat_context import chat_context
from exo.shared.models.model_cards import MODEL_CARDS
from openai import OpenAI
from openai.types.chat import ChatCompletionMessageParam
client = OpenAI(base_url="http://localhost:8000/v1", api_key="sk-local")
MODELS = list(MODEL_CARDS.keys())
@final
class ThreadModelStore:
"""Per-thread selected model registry.
Chainlit provides a distinct thread ID per conversation; we map that ID to
the chosen model for subsequent completions.
"""
_by_thread_id: Dict[str, str] = {}
@classmethod
def set_model(cls, thread_id: str, model_name: str) -> None:
cls._by_thread_id[thread_id] = model_name
@classmethod
def get_model(cls, thread_id: str, default_model: str) -> str:
return cls._by_thread_id.get(thread_id, default_model)
# Provide typed decorator aliases to satisfy strict type checking.
if TYPE_CHECKING:
def on_chat_start_dec(func: Callable[[], Awaitable[None]]) -> Callable[[], Awaitable[None]]: ...
def on_message_dec(func: Callable[[cl.Message], Awaitable[None]]) -> Callable[[cl.Message], Awaitable[None]]: ...
def action_callback_dec(name: str) -> Callable[[Callable[[Action], Awaitable[None]]], Callable[[Action], Awaitable[None]]]: ...
else:
on_chat_start_dec = callbacks.on_chat_start
on_message_dec = callbacks.on_message
action_callback_dec = callbacks.action_callback
@on_chat_start_dec
async def start() -> None:
# Render model choices inline on the main screen as action buttons.
actions = [Action(name="set_model", payload={}, label=m) for m in MODELS]
await cl.Message(
content="Choose a model to use for this conversation:",
actions=actions,
).send()
@action_callback_dec("set_model")
async def on_model_selected(action: Action) -> None:
thread_id = context.session.thread_id
selected_model = action.label if action.label in MODELS else MODELS[0]
ThreadModelStore.set_model(thread_id, selected_model)
await cl.Message(f"Model set to: {selected_model}").send()
@on_message_dec
async def on_message(msg: cl.Message) -> None:
"""Handle user messages by calling the chat completions API.
Any API error is transformed into an inline error message for the user.
"""
thread_id = context.session.thread_id
model = ThreadModelStore.get_model(thread_id, MODELS[0])
user_text = cast("str | None", getattr(msg, "content", None)) or ""
history_raw: list[dict[str, str]] = cast(list[dict[str, str]], chat_context.to_openai())
history_raw.append({"role": "user", "content": user_text})
def to_openai_params(items: list[dict[str, str]]) -> list[ChatCompletionMessageParam]:
return cast(list[ChatCompletionMessageParam], items)
history = to_openai_params(history_raw)
assistant_msg: cl.Message = cl.Message(content="")
@runtime_checkable
class _HasContent(Protocol):
content: Optional[str]
@runtime_checkable
class _ChoiceDelta(Protocol):
delta: Optional[_HasContent]
message: Optional[_HasContent]
@runtime_checkable
class _Chunk(Protocol):
choices: Sequence[_ChoiceDelta]
try:
stream_any: object = client.chat.completions.create(
model=model,
messages=history,
stream=True,
)
stream = cast(Iterable[_Chunk], stream_any)
for chunk in stream:
if not chunk.choices:
continue
choice = chunk.choices[0]
token: Optional[str] = None
if getattr(choice, "delta", None) and getattr(choice.delta, "content", None):
token = choice.delta.content # type: ignore[attr-defined]
if token:
# stream_token exists on Message; ignore type checker limitations
await assistant_msg.stream_token(token)
await assistant_msg.send()
except Exception as e:
if "404" in str(e):
await ErrorMessage(
(
f"No instance found for model {model}. You need to load an instance of {model} "
"first on the EXO dashboard: http://localhost:8000"
)
).send()
return
await ErrorMessage(f"Request failed: {e!s}").send()

View File

@@ -0,0 +1,184 @@
from __future__ import annotations
import contextlib
import os
import shutil
import socket
import subprocess
import sys
import time
import urllib.request
from dataclasses import dataclass
from typing import final
@final
class ChainlitLaunchError(RuntimeError):
"""Raised when Chainlit UI fails to launch or become ready."""
@dataclass(frozen=True, slots=True)
class ChainlitConfig:
port: int = 8001
host: str = "127.0.0.1"
app_path: str | None = None
ui_dir: str | None = None
def _is_port_open(host: str, port: int, timeout_s: float = 0.5) -> bool:
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.settimeout(timeout_s)
return s.connect_ex((host, port)) == 0
def _wait_http_ready(url: str, timeout_s: float = 15.0) -> bool:
start = time.time()
while time.time() - start < timeout_s:
try:
with urllib.request.urlopen(url, timeout=1):
return True
except Exception:
time.sleep(0.25)
return False
def _find_chainlit_executable() -> list[str]:
exe = shutil.which("chainlit")
if exe:
return [exe]
# Fallback to python -m chainlit if console script is not on PATH
return [sys.executable, "-m", "chainlit"]
def _default_app_path() -> str:
# Resolve the packaged chainlit app location
here = os.path.dirname(__file__)
app = os.path.abspath(os.path.join(here, "../ui/chainlit_app.py"))
return app
def launch_chainlit(
cfg: ChainlitConfig,
*,
foreground: bool = False,
wait_ready: bool = True,
ready_timeout_s: float = 20.0,
) -> subprocess.Popen[bytes]:
if _is_port_open(cfg.host, cfg.port):
raise ChainlitLaunchError(f"Port {cfg.port} already in use on {cfg.host}")
app_path = cfg.app_path or _default_app_path()
if not os.path.exists(app_path):
raise ChainlitLaunchError(f"Chainlit app not found at {app_path}")
env = os.environ.copy()
# Resolve APP_ROOT (directory of the Chainlit app) and ensure assets there
try:
app_dir = os.path.dirname(app_path)
# Also prepare public/ under optional ui_dir for convenience
target_dirs = [os.path.join(app_dir, "public")]
if cfg.ui_dir:
target_dirs.append(os.path.join(cfg.ui_dir, "public"))
# Resolve the repo root from this file to locate dashboard/exo-logo.png
repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
src_logo_png = os.path.join(repo_root, "dashboard", "exo-logo-hq-square-black-bg.png")
src_logo_jpg = os.path.join(repo_root, "dashboard", "exo-logo-hq-square-black-bg.jpg")
src_logo_favicon = os.path.join(repo_root, "dashboard", "favicon.ico")
src_logo_webp = os.path.join(repo_root, "dashboard", "exo-logo-hq-square-black-bg.webp")
def _ensure(src: str, dst: str) -> None:
# if os.path.exists(src) and not os.path.exists(dst):
try:
os.symlink(src, dst)
except Exception:
import shutil as _shutil
_shutil.copyfile(src, dst)
def _ensure_local_copy(src: str, dst: str) -> None:
# if not os.path.exists(src):
# return
try:
os.makedirs(os.path.dirname(dst), exist_ok=True)
# Replace existing symlink or file with a local copy
if os.path.islink(dst) or os.path.exists(dst):
try:
os.unlink(dst)
except Exception:
pass
import shutil as _shutil
_shutil.copyfile(src, dst)
except Exception:
pass
for pub_dir in target_dirs:
os.makedirs(pub_dir, exist_ok=True)
# Logos per docs
_ensure(src_logo_png, os.path.join(pub_dir, "logo_dark.png"))
_ensure(src_logo_png, os.path.join(pub_dir, "logo_light.png"))
# Favicon (serve a real local file; avoid symlinks for server path checks)
_ensure_local_copy(src_logo_png, os.path.join(pub_dir, "favicon.png"))
# Provide a .ico fallback
_ensure_local_copy(src_logo_favicon, os.path.join(pub_dir, "favicon.ico"))
# Avatars
avatars_dir = os.path.join(pub_dir, "avatars")
os.makedirs(avatars_dir, exist_ok=True)
# Always local copies for avatars to satisfy is_path_inside checks
_ensure_local_copy(src_logo_png, os.path.join(avatars_dir, "assistant.png"))
_ensure_local_copy(src_logo_png, os.path.join(avatars_dir, "default.png"))
# Extra avatar formats as fallback
_ensure_local_copy(src_logo_jpg, os.path.join(avatars_dir, "assistant.jpg"))
_ensure_local_copy(src_logo_webp, os.path.join(avatars_dir, "assistant.webp"))
except Exception:
# Non-fatal; logo absence shouldn't block UI
pass
cmd = [*_find_chainlit_executable(), "run", app_path, "--host", cfg.host, "--port", str(cfg.port)]
cwd = None
if cfg.ui_dir:
cwd = cfg.ui_dir
if foreground:
if cwd is not None:
os.chdir(cwd)
os.execvpe(cmd[0], cmd, env)
raise AssertionError("os.execvpe should not return")
proc = subprocess.Popen(
cmd,
env=env,
cwd=cwd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if wait_ready:
ok = _wait_http_ready(f"http://{cfg.host}:{cfg.port}", timeout_s=ready_timeout_s)
if not ok:
try:
out, err = proc.communicate(timeout=1)
except Exception:
proc.terminate()
out, err = b"", b""
raise ChainlitLaunchError(
(
f"Chainlit did not become ready on {cfg.host}:{cfg.port}.\n"
f"STDOUT:\n{out.decode(errors='ignore')}\n\n"
f"STDERR:\n{err.decode(errors='ignore')}"
)
)
return proc
def terminate_process(proc: subprocess.Popen[bytes], *, timeout_s: float = 5.0) -> None:
try:
proc.terminate()
try:
proc.wait(timeout=timeout_s)
return
except subprocess.TimeoutExpired:
proc.kill()
except Exception:
return

1416
uv.lock generated

File diff suppressed because it is too large Load Diff