version: 2.1 orbs: python: circleci/python@2 commands: run_chatgpt_api_test: parameters: inference_engine: type: string model_id: type: string expected_output: type: string prompt: type: string steps: - run: name: Run chatgpt api integration test (<>, <>) command: | source env/bin/activate # Set CLANG=1 for tinygrad only if [ "<>" = "tinygrad" ]; then pip install llvmlite export TOKENIZERS_PARALLELISM=true SUPPORT_BF16=0 CLANG=1 fi # Start first instance EXO_HOME="$(pwd)/.exo_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <> \ --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 \ --chatgpt-api-response-timeout 900 --disable-tui > output1.log & PID1=$! tail -f output1.log & TAIL1=$! # Start second instance EXO_HOME="$(pwd)/.exo_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine <> \ --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 \ --chatgpt-api-response-timeout 900 --disable-tui > output2.log & PID2=$! tail -f output2.log & TAIL2=$! # Remember to kill the tail processes at the end trap 'kill $TAIL1 $TAIL2' EXIT # Wait for discovery sleep 10 # Function to check if processes are still running check_processes() { if ! kill -0 $PID1 2>/dev/null; then echo "First instance (PID $PID1) died unexpectedly. Log output:" cat output1.log exit 1 fi if ! kill -0 $PID2 2>/dev/null; then echo "Second instance (PID $PID2) died unexpectedly. Log output:" cat output2.log exit 1 fi } # Check processes before proceeding check_processes echo "Sending request to first instance..." response_1=$(curl -s http://localhost:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "<>", "messages": [{"role": "user", "content": "<>"}], "temperature": 0.7 }') echo "Response 1: $response_1" # Check processes after first response check_processes echo "Sending request to second instance..." response_2=$(curl -s http://localhost:8001/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "<>", "messages": [{"role": "user", "content": "<>"}], "temperature": 0.7 }') echo "Response 2: $response_2" # Check processes after second response check_processes # Stop both instances kill $PID1 $PID2 echo "" # Extract content using jq and check if it contains expected output content1=$(echo "$response_1" | jq -r '.choices[0].message.content') content2=$(echo "$response_2" | jq -r '.choices[0].message.content') if [[ "$content1" != *"<>"* ]] || [[ "$content2" != *"<>"* ]]; then echo "Test failed: Response does not match '<>'" echo "Response 1 content: $content1" echo "" echo "Response 2 content: $content2" echo "Output of first instance:" cat output1.log echo "Output of second instance:" cat output2.log exit 1 else echo "Test passed: Response from both nodes matches '<>'" fi jobs: unit_test: macos: xcode: "16.0.0" resource_class: m2pro.large steps: - checkout - run: name: Set up Python command: | brew install python@3.12 python3.12 -m venv env source env/bin/activate - run: name: Install dependencies command: | source env/bin/activate pip install --upgrade pip pip install . - run: name: Run tests command: | source env/bin/activate # set TEMPERATURE to 0 for deterministic sampling echo "Running inference engine tests..." METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine echo "Running tokenizer tests..." python3 ./test/test_tokenizers.py python3 ./test/test_model_helpers.py discovery_integration_test: macos: xcode: "16.0.0" steps: - checkout - run: name: Set up Python command: | brew install python@3.12 python3.12 -m venv env source env/bin/activate - run: name: Install dependencies command: | source env/bin/activate pip install --upgrade pip pip install . - run: name: Run discovery integration test command: | source env/bin/activate DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --disable-tui > output1.log 2>&1 & PID1=$! DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --disable-tui > output2.log 2>&1 & PID2=$! sleep 10 kill $PID1 $PID2 if grep -q "Peer statuses: {\\'node2\\': \\'is_connected=True, health_check=True" output1.log && ! grep -q "Failed to connect peers:" output1.log && grep -q "Peer statuses: {\\'node1\\': \\'is_connected=True, health_check=True" output2.log && ! grep -q "Failed to connect peers:" output2.log; then echo "Test passed: Both instances discovered each other" exit 0 else echo "Test failed: Devices did not discover each other" echo "Output of first instance:" cat output1.log echo "Output of second instance:" cat output2.log exit 1 fi chatgpt_api_integration_test_mlx: macos: xcode: "16.0.0" resource_class: m2pro.large steps: - checkout - run: name: Set up Python command: | brew install python@3.12 python3.12 -m venv env source env/bin/activate - run: name: Install dependencies command: | source env/bin/activate pip install --upgrade pip pip install . - run_chatgpt_api_test: inference_engine: mlx model_id: llama-3.2-1b prompt: "Keep responses concise. Who was the king of pop?" expected_output: "Michael Jackson" chatgpt_api_integration_test_dummy: macos: xcode: "16.0.0" resource_class: m2pro.large steps: - checkout - run: name: Set up Python command: | brew install python@3.12 python3.12 -m venv env source env/bin/activate - run: name: Install dependencies command: | source env/bin/activate pip install --upgrade pip pip install . - run_chatgpt_api_test: inference_engine: dummy model_id: dummy prompt: "Dummy prompt." expected_output: "dummy" chatgpt_api_integration_test_tinygrad: macos: xcode: "16.0.0" resource_class: m2pro.large steps: - checkout - run: name: Set up Python command: | brew install python@3.12 python3.12 -m venv env source env/bin/activate - run: name: Install dependencies command: | source env/bin/activate pip install --upgrade pip pip install . - run_chatgpt_api_test: inference_engine: tinygrad model_id: llama-3.2-1b prompt: "Keep responses concise. Who was the king of pop?" expected_output: "Michael Jackson" chatgpt_api_integration_test_tinygrad_linux: machine: image: ubuntu-2204:current resource_class: xlarge steps: - checkout - run: name: Set up Python command: | export DEBIAN_FRONTEND=noninteractive export DEBCONF_NONINTERACTIVE_SEEN=true sudo apt-get update sudo add-apt-repository -y ppa:deadsnakes/ppa sudo apt-get update sudo apt-get install -y python3.12 python3.12-venv clang python3.12 -m venv env source env/bin/activate - run: name: Install dependencies command: | source env/bin/activate pip install --upgrade pip pip install . - run_chatgpt_api_test: inference_engine: tinygrad model_id: llama-3.2-1b prompt: "Keep responses concise. Who was the king of pop?" expected_output: "Michael Jackson" measure_pip_sizes: macos: xcode: "16.0.0" steps: - checkout - run: name: Set up Python command: | brew install python@3.12 python3.12 -m venv env source env/bin/activate - run: name: Install dependencies and measure sizes command: | source env/bin/activate pip install --upgrade pip pip install . python ./extra/pipsize.py --json ./pipsize.json - store_artifacts: path: ./pipsize.json destination: pip-sizes.json check_line_count: docker: - image: cimg/python:3.10 steps: - checkout - run: name: Setup git for PR comparison command: | if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then PR_NUMBER=$(echo $CIRCLE_PULL_REQUEST | rev | cut -d'/' -f1 | rev) BASE_BRANCH=$(curl -s -H "Circle-Token: $CIRCLE_TOKEN" \ "https://circleci.com/api/v2/project/github/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pipeline/$CIRCLE_WORKFLOW_ID" \ | jq -r '.target_branch') git clone -b $BASE_BRANCH --single-branch \ https://github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME.git \ base_branch fi - run: name: Install dependencies command: | python -m pip install --upgrade pip pip install tabulate - run: name: Run line count check command: | if [[ -n "$CIRCLE_PULL_REQUEST" ]]; then python extra/line_counter.py base_branch . else python extra/line_counter.py . fi - store_artifacts: path: line-count-snapshot.json destination: line-count-snapshot.json - store_artifacts: path: line-count-diff.json destination: line-count-diff.json - run: name: Create test results directory command: | mkdir -p test-results/line-count cp line-count-*.json test-results/line-count/ - store_test_results: path: test-results workflows: version: 2 build_and_test: jobs: - check_line_count: filters: branches: only: /.*/ tags: only: /.*/ - unit_test - discovery_integration_test - chatgpt_api_integration_test_mlx - chatgpt_api_integration_test_tinygrad - chatgpt_api_integration_test_tinygrad_linux - chatgpt_api_integration_test_dummy - measure_pip_sizes