support different network interface tests

2025-10-23 02:57:14 +03:00 · 2024-12-17 21:03:00 +00:00
parent 2f0b543a1e
commit 023ddc207e
2 changed files with 31 additions and 14 deletions
--- a/.github/workflows/bench_job.yml
+++ b/.github/workflows/bench_job.yml
@@ -13,6 +13,9 @@ on:
      calling_job_name:
        required: true
        type: string
+      network_interface:
+        required: true
+        type: string
 jobs:
  generate-matrix:
    runs-on: ubuntu-latest
@@ -122,7 +125,7 @@ jobs:
          sudo taskpolicy -d default -g default -a -t 0 -l 0 .venv/bin/exo \
            --node-id="${MY_NODE_ID}" \
            --node-id-filter="${ALL_NODE_IDS}" \
-            --interface-type-filter="Ethernet" \
+            --interface-type-filter="${{ inputs.network_interface }}" \
            --disable-tui \
            --max-generate-tokens 250 \
            --chatgpt-api-port 52415 > output1.log 2>&1 &
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -17,6 +17,7 @@ jobs:
      config: '{"M4PRO_GPU16_24GB": 1}'
      model: ${{ matrix.model }}
      calling_job_name: 'single-m4-pro'
+      network_interface: 'Ethernet'
    secrets: inherit

  two-m4-pro-cluster:
@@ -28,30 +29,43 @@ jobs:
      config: '{"M4PRO_GPU16_24GB": 2}'
      model: ${{ matrix.model }}
      calling_job_name: 'two-m4-pro-cluster'
+      network_interface: 'Ethernet'
    secrets: inherit

+  # two-m4-pro-cluster-thunderbolt:
+  #   strategy:
+  #     matrix:
+  #       model: ['llama-3.2-1b', 'llama-3.2-3b', 'llama-3.1-8b']
+  #   uses: ./.github/workflows/bench_job.yml
+  #   with:
+  #     config: '{"M4PRO_GPU16_24GB": 2}'
+  #     model: ${{ matrix.model }}
+  #     calling_job_name: 'two-m4-pro-cluster-thunderbolt'
+  #     network_interface: 'Thunderbolt'
+  #   secrets: inherit
+
  three-m4-pro-cluster:
    strategy:
      matrix:
        model: ['llama-3.2-1b', 'llama-3.2-3b', 'llama-3.1-8b', 'llama-3.3-70b']
-      # Optional: add fail-fast: false if you want all matrix jobs to continue even if one fails
      fail-fast: false
    uses: ./.github/workflows/bench_job.yml
    with:
      config: '{"M4PRO_GPU16_24GB": 3}'
      model: ${{ matrix.model }}
      calling_job_name: 'three-m4-pro-cluster'
+      network_interface: 'Ethernet'
    secrets: inherit

-  # test-m3-single-node:
-  #   strategy:
-  #     matrix:
-  #       model: ['llama-3.2-1b']
-  #     # Optional: add fail-fast: false if you want all matrix jobs to continue even if one fails
-  #     fail-fast: false
-  #   uses: ./.github/workflows/bench_job.yml
-  #   with:
-  #     config: '{"M3MAX_GPU40_128GB": 1}'
-  #     model: ${{ matrix.model }}
-  #     calling_job_name: 'test-m3-cluster'
-  #   secrets: inherit
+  test-m3-single-node:
+    strategy:
+      matrix:
+        model: ['llama-3.2-1b']
+      fail-fast: false
+    uses: ./.github/workflows/bench_job.yml
+    with:
+      config: '{"M3MAX_GPU40_128GB": 1}'
+      model: ${{ matrix.model }}
+      calling_job_name: 'test-m3-cluster'
+      network_interface: 'Ethernet'
+    secrets: inherit