-
Notifications
You must be signed in to change notification settings - Fork 1.1k
269 lines (232 loc) · 9 KB
/
build_and_test.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
name: Build and Test
on:
push:
branches: [ '*' ]
tags: [ '*' ]
pull_request:
branches: [ '*' ]
env:
PYTHON_VERSION: "3.12"
jobs:
check_line_count:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install tabulate
- name: Run line count check
run: |
if [[ -n "${{ github.event.pull_request }}" ]]; then
git fetch origin ${{ github.base_ref }}
git clone -b ${{ github.base_ref }} --single-branch \
https://github.com/${{ github.repository }}.git base_branch
python extra/line_counter.py base_branch .
else
python extra/line_counter.py .
fi
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: line-count-results
path: |
line-count-snapshot.json
line-count-diff.json
unit_test:
runs-on: macos-15
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m venv env
source env/bin/activate
pip install --upgrade pip
pip install .
- name: Run tests
run: |
source env/bin/activate
# set TEMPERATURE to 0 for deterministic sampling
echo "Running inference engine tests..."
METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 METAL_XCODE=1 TEMPERATURE=0 python3 -m exo.inference.test_inference_engine
echo "Running tokenizer tests..."
python3 ./test/test_tokenizers.py
python3 ./test/test_model_helpers.py
discovery_integration_test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m venv env
source env/bin/activate
pip install --upgrade pip
pip install .
- name: Run discovery integration test
run: |
source env/bin/activate
DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --disable-tui > output1.log 2>&1 &
PID1=$!
DEBUG_DISCOVERY=7 DEBUG=7 exo --node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --disable-tui > output2.log 2>&1 &
PID2=$!
sleep 10
kill $PID1 $PID2
if grep -q "Peer statuses: {\\'node2\\': \\'is_connected=True, health_check=True" output1.log && ! grep -q "Failed to connect peers:" output1.log && grep -q "Peer statuses: {\\'node1\\': \\'is_connected=True, health_check=True" output2.log && ! grep -q "Failed to connect peers:" output2.log; then
echo "Test passed: Both instances discovered each other"
exit 0
else
echo "Test failed: Devices did not discover each other"
echo "Output of first instance:"
cat output1.log
echo "Output of second instance:"
cat output2.log
exit 1
fi
chatgpt_api_tests:
runs-on: ${{ matrix.inference_engine == 'tinygrad' && 'ubuntu-latest' || 'macos-15' }}
strategy:
matrix:
inference_engine: [mlx, tinygrad, dummy]
include:
- inference_engine: mlx
model_id: llama-3.2-1b
prompt: "Keep responses concise. Who was the king of pop?"
expected_output: "Michael Jackson"
- inference_engine: tinygrad
model_id: llama-3.2-1b
prompt: "Keep responses concise. Who was the king of pop?"
expected_output: "Michael Jackson"
- inference_engine: dummy
model_id: dummy
prompt: "Dummy prompt."
expected_output: "dummy"
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m venv env
source env/bin/activate
pip install --upgrade pip
pip install .
if [ "${{ matrix.inference_engine }}" = "tinygrad" ]; then
pip install llvmlite
fi
- name: Run ChatGPT API test
env:
TOKENIZERS_PARALLELISM: ${{ matrix.inference_engine == 'tinygrad' && 'true' || 'false' }}
SUPPORT_BF16: ${{ matrix.inference_engine == 'tinygrad' && '0' || '' }}
CLANG: ${{ matrix.inference_engine == 'tinygrad' && '1' || '' }}
run: |
source env/bin/activate
# Start first instance
HF_HOME="$(pwd)/.hf_cache_node1" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine ${{ matrix.inference_engine }} \
--node-id "node1" --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 \
--chatgpt-api-response-timeout 900 --disable-tui > output1.log &
PID1=$!
tail -f output1.log &
TAIL1=$!
# Start second instance
HF_HOME="$(pwd)/.hf_cache_node2" DEBUG_DISCOVERY=7 DEBUG=7 exo --inference-engine ${{ matrix.inference_engine }} \
--node-id "node2" --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 \
--chatgpt-api-response-timeout 900 --disable-tui > output2.log &
PID2=$!
tail -f output2.log &
TAIL2=$!
# Remember to kill the tail processes at the end
trap 'kill $TAIL1 $TAIL2' EXIT
# Wait for discovery
sleep 10
# Function to check if processes are still running
check_processes() {
if ! kill -0 $PID1 2>/dev/null; then
echo "First instance (PID $PID1) died unexpectedly. Log output:"
cat output1.log
exit 1
fi
if ! kill -0 $PID2 2>/dev/null; then
echo "Second instance (PID $PID2) died unexpectedly. Log output:"
cat output2.log
exit 1
fi
}
# Check processes before proceeding
check_processes
echo "Sending request to first instance..."
response_1=$(curl -s http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "${{ matrix.model_id }}",
"messages": [{"role": "user", "content": "${{ matrix.prompt }}"}],
"temperature": 0.7
}')
echo "Response 1: $response_1"
# Check processes after first response
check_processes
echo "Sending request to second instance..."
response_2=$(curl -s http://localhost:8001/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "${{ matrix.model_id }}",
"messages": [{"role": "user", "content": "${{ matrix.prompt }}"}],
"temperature": 0.7
}')
echo "Response 2: $response_2"
# Check processes after second response
check_processes
# Stop both instances
kill $PID1 $PID2
echo ""
# Extract content using jq and check if it contains expected output
content1=$(echo "$response_1" | jq -r '.choices[0].message.content')
content2=$(echo "$response_2" | jq -r '.choices[0].message.content')
if [[ "$content1" != *"${{ matrix.expected_output }}"* ]] || [[ "$content2" != *"${{ matrix.expected_output }}"* ]]; then
echo "Test failed: Response does not match '${{ matrix.expected_output }}'"
echo "Response 1 content: $content1"
echo ""
echo "Response 2 content: $content2"
echo "Output of first instance:"
cat output1.log
echo "Output of second instance:"
cat output2.log
exit 1
else
echo "Test passed: Response from both nodes matches '${{ matrix.expected_output }}'"
fi
measure_pip_sizes:
runs-on: macos-15
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies and measure sizes
run: |
python -m venv env
source env/bin/activate
pip install --upgrade pip
pip install .
python ./extra/pipsize.py --json ./pipsize.json
- name: Upload pip sizes artifact
uses: actions/upload-artifact@v4
with:
name: pip-sizes
path: ./pipsize.json