|
10 | 10 | - .github/workflows/mlx.yml |
11 | 11 | - backends/mlx/** |
12 | 12 | - extension/llm/export/** |
| 13 | + - extension/audio/** |
| 14 | + - examples/models/parakeet/** |
| 15 | + - examples/models/voxtral_realtime/** |
13 | 16 | workflow_dispatch: |
14 | 17 |
|
15 | 18 | permissions: {} |
@@ -104,3 +107,370 @@ jobs: |
104 | 107 | echo "::error::Too many test failures: $FAILED > $MAX_FAILURES" |
105 | 108 | exit 1 |
106 | 109 | fi |
| 110 | +
|
| 111 | + test-mlx-parakeet: |
| 112 | + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main |
| 113 | + with: |
| 114 | + job-name: test-mlx-parakeet |
| 115 | + runner: macos-14-xlarge |
| 116 | + python-version: "3.12" |
| 117 | + submodules: recursive |
| 118 | + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} |
| 119 | + timeout: 90 |
| 120 | + script: | |
| 121 | + set -eux |
| 122 | +
|
| 123 | + echo "::group::Install ExecuTorch" |
| 124 | + ${CONDA_RUN} python install_executorch.py > /dev/null |
| 125 | + echo "::endgroup::" |
| 126 | +
|
| 127 | + echo "::group::Install Parakeet requirements" |
| 128 | + ${CONDA_RUN} pip install -r examples/models/parakeet/install_requirements.txt |
| 129 | + echo "::endgroup::" |
| 130 | +
|
| 131 | + ${CONDA_RUN} pip list |
| 132 | +
|
| 133 | + echo "::group::Export Parakeet" |
| 134 | + ${CONDA_RUN} python -m executorch.examples.models.parakeet.export_parakeet_tdt \ |
| 135 | + --backend mlx \ |
| 136 | + --dtype bf16 \ |
| 137 | + --qlinear_encoder 4w \ |
| 138 | + --qlinear_encoder_group_size 128 \ |
| 139 | + --qlinear 4w \ |
| 140 | + --qlinear_group_size 128 \ |
| 141 | + --output-dir /tmp/parakeet_mlx |
| 142 | + echo "::endgroup::" |
| 143 | +
|
| 144 | + echo "::group::Build Parakeet MLX runner" |
| 145 | + ${CONDA_RUN} make parakeet-mlx |
| 146 | + echo "::endgroup::" |
| 147 | +
|
| 148 | + echo "::group::Run Parakeet MLX runner" |
| 149 | + curl -L https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav -o /tmp/test_audio.wav |
| 150 | + OUTPUT=$(./cmake-out/examples/models/parakeet/parakeet_runner \ |
| 151 | + --model_path /tmp/parakeet_mlx/model.pte \ |
| 152 | + --audio_path /tmp/test_audio.wav \ |
| 153 | + --tokenizer_path /tmp/parakeet_mlx/tokenizer.model 2>&1) |
| 154 | + echo "Runner output:" |
| 155 | + echo "$OUTPUT" |
| 156 | + if echo "$OUTPUT" | grep -iq "Phoebe"; then |
| 157 | + echo "Success: 'Phoebe' found in output" |
| 158 | + else |
| 159 | + echo "Failed: Expected 'Phoebe' not found in output" |
| 160 | + exit 1 |
| 161 | + fi |
| 162 | + echo "::endgroup::" |
| 163 | +
|
| 164 | + test-mlx-voxtral: |
| 165 | + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main |
| 166 | + secrets: inherit |
| 167 | + with: |
| 168 | + job-name: test-mlx-voxtral |
| 169 | + runner: macos-14-xlarge |
| 170 | + python-version: "3.12" |
| 171 | + submodules: recursive |
| 172 | + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} |
| 173 | + secrets-env: EXECUTORCH_HF_TOKEN |
| 174 | + timeout: 90 |
| 175 | + script: | |
| 176 | + set -eux |
| 177 | +
|
| 178 | + echo "::group::Install ExecuTorch" |
| 179 | + ${CONDA_RUN} python install_executorch.py > /dev/null |
| 180 | + echo "::endgroup::" |
| 181 | +
|
| 182 | + echo "::group::Install Voxtral requirements" |
| 183 | + ${CONDA_RUN} pip install mistral_common librosa soundfile datasets |
| 184 | + OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) |
| 185 | + ${CONDA_RUN} pip install "optimum-executorch @ git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}" |
| 186 | + echo "::endgroup::" |
| 187 | +
|
| 188 | + ${CONDA_RUN} pip list |
| 189 | +
|
| 190 | + echo "::group::Export Voxtral" |
| 191 | + ${CONDA_RUN} python -m executorch.backends.mlx.examples.voxtral.export_voxtral_hf \ |
| 192 | + --output-dir /tmp/voxtral_mlx \ |
| 193 | + --dtype bf16 \ |
| 194 | + --qlinear 4w |
| 195 | + echo "::endgroup::" |
| 196 | +
|
| 197 | + echo "::group::Build Voxtral MLX runner" |
| 198 | + ${CONDA_RUN} make voxtral-mlx |
| 199 | + echo "::endgroup::" |
| 200 | +
|
| 201 | + echo "::group::Run Voxtral MLX runner" |
| 202 | + curl -L https://huggingface.co/mistralai/Voxtral-Mini-3B-2507/resolve/main/tekken.json -o /tmp/tekken.json |
| 203 | + curl -L https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav -o /tmp/test_audio.wav |
| 204 | + OUTPUT=$(./cmake-out/examples/models/voxtral/voxtral_runner \ |
| 205 | + --model_path /tmp/voxtral_mlx/model.pte \ |
| 206 | + --tokenizer_path /tmp/tekken.json \ |
| 207 | + --audio_path /tmp/test_audio.wav \ |
| 208 | + --processor_path /tmp/voxtral_mlx/preprocessor.pte \ |
| 209 | + --prompt "What is happening in this audio?" \ |
| 210 | + --temperature 0 2>&1) |
| 211 | + echo "Runner output:" |
| 212 | + echo "$OUTPUT" |
| 213 | + if echo "$OUTPUT" | grep -iq "poem"; then |
| 214 | + echo "Success: 'poem' found in output" |
| 215 | + else |
| 216 | + echo "Failed: Expected 'poem' not found in output" |
| 217 | + exit 1 |
| 218 | + fi |
| 219 | + echo "::endgroup::" |
| 220 | +
|
| 221 | + test-mlx-voxtral-realtime: |
| 222 | + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main |
| 223 | + secrets: inherit |
| 224 | + with: |
| 225 | + job-name: test-mlx-voxtral-realtime |
| 226 | + runner: macos-14-xlarge |
| 227 | + python-version: "3.12" |
| 228 | + submodules: recursive |
| 229 | + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} |
| 230 | + secrets-env: EXECUTORCH_HF_TOKEN |
| 231 | + timeout: 90 |
| 232 | + script: | |
| 233 | + set -eux |
| 234 | +
|
| 235 | + echo "::group::Install ExecuTorch" |
| 236 | + ${CONDA_RUN} python install_executorch.py > /dev/null |
| 237 | + echo "::endgroup::" |
| 238 | +
|
| 239 | + echo "::group::Install Voxtral Realtime requirements" |
| 240 | + ${CONDA_RUN} pip install safetensors |
| 241 | + echo "::endgroup::" |
| 242 | +
|
| 243 | + ${CONDA_RUN} pip list |
| 244 | +
|
| 245 | + echo "::group::Download model" |
| 246 | + HF_TOKEN=$SECRET_EXECUTORCH_HF_TOKEN ${CONDA_RUN} python -c "from huggingface_hub import snapshot_download; snapshot_download('mistralai/Voxtral-Mini-4B-Realtime-2602')" |
| 247 | + MODEL_PATH=$(HF_TOKEN=$SECRET_EXECUTORCH_HF_TOKEN ${CONDA_RUN} python -c "from huggingface_hub import snapshot_download; print(snapshot_download('mistralai/Voxtral-Mini-4B-Realtime-2602'))") |
| 248 | + echo "Model path: ${MODEL_PATH}" |
| 249 | + echo "::endgroup::" |
| 250 | +
|
| 251 | + echo "::group::Export preprocessor" |
| 252 | + ${CONDA_RUN} python -m executorch.extension.audio.mel_spectrogram \ |
| 253 | + --feature_size 128 \ |
| 254 | + --streaming \ |
| 255 | + --backend mlx \ |
| 256 | + --output_file /tmp/voxtral_rt_mlx/preprocessor.pte |
| 257 | + echo "::endgroup::" |
| 258 | +
|
| 259 | + echo "::group::Export Voxtral Realtime (streaming)" |
| 260 | + ${CONDA_RUN} python -m executorch.examples.models.voxtral_realtime.export_voxtral_rt \ |
| 261 | + --model-path "${MODEL_PATH}" \ |
| 262 | + --backend mlx \ |
| 263 | + --streaming \ |
| 264 | + --output-dir /tmp/voxtral_rt_mlx \ |
| 265 | + --qlinear-encoder 4w \ |
| 266 | + --qlinear 4w \ |
| 267 | + --qembedding 8w \ |
| 268 | + --qembedding-group-size 128 |
| 269 | + echo "::endgroup::" |
| 270 | +
|
| 271 | + echo "::group::Build Voxtral Realtime MLX runner" |
| 272 | + ${CONDA_RUN} make voxtral_realtime-mlx |
| 273 | + echo "::endgroup::" |
| 274 | +
|
| 275 | + echo "::group::Run Voxtral Realtime MLX runner" |
| 276 | + curl -L https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav -o /tmp/test_audio.wav |
| 277 | + OUTPUT=$(./cmake-out/examples/models/voxtral_realtime/voxtral_realtime_runner \ |
| 278 | + --model_path /tmp/voxtral_rt_mlx/model.pte \ |
| 279 | + --tokenizer_path "${MODEL_PATH}/tekken.json" \ |
| 280 | + --preprocessor_path /tmp/voxtral_rt_mlx/preprocessor.pte \ |
| 281 | + --audio_path /tmp/test_audio.wav \ |
| 282 | + --streaming 2>&1) |
| 283 | + echo "Runner output:" |
| 284 | + echo "$OUTPUT" |
| 285 | + if echo "$OUTPUT" | grep -iq "Phoebe"; then |
| 286 | + echo "Success: 'Phoebe' found in output" |
| 287 | + else |
| 288 | + echo "Failed: Expected 'Phoebe' not found in output" |
| 289 | + exit 1 |
| 290 | + fi |
| 291 | + echo "::endgroup::" |
| 292 | +
|
| 293 | + test-mlx-whisper: |
| 294 | + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main |
| 295 | + secrets: inherit |
| 296 | + with: |
| 297 | + job-name: test-mlx-whisper |
| 298 | + runner: macos-14-xlarge |
| 299 | + python-version: "3.12" |
| 300 | + submodules: recursive |
| 301 | + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} |
| 302 | + secrets-env: EXECUTORCH_HF_TOKEN |
| 303 | + timeout: 90 |
| 304 | + script: | |
| 305 | + set -eux |
| 306 | +
|
| 307 | + echo "::group::Install ExecuTorch and configure MLX build" |
| 308 | + ${CONDA_RUN} python install_executorch.py > /dev/null |
| 309 | + echo "::endgroup::" |
| 310 | +
|
| 311 | + echo "::group::Install Whisper requirements" |
| 312 | + ${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" |
| 313 | + ${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN |
| 314 | + ${CONDA_RUN} pip install transformers soundfile datasets librosa |
| 315 | + echo "::endgroup::" |
| 316 | +
|
| 317 | + ${CONDA_RUN} pip list |
| 318 | +
|
| 319 | + echo "::group::Export Whisper" |
| 320 | + ${CONDA_RUN} python -m executorch.backends.mlx.examples.whisper.export_whisper \ |
| 321 | + --model-id "openai/whisper-tiny" \ |
| 322 | + --output-dir /tmp/whisper_mlx \ |
| 323 | + --dtype bf16 \ |
| 324 | + --qlinear 4w |
| 325 | + echo "::endgroup::" |
| 326 | +
|
| 327 | + echo "::group::Run Whisper inference" |
| 328 | + OUTPUT=$( ${CONDA_RUN} python -m executorch.backends.mlx.examples.whisper.run_whisper \ |
| 329 | + --model-dir /tmp/whisper_mlx \ |
| 330 | + --use-sample-audio 2>&1) |
| 331 | + echo "$OUTPUT" |
| 332 | + if echo "$OUTPUT" | grep -iq "Mr. Quilter"; then |
| 333 | + echo "Success: 'Mr. Quilter' found in transcription" |
| 334 | + else |
| 335 | + echo "Failed: Expected 'Mr. Quilter' not found in transcription" |
| 336 | + exit 1 |
| 337 | + fi |
| 338 | + echo "::endgroup::" |
| 339 | +
|
| 340 | +
|
| 341 | + test-mlx-stories110m: |
| 342 | + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main |
| 343 | + with: |
| 344 | + job-name: test-mlx-stories110m |
| 345 | + runner: macos-14-xlarge |
| 346 | + python-version: "3.12" |
| 347 | + submodules: recursive |
| 348 | + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} |
| 349 | + timeout: 90 |
| 350 | + script: | |
| 351 | + set -eux |
| 352 | +
|
| 353 | + echo "::group::Install ExecuTorch" |
| 354 | + ${CONDA_RUN} python install_executorch.py > /dev/null |
| 355 | + echo "::endgroup::" |
| 356 | +
|
| 357 | + echo "::group::Install Llama requirements" |
| 358 | + ${CONDA_RUN} sh examples/models/llama/install_requirements.sh |
| 359 | + echo "::endgroup::" |
| 360 | +
|
| 361 | + ${CONDA_RUN} pip list |
| 362 | +
|
| 363 | + echo "::group::Build ExecuTorch with MLX delegate" |
| 364 | + ${CONDA_RUN} cmake --workflow --preset mlx-release |
| 365 | + echo "::endgroup::" |
| 366 | +
|
| 367 | + echo "::group::Build Llama runner with MLX" |
| 368 | + pushd examples/models/llama |
| 369 | + ${CONDA_RUN} cmake --workflow --preset llama-release |
| 370 | + popd |
| 371 | + echo "::endgroup::" |
| 372 | +
|
| 373 | + echo "::group::Download stories110M artifacts" |
| 374 | + curl -Ls "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt" --output stories110M.pt |
| 375 | + curl -Ls "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model" --output tokenizer.model |
| 376 | + echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json |
| 377 | + echo "::endgroup::" |
| 378 | +
|
| 379 | + echo "::group::Create tokenizer.bin" |
| 380 | + ${CONDA_RUN} python -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin |
| 381 | + echo "::endgroup::" |
| 382 | +
|
| 383 | + echo "::group::Export stories110M with MLX backend via export_llama_lib" |
| 384 | + ${CONDA_RUN} python -m extension.llm.export.export_llm \ |
| 385 | + base.checkpoint=stories110M.pt \ |
| 386 | + base.params=params.json \ |
| 387 | + model.use_kv_cache=true \ |
| 388 | + model.dtype_override=fp32 \ |
| 389 | + backend.mlx.enabled=true \ |
| 390 | + quantization.qmode=4w \ |
| 391 | + quantization.group_size=32 \ |
| 392 | + export.output_name=/tmp/stories110m_mlx.pte |
| 393 | + echo "::endgroup::" |
| 394 | +
|
| 395 | + echo "::group::Run inference with C++ llama runner" |
| 396 | + ./cmake-out/examples/models/llama/llama_main \ |
| 397 | + --model_path=/tmp/stories110m_mlx.pte \ |
| 398 | + --tokenizer_path=tokenizer.bin \ |
| 399 | + --prompt="Once upon a time," \ |
| 400 | + --temperature=0 \ |
| 401 | + --seq_len=10 |
| 402 | + echo "::endgroup::" |
| 403 | +
|
| 404 | + test-mlx-llm: |
| 405 | + strategy: |
| 406 | + fail-fast: false |
| 407 | + matrix: |
| 408 | + model: |
| 409 | + - id: "unsloth/Llama-3.2-1B-Instruct" |
| 410 | + name: "llama-1b" |
| 411 | + - id: "unsloth/Qwen3-0.6B" |
| 412 | + name: "qwen3-0.6b" |
| 413 | + - id: "unsloth/gemma-3-1b-it" |
| 414 | + name: "gemma3-1b" |
| 415 | + use-custom: [false, true] |
| 416 | + qconfig: ["4w", "nvfp4"] |
| 417 | + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main |
| 418 | + secrets: inherit |
| 419 | + with: |
| 420 | + job-name: test-mlx-llm-${{ matrix.model.name }}${{ matrix.use-custom && '-custom' || '' }}-${{ matrix.qconfig }} |
| 421 | + runner: macos-14-xlarge |
| 422 | + python-version: "3.12" |
| 423 | + submodules: recursive |
| 424 | + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} |
| 425 | + secrets-env: EXECUTORCH_HF_TOKEN |
| 426 | + timeout: 90 |
| 427 | + script: | |
| 428 | + set -eux |
| 429 | +
|
| 430 | + MODEL_ID="${{ matrix.model.id }}" |
| 431 | + MODEL_NAME="${{ matrix.model.name }}" |
| 432 | + USE_CUSTOM="${{ matrix.use-custom }}" |
| 433 | + QCONFIG="${{ matrix.qconfig }}" |
| 434 | +
|
| 435 | + CUSTOM_ARGS="" |
| 436 | + if [ "${USE_CUSTOM}" = "true" ]; then |
| 437 | + CUSTOM_ARGS="--use-custom-sdpa --use-custom-kv-cache" |
| 438 | + fi |
| 439 | +
|
| 440 | + echo "::group::Install ExecuTorch and configure MLX build" |
| 441 | + ${CONDA_RUN} python install_executorch.py > /dev/null |
| 442 | + ${CONDA_RUN} cmake --preset mlx-release |
| 443 | + echo "::endgroup::" |
| 444 | +
|
| 445 | + echo "::group::Install LLM requirements" |
| 446 | + ${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" |
| 447 | + ${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN |
| 448 | + OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) |
| 449 | + ${CONDA_RUN} pip install transformers "optimum-executorch @ git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}" |
| 450 | + echo "::endgroup::" |
| 451 | +
|
| 452 | + ${CONDA_RUN} pip list |
| 453 | +
|
| 454 | + echo "::group::Export ${MODEL_NAME}" |
| 455 | + ${CONDA_RUN} python -m executorch.backends.mlx.examples.llm.export_llm_hf \ |
| 456 | + --model-id "${MODEL_ID}" \ |
| 457 | + --output /tmp/${MODEL_NAME}.pte \ |
| 458 | + --qlinear ${QCONFIG} \ |
| 459 | + --qembedding ${QCONFIG} \ |
| 460 | + ${CUSTOM_ARGS} |
| 461 | + echo "::endgroup::" |
| 462 | +
|
| 463 | + echo "::group::Run ${MODEL_NAME} inference" |
| 464 | + OUTPUT=$(${CONDA_RUN} python -m executorch.backends.mlx.examples.llm.run_llm_hf \ |
| 465 | + --pte /tmp/${MODEL_NAME}.pte \ |
| 466 | + --model-id "${MODEL_ID}" \ |
| 467 | + --prompt "What is the capital of France?" \ |
| 468 | + --max-new-tokens 50 2>&1) |
| 469 | + echo "$OUTPUT" |
| 470 | + if echo "$OUTPUT" | grep -iq "Paris"; then |
| 471 | + echo "Success: 'Paris' found in output" |
| 472 | + else |
| 473 | + echo "Failed: Expected 'Paris' not found in output" |
| 474 | + exit 1 |
| 475 | + fi |
| 476 | + echo "::endgroup::" |
0 commit comments