Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 8 additions & 24 deletions .ci/scripts/build_llama_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
fi
which "${PYTHON_EXECUTABLE}"

install_executorch_and_backend_lib() {
echo "Installing executorch and xnnpack backend"
build_llama_android() {
echo "Building llama runner for Android..."
pushd extension/llm/tokenizers
echo "Updating tokenizers submodule"
git submodule update --init
popd
clean_executorch_install_folders
mkdir cmake-android-out
ANDROID_NDK=${ANDROID_NDK:-/opt/ndk}
Expand All @@ -26,30 +30,10 @@ install_executorch_and_backend_lib() {
-DBUCK2="${BUCK2}" \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
-DANDROID_ABI="${ANDROID_ABI}" \
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DCMAKE_BUILD_TYPE=Release \
-DXNNPACK_ENABLE_ARM_BF16=OFF \
-Bcmake-android-out .

cmake --build cmake-android-out -j4 --target install --config Release
cmake --build cmake-android-out -j4 --target llama_main --config Release
}

build_llama_runner() {
echo "Building llama runner for Android..."
pushd extension/llm/tokenizers
echo "Updating tokenizers submodule"
git submodule update --init
popd
ANDROID_ABI=arm64-v8a
cmake -DBUCK2="${BUCK2}" \
-DBUILD_TESTING=OFF \
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DCMAKE_BUILD_TYPE=Release \
-Bcmake-android-out/examples/models/llama examples/models/llama

cmake --build cmake-android-out/examples/models/llama -j4 --config Release
}
install_executorch_and_backend_lib
build_llama_runner
build_llama_android
31 changes: 8 additions & 23 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -149,35 +149,21 @@ fi

which "${PYTHON_EXECUTABLE}"

cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
cmake_build_llama() {
echo "Building llama runner"
pushd extension/llm/tokenizers
echo "Updating tokenizers submodule"
git submodule update --init
popd
rm -rf cmake-out
retry cmake --preset llm \
-DEXECUTORCH_BUILD_TESTS=ON \
-DBUILD_TESTING=OFF \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
-DEXECUTORCH_BUILD_QNN="$QNN" \
-DEXECUTORCH_ENABLE_LOGGING=ON \
-DQNN_SDK_ROOT="$QNN_SDK_ROOT"
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
}

cmake_build_llama_runner() {
echo "Building llama runner"
pushd extension/llm/tokenizers
echo "Updating tokenizers submodule"
git submodule update --init
popd
dir="examples/models/llama"
if [[ "$CMAKE_BUILD_TYPE" == "Debug" ]]; then
PRESET="llama-debug"
else
PRESET="llama-release"
fi
pushd "${dir}"
cmake --workflow --preset "${PRESET}"
popd
cmake --build cmake-out -j9 --target llama_main --config "$CMAKE_BUILD_TYPE"
}

cleanup_files() {
Expand Down Expand Up @@ -269,8 +255,7 @@ if [[ "${BUILD_TOOL}" == "buck2" ]]; then
# shellcheck source=/dev/null
$BUCK run examples/models/llama:main -- ${RUNTIME_ARGS} > result.txt
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
cmake_install_executorch_libraries
cmake_build_llama_runner
cmake_build_llama
# Run llama runner
NOW=$(date +"%H:%M:%S")
echo "Starting to run llama runner at ${NOW}"
Expand Down
12 changes: 2 additions & 10 deletions .ci/scripts/test_llama_torchao_lowbit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ echo "Update tokenizers submodule"
git submodule update --init
popd

# Install ET with CMake
# Build llama runner with torchao
cmake -DPYTHON_EXECUTABLE=python \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DEXECUTORCH_ENABLE_LOGGING=1 \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
Expand All @@ -41,14 +40,7 @@ cmake -DPYTHON_EXECUTABLE=python \
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
-Bcmake-out .
cmake --build cmake-out -j16 --config Release --target install

# Install llama runner with torchao
cmake -DPYTHON_EXECUTABLE=python \
-DCMAKE_BUILD_TYPE=Release \
-Bcmake-out/examples/models/llama \
examples/models/llama
cmake --build cmake-out/examples/models/llama -j16 --config Release
cmake --build cmake-out -j16 --config Release --target llama_main

# Download stories llama110m artifacts
download_stories_model_artifacts
Expand Down
16 changes: 5 additions & 11 deletions .ci/scripts/test_lora.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,15 @@ set -exu
# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
cmake --preset llm-release -DEXECUTORCH_ENABLE_LOGGING=ON
cmake --build --preset llm-release-install
}

cmake_build_llama_runner() {
cmake_build_llama() {
echo "Building llama runner"
pushd extension/llm/tokenizers
echo "Updating tokenizers submodule"
git submodule update --init
popd
make llama-cpu
rm -rf cmake-out
cmake --preset llm-release -DEXECUTORCH_ENABLE_LOGGING=ON
cmake --build cmake-out -j9 --target llama_main --config Release
}

cleanup_files() {
Expand Down Expand Up @@ -57,8 +52,7 @@ HF_QWEN_PATH=$(python -c "from huggingface_hub import snapshot_download; print(s
echo "Model downloaded to: $HF_QWEN_PATH"

### BUILD LLAMA RUNNER.
cmake_install_executorch_libraries
cmake_build_llama_runner
cmake_build_llama

# Runner constants.
RUNTIME_ARGS="--tokenizer_path=${HF_QWEN_PATH}/ --temperature=0 --seq_len=100 --warmup=1"
Expand Down
16 changes: 5 additions & 11 deletions .ci/scripts/test_lora_multimethod.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,15 @@ set -exu
# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
cmake --preset llm-release -DEXECUTORCH_ENABLE_LOGGING=ON
cmake --build --preset llm-release-install
}

cmake_build_llama_runner() {
cmake_build_llama() {
echo "Building llama runner"
pushd extension/llm/tokenizers
echo "Updating tokenizers submodule"
git submodule update --init
popd
make llama-cpu
rm -rf cmake-out
cmake --preset llm-release -DEXECUTORCH_ENABLE_LOGGING=ON
cmake --build cmake-out -j9 --target llama_main --config Release
}

cleanup_files() {
Expand Down Expand Up @@ -55,8 +50,7 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
--config examples/models/qwen3/config/qwen3_multimethod.yaml

### BUILD LLAMA RUNNER ###
cmake_install_executorch_libraries
cmake_build_llama_runner
cmake_build_llama

# Runner constants.
RUNTIME_ARGS="--tokenizer_path=${HF_QWEN_PATH}/ --temperature=0 --seq_len=100 --warmup=1"
Expand Down
11 changes: 1 addition & 10 deletions .ci/scripts/test_torchao_huggingface_checkpoints.sh
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ fi
if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
echo "[runner] Building and testing llama_main ..."
cmake -DPYTHON_EXECUTABLE=python \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DEXECUTORCH_ENABLE_LOGGING=1 \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
Expand All @@ -163,15 +162,7 @@ if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
-DEXECUTORCH_BUILD_KERNELS_TORCHAO=${EXECUTORCH_BUILD_KERNELS_TORCHAO} \
-Bcmake-out .
cmake --build cmake-out -j16 --config Release --target install


# Install llama runner
cmake -DPYTHON_EXECUTABLE=python \
-DCMAKE_BUILD_TYPE=Release \
-Bcmake-out/examples/models/llama \
examples/models/llama
cmake --build cmake-out/examples/models/llama -j16 --config Release
cmake --build cmake-out -j16 --config Release --target llama_main

# Run the model
./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time,"
Expand Down
4 changes: 1 addition & 3 deletions .github/workflows/mlx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -365,9 +365,7 @@ jobs:
echo "::endgroup::"
echo "::group::Build Llama runner with MLX"
pushd examples/models/llama
${CONDA_RUN} cmake --workflow --preset llama-release
popd
${CONDA_RUN} cmake --build cmake-out --target llama_main
echo "::endgroup::"
echo "::group::Download stories110M artifacts"
Expand Down
9 changes: 8 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,7 @@ endif()
if(EXECUTORCH_BUILD_KERNELS_LLM)
# TODO: move all custom kernels to ${CMAKE_CURRENT_SOURCE_DIR}/kernels/custom
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/custom_ops)
executorch_target_link_options_shared_lib(custom_ops)
list(APPEND _executorch_kernels custom_ops_aot_lib)
endif()

Expand Down Expand Up @@ -1271,7 +1272,7 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
endif()

if(EXECUTORCH_BUILD_KERNELS_LLM)
list(APPEND _executor_runner_libs $<LINK_LIBRARY:WHOLE_ARCHIVE,custom_ops>)
list(APPEND _executor_runner_libs custom_ops)
endif()

if(EXECUTORCH_ENABLE_EVENT_TRACER)
Expand Down Expand Up @@ -1322,6 +1323,12 @@ if(EXECUTORCH_BUILD_ANDROID_JNI)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/android)
endif()

# Examples — EXCLUDE_FROM_ALL so they only build when explicitly requested
# (e.g., cmake --build cmake-out --target llama_main).
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/examples/models/llama EXCLUDE_FROM_ALL
)

include(Test.cmake)

install(
Expand Down
71 changes: 71 additions & 0 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,35 @@
"install"
],
"jobs": 0
},
{
"name": "llama-release-build",
"displayName": "Build llama_main (Release)",
"configurePreset": "llm-release",
"configuration": "Release",
"targets": [
"llama_main"
],
"jobs": 0
},
{
"name": "llama-cuda-build",
"displayName": "Build llama_main with CUDA (Release)",
"configurePreset": "llm-release-cuda",
"configuration": "Release",
"targets": [
"llama_main"
],
"jobs": 0
},
{
"name": "llama-cuda-debug-build",
"displayName": "Build llama_main with CUDA (Debug)",
"configurePreset": "llm-debug-cuda",
"targets": [
"llama_main"
],
"jobs": 0
}
],
"workflowPresets": [
Expand Down Expand Up @@ -584,6 +613,48 @@
"name": "mlx-debug-install"
}
]
},
{
"name": "llama-release",
"displayName": "Configure and build llama_main (Release, CPU)",
"steps": [
{
"type": "configure",
"name": "llm-release"
},
{
"type": "build",
"name": "llama-release-build"
}
]
},
{
"name": "llama-cuda",
"displayName": "Configure and build llama_main (Release, CUDA)",
"steps": [
{
"type": "configure",
"name": "llm-release-cuda"
},
{
"type": "build",
"name": "llama-cuda-build"
}
]
},
{
"name": "llama-cuda-debug",
"displayName": "Configure and build llama_main (Debug, CUDA)",
"steps": [
{
"type": "configure",
"name": "llm-debug-cuda"
},
{
"type": "build",
"name": "llama-cuda-debug-build"
}
]
}
]
}
12 changes: 3 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -342,28 +342,22 @@ silero-vad-cpu:
@echo " Binary: cmake-out/examples/models/silero_vad/silero_vad_stream_runner"

llama-cpu:
@echo "==> Building and installing ExecuTorch..."
cmake --workflow --preset llm-release
@echo "==> Building Llama runner (CPU)..."
cd examples/models/llama && cmake --workflow --preset llama-release
cmake --workflow --preset llama-release
@echo ""
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/llama/llama_main"

llama-cuda:
@echo "==> Building and installing ExecuTorch with CUDA..."
cmake --workflow --preset llm-release-cuda
@echo "==> Building Llama runner with CUDA..."
cd examples/models/llama && cmake --workflow --preset llama-cuda
cmake --workflow --preset llama-cuda
@echo ""
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/llama/llama_main"

llama-cuda-debug:
@echo "==> Building and installing ExecuTorch with CUDA (debug mode)..."
cmake --workflow --preset llm-debug-cuda
@echo "==> Building Llama runner with CUDA (debug mode)..."
cd examples/models/llama && cmake --workflow --preset llama-cuda-debug
cmake --workflow --preset llama-cuda-debug
@echo ""
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/llama/llama_main"
Expand Down
Loading
Loading