Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/transcription-parakeet/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Makefile
!CMakeLists.txt
!cmake/*.cmake
!vcpkg/**/*.cmake
!vcpkg-overlay-ports/**/*.cmake
compile_commands.json
.vscode/
.idea/
Expand Down
6 changes: 6 additions & 0 deletions packages/transcription-parakeet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,12 @@ if(ANDROID)
-Wl,-z,max-page-size=16384
-Wl,-z,common-page-size=16384
)
# [DO NOT MERGE] Device-Farm Mali RCA (qvac PR #2476): the abort-capture
# instrumentation in ParakeetModel.cpp calls __android_log_print, which lives
# in liblog. Without linking it the symbol is undefined in the .bare and the
# addon fails to dlopen at runtime (bare reports ADDON_NOT_FOUND). Strip with
# the rest of the test scaffolding.
target_link_libraries(${qvac_lib_infer_parakeet}_module PRIVATE log)
endif()

if(APPLE)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
#include <stdexcept>
#include <vector>

#ifdef __ANDROID__
#include <android/log.h>
#endif

#include <parakeet/parakeet.h>

#include "ggml.h"
Expand Down Expand Up @@ -124,13 +128,39 @@ void ggmlLogTrampoline(ggml_log_level level, const char * text, void * /*user_da
ggml_log_buf().erase(0, nl + 1);
if (line.empty()) continue;
QLOG(ggmlLevelToPriority(ggml_log_buf_level()), line);
#ifdef __ANDROID__
// [DO NOT MERGE] Device-Farm RCA (qvac PR #2476): mirror every ggml line
// to logcat so the Vulkan device-caps banner (emitted at DEBUG, and a
// no-op through QLOG unless setLogger() is on) is visible in Device-Farm
// logs. Strip with the rest of the test scaffolding.
__android_log_print(ANDROID_LOG_INFO, "ggml", "%s", line.c_str());
#endif
}
}

// [DO NOT MERGE] Device-Farm Mali RCA (qvac PR #2476): the RN/bare runtime
// does not forward native stderr to logcat, so a ggml_abort()/GGML_ASSERT
// failure is invisible in Device-Farm logs. ggml exposes a synchronous abort
// callback (ggml_set_abort_callback, llama.cpp upstream) invoked with the
// fully-formatted "file:line: message" immediately before abort(). Forward it
// straight to logcat via __android_log_print — NOT QLOG, which is a no-op
// unless setLogger() is called from JS and would hop to the very JS thread
// that is aborting. Strip with the rest of the test scaffolding.
void ggmlAbortTrampoline(const char * message) {
const char * msg = message ? message : "(null)";
#ifdef __ANDROID__
__android_log_print(ANDROID_LOG_FATAL, "ggml_abort", "%s", msg);
#else
std::fprintf(stderr, "ggml_abort: %s\n", msg);
#endif
}

void installGgmlLogTrampolineOnce() {
static std::once_flag once;
std::call_once(once, [] {
ggml_log_set(&ggmlLogTrampoline, nullptr);
// [DO NOT MERGE] capture the Mali GGML_ASSERT — see ggmlAbortTrampoline.
ggml_set_abort_callback(&ggmlAbortTrampoline);
});
}

Expand Down Expand Up @@ -221,17 +251,9 @@ void ParakeetModel::cleanupTempFile_() {
void ParakeetModel::load() {
if (is_loaded_) return;

// Force useGPU to false in Android until Vulkan and OpenCL are stabilized
#ifdef __ANDROID__
if (cfg_.useGPU) {
QLOG(
logger::Priority::WARNING,
"Parakeet: useGPU=true is currently ignored on Android "
"(GPU backends disabled at engine boundary pending Vulkan/Mali "
"and OpenCL/Adreno driver fixes); falling back to CPU.");
cfg_.useGPU = false;
}
#endif
// DO NOT MERGE (device-farm Vulkan test): the Android useGPU=false guard is
// removed so useGPU=true reaches the engine and the Parakeet Android tests
// exercise the Vulkan backend on the Device Farm Adreno/Mali devices.

QLOG(logger::Priority::INFO,
"Loading Parakeet GGUF (modelType hint: " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,20 @@ async function runMobilePerfCase (t, opts) {

t.ok(receivedStats.length >= NUM_TRANSCRIPTIONS, `${modelLabel} ${epLabel} should receive JobEnded stats for every run (got ${receivedStats.length})`)
t.ok(timings.length === NUM_TRANSCRIPTIONS, `${modelLabel} ${epLabel} should complete ${NUM_TRANSCRIPTIONS} transcriptions (got ${timings.length})`)

// DO NOT MERGE (device-farm Vulkan validation): assert the Android GPU run
// actually selected the Vulkan backend (backendId 3), not OpenCL (4) or a
// CPU fallback (0). Both forced-Vulkan Device Farm devices (Adreno + Mali)
// must report Vulkan. BackendId: 0=CPU 1=Metal 2=CUDA 3=Vulkan 4=OpenCL.
if (useGPU && platform.startsWith('android')) {
const finalStats = receivedStats.length > 0
? receivedStats[receivedStats.length - 1].stats
: null
const backendId = finalStats ? finalStats.backendId : null
t.ok(backendId === 3,
`${modelLabel} ${epLabel} Android use_gpu=true must select Vulkan (backendId=3); got ${backendId}`)
}

console.log(`✅ Mobile perf case ${modelLabel} ${epLabel} completed successfully!\n`)
} finally {
console.log('=== Cleanup ===')
Expand Down
3 changes: 3 additions & 0 deletions packages/transcription-parakeet/vcpkg-configuration.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,8 @@
"spirv-headers"
]
}
],
"overlay-ports": [
"./vcpkg-overlay-ports"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Detect the Vulkan version shipped with the Android NDK by parsing
# vulkan_core.h from the NDK sysroot. Sets `vulkan_version` in the
# caller's scope (e.g. "1.3.275").
function(detect_ndk_vulkan_version)
string(TOLOWER "${CMAKE_HOST_SYSTEM_NAME}" host_system_name_lower)

file(GLOB host_dirs LIST_DIRECTORIES true "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_system_name_lower}-*")
if(host_dirs)
list(GET host_dirs 0 host_dir)
get_filename_component(host_arch "${host_dir}" NAME)
set(vulkan_core_h "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_arch}/sysroot/usr/include/vulkan/vulkan_core.h")
else()
message(FATAL_ERROR "Could not find NDK host directory for ${host_system_name_lower}")
endif()

if(NOT EXISTS "${vulkan_core_h}")
message(FATAL_ERROR "vulkan_core.h not found at ${vulkan_core_h}")
endif()

file(READ "${vulkan_core_h}" header_content)
string(REGEX MATCH "VK_HEADER_VERSION ([0-9]+)" version_match "${header_content}")
if(version_match)
set(header_version_3 "${CMAKE_MATCH_1}")
else()
message(FATAL_ERROR "Could not extract VK_HEADER_VERSION from ${vulkan_core_h}")
endif()

# Extract major.minor version from VK_HEADER_VERSION_COMPLETE for download URL
string(REGEX MATCH "VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION\\(([0-9]+), ([0-9]+), ([0-9]+)" version_match "${header_content}")
if(version_match)
set(major "${CMAKE_MATCH_2}")
set(minor "${CMAKE_MATCH_3}")
set(vulkan_version "${major}.${minor}.${header_version_3}" PARENT_SCOPE)
else()
message(FATAL_ERROR "Could not extract VK_HEADER_VERSION_COMPLETE from ${vulkan_core_h}")
endif()
endfunction()
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
diff --git a/src/ggml-vulkan/CMakeLists.txt b/src/ggml-vulkan/CMakeLists.txt
index 715a263a..3d92ac5d 100644
--- a/src/ggml-vulkan/CMakeLists.txt
+++ b/src/ggml-vulkan/CMakeLists.txt
@@ -7,6 +7,7 @@ if (POLICY CMP0147)
endif()

find_package(Vulkan COMPONENTS glslc REQUIRED)
+find_package(SPIRV-Headers QUIET)

if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# Parallel build object files
@@ -87,6 +88,11 @@ if (Vulkan_FOUND)
)

target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
+
+ if (TARGET SPIRV-Headers::SPIRV-Headers)
+ target_link_libraries(ggml-vulkan PRIVATE SPIRV-Headers::SPIRV-Headers)
+ endif()
+
target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

# Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# ggml-speech: OVERLAY pinned to the tetherto/qvac-ext-ggml PR #14 head
# (commit e6578d01) = the Adreno-Vulkan guards PLUS the generic descriptor-set
# pool grow-on-demand fix for the Mali-G715 EOU-Vulkan crash. This is the CLEAN
# commit that lands on PR #14 (no RCA instrumentation). DO NOT MERGE this
# overlay: device-farm validation vehicle only. Pairs with the Android
# default-features below dropping OpenCL so the Parakeet Android tests exercise
# Vulkan.

vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO tetherto/qvac-ext-ggml
REF e6578d0139235e0abd40804faf01f19bed15e414
SHA512 1ac36882d355781b81f7e77b3cf06c04bcf7f7b90905d3f3ec5391bacf260e046860471b0fc1cb4c1b80093de16e88611938f932ee8b74190950463d8b928c28
HEAD_REF QVAC-19213-adreno-vulkan-shmem-fix
)

set(GGML_METAL OFF)
set(GGML_VULKAN OFF)
set(GGML_CUDA OFF)
set(GGML_OPENCL OFF)
set(GGML_METAL_FUSE_MV_BIAS OFF)

if("metal" IN_LIST FEATURES)
set(GGML_METAL ON)
endif()

# Off by default: the chatterbox Q-variant mul_mv + bias/residual fusion
# produces zero tokens on parakeet's EOU q8_0 joint network. Consumers
# whose models stay clear of that pattern can opt in for the speedup.
if("metal-fuse-mv-bias" IN_LIST FEATURES)
set(GGML_METAL_FUSE_MV_BIAS ON)
endif()

if("vulkan" IN_LIST FEATURES)
set(GGML_VULKAN ON)
endif()

set(GGML_CUDA_COMPILER_OPTION "")

if("cuda" IN_LIST FEATURES)
set(GGML_CUDA ON)
find_program(NVCC_EXECUTABLE nvcc
PATHS /usr/local/cuda/bin /usr/local/cuda-12.8/bin
NO_DEFAULT_PATH
)
if(NOT NVCC_EXECUTABLE)
find_program(NVCC_EXECUTABLE nvcc REQUIRED)
endif()
set(GGML_CUDA_COMPILER_OPTION "-DCMAKE_CUDA_COMPILER=${NVCC_EXECUTABLE}")
message(STATUS "CUDA compiler: ${NVCC_EXECUTABLE}")
endif()

if("opencl" IN_LIST FEATURES)
set(GGML_OPENCL ON)
endif()

if(VCPKG_TARGET_IS_ANDROID AND "vulkan" IN_LIST FEATURES)
include(${CMAKE_CURRENT_LIST_DIR}/android-vulkan-version.cmake)
detect_ndk_vulkan_version()
message(STATUS "NDK Vulkan version: ${vulkan_version}")

file(DOWNLOAD
"https://github.com/KhronosGroup/Vulkan-Headers/archive/refs/tags/v${vulkan_version}.tar.gz"
"${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz"
TLS_VERIFY ON
)
file(ARCHIVE_EXTRACT
INPUT "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz"
DESTINATION "${SOURCE_PATH}"
PATTERNS "*.hpp"
)
file(COPY "${SOURCE_PATH}/Vulkan-Headers-${vulkan_version}/include/"
DESTINATION "${SOURCE_PATH}/src/")
endif()

set(PLATFORM_OPTIONS)

if(VCPKG_TARGET_IS_IOS)
list(APPEND PLATFORM_OPTIONS -DGGML_BLAS=OFF -DGGML_ACCELERATE=OFF)
endif()

# Hybrid Android backend mode: GPU backends as MODULE .so loaded at runtime
# via dlopen, CPU built as per-arch MODULE .so variants (one per ARMv8.0/
# 8.2/8.6/9.0/9.2 feature tier) also loaded at runtime via dlopen. The
# downstream addon installs the resulting libqvac-speech-ggml-cpu-android_armv*
# .so files alongside the .bare binary; the per-variant scoring in
# ggml-cpu's `ggml_backend_cpu_aarch64_score` then picks the highest tier
# the running device supports at first use. Pairs with the speech-branch
# `ggml-backend: android per-arch CPU variant dlopen fallback` patch
# (commit 9562ed04) so the variant lookup also succeeds when the consumer
# APK keeps native .so files compressed (AGP `useLegacyPackaging=false`).
if(VCPKG_TARGET_IS_ANDROID)
list(APPEND PLATFORM_OPTIONS
-DGGML_BACKEND_DL=ON
-DGGML_CPU_ALL_VARIANTS=ON
-DGGML_CPU_REPACK=ON
-DGGML_VULKAN_DISABLE_COOPMAT=ON
-DGGML_VULKAN_DISABLE_COOPMAT2=ON
)
endif()

# PR #13 (v0.10.2 sync) introduces an unconditional
# `#include <spirv/unified1/spirv.hpp>` in src/ggml-vulkan/ggml-vulkan.cpp,
# but the upstream ggml-vulkan CMakeLists.txt never finds spirv-headers nor
# wires its include dir into the ggml-vulkan target. Apply a small patch
# so it does (and depend on spirv-headers in vcpkg.json's vulkan feature).
# TODO: push the equivalent fix upstream and drop this patch.
if("vulkan" IN_LIST FEATURES)
vcpkg_apply_patches(
SOURCE_PATH "${SOURCE_PATH}"
PATCHES
"${CMAKE_CURRENT_LIST_DIR}/patches/0001-ggml-vulkan-find-spirv-headers.patch"
)
endif()

vcpkg_cmake_configure(
SOURCE_PATH "${SOURCE_PATH}"
OPTIONS
-DBUILD_SHARED_LIBS=OFF
-DGGML_NATIVE=OFF
-DGGML_CCACHE=OFF
-DGGML_OPENMP=OFF
-DGGML_LLAMAFILE=OFF
-DGGML_BUILD_TESTS=OFF
-DGGML_BUILD_EXAMPLES=OFF
-DGGML_METAL=${GGML_METAL}
-DGGML_VULKAN=${GGML_VULKAN}
-DGGML_CUDA=${GGML_CUDA}
-DGGML_OPENCL=${GGML_OPENCL}
-DGGML_METAL_FUSE_MV_BIAS=${GGML_METAL_FUSE_MV_BIAS}
-DGGML_LIB_OUTPUT_PREFIX=qvac-speech-
${GGML_CUDA_COMPILER_OPTION}
${PLATFORM_OPTIONS}
)

vcpkg_cmake_install()

# Pick up the MODULE backend .so files ggml builds into the buildtree's
# bin/ directory (Android dynamic-backend mode). cmake install() doesn't
# move them by default.
if(VCPKG_TARGET_IS_ANDROID)
file(GLOB _backend_sos
"${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-rel/bin/libqvac-speech-ggml-*.so"
)
if(_backend_sos)
file(INSTALL ${_backend_sos} DESTINATION "${CURRENT_PACKAGES_DIR}/lib")
endif()
endif()

vcpkg_cmake_config_fixup(PACKAGE_NAME ggml CONFIG_PATH lib/cmake/ggml)

if(EXISTS "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc")
file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/lib/pkgconfig")
file(RENAME "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc"
"${CURRENT_PACKAGES_DIR}/lib/pkgconfig/ggml.pc")
endif()
if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc")
file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig")
file(RENAME "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc"
"${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig/ggml.pc")
endif()
file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/share/pkgconfig"
"${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig")
vcpkg_fixup_pkgconfig()

file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include")
file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share")

set(VCPKG_POLICY_MISMATCHED_NUMBER_OF_BINARIES enabled)

file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}")
vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE")
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
The package ggml provides CMake integration:

find_package(ggml CONFIG REQUIRED)
target_link_libraries(main PRIVATE ggml::ggml)

Available vcpkg features:
metal - Metal GPU backend (macOS/iOS, auto-enabled on Apple)
vulkan - Vulkan GPU backend
cuda - CUDA GPU backend
opencl - OpenCL GPU backend
Loading
Loading