From 6fc214ea955ebb36dc60fd6e791cec524a505701 Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Mon, 8 Jun 2026 13:04:38 +0530 Subject: [PATCH 01/10] DO NOT MERGE - QVAC-19214 transcription-parakeet: Vulkan device-farm test Overlay-pins ggml-speech to qvac-ext-ggml PR #14 (commit 8bf760f4 -- Adreno Vulkan support + the Parakeet quantized-matmul fix) and forces the Parakeet Android tests onto Vulkan, to validate on Device Farm (Samsung S25 Ultra / Adreno + Pixel 9 / Mali) that Vulkan runs Parakeet correctly and nothing else breaks. NOT for merge -- the overlay + GPU-guard removal are test-only. - vcpkg-overlay-ports/ggml-speech: pinned to qvac-ext-ggml@8bf760f4; Android default-features drop OpenCL (Vulkan-only) so the loader has no OpenCL .so and the tier policy selects Vulkan on Adreno. - vcpkg-configuration.json: register ./vcpkg-overlay-ports. - vcpkg.json: Android parakeet-cpp features=["vulkan"] + default-features:false. - ParakeetModel.cpp: remove the Android useGPU=false guard so useGPU=true reaches the engine. - mobile-perf-runner.js: assert Android use_gpu runs select Vulkan (backendId=3). --- packages/transcription-parakeet/.gitignore | 1 + .../parakeet/ParakeetModel.cpp | 14 +- .../test/integration/mobile-perf-runner.js | 14 ++ .../vcpkg-configuration.json | 3 + .../ggml-speech/android-vulkan-version.cmake | 37 ++++ .../0001-ggml-vulkan-find-spirv-headers.patch | 24 +++ .../ggml-speech/portfile.cmake | 172 ++++++++++++++++++ .../vcpkg-overlay-ports/ggml-speech/usage | 10 + .../ggml-speech/vcpkg.json | 53 ++++++ packages/transcription-parakeet/vcpkg.json | 3 +- 10 files changed, 319 insertions(+), 12 deletions(-) create mode 100644 packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/android-vulkan-version.cmake create mode 100644 packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/patches/0001-ggml-vulkan-find-spirv-headers.patch create mode 100644 packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake create mode 100644 packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/usage create mode 100644 packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/vcpkg.json diff --git a/packages/transcription-parakeet/.gitignore b/packages/transcription-parakeet/.gitignore index 1696b4b7f7..87220aa150 100644 --- a/packages/transcription-parakeet/.gitignore +++ b/packages/transcription-parakeet/.gitignore @@ -22,6 +22,7 @@ Makefile !CMakeLists.txt !cmake/*.cmake !vcpkg/**/*.cmake +!vcpkg-overlay-ports/**/*.cmake compile_commands.json .vscode/ .idea/ diff --git a/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp b/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp index adf4a736a6..717e32c30f 100644 --- a/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp +++ b/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp @@ -221,17 +221,9 @@ void ParakeetModel::cleanupTempFile_() { void ParakeetModel::load() { if (is_loaded_) return; - // Force useGPU to false in Android until Vulkan and OpenCL are stabilized -#ifdef __ANDROID__ - if (cfg_.useGPU) { - QLOG( - logger::Priority::WARNING, - "Parakeet: useGPU=true is currently ignored on Android " - "(GPU backends disabled at engine boundary pending Vulkan/Mali " - "and OpenCL/Adreno driver fixes); falling back to CPU."); - cfg_.useGPU = false; - } -#endif + // DO NOT MERGE (device-farm Vulkan test): the Android useGPU=false guard is + // removed so useGPU=true reaches the engine and the Parakeet Android tests + // exercise the Vulkan backend on the Device Farm Adreno/Mali devices. QLOG(logger::Priority::INFO, "Loading Parakeet GGUF (modelType hint: " + diff --git a/packages/transcription-parakeet/test/integration/mobile-perf-runner.js b/packages/transcription-parakeet/test/integration/mobile-perf-runner.js index f6fe6af565..8f446fc2d3 100644 --- a/packages/transcription-parakeet/test/integration/mobile-perf-runner.js +++ b/packages/transcription-parakeet/test/integration/mobile-perf-runner.js @@ -152,6 +152,20 @@ async function runMobilePerfCase (t, opts) { t.ok(receivedStats.length >= NUM_TRANSCRIPTIONS, `${modelLabel} ${epLabel} should receive JobEnded stats for every run (got ${receivedStats.length})`) t.ok(timings.length === NUM_TRANSCRIPTIONS, `${modelLabel} ${epLabel} should complete ${NUM_TRANSCRIPTIONS} transcriptions (got ${timings.length})`) + + // DO NOT MERGE (device-farm Vulkan validation): assert the Android GPU run + // actually selected the Vulkan backend (backendId 3), not OpenCL (4) or a + // CPU fallback (0). Both forced-Vulkan Device Farm devices (Adreno + Mali) + // must report Vulkan. BackendId: 0=CPU 1=Metal 2=CUDA 3=Vulkan 4=OpenCL. + if (useGPU && platform.startsWith('android')) { + const finalStats = receivedStats.length > 0 + ? receivedStats[receivedStats.length - 1].stats + : null + const backendId = finalStats ? finalStats.backendId : null + t.ok(backendId === 3, + `${modelLabel} ${epLabel} Android use_gpu=true must select Vulkan (backendId=3); got ${backendId}`) + } + console.log(`✅ Mobile perf case ${modelLabel} ${epLabel} completed successfully!\n`) } finally { console.log('=== Cleanup ===') diff --git a/packages/transcription-parakeet/vcpkg-configuration.json b/packages/transcription-parakeet/vcpkg-configuration.json index 04e7630640..9dc16448d2 100644 --- a/packages/transcription-parakeet/vcpkg-configuration.json +++ b/packages/transcription-parakeet/vcpkg-configuration.json @@ -15,5 +15,8 @@ "spirv-headers" ] } + ], + "overlay-ports": [ + "./vcpkg-overlay-ports" ] } diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/android-vulkan-version.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/android-vulkan-version.cmake new file mode 100644 index 0000000000..16ac7c0826 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/android-vulkan-version.cmake @@ -0,0 +1,37 @@ +# Detect the Vulkan version shipped with the Android NDK by parsing +# vulkan_core.h from the NDK sysroot. Sets `vulkan_version` in the +# caller's scope (e.g. "1.3.275"). +function(detect_ndk_vulkan_version) + string(TOLOWER "${CMAKE_HOST_SYSTEM_NAME}" host_system_name_lower) + + file(GLOB host_dirs LIST_DIRECTORIES true "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_system_name_lower}-*") + if(host_dirs) + list(GET host_dirs 0 host_dir) + get_filename_component(host_arch "${host_dir}" NAME) + set(vulkan_core_h "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_arch}/sysroot/usr/include/vulkan/vulkan_core.h") + else() + message(FATAL_ERROR "Could not find NDK host directory for ${host_system_name_lower}") + endif() + + if(NOT EXISTS "${vulkan_core_h}") + message(FATAL_ERROR "vulkan_core.h not found at ${vulkan_core_h}") + endif() + + file(READ "${vulkan_core_h}" header_content) + string(REGEX MATCH "VK_HEADER_VERSION ([0-9]+)" version_match "${header_content}") + if(version_match) + set(header_version_3 "${CMAKE_MATCH_1}") + else() + message(FATAL_ERROR "Could not extract VK_HEADER_VERSION from ${vulkan_core_h}") + endif() + + # Extract major.minor version from VK_HEADER_VERSION_COMPLETE for download URL + string(REGEX MATCH "VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION\\(([0-9]+), ([0-9]+), ([0-9]+)" version_match "${header_content}") + if(version_match) + set(major "${CMAKE_MATCH_2}") + set(minor "${CMAKE_MATCH_3}") + set(vulkan_version "${major}.${minor}.${header_version_3}" PARENT_SCOPE) + else() + message(FATAL_ERROR "Could not extract VK_HEADER_VERSION_COMPLETE from ${vulkan_core_h}") + endif() +endfunction() diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/patches/0001-ggml-vulkan-find-spirv-headers.patch b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/patches/0001-ggml-vulkan-find-spirv-headers.patch new file mode 100644 index 0000000000..7906d49727 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/patches/0001-ggml-vulkan-find-spirv-headers.patch @@ -0,0 +1,24 @@ +diff --git a/src/ggml-vulkan/CMakeLists.txt b/src/ggml-vulkan/CMakeLists.txt +index 715a263a..3d92ac5d 100644 +--- a/src/ggml-vulkan/CMakeLists.txt ++++ b/src/ggml-vulkan/CMakeLists.txt +@@ -7,6 +7,7 @@ if (POLICY CMP0147) + endif() + + find_package(Vulkan COMPONENTS glslc REQUIRED) ++find_package(SPIRV-Headers QUIET) + + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # Parallel build object files +@@ -87,6 +88,11 @@ if (Vulkan_FOUND) + ) + + target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan) ++ ++ if (TARGET SPIRV-Headers::SPIRV-Headers) ++ target_link_libraries(ggml-vulkan PRIVATE SPIRV-Headers::SPIRV-Headers) ++ endif() ++ + target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + + # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake new file mode 100644 index 0000000000..a943134660 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake @@ -0,0 +1,172 @@ +# ggml-speech: OVERLAY pinned to tetherto/qvac-ext-ggml PR #14 (commit +# 8bf760f4) -- Adreno 740 Vulkan support + the Parakeet quantized-matmul +# fix (disable integer-dot MMQ + force a quantized src0 -> f16 dequant on +# Qualcomm, so the int8-only shaders the Adreno SPIR-V compiler rejects are +# never used). DO NOT MERGE: device-farm validation overlay only. Pairs +# with the Android default-features below dropping OpenCL so the Parakeet +# Android tests exercise the Vulkan backend. + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO tetherto/qvac-ext-ggml + REF 8bf760f41a3f510216c6f1a80ca8b5795c1b2ffd + SHA512 5c2f894cc719f97d7ca8a5e52be40be4639881d162d4b5a695ef4ce05ad7ee4083353a1d2dca8e8bab330d080348a8548a577fb40054a452be6ff104a129b946 + HEAD_REF QVAC-19213-adreno-vulkan-shmem-fix +) + +set(GGML_METAL OFF) +set(GGML_VULKAN OFF) +set(GGML_CUDA OFF) +set(GGML_OPENCL OFF) +set(GGML_METAL_FUSE_MV_BIAS OFF) + +if("metal" IN_LIST FEATURES) + set(GGML_METAL ON) +endif() + +# Off by default: the chatterbox Q-variant mul_mv + bias/residual fusion +# produces zero tokens on parakeet's EOU q8_0 joint network. Consumers +# whose models stay clear of that pattern can opt in for the speedup. +if("metal-fuse-mv-bias" IN_LIST FEATURES) + set(GGML_METAL_FUSE_MV_BIAS ON) +endif() + +if("vulkan" IN_LIST FEATURES) + set(GGML_VULKAN ON) +endif() + +set(GGML_CUDA_COMPILER_OPTION "") + +if("cuda" IN_LIST FEATURES) + set(GGML_CUDA ON) + find_program(NVCC_EXECUTABLE nvcc + PATHS /usr/local/cuda/bin /usr/local/cuda-12.8/bin + NO_DEFAULT_PATH + ) + if(NOT NVCC_EXECUTABLE) + find_program(NVCC_EXECUTABLE nvcc REQUIRED) + endif() + set(GGML_CUDA_COMPILER_OPTION "-DCMAKE_CUDA_COMPILER=${NVCC_EXECUTABLE}") + message(STATUS "CUDA compiler: ${NVCC_EXECUTABLE}") +endif() + +if("opencl" IN_LIST FEATURES) + set(GGML_OPENCL ON) +endif() + +if(VCPKG_TARGET_IS_ANDROID AND "vulkan" IN_LIST FEATURES) + include(${CMAKE_CURRENT_LIST_DIR}/android-vulkan-version.cmake) + detect_ndk_vulkan_version() + message(STATUS "NDK Vulkan version: ${vulkan_version}") + + file(DOWNLOAD + "https://github.com/KhronosGroup/Vulkan-Headers/archive/refs/tags/v${vulkan_version}.tar.gz" + "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz" + TLS_VERIFY ON + ) + file(ARCHIVE_EXTRACT + INPUT "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz" + DESTINATION "${SOURCE_PATH}" + PATTERNS "*.hpp" + ) + file(COPY "${SOURCE_PATH}/Vulkan-Headers-${vulkan_version}/include/" + DESTINATION "${SOURCE_PATH}/src/") +endif() + +set(PLATFORM_OPTIONS) + +if(VCPKG_TARGET_IS_IOS) + list(APPEND PLATFORM_OPTIONS -DGGML_BLAS=OFF -DGGML_ACCELERATE=OFF) +endif() + +# Hybrid Android backend mode: GPU backends as MODULE .so loaded at runtime +# via dlopen, CPU built as per-arch MODULE .so variants (one per ARMv8.0/ +# 8.2/8.6/9.0/9.2 feature tier) also loaded at runtime via dlopen. The +# downstream addon installs the resulting libqvac-speech-ggml-cpu-android_armv* +# .so files alongside the .bare binary; the per-variant scoring in +# ggml-cpu's `ggml_backend_cpu_aarch64_score` then picks the highest tier +# the running device supports at first use. Pairs with the speech-branch +# `ggml-backend: android per-arch CPU variant dlopen fallback` patch +# (commit 9562ed04) so the variant lookup also succeeds when the consumer +# APK keeps native .so files compressed (AGP `useLegacyPackaging=false`). +if(VCPKG_TARGET_IS_ANDROID) + list(APPEND PLATFORM_OPTIONS + -DGGML_BACKEND_DL=ON + -DGGML_CPU_ALL_VARIANTS=ON + -DGGML_CPU_REPACK=ON + -DGGML_VULKAN_DISABLE_COOPMAT=ON + -DGGML_VULKAN_DISABLE_COOPMAT2=ON + ) +endif() + +# PR #13 (v0.10.2 sync) introduces an unconditional +# `#include ` in src/ggml-vulkan/ggml-vulkan.cpp, +# but the upstream ggml-vulkan CMakeLists.txt never finds spirv-headers nor +# wires its include dir into the ggml-vulkan target. Apply a small patch +# so it does (and depend on spirv-headers in vcpkg.json's vulkan feature). +# TODO: push the equivalent fix upstream and drop this patch. +if("vulkan" IN_LIST FEATURES) + vcpkg_apply_patches( + SOURCE_PATH "${SOURCE_PATH}" + PATCHES + "${CMAKE_CURRENT_LIST_DIR}/patches/0001-ggml-vulkan-find-spirv-headers.patch" + ) +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + OPTIONS + -DBUILD_SHARED_LIBS=OFF + -DGGML_NATIVE=OFF + -DGGML_CCACHE=OFF + -DGGML_OPENMP=OFF + -DGGML_LLAMAFILE=OFF + -DGGML_BUILD_TESTS=OFF + -DGGML_BUILD_EXAMPLES=OFF + -DGGML_METAL=${GGML_METAL} + -DGGML_VULKAN=${GGML_VULKAN} + -DGGML_CUDA=${GGML_CUDA} + -DGGML_OPENCL=${GGML_OPENCL} + -DGGML_METAL_FUSE_MV_BIAS=${GGML_METAL_FUSE_MV_BIAS} + -DGGML_LIB_OUTPUT_PREFIX=qvac-speech- + ${GGML_CUDA_COMPILER_OPTION} + ${PLATFORM_OPTIONS} +) + +vcpkg_cmake_install() + +# Pick up the MODULE backend .so files ggml builds into the buildtree's +# bin/ directory (Android dynamic-backend mode). cmake install() doesn't +# move them by default. +if(VCPKG_TARGET_IS_ANDROID) + file(GLOB _backend_sos + "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-rel/bin/libqvac-speech-ggml-*.so" + ) + if(_backend_sos) + file(INSTALL ${_backend_sos} DESTINATION "${CURRENT_PACKAGES_DIR}/lib") + endif() +endif() + +vcpkg_cmake_config_fixup(PACKAGE_NAME ggml CONFIG_PATH lib/cmake/ggml) + +if(EXISTS "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc") + file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/lib/pkgconfig") + file(RENAME "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc" + "${CURRENT_PACKAGES_DIR}/lib/pkgconfig/ggml.pc") +endif() +if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc") + file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig") + file(RENAME "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc" + "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig/ggml.pc") +endif() +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/share/pkgconfig" + "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig") +vcpkg_fixup_pkgconfig() + +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share") + +set(VCPKG_POLICY_MISMATCHED_NUMBER_OF_BINARIES enabled) + +file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}") +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/usage b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/usage new file mode 100644 index 0000000000..9b23041f03 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/usage @@ -0,0 +1,10 @@ +The package ggml provides CMake integration: + + find_package(ggml CONFIG REQUIRED) + target_link_libraries(main PRIVATE ggml::ggml) + +Available vcpkg features: + metal - Metal GPU backend (macOS/iOS, auto-enabled on Apple) + vulkan - Vulkan GPU backend + cuda - CUDA GPU backend + opencl - OpenCL GPU backend diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/vcpkg.json b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/vcpkg.json new file mode 100644 index 0000000000..2ece3c0404 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/vcpkg.json @@ -0,0 +1,53 @@ +{ + "name": "ggml-speech", + "version-date": "2026-06-04", + "description": "Speech-stack flavour of ggml from tetherto/qvac-ext-ggml@speech, including the ggml-org v0.10.2 sync (PR #13), the iOS Metal NULL-safety hardening from PR #10, GustavoA1604's Android per-arch CPU dlopen fallback (PR #11), the Mac M2 PAD test fix, and Adreno-aware Android OpenCL/Vulkan backend selection (PR #18, QVAC-18993): on Android the loader detects the GPU via Vulkan and only keeps OpenCL for Adreno > 700, CPU-only for Adreno 1..700, Vulkan/CPU for non-Adreno. Adds Adreno OpenCL elementwise kernels (sin/cos/abs/elu/leaky_relu) for Chatterbox S3Gen and robust Adreno-generation detection (PR #17, #19). Library filenames are prefixed libqvac-speech-ggml-* so they coexist with libqvac-ggml-* (fabric/llm) and libqvac-diffusion-ggml-* on the same Android device. Mutually exclusive with the regular ggml port in the same triplet -- pick one per build.", + "homepage": "https://github.com/tetherto/qvac-ext-ggml/tree/speech", + "license": "MIT", + "dependencies": [ + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ], + "default-features": [ + { + "name": "metal", + "platform": "osx | ios" + }, + { + "name": "vulkan", + "platform": "windows | linux | android" + } + ], + "features": { + "cuda": { + "description": "Enable CUDA GPU backend" + }, + "metal": { + "description": "Enable Metal GPU backend (macOS/iOS)" + }, + "metal-fuse-mv-bias": { + "description": "Compile in the Q-variant mul_mv + ADD(bias) [+ ADD(residual)] fusion in ggml-metal. Off by default: empirically produces zero tokens on parakeet's EOU q8_0 joint network. Opt in only after Metal A/B-validating your model against the fused path." + }, + "opencl": { + "description": "Enable OpenCL GPU backend", + "dependencies": [ + "opencl" + ] + }, + "vulkan": { + "description": "Enable Vulkan GPU backend", + "dependencies": [ + { + "name": "spirv-headers", + "version>=": "1.4.341.0" + } + ] + } + } +} diff --git a/packages/transcription-parakeet/vcpkg.json b/packages/transcription-parakeet/vcpkg.json index d0bc9de28c..44e621aeff 100644 --- a/packages/transcription-parakeet/vcpkg.json +++ b/packages/transcription-parakeet/vcpkg.json @@ -11,7 +11,8 @@ { "name": "parakeet-cpp", "version>=": "2026-05-26#2", - "features": ["vulkan", "opencl"], + "features": ["vulkan"], + "default-features": false, "platform": "android" }, { From 2ab58bbf0a3bdf6b42a903bc10026d48636c83b2 Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Mon, 8 Jun 2026 16:17:29 +0530 Subject: [PATCH 02/10] DO NOT MERGE - QVAC-19214 parakeet: capture ggml_abort to logcat for Mali RCA Device-Farm Mali-G715 aborts in the EOU-q4_0 Vulkan run (ggml_abort), but the RN/bare runtime does not forward native stderr to logcat, so the exact GGML_ASSERT is invisible in Device-Farm logs. Install a synchronous ggml_set_abort_callback (llama.cpp upstream API) that forwards the formatted 'file:line: message' straight to logcat via __android_log_print before abort(), and mirror ggml log lines (incl. the DEBUG Vulkan device-caps banner) to logcat. Diagnostic only - strip with the rest of the device-farm test scaffolding. --- .../parakeet/ParakeetModel.cpp | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp b/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp index 717e32c30f..b007bffe12 100644 --- a/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp +++ b/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp @@ -14,6 +14,10 @@ #include #include +#ifdef __ANDROID__ +#include +#endif + #include #include "ggml.h" @@ -124,13 +128,39 @@ void ggmlLogTrampoline(ggml_log_level level, const char * text, void * /*user_da ggml_log_buf().erase(0, nl + 1); if (line.empty()) continue; QLOG(ggmlLevelToPriority(ggml_log_buf_level()), line); +#ifdef __ANDROID__ + // [DO NOT MERGE] Device-Farm RCA (qvac PR #2476): mirror every ggml line + // to logcat so the Vulkan device-caps banner (emitted at DEBUG, and a + // no-op through QLOG unless setLogger() is on) is visible in Device-Farm + // logs. Strip with the rest of the test scaffolding. + __android_log_print(ANDROID_LOG_INFO, "ggml", "%s", line.c_str()); +#endif } } +// [DO NOT MERGE] Device-Farm Mali RCA (qvac PR #2476): the RN/bare runtime +// does not forward native stderr to logcat, so a ggml_abort()/GGML_ASSERT +// failure is invisible in Device-Farm logs. ggml exposes a synchronous abort +// callback (ggml_set_abort_callback, llama.cpp upstream) invoked with the +// fully-formatted "file:line: message" immediately before abort(). Forward it +// straight to logcat via __android_log_print — NOT QLOG, which is a no-op +// unless setLogger() is called from JS and would hop to the very JS thread +// that is aborting. Strip with the rest of the test scaffolding. +void ggmlAbortTrampoline(const char * message) { + const char * msg = message ? message : "(null)"; +#ifdef __ANDROID__ + __android_log_print(ANDROID_LOG_FATAL, "ggml_abort", "%s", msg); +#else + std::fprintf(stderr, "ggml_abort: %s\n", msg); +#endif +} + void installGgmlLogTrampolineOnce() { static std::once_flag once; std::call_once(once, [] { ggml_log_set(&ggmlLogTrampoline, nullptr); + // [DO NOT MERGE] capture the Mali GGML_ASSERT — see ggmlAbortTrampoline. + ggml_set_abort_callback(&ggmlAbortTrampoline); }); } From 85b292c4f2286c9faec6d868d6fa0c99e9672b0d Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Mon, 8 Jun 2026 17:55:40 +0530 Subject: [PATCH 03/10] DO NOT MERGE - QVAC-19214 parakeet: fix addon load (link liblog) + force Vulkan on Adreno Two fixes so the Device-Farm Mali RCA run actually exercises the models: 1. Link liblog into the addon .bare. The abort-capture instrumentation's __android_log_print was an undefined symbol (no liblog in DT_NEEDED), so the addon failed to dlopen at runtime -> bare ADDON_NOT_FOUND -> 18/19 tests failed on both devices. Verified: rebuilt .bare now lists liblog.so in DT_NEEDED. 2. Re-add default-features:false to the android parakeet-cpp dep (dropped by the main merge), so OpenCL is not built and Adreno selects Vulkan (backendId=3), not OpenCL (backendId=4). Part of the DO-NOT-MERGE device-farm test scaffolding. --- packages/transcription-parakeet/CMakeLists.txt | 6 ++++++ packages/transcription-parakeet/vcpkg.json | 1 + 2 files changed, 7 insertions(+) diff --git a/packages/transcription-parakeet/CMakeLists.txt b/packages/transcription-parakeet/CMakeLists.txt index 116a28a28e..c8b5b07213 100644 --- a/packages/transcription-parakeet/CMakeLists.txt +++ b/packages/transcription-parakeet/CMakeLists.txt @@ -248,6 +248,12 @@ if(ANDROID) -Wl,-z,max-page-size=16384 -Wl,-z,common-page-size=16384 ) + # [DO NOT MERGE] Device-Farm Mali RCA (qvac PR #2476): the abort-capture + # instrumentation in ParakeetModel.cpp calls __android_log_print, which lives + # in liblog. Without linking it the symbol is undefined in the .bare and the + # addon fails to dlopen at runtime (bare reports ADDON_NOT_FOUND). Strip with + # the rest of the test scaffolding. + target_link_libraries(${qvac_lib_infer_parakeet}_module PRIVATE log) endif() if(APPLE) diff --git a/packages/transcription-parakeet/vcpkg.json b/packages/transcription-parakeet/vcpkg.json index 8ce33f12c6..96baaef19a 100644 --- a/packages/transcription-parakeet/vcpkg.json +++ b/packages/transcription-parakeet/vcpkg.json @@ -11,6 +11,7 @@ { "name": "parakeet-cpp", "version>=": "2026-06-04#0", + "default-features": false, "features": ["vulkan"], "platform": "android" }, From 41b6196e819d1e21465fdcb83f31d13a58248f2d Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Mon, 8 Jun 2026 18:57:05 +0530 Subject: [PATCH 04/10] DO NOT MERGE - QVAC-19214 parakeet: overlay ggml-speech -> Mali-RCA ggml (9be02126) Point the device-farm overlay at qvac-ext-ggml@QVAC-19214-mali-rca (9be02126 = PR #14's 8bf760f4 + a ggml_abort->logcat diagnostic). The Vulkan backend's GGML_ASSERT on Mali-G715 is otherwise invisible: bare drops native stderr and the addon abort callback never fires for the dlopen'd backend (separate linker namespace). This rebuilds ggml-speech from the instrumented commit so the next Device-Farm Mali EOU crash logs the exact assert under logcat tag ggml_abort. --- .../ggml-speech/portfile.cmake | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake index a943134660..1d5bcc761b 100644 --- a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake @@ -1,17 +1,19 @@ -# ggml-speech: OVERLAY pinned to tetherto/qvac-ext-ggml PR #14 (commit -# 8bf760f4) -- Adreno 740 Vulkan support + the Parakeet quantized-matmul -# fix (disable integer-dot MMQ + force a quantized src0 -> f16 dequant on -# Qualcomm, so the int8-only shaders the Adreno SPIR-V compiler rejects are -# never used). DO NOT MERGE: device-farm validation overlay only. Pairs -# with the Android default-features below dropping OpenCL so the Parakeet -# Android tests exercise the Vulkan backend. +# ggml-speech: OVERLAY pinned to tetherto/qvac-ext-ggml branch +# QVAC-19214-mali-rca (commit 9be02126) = PR #14's Adreno-Vulkan fix +# (8bf760f4) PLUS a DO-NOT-MERGE diagnostic that forwards ggml_abort's +# "file:line: GGML_ASSERT(...)" to Android logcat (tag ggml_abort), to +# capture the exact Mali-G715 EOU-Vulkan assertion that is otherwise +# invisible (bare drops native stderr; the addon abort callback never fires +# for the dlopen'd backend's separate linker namespace). DO NOT MERGE: +# device-farm RCA overlay only. Pairs with the Android default-features +# below dropping OpenCL so the Parakeet Android tests exercise Vulkan. vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO tetherto/qvac-ext-ggml - REF 8bf760f41a3f510216c6f1a80ca8b5795c1b2ffd - SHA512 5c2f894cc719f97d7ca8a5e52be40be4639881d162d4b5a695ef4ce05ad7ee4083353a1d2dca8e8bab330d080348a8548a577fb40054a452be6ff104a129b946 - HEAD_REF QVAC-19213-adreno-vulkan-shmem-fix + REF 9be02126c72d0a8f2404059b6bfa242efc354acf + SHA512 d7fedf1ca7256aec1b0c008e3b64ca6b4586fe5eadd6a311ac04076227c0ab3085c2f1376b85305d31628671402be9712ad65e46fb5a691fabdd6f324f060475 + HEAD_REF QVAC-19214-mali-rca ) set(GGML_METAL OFF) From 486d71a6daac702c29fe1670b3badf2c3a34572f Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Mon, 8 Jun 2026 20:52:20 +0530 Subject: [PATCH 05/10] DO NOT MERGE - QVAC-19214 parakeet: overlay ggml -> a066fa47 (name descriptor-set overflow pipeline) Bump the RCA overlay to qvac-ext-ggml@a066fa47 (= 9be02126 + a GGML_ABORT that prints the pipeline name/idx/size at the descriptor-set overflow). Next Mali EOU crash will name the exact under-requesting pipeline so we can write a targeted per-op request-count fix. --- .../vcpkg-overlay-ports/ggml-speech/portfile.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake index 1d5bcc761b..671bc1c61e 100644 --- a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake @@ -11,8 +11,8 @@ vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO tetherto/qvac-ext-ggml - REF 9be02126c72d0a8f2404059b6bfa242efc354acf - SHA512 d7fedf1ca7256aec1b0c008e3b64ca6b4586fe5eadd6a311ac04076227c0ab3085c2f1376b85305d31628671402be9712ad65e46fb5a691fabdd6f324f060475 + REF a066fa473bfedf5135a5959d4c0e4b833e78b457 + SHA512 c6eec91686342f5edffcf4182cc7399b698134ab33ef87fb4e051a3c4697057a318beadc0111cb125cafbc1078ff9da6c81e258d209a7cea1824aa8907b7796b HEAD_REF QVAC-19214-mali-rca ) From bf42746414cbb1bb9cba2463fe69dd382ab12c9a Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Mon, 8 Jun 2026 21:54:43 +0530 Subject: [PATCH 06/10] DO NOT MERGE - QVAC-19214 parakeet: overlay ggml -> 253602e3 (name the descriptor-set over-dispatcher) --- .../vcpkg-overlay-ports/ggml-speech/portfile.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake index 671bc1c61e..9b01342322 100644 --- a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake @@ -11,8 +11,8 @@ vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO tetherto/qvac-ext-ggml - REF a066fa473bfedf5135a5959d4c0e4b833e78b457 - SHA512 c6eec91686342f5edffcf4182cc7399b698134ab33ef87fb4e051a3c4697057a318beadc0111cb125cafbc1078ff9da6c81e258d209a7cea1824aa8907b7796b + REF 253602e373a3633198237301bc5033f3c0eeb584 + SHA512 05c44c576f6185444148e2888f903da60ec903f5e866b8e50bc0d11abd7aa795ef8a3f36d3ed8ecea973210c6644fdc638a7ac939440048895ba412da3f739c3 HEAD_REF QVAC-19214-mali-rca ) From 1e7fceef67680f951e85bdc379583a2b436c6eb3 Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Mon, 8 Jun 2026 23:25:30 +0530 Subject: [PATCH 07/10] DO NOT MERGE - QVAC-19214 parakeet: overlay ggml -> ddfbf178 (descriptor-set grow-on-demand fix) Verification run for the actual fix: grow the Vulkan descriptor-set pool on demand so the Mali-G715 f16 small-tile matmul over-dispatch no longer overflows. Expect Mali EOU GPU to pass now (Adreno + Xclipse unaffected). --- .../vcpkg-overlay-ports/ggml-speech/portfile.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake index 9b01342322..12054d85e9 100644 --- a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake @@ -11,8 +11,8 @@ vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO tetherto/qvac-ext-ggml - REF 253602e373a3633198237301bc5033f3c0eeb584 - SHA512 05c44c576f6185444148e2888f903da60ec903f5e866b8e50bc0d11abd7aa795ef8a3f36d3ed8ecea973210c6644fdc638a7ac939440048895ba412da3f739c3 + REF ddfbf1788b0cc69495f06efa029e5e69f5ecc0e6 + SHA512 f06e0f58c5549dddc6e764c7796fc3c2cd6b96caba94bf1f6b6c495f90b3594646a39652e8ffaf1c0d063ef1c888c11a72057dc44fc09ee2eb9bed2333c8815b HEAD_REF QVAC-19214-mali-rca ) From 5b1d6d38cab805cf8f4653b73733aa1038573adb Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Tue, 9 Jun 2026 08:31:46 +0530 Subject: [PATCH 08/10] DO NOT MERGE - QVAC-19214 parakeet: overlay ggml -> 45c159d7 (fix grow-on-demand to actually allocate) --- .../vcpkg-overlay-ports/ggml-speech/portfile.cmake | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake index 12054d85e9..7aee066395 100644 --- a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake @@ -1,6 +1,7 @@ # ggml-speech: OVERLAY pinned to tetherto/qvac-ext-ggml branch -# QVAC-19214-mali-rca (commit 9be02126) = PR #14's Adreno-Vulkan fix -# (8bf760f4) PLUS a DO-NOT-MERGE diagnostic that forwards ggml_abort's +# QVAC-19214-mali-rca (commit 45c159d7) = PR #14's Adreno-Vulkan fix +# (8bf760f4) PLUS the generic descriptor-set grow-on-demand fix for the +# Mali-G715 EOU-Vulkan crash, PLUS a DO-NOT-MERGE diagnostic that forwards ggml_abort's # "file:line: GGML_ASSERT(...)" to Android logcat (tag ggml_abort), to # capture the exact Mali-G715 EOU-Vulkan assertion that is otherwise # invisible (bare drops native stderr; the addon abort callback never fires @@ -11,8 +12,8 @@ vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO tetherto/qvac-ext-ggml - REF ddfbf1788b0cc69495f06efa029e5e69f5ecc0e6 - SHA512 f06e0f58c5549dddc6e764c7796fc3c2cd6b96caba94bf1f6b6c495f90b3594646a39652e8ffaf1c0d063ef1c888c11a72057dc44fc09ee2eb9bed2333c8815b + REF 45c159d7490ed2946a4095d8d5aa0f14243a18d9 + SHA512 2cc5ac3680125108da2bf4a21534e28f7e38eb7a2aad61c8989dd7f7b81aa8843dc7991e1a8e9465dc41696ea60f0e60414202ede52c39dfcb238a5ed0f7d31c HEAD_REF QVAC-19214-mali-rca ) From 556712785ad6c2476c67af962b578cf94a264876 Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Tue, 9 Jun 2026 08:43:40 +0530 Subject: [PATCH 09/10] DO NOT MERGE - QVAC-19214 parakeet: fix ggml-speech SHA512 (GitHub archive-gen race served a transient hash) --- .../vcpkg-overlay-ports/ggml-speech/portfile.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake index 7aee066395..b00364ab25 100644 --- a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake @@ -13,7 +13,7 @@ vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO tetherto/qvac-ext-ggml REF 45c159d7490ed2946a4095d8d5aa0f14243a18d9 - SHA512 2cc5ac3680125108da2bf4a21534e28f7e38eb7a2aad61c8989dd7f7b81aa8843dc7991e1a8e9465dc41696ea60f0e60414202ede52c39dfcb238a5ed0f7d31c + SHA512 1cf6b6708d215130d24b9dfdbb56add0b6250627159dadca85633797866e54782fa476d2d3b3ac4754ad66e9c00c53ed1c1d28bdf9aeef196f52f86a136c5324 HEAD_REF QVAC-19214-mali-rca ) From a3780eef75c4cb4490f016887613fc23d8176439 Mon Sep 17 00:00:00 2001 From: Pratik Narola Date: Tue, 9 Jun 2026 10:24:45 +0530 Subject: [PATCH 10/10] DO NOT MERGE - QVAC-19214 parakeet: overlay ggml -> e6578d01 (clean PR #14 commit, no RCA instrumentation) --- .../ggml-speech/portfile.cmake | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake index b00364ab25..78ffc90e38 100644 --- a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake @@ -1,20 +1,17 @@ -# ggml-speech: OVERLAY pinned to tetherto/qvac-ext-ggml branch -# QVAC-19214-mali-rca (commit 45c159d7) = PR #14's Adreno-Vulkan fix -# (8bf760f4) PLUS the generic descriptor-set grow-on-demand fix for the -# Mali-G715 EOU-Vulkan crash, PLUS a DO-NOT-MERGE diagnostic that forwards ggml_abort's -# "file:line: GGML_ASSERT(...)" to Android logcat (tag ggml_abort), to -# capture the exact Mali-G715 EOU-Vulkan assertion that is otherwise -# invisible (bare drops native stderr; the addon abort callback never fires -# for the dlopen'd backend's separate linker namespace). DO NOT MERGE: -# device-farm RCA overlay only. Pairs with the Android default-features -# below dropping OpenCL so the Parakeet Android tests exercise Vulkan. +# ggml-speech: OVERLAY pinned to the tetherto/qvac-ext-ggml PR #14 head +# (commit e6578d01) = the Adreno-Vulkan guards PLUS the generic descriptor-set +# pool grow-on-demand fix for the Mali-G715 EOU-Vulkan crash. This is the CLEAN +# commit that lands on PR #14 (no RCA instrumentation). DO NOT MERGE this +# overlay: device-farm validation vehicle only. Pairs with the Android +# default-features below dropping OpenCL so the Parakeet Android tests exercise +# Vulkan. vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO tetherto/qvac-ext-ggml - REF 45c159d7490ed2946a4095d8d5aa0f14243a18d9 - SHA512 1cf6b6708d215130d24b9dfdbb56add0b6250627159dadca85633797866e54782fa476d2d3b3ac4754ad66e9c00c53ed1c1d28bdf9aeef196f52f86a136c5324 - HEAD_REF QVAC-19214-mali-rca + REF e6578d0139235e0abd40804faf01f19bed15e414 + SHA512 1ac36882d355781b81f7e77b3cf06c04bcf7f7b90905d3f3ec5391bacf260e046860471b0fc1cb4c1b80093de16e88611938f932ee8b74190950463d8b928c28 + HEAD_REF QVAC-19213-adreno-vulkan-shmem-fix ) set(GGML_METAL OFF)