tetherto · pratiknarola-t · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
@@ -22,6 +22,7 @@ Makefile
 !CMakeLists.txt
 !cmake/*.cmake
 !vcpkg/**/*.cmake
+!vcpkg-overlay-ports/**/*.cmake
 compile_commands.json
 .vscode/
 .idea/

@@ -248,6 +248,12 @@ if(ANDROID)
       -Wl,-z,max-page-size=16384
       -Wl,-z,common-page-size=16384
   )
+  # [DO NOT MERGE] Device-Farm Mali RCA (qvac PR #2476): the abort-capture
+  # instrumentation in ParakeetModel.cpp calls __android_log_print, which lives
+  # in liblog. Without linking it the symbol is undefined in the .bare and the
+  # addon fails to dlopen at runtime (bare reports ADDON_NOT_FOUND). Strip with
+  # the rest of the test scaffolding.
+  target_link_libraries(${qvac_lib_infer_parakeet}_module PRIVATE log)
 endif()
 
 if(APPLE)

@@ -14,6 +14,10 @@
 #include <stdexcept>
 #include <vector>
 
+#ifdef __ANDROID__
+#include <android/log.h>
+#endif
+
 #include <parakeet/parakeet.h>
 
 #include "ggml.h"
@@ -124,13 +128,39 @@ void ggmlLogTrampoline(ggml_log_level level, const char * text, void * /*user_da
     ggml_log_buf().erase(0, nl + 1);
     if (line.empty()) continue;
     QLOG(ggmlLevelToPriority(ggml_log_buf_level()), line);
+#ifdef __ANDROID__
+    // [DO NOT MERGE] Device-Farm RCA (qvac PR #2476): mirror every ggml line
+    // to logcat so the Vulkan device-caps banner (emitted at DEBUG, and a
+    // no-op through QLOG unless setLogger() is on) is visible in Device-Farm
+    // logs. Strip with the rest of the test scaffolding.
+    __android_log_print(ANDROID_LOG_INFO, "ggml", "%s", line.c_str());
+#endif
   }
 }
 
+// [DO NOT MERGE] Device-Farm Mali RCA (qvac PR #2476): the RN/bare runtime
+// does not forward native stderr to logcat, so a ggml_abort()/GGML_ASSERT
+// failure is invisible in Device-Farm logs. ggml exposes a synchronous abort
+// callback (ggml_set_abort_callback, llama.cpp upstream) invoked with the
+// fully-formatted "file:line: message" immediately before abort(). Forward it
+// straight to logcat via __android_log_print — NOT QLOG, which is a no-op
+// unless setLogger() is called from JS and would hop to the very JS thread
+// that is aborting. Strip with the rest of the test scaffolding.
+void ggmlAbortTrampoline(const char * message) {
+  const char * msg = message ? message : "(null)";
+#ifdef __ANDROID__
+  __android_log_print(ANDROID_LOG_FATAL, "ggml_abort", "%s", msg);
+#else
+  std::fprintf(stderr, "ggml_abort: %s\n", msg);
+#endif
+}
+
 void installGgmlLogTrampolineOnce() {
   static std::once_flag once;
   std::call_once(once, [] {
     ggml_log_set(&ggmlLogTrampoline, nullptr);
+    // [DO NOT MERGE] capture the Mali GGML_ASSERT — see ggmlAbortTrampoline.
+    ggml_set_abort_callback(&ggmlAbortTrampoline);
   });
 }
 
@@ -221,17 +251,9 @@ void ParakeetModel::cleanupTempFile_() {
 void ParakeetModel::load() {
   if (is_loaded_) return;
 
-  // Force useGPU to false in Android until Vulkan and OpenCL are stabilized
-#ifdef __ANDROID__
-  if (cfg_.useGPU) {
-    QLOG(
-        logger::Priority::WARNING,
-        "Parakeet: useGPU=true is currently ignored on Android "
-        "(GPU backends disabled at engine boundary pending Vulkan/Mali "
-        "and OpenCL/Adreno driver fixes); falling back to CPU.");
-    cfg_.useGPU = false;
-  }
-#endif
+  // DO NOT MERGE (device-farm Vulkan test): the Android useGPU=false guard is
+  // removed so useGPU=true reaches the engine and the Parakeet Android tests
+  // exercise the Vulkan backend on the Device Farm Adreno/Mali devices.
 
   QLOG(logger::Priority::INFO,
        "Loading Parakeet GGUF (modelType hint: " +

@@ -152,6 +152,20 @@ async function runMobilePerfCase (t, opts) {
 
     t.ok(receivedStats.length >= NUM_TRANSCRIPTIONS, `${modelLabel} ${epLabel} should receive JobEnded stats for every run (got ${receivedStats.length})`)
     t.ok(timings.length === NUM_TRANSCRIPTIONS, `${modelLabel} ${epLabel} should complete ${NUM_TRANSCRIPTIONS} transcriptions (got ${timings.length})`)
+
+    // DO NOT MERGE (device-farm Vulkan validation): assert the Android GPU run
+    // actually selected the Vulkan backend (backendId 3), not OpenCL (4) or a
+    // CPU fallback (0). Both forced-Vulkan Device Farm devices (Adreno + Mali)
+    // must report Vulkan. BackendId: 0=CPU 1=Metal 2=CUDA 3=Vulkan 4=OpenCL.
+    if (useGPU && platform.startsWith('android')) {
+      const finalStats = receivedStats.length > 0
+        ? receivedStats[receivedStats.length - 1].stats
+        : null
+      const backendId = finalStats ? finalStats.backendId : null
+      t.ok(backendId === 3,
+        `${modelLabel} ${epLabel} Android use_gpu=true must select Vulkan (backendId=3); got ${backendId}`)
+    }
+
     console.log(`✅ Mobile perf case ${modelLabel} ${epLabel} completed successfully!\n`)
   } finally {
     console.log('=== Cleanup ===')

@@ -15,5 +15,8 @@
         "spirv-headers"
       ]
     }
+  ],
+  "overlay-ports": [
+    "./vcpkg-overlay-ports"
   ]
 }
@@ -0,0 +1,37 @@
+# Detect the Vulkan version shipped with the Android NDK by parsing
+# vulkan_core.h from the NDK sysroot.  Sets `vulkan_version` in the
+# caller's scope (e.g. "1.3.275").
+function(detect_ndk_vulkan_version)
+    string(TOLOWER "${CMAKE_HOST_SYSTEM_NAME}" host_system_name_lower)
+
+    file(GLOB host_dirs LIST_DIRECTORIES true "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_system_name_lower}-*")
+    if(host_dirs)
+        list(GET host_dirs 0 host_dir)
+        get_filename_component(host_arch "${host_dir}" NAME)
+        set(vulkan_core_h "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_arch}/sysroot/usr/include/vulkan/vulkan_core.h")
+    else()
+        message(FATAL_ERROR "Could not find NDK host directory for ${host_system_name_lower}")
+    endif()
+
+    if(NOT EXISTS "${vulkan_core_h}")
+        message(FATAL_ERROR "vulkan_core.h not found at ${vulkan_core_h}")
+    endif()
+
+    file(READ "${vulkan_core_h}" header_content)
+    string(REGEX MATCH "VK_HEADER_VERSION ([0-9]+)" version_match "${header_content}")
+    if(version_match)
+        set(header_version_3 "${CMAKE_MATCH_1}")
+    else()
+        message(FATAL_ERROR "Could not extract VK_HEADER_VERSION from ${vulkan_core_h}")
+    endif()
+
+    # Extract major.minor version from VK_HEADER_VERSION_COMPLETE for download URL
+    string(REGEX MATCH "VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION\\(([0-9]+), ([0-9]+), ([0-9]+)" version_match "${header_content}")
+    if(version_match)
+        set(major "${CMAKE_MATCH_2}")
+        set(minor "${CMAKE_MATCH_3}")
+        set(vulkan_version "${major}.${minor}.${header_version_3}" PARENT_SCOPE)
+    else()
+        message(FATAL_ERROR "Could not extract VK_HEADER_VERSION_COMPLETE from ${vulkan_core_h}")
+    endif()
+endfunction()
@@ -0,0 +1,24 @@
+diff --git a/src/ggml-vulkan/CMakeLists.txt b/src/ggml-vulkan/CMakeLists.txt
+index 715a263a..3d92ac5d 100644
+--- a/src/ggml-vulkan/CMakeLists.txt
++++ b/src/ggml-vulkan/CMakeLists.txt
+@@ -7,6 +7,7 @@ if (POLICY CMP0147)
+ endif()
+
+ find_package(Vulkan COMPONENTS glslc REQUIRED)
++find_package(SPIRV-Headers QUIET)
+
+ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+     # Parallel build object files
+@@ -87,6 +88,11 @@ if (Vulkan_FOUND)
+     )
+
+     target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
++
++    if (TARGET SPIRV-Headers::SPIRV-Headers)
++        target_link_libraries(ggml-vulkan PRIVATE SPIRV-Headers::SPIRV-Headers)
++    endif()
++
+     target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+
+     # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
@@ -0,0 +1,172 @@
+# ggml-speech: OVERLAY pinned to the tetherto/qvac-ext-ggml PR #14 head
+# (commit e6578d01) = the Adreno-Vulkan guards PLUS the generic descriptor-set
+# pool grow-on-demand fix for the Mali-G715 EOU-Vulkan crash. This is the CLEAN
+# commit that lands on PR #14 (no RCA instrumentation). DO NOT MERGE this
+# overlay: device-farm validation vehicle only. Pairs with the Android
+# default-features below dropping OpenCL so the Parakeet Android tests exercise
+# Vulkan.
+
+vcpkg_from_github(
+    OUT_SOURCE_PATH SOURCE_PATH
+    REPO tetherto/qvac-ext-ggml
+    REF e6578d0139235e0abd40804faf01f19bed15e414
+    SHA512 1ac36882d355781b81f7e77b3cf06c04bcf7f7b90905d3f3ec5391bacf260e046860471b0fc1cb4c1b80093de16e88611938f932ee8b74190950463d8b928c28
+    HEAD_REF QVAC-19213-adreno-vulkan-shmem-fix
+)
+
+set(GGML_METAL  OFF)
+set(GGML_VULKAN OFF)
+set(GGML_CUDA   OFF)
+set(GGML_OPENCL OFF)
+set(GGML_METAL_FUSE_MV_BIAS OFF)
+
+if("metal" IN_LIST FEATURES)
+    set(GGML_METAL ON)
+endif()
+
+# Off by default: the chatterbox Q-variant mul_mv + bias/residual fusion
+# produces zero tokens on parakeet's EOU q8_0 joint network. Consumers
+# whose models stay clear of that pattern can opt in for the speedup.
+if("metal-fuse-mv-bias" IN_LIST FEATURES)
+    set(GGML_METAL_FUSE_MV_BIAS ON)
+endif()
+
+if("vulkan" IN_LIST FEATURES)
+    set(GGML_VULKAN ON)
+endif()
+
+set(GGML_CUDA_COMPILER_OPTION "")
+
+if("cuda" IN_LIST FEATURES)
+    set(GGML_CUDA ON)
+    find_program(NVCC_EXECUTABLE nvcc
+        PATHS /usr/local/cuda/bin /usr/local/cuda-12.8/bin
+        NO_DEFAULT_PATH
+    )
+    if(NOT NVCC_EXECUTABLE)
+        find_program(NVCC_EXECUTABLE nvcc REQUIRED)
+    endif()
+    set(GGML_CUDA_COMPILER_OPTION "-DCMAKE_CUDA_COMPILER=${NVCC_EXECUTABLE}")
+    message(STATUS "CUDA compiler: ${NVCC_EXECUTABLE}")
+endif()
+
+if("opencl" IN_LIST FEATURES)
+    set(GGML_OPENCL ON)
+endif()
+
+if(VCPKG_TARGET_IS_ANDROID AND "vulkan" IN_LIST FEATURES)
+    include(${CMAKE_CURRENT_LIST_DIR}/android-vulkan-version.cmake)
+    detect_ndk_vulkan_version()
+    message(STATUS "NDK Vulkan version: ${vulkan_version}")
+
+    file(DOWNLOAD
+        "https://github.com/KhronosGroup/Vulkan-Headers/archive/refs/tags/v${vulkan_version}.tar.gz"
+        "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz"
+        TLS_VERIFY ON
+    )
+    file(ARCHIVE_EXTRACT
+        INPUT "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz"
+        DESTINATION "${SOURCE_PATH}"
+        PATTERNS "*.hpp"
+    )
+    file(COPY "${SOURCE_PATH}/Vulkan-Headers-${vulkan_version}/include/"
+         DESTINATION "${SOURCE_PATH}/src/")
+endif()
+
+set(PLATFORM_OPTIONS)
+
+if(VCPKG_TARGET_IS_IOS)
+    list(APPEND PLATFORM_OPTIONS -DGGML_BLAS=OFF -DGGML_ACCELERATE=OFF)
+endif()
+
+# Hybrid Android backend mode: GPU backends as MODULE .so loaded at runtime
+# via dlopen, CPU built as per-arch MODULE .so variants (one per ARMv8.0/
+# 8.2/8.6/9.0/9.2 feature tier) also loaded at runtime via dlopen. The
+# downstream addon installs the resulting libqvac-speech-ggml-cpu-android_armv*
+# .so files alongside the .bare binary; the per-variant scoring in
+# ggml-cpu's `ggml_backend_cpu_aarch64_score` then picks the highest tier
+# the running device supports at first use. Pairs with the speech-branch
+# `ggml-backend: android per-arch CPU variant dlopen fallback` patch
+# (commit 9562ed04) so the variant lookup also succeeds when the consumer
+# APK keeps native .so files compressed (AGP `useLegacyPackaging=false`).
+if(VCPKG_TARGET_IS_ANDROID)
+    list(APPEND PLATFORM_OPTIONS
+        -DGGML_BACKEND_DL=ON
+        -DGGML_CPU_ALL_VARIANTS=ON
+        -DGGML_CPU_REPACK=ON
+        -DGGML_VULKAN_DISABLE_COOPMAT=ON
+        -DGGML_VULKAN_DISABLE_COOPMAT2=ON
+    )
+endif()
+
+# PR #13 (v0.10.2 sync) introduces an unconditional
+# `#include <spirv/unified1/spirv.hpp>` in src/ggml-vulkan/ggml-vulkan.cpp,
+# but the upstream ggml-vulkan CMakeLists.txt never finds spirv-headers nor
+# wires its include dir into the ggml-vulkan target. Apply a small patch
+# so it does (and depend on spirv-headers in vcpkg.json's vulkan feature).
+# TODO: push the equivalent fix upstream and drop this patch.
+if("vulkan" IN_LIST FEATURES)
+    vcpkg_apply_patches(
+        SOURCE_PATH "${SOURCE_PATH}"
+        PATCHES
+            "${CMAKE_CURRENT_LIST_DIR}/patches/0001-ggml-vulkan-find-spirv-headers.patch"
+    )
+endif()
+
+vcpkg_cmake_configure(
+    SOURCE_PATH "${SOURCE_PATH}"
+    OPTIONS
+        -DBUILD_SHARED_LIBS=OFF
+        -DGGML_NATIVE=OFF
+        -DGGML_CCACHE=OFF
+        -DGGML_OPENMP=OFF
+        -DGGML_LLAMAFILE=OFF
+        -DGGML_BUILD_TESTS=OFF
+        -DGGML_BUILD_EXAMPLES=OFF
+        -DGGML_METAL=${GGML_METAL}
+        -DGGML_VULKAN=${GGML_VULKAN}
+        -DGGML_CUDA=${GGML_CUDA}
+        -DGGML_OPENCL=${GGML_OPENCL}
+        -DGGML_METAL_FUSE_MV_BIAS=${GGML_METAL_FUSE_MV_BIAS}
+        -DGGML_LIB_OUTPUT_PREFIX=qvac-speech-
+        ${GGML_CUDA_COMPILER_OPTION}
+        ${PLATFORM_OPTIONS}
+)
+
+vcpkg_cmake_install()
+
+# Pick up the MODULE backend .so files ggml builds into the buildtree's
+# bin/ directory (Android dynamic-backend mode). cmake install() doesn't
+# move them by default.
+if(VCPKG_TARGET_IS_ANDROID)
+    file(GLOB _backend_sos
+        "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-rel/bin/libqvac-speech-ggml-*.so"
+    )
+    if(_backend_sos)
+        file(INSTALL ${_backend_sos} DESTINATION "${CURRENT_PACKAGES_DIR}/lib")
+    endif()
+endif()
+
+vcpkg_cmake_config_fixup(PACKAGE_NAME ggml CONFIG_PATH lib/cmake/ggml)
+
+if(EXISTS "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc")
+    file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/lib/pkgconfig")
+    file(RENAME "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc"
+                "${CURRENT_PACKAGES_DIR}/lib/pkgconfig/ggml.pc")
+endif()
+if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc")
+    file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig")
+    file(RENAME "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc"
+                "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig/ggml.pc")
+endif()
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/share/pkgconfig"
+                    "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig")
+vcpkg_fixup_pkgconfig()
+
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include")
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share")
+
+set(VCPKG_POLICY_MISMATCHED_NUMBER_OF_BINARIES enabled)
+
+file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}")
+vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE")
@@ -0,0 +1,10 @@
+The package ggml provides CMake integration:
+
+  find_package(ggml CONFIG REQUIRED)
+  target_link_libraries(main PRIVATE ggml::ggml)
+
+Available vcpkg features:
+  metal  - Metal GPU backend (macOS/iOS, auto-enabled on Apple)
+  vulkan - Vulkan GPU backend
+  cuda   - CUDA GPU backend
+  opencl - OpenCL GPU backend