diff --git a/packages/transcription-parakeet/.gitignore b/packages/transcription-parakeet/.gitignore index 1696b4b7f7..87220aa150 100644 --- a/packages/transcription-parakeet/.gitignore +++ b/packages/transcription-parakeet/.gitignore @@ -22,6 +22,7 @@ Makefile !CMakeLists.txt !cmake/*.cmake !vcpkg/**/*.cmake +!vcpkg-overlay-ports/**/*.cmake compile_commands.json .vscode/ .idea/ diff --git a/packages/transcription-parakeet/CMakeLists.txt b/packages/transcription-parakeet/CMakeLists.txt index 116a28a28e..c8b5b07213 100644 --- a/packages/transcription-parakeet/CMakeLists.txt +++ b/packages/transcription-parakeet/CMakeLists.txt @@ -248,6 +248,12 @@ if(ANDROID) -Wl,-z,max-page-size=16384 -Wl,-z,common-page-size=16384 ) + # [DO NOT MERGE] Device-Farm Mali RCA (qvac PR #2476): the abort-capture + # instrumentation in ParakeetModel.cpp calls __android_log_print, which lives + # in liblog. Without linking it the symbol is undefined in the .bare and the + # addon fails to dlopen at runtime (bare reports ADDON_NOT_FOUND). Strip with + # the rest of the test scaffolding. + target_link_libraries(${qvac_lib_infer_parakeet}_module PRIVATE log) endif() if(APPLE) diff --git a/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp b/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp index adf4a736a6..b007bffe12 100644 --- a/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp +++ b/packages/transcription-parakeet/addon/src/model-interface/parakeet/ParakeetModel.cpp @@ -14,6 +14,10 @@ #include #include +#ifdef __ANDROID__ +#include +#endif + #include #include "ggml.h" @@ -124,13 +128,39 @@ void ggmlLogTrampoline(ggml_log_level level, const char * text, void * /*user_da ggml_log_buf().erase(0, nl + 1); if (line.empty()) continue; QLOG(ggmlLevelToPriority(ggml_log_buf_level()), line); +#ifdef __ANDROID__ + // [DO NOT MERGE] Device-Farm RCA (qvac PR #2476): mirror every ggml line + // to logcat so the Vulkan device-caps banner (emitted at DEBUG, and a + // no-op through QLOG unless setLogger() is on) is visible in Device-Farm + // logs. Strip with the rest of the test scaffolding. + __android_log_print(ANDROID_LOG_INFO, "ggml", "%s", line.c_str()); +#endif } } +// [DO NOT MERGE] Device-Farm Mali RCA (qvac PR #2476): the RN/bare runtime +// does not forward native stderr to logcat, so a ggml_abort()/GGML_ASSERT +// failure is invisible in Device-Farm logs. ggml exposes a synchronous abort +// callback (ggml_set_abort_callback, llama.cpp upstream) invoked with the +// fully-formatted "file:line: message" immediately before abort(). Forward it +// straight to logcat via __android_log_print — NOT QLOG, which is a no-op +// unless setLogger() is called from JS and would hop to the very JS thread +// that is aborting. Strip with the rest of the test scaffolding. +void ggmlAbortTrampoline(const char * message) { + const char * msg = message ? message : "(null)"; +#ifdef __ANDROID__ + __android_log_print(ANDROID_LOG_FATAL, "ggml_abort", "%s", msg); +#else + std::fprintf(stderr, "ggml_abort: %s\n", msg); +#endif +} + void installGgmlLogTrampolineOnce() { static std::once_flag once; std::call_once(once, [] { ggml_log_set(&ggmlLogTrampoline, nullptr); + // [DO NOT MERGE] capture the Mali GGML_ASSERT — see ggmlAbortTrampoline. + ggml_set_abort_callback(&ggmlAbortTrampoline); }); } @@ -221,17 +251,9 @@ void ParakeetModel::cleanupTempFile_() { void ParakeetModel::load() { if (is_loaded_) return; - // Force useGPU to false in Android until Vulkan and OpenCL are stabilized -#ifdef __ANDROID__ - if (cfg_.useGPU) { - QLOG( - logger::Priority::WARNING, - "Parakeet: useGPU=true is currently ignored on Android " - "(GPU backends disabled at engine boundary pending Vulkan/Mali " - "and OpenCL/Adreno driver fixes); falling back to CPU."); - cfg_.useGPU = false; - } -#endif + // DO NOT MERGE (device-farm Vulkan test): the Android useGPU=false guard is + // removed so useGPU=true reaches the engine and the Parakeet Android tests + // exercise the Vulkan backend on the Device Farm Adreno/Mali devices. QLOG(logger::Priority::INFO, "Loading Parakeet GGUF (modelType hint: " + diff --git a/packages/transcription-parakeet/test/integration/mobile-perf-runner.js b/packages/transcription-parakeet/test/integration/mobile-perf-runner.js index f6fe6af565..8f446fc2d3 100644 --- a/packages/transcription-parakeet/test/integration/mobile-perf-runner.js +++ b/packages/transcription-parakeet/test/integration/mobile-perf-runner.js @@ -152,6 +152,20 @@ async function runMobilePerfCase (t, opts) { t.ok(receivedStats.length >= NUM_TRANSCRIPTIONS, `${modelLabel} ${epLabel} should receive JobEnded stats for every run (got ${receivedStats.length})`) t.ok(timings.length === NUM_TRANSCRIPTIONS, `${modelLabel} ${epLabel} should complete ${NUM_TRANSCRIPTIONS} transcriptions (got ${timings.length})`) + + // DO NOT MERGE (device-farm Vulkan validation): assert the Android GPU run + // actually selected the Vulkan backend (backendId 3), not OpenCL (4) or a + // CPU fallback (0). Both forced-Vulkan Device Farm devices (Adreno + Mali) + // must report Vulkan. BackendId: 0=CPU 1=Metal 2=CUDA 3=Vulkan 4=OpenCL. + if (useGPU && platform.startsWith('android')) { + const finalStats = receivedStats.length > 0 + ? receivedStats[receivedStats.length - 1].stats + : null + const backendId = finalStats ? finalStats.backendId : null + t.ok(backendId === 3, + `${modelLabel} ${epLabel} Android use_gpu=true must select Vulkan (backendId=3); got ${backendId}`) + } + console.log(`✅ Mobile perf case ${modelLabel} ${epLabel} completed successfully!\n`) } finally { console.log('=== Cleanup ===') diff --git a/packages/transcription-parakeet/vcpkg-configuration.json b/packages/transcription-parakeet/vcpkg-configuration.json index 04e7630640..9dc16448d2 100644 --- a/packages/transcription-parakeet/vcpkg-configuration.json +++ b/packages/transcription-parakeet/vcpkg-configuration.json @@ -15,5 +15,8 @@ "spirv-headers" ] } + ], + "overlay-ports": [ + "./vcpkg-overlay-ports" ] } diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/android-vulkan-version.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/android-vulkan-version.cmake new file mode 100644 index 0000000000..16ac7c0826 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/android-vulkan-version.cmake @@ -0,0 +1,37 @@ +# Detect the Vulkan version shipped with the Android NDK by parsing +# vulkan_core.h from the NDK sysroot. Sets `vulkan_version` in the +# caller's scope (e.g. "1.3.275"). +function(detect_ndk_vulkan_version) + string(TOLOWER "${CMAKE_HOST_SYSTEM_NAME}" host_system_name_lower) + + file(GLOB host_dirs LIST_DIRECTORIES true "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_system_name_lower}-*") + if(host_dirs) + list(GET host_dirs 0 host_dir) + get_filename_component(host_arch "${host_dir}" NAME) + set(vulkan_core_h "$ENV{ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/${host_arch}/sysroot/usr/include/vulkan/vulkan_core.h") + else() + message(FATAL_ERROR "Could not find NDK host directory for ${host_system_name_lower}") + endif() + + if(NOT EXISTS "${vulkan_core_h}") + message(FATAL_ERROR "vulkan_core.h not found at ${vulkan_core_h}") + endif() + + file(READ "${vulkan_core_h}" header_content) + string(REGEX MATCH "VK_HEADER_VERSION ([0-9]+)" version_match "${header_content}") + if(version_match) + set(header_version_3 "${CMAKE_MATCH_1}") + else() + message(FATAL_ERROR "Could not extract VK_HEADER_VERSION from ${vulkan_core_h}") + endif() + + # Extract major.minor version from VK_HEADER_VERSION_COMPLETE for download URL + string(REGEX MATCH "VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION\\(([0-9]+), ([0-9]+), ([0-9]+)" version_match "${header_content}") + if(version_match) + set(major "${CMAKE_MATCH_2}") + set(minor "${CMAKE_MATCH_3}") + set(vulkan_version "${major}.${minor}.${header_version_3}" PARENT_SCOPE) + else() + message(FATAL_ERROR "Could not extract VK_HEADER_VERSION_COMPLETE from ${vulkan_core_h}") + endif() +endfunction() diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/patches/0001-ggml-vulkan-find-spirv-headers.patch b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/patches/0001-ggml-vulkan-find-spirv-headers.patch new file mode 100644 index 0000000000..7906d49727 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/patches/0001-ggml-vulkan-find-spirv-headers.patch @@ -0,0 +1,24 @@ +diff --git a/src/ggml-vulkan/CMakeLists.txt b/src/ggml-vulkan/CMakeLists.txt +index 715a263a..3d92ac5d 100644 +--- a/src/ggml-vulkan/CMakeLists.txt ++++ b/src/ggml-vulkan/CMakeLists.txt +@@ -7,6 +7,7 @@ if (POLICY CMP0147) + endif() + + find_package(Vulkan COMPONENTS glslc REQUIRED) ++find_package(SPIRV-Headers QUIET) + + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # Parallel build object files +@@ -87,6 +88,11 @@ if (Vulkan_FOUND) + ) + + target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan) ++ ++ if (TARGET SPIRV-Headers::SPIRV-Headers) ++ target_link_libraries(ggml-vulkan PRIVATE SPIRV-Headers::SPIRV-Headers) ++ endif() ++ + target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + + # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake new file mode 100644 index 0000000000..78ffc90e38 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/portfile.cmake @@ -0,0 +1,172 @@ +# ggml-speech: OVERLAY pinned to the tetherto/qvac-ext-ggml PR #14 head +# (commit e6578d01) = the Adreno-Vulkan guards PLUS the generic descriptor-set +# pool grow-on-demand fix for the Mali-G715 EOU-Vulkan crash. This is the CLEAN +# commit that lands on PR #14 (no RCA instrumentation). DO NOT MERGE this +# overlay: device-farm validation vehicle only. Pairs with the Android +# default-features below dropping OpenCL so the Parakeet Android tests exercise +# Vulkan. + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO tetherto/qvac-ext-ggml + REF e6578d0139235e0abd40804faf01f19bed15e414 + SHA512 1ac36882d355781b81f7e77b3cf06c04bcf7f7b90905d3f3ec5391bacf260e046860471b0fc1cb4c1b80093de16e88611938f932ee8b74190950463d8b928c28 + HEAD_REF QVAC-19213-adreno-vulkan-shmem-fix +) + +set(GGML_METAL OFF) +set(GGML_VULKAN OFF) +set(GGML_CUDA OFF) +set(GGML_OPENCL OFF) +set(GGML_METAL_FUSE_MV_BIAS OFF) + +if("metal" IN_LIST FEATURES) + set(GGML_METAL ON) +endif() + +# Off by default: the chatterbox Q-variant mul_mv + bias/residual fusion +# produces zero tokens on parakeet's EOU q8_0 joint network. Consumers +# whose models stay clear of that pattern can opt in for the speedup. +if("metal-fuse-mv-bias" IN_LIST FEATURES) + set(GGML_METAL_FUSE_MV_BIAS ON) +endif() + +if("vulkan" IN_LIST FEATURES) + set(GGML_VULKAN ON) +endif() + +set(GGML_CUDA_COMPILER_OPTION "") + +if("cuda" IN_LIST FEATURES) + set(GGML_CUDA ON) + find_program(NVCC_EXECUTABLE nvcc + PATHS /usr/local/cuda/bin /usr/local/cuda-12.8/bin + NO_DEFAULT_PATH + ) + if(NOT NVCC_EXECUTABLE) + find_program(NVCC_EXECUTABLE nvcc REQUIRED) + endif() + set(GGML_CUDA_COMPILER_OPTION "-DCMAKE_CUDA_COMPILER=${NVCC_EXECUTABLE}") + message(STATUS "CUDA compiler: ${NVCC_EXECUTABLE}") +endif() + +if("opencl" IN_LIST FEATURES) + set(GGML_OPENCL ON) +endif() + +if(VCPKG_TARGET_IS_ANDROID AND "vulkan" IN_LIST FEATURES) + include(${CMAKE_CURRENT_LIST_DIR}/android-vulkan-version.cmake) + detect_ndk_vulkan_version() + message(STATUS "NDK Vulkan version: ${vulkan_version}") + + file(DOWNLOAD + "https://github.com/KhronosGroup/Vulkan-Headers/archive/refs/tags/v${vulkan_version}.tar.gz" + "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz" + TLS_VERIFY ON + ) + file(ARCHIVE_EXTRACT + INPUT "${SOURCE_PATH}/vulkan-hpp-${vulkan_version}.tar.gz" + DESTINATION "${SOURCE_PATH}" + PATTERNS "*.hpp" + ) + file(COPY "${SOURCE_PATH}/Vulkan-Headers-${vulkan_version}/include/" + DESTINATION "${SOURCE_PATH}/src/") +endif() + +set(PLATFORM_OPTIONS) + +if(VCPKG_TARGET_IS_IOS) + list(APPEND PLATFORM_OPTIONS -DGGML_BLAS=OFF -DGGML_ACCELERATE=OFF) +endif() + +# Hybrid Android backend mode: GPU backends as MODULE .so loaded at runtime +# via dlopen, CPU built as per-arch MODULE .so variants (one per ARMv8.0/ +# 8.2/8.6/9.0/9.2 feature tier) also loaded at runtime via dlopen. The +# downstream addon installs the resulting libqvac-speech-ggml-cpu-android_armv* +# .so files alongside the .bare binary; the per-variant scoring in +# ggml-cpu's `ggml_backend_cpu_aarch64_score` then picks the highest tier +# the running device supports at first use. Pairs with the speech-branch +# `ggml-backend: android per-arch CPU variant dlopen fallback` patch +# (commit 9562ed04) so the variant lookup also succeeds when the consumer +# APK keeps native .so files compressed (AGP `useLegacyPackaging=false`). +if(VCPKG_TARGET_IS_ANDROID) + list(APPEND PLATFORM_OPTIONS + -DGGML_BACKEND_DL=ON + -DGGML_CPU_ALL_VARIANTS=ON + -DGGML_CPU_REPACK=ON + -DGGML_VULKAN_DISABLE_COOPMAT=ON + -DGGML_VULKAN_DISABLE_COOPMAT2=ON + ) +endif() + +# PR #13 (v0.10.2 sync) introduces an unconditional +# `#include ` in src/ggml-vulkan/ggml-vulkan.cpp, +# but the upstream ggml-vulkan CMakeLists.txt never finds spirv-headers nor +# wires its include dir into the ggml-vulkan target. Apply a small patch +# so it does (and depend on spirv-headers in vcpkg.json's vulkan feature). +# TODO: push the equivalent fix upstream and drop this patch. +if("vulkan" IN_LIST FEATURES) + vcpkg_apply_patches( + SOURCE_PATH "${SOURCE_PATH}" + PATCHES + "${CMAKE_CURRENT_LIST_DIR}/patches/0001-ggml-vulkan-find-spirv-headers.patch" + ) +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + OPTIONS + -DBUILD_SHARED_LIBS=OFF + -DGGML_NATIVE=OFF + -DGGML_CCACHE=OFF + -DGGML_OPENMP=OFF + -DGGML_LLAMAFILE=OFF + -DGGML_BUILD_TESTS=OFF + -DGGML_BUILD_EXAMPLES=OFF + -DGGML_METAL=${GGML_METAL} + -DGGML_VULKAN=${GGML_VULKAN} + -DGGML_CUDA=${GGML_CUDA} + -DGGML_OPENCL=${GGML_OPENCL} + -DGGML_METAL_FUSE_MV_BIAS=${GGML_METAL_FUSE_MV_BIAS} + -DGGML_LIB_OUTPUT_PREFIX=qvac-speech- + ${GGML_CUDA_COMPILER_OPTION} + ${PLATFORM_OPTIONS} +) + +vcpkg_cmake_install() + +# Pick up the MODULE backend .so files ggml builds into the buildtree's +# bin/ directory (Android dynamic-backend mode). cmake install() doesn't +# move them by default. +if(VCPKG_TARGET_IS_ANDROID) + file(GLOB _backend_sos + "${CURRENT_BUILDTREES_DIR}/${TARGET_TRIPLET}-rel/bin/libqvac-speech-ggml-*.so" + ) + if(_backend_sos) + file(INSTALL ${_backend_sos} DESTINATION "${CURRENT_PACKAGES_DIR}/lib") + endif() +endif() + +vcpkg_cmake_config_fixup(PACKAGE_NAME ggml CONFIG_PATH lib/cmake/ggml) + +if(EXISTS "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc") + file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/lib/pkgconfig") + file(RENAME "${CURRENT_PACKAGES_DIR}/share/pkgconfig/ggml.pc" + "${CURRENT_PACKAGES_DIR}/lib/pkgconfig/ggml.pc") +endif() +if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc") + file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig") + file(RENAME "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig/ggml.pc" + "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig/ggml.pc") +endif() +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/share/pkgconfig" + "${CURRENT_PACKAGES_DIR}/debug/share/pkgconfig") +vcpkg_fixup_pkgconfig() + +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share") + +set(VCPKG_POLICY_MISMATCHED_NUMBER_OF_BINARIES enabled) + +file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}") +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/usage b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/usage new file mode 100644 index 0000000000..9b23041f03 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/usage @@ -0,0 +1,10 @@ +The package ggml provides CMake integration: + + find_package(ggml CONFIG REQUIRED) + target_link_libraries(main PRIVATE ggml::ggml) + +Available vcpkg features: + metal - Metal GPU backend (macOS/iOS, auto-enabled on Apple) + vulkan - Vulkan GPU backend + cuda - CUDA GPU backend + opencl - OpenCL GPU backend diff --git a/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/vcpkg.json b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/vcpkg.json new file mode 100644 index 0000000000..2ece3c0404 --- /dev/null +++ b/packages/transcription-parakeet/vcpkg-overlay-ports/ggml-speech/vcpkg.json @@ -0,0 +1,53 @@ +{ + "name": "ggml-speech", + "version-date": "2026-06-04", + "description": "Speech-stack flavour of ggml from tetherto/qvac-ext-ggml@speech, including the ggml-org v0.10.2 sync (PR #13), the iOS Metal NULL-safety hardening from PR #10, GustavoA1604's Android per-arch CPU dlopen fallback (PR #11), the Mac M2 PAD test fix, and Adreno-aware Android OpenCL/Vulkan backend selection (PR #18, QVAC-18993): on Android the loader detects the GPU via Vulkan and only keeps OpenCL for Adreno > 700, CPU-only for Adreno 1..700, Vulkan/CPU for non-Adreno. Adds Adreno OpenCL elementwise kernels (sin/cos/abs/elu/leaky_relu) for Chatterbox S3Gen and robust Adreno-generation detection (PR #17, #19). Library filenames are prefixed libqvac-speech-ggml-* so they coexist with libqvac-ggml-* (fabric/llm) and libqvac-diffusion-ggml-* on the same Android device. Mutually exclusive with the regular ggml port in the same triplet -- pick one per build.", + "homepage": "https://github.com/tetherto/qvac-ext-ggml/tree/speech", + "license": "MIT", + "dependencies": [ + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ], + "default-features": [ + { + "name": "metal", + "platform": "osx | ios" + }, + { + "name": "vulkan", + "platform": "windows | linux | android" + } + ], + "features": { + "cuda": { + "description": "Enable CUDA GPU backend" + }, + "metal": { + "description": "Enable Metal GPU backend (macOS/iOS)" + }, + "metal-fuse-mv-bias": { + "description": "Compile in the Q-variant mul_mv + ADD(bias) [+ ADD(residual)] fusion in ggml-metal. Off by default: empirically produces zero tokens on parakeet's EOU q8_0 joint network. Opt in only after Metal A/B-validating your model against the fused path." + }, + "opencl": { + "description": "Enable OpenCL GPU backend", + "dependencies": [ + "opencl" + ] + }, + "vulkan": { + "description": "Enable Vulkan GPU backend", + "dependencies": [ + { + "name": "spirv-headers", + "version>=": "1.4.341.0" + } + ] + } + } +} diff --git a/packages/transcription-parakeet/vcpkg.json b/packages/transcription-parakeet/vcpkg.json index 27ff0c1ab2..96baaef19a 100644 --- a/packages/transcription-parakeet/vcpkg.json +++ b/packages/transcription-parakeet/vcpkg.json @@ -11,7 +11,8 @@ { "name": "parakeet-cpp", "version>=": "2026-06-04#0", - "features": ["vulkan", "opencl"], + "default-features": false, + "features": ["vulkan"], "platform": "android" }, {