Merge pull request #241 from ruvnet/feat/ruvllm-wasm-publish

ruvnet · web-flow · commit 0f9f55bf669c · 2026-03-06T10:06:05.000-05:00
feat: ruvllm-wasm v2.0.0 — first functional WASM publish
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/ruvllm-wasm/Cargo.toml b/crates/ruvllm-wasm/Cargo.toml
@@ -10,6 +10,9 @@ description = "WASM bindings for RuvLLM - browser-compatible LLM inference runti
 keywords = ["wasm", "llm", "inference", "browser", "webgpu"]
 categories = ["wasm", "api-bindings", "web-programming"]
 
+[package.metadata.wasm-pack.profile.release]
+wasm-opt = false
+
 [lib]
 crate-type = ["cdylib", "rlib"]
 
@@ -18,12 +21,12 @@ crate-type = ["cdylib", "rlib"]
 wasm-bindgen = "0.2"
 wasm-bindgen-futures = "0.4"
 js-sys = "0.3"
+# Core web-sys features (always needed)
 web-sys = { version = "0.3", features = [
     "console",
     "Performance",
     "Window",
     "Navigator",
-    # Web Workers support (enabled with parallel feature)
     "Worker",
     "WorkerOptions",
     "WorkerType",
@@ -33,39 +36,6 @@ web-sys = { version = "0.3", features = [
     "MessageEvent",
     "ErrorEvent",
     "DedicatedWorkerGlobalScope",
-    # WebGPU features (enabled with webgpu feature)
-    "Gpu",
-    "GpuAdapter",
-    "GpuAdapterInfo",
-    "GpuDevice",
-    "GpuQueue",
-    "GpuBuffer",
-    "GpuBufferDescriptor",
-    "GpuShaderModule",
-    "GpuShaderModuleDescriptor",
-    "GpuBindGroup",
-    "GpuBindGroupDescriptor",
-    "GpuBindGroupEntry",
-    "GpuBindGroupLayout",
-    "GpuBindGroupLayoutDescriptor",
-    "GpuBindGroupLayoutEntry",
-    "GpuBufferBinding",
-    "GpuBufferBindingLayout",
-    "GpuBufferBindingType",
-    "GpuComputePipeline",
-    "GpuComputePipelineDescriptor",
-    "GpuPipelineLayout",
-    "GpuPipelineLayoutDescriptor",
-    "GpuProgrammableStage",
-    "GpuCommandEncoder",
-    "GpuCommandEncoderDescriptor",
-    "GpuCommandBuffer",
-    "GpuComputePassEncoder",
-    "GpuComputePassDescriptor",
-    "gpu_map_mode",
-    "GpuRequestAdapterOptions",
-    "GpuDeviceDescriptor",
-    "GpuSupportedLimits",
 ] }
 
 # Serialization
@@ -76,16 +46,27 @@ serde_json = "1.0"
 # Error handling
 console_error_panic_hook = { version = "0.1", optional = true }
 
-# Byte casting for GPU buffers
-bytemuck = { version = "1.14", features = ["derive"] }
 
 [dev-dependencies]
 wasm-bindgen-test = "0.3"
 
 [features]
 default = ["console_error_panic_hook"]
-# WebGPU acceleration
-webgpu = []
+# WebGPU acceleration (adds GPU compute pipeline, shader compilation, buffer management)
+webgpu = ["web-sys/Gpu", "web-sys/GpuAdapter", "web-sys/GpuAdapterInfo",
+    "web-sys/GpuDevice", "web-sys/GpuQueue", "web-sys/GpuBuffer",
+    "web-sys/GpuBufferDescriptor", "web-sys/GpuShaderModule",
+    "web-sys/GpuShaderModuleDescriptor", "web-sys/GpuBindGroup",
+    "web-sys/GpuBindGroupDescriptor", "web-sys/GpuBindGroupEntry",
+    "web-sys/GpuBindGroupLayout", "web-sys/GpuBindGroupLayoutDescriptor",
+    "web-sys/GpuBindGroupLayoutEntry", "web-sys/GpuBufferBinding",
+    "web-sys/GpuBufferBindingLayout", "web-sys/GpuBufferBindingType",
+    "web-sys/GpuComputePipeline", "web-sys/GpuComputePipelineDescriptor",
+    "web-sys/GpuPipelineLayout", "web-sys/GpuPipelineLayoutDescriptor",
+    "web-sys/GpuProgrammableStage", "web-sys/GpuCommandEncoder",
+    "web-sys/GpuCommandEncoderDescriptor", "web-sys/GpuCommandBuffer",
+    "web-sys/GpuComputePassEncoder", "web-sys/GpuComputePassDescriptor",
+    "web-sys/GpuRequestAdapterOptions", "web-sys/GpuDeviceDescriptor"]
 # Enable parallel inference with Web Workers
 parallel = []
 # Enable SIMD optimizations (requires wasm-simd target feature)
diff --git a/docs/adr/ADR-084-ruvllm-wasm-publish.md b/docs/adr/ADR-084-ruvllm-wasm-publish.md
@@ -0,0 +1,83 @@
+# ADR-084: ruvllm-wasm — First Functional npm Publish
+
+**Status**: Accepted
+**Date**: 2026-03-06
+**Authors**: RuVector Team
+**Deciders**: ruv
+**Related**: ADR-083 (Brain Training Loops), Issue #238 (placeholder deprecation)
+
+## 1. Context
+
+The `@ruvector/ruvllm-wasm` npm package (v0.1.0) was a placeholder — published without compiled WASM binaries. It was deprecated in PR #239. Meanwhile, the Rust crate `ruvllm-wasm` (v2.0.0) contains substantial working code:
+
+| Subsystem | Status | Exports |
+|-----------|--------|---------|
+| KV Cache (two-tier FP32+u8) | Working | `KvCacheWasm`, `KvCacheConfigWasm` |
+| Memory (arena + buffer pool) | Working | `InferenceArenaWasm`, `BufferPoolWasm` |
+| Chat Templates (7 formats) | Working | `ChatTemplateWasm`, `ChatMessageWasm` |
+| HNSW Semantic Router | Working | `HnswRouterWasm`, `PatternWasm`, `RouteResultWasm` |
+| MicroLoRA (rank 1-4) | Working | `MicroLoraWasm`, `AdaptFeedbackWasm` |
+| SONA Instant Learning | Working | `SonaInstantWasm`, `SonaConfigWasm` |
+| Web Workers | Working | `ParallelInference`, feature detection |
+| WebGPU (matmul shader) | Feature-gated | `WebGpuInference`, `WebGpuContext` |
+| IntelligentLLM (combined) | Commented out | Pending API compatibility |
+
+## 2. Decision
+
+### 2.1 Fix WASM Build
+
+The Rust 1.91 compiler has a codegen bug where release-profile optimizations produce invalid WASM (type mismatch: `expected i32, found f64` in wasm-bindgen post-processing). Debug builds validate fine.
+
+**Workaround**: Build with `codegen-units=256` + `lto=off`. This prevents cross-function optimization passes that trigger the bug while still producing optimized output.
+
+```bash
+CARGO_PROFILE_RELEASE_CODEGEN_UNITS=256 \
+CARGO_PROFILE_RELEASE_LTO=off \
+wasm-pack build crates/ruvllm-wasm --target web --scope ruvector --release
+```
+
+Added `wasm-opt = false` to `[package.metadata.wasm-pack.profile.release]` since wasm-opt's validator also rejects the binary.
+
+### 2.2 Gate WebGPU Features
+
+WebGPU `web-sys` features (`gpu_map_mode`, `GpuSupportedLimits`, 28 GPU types) were compiled unconditionally, inflating binary size. Moved all GPU web-sys features behind the `webgpu` Cargo feature flag.
+
+Removed unused `bytemuck` dependency and `gpu_map_mode` / `GpuSupportedLimits` (declared but never referenced in source).
+
+### 2.3 Publish as v2.0.0
+
+Published `@ruvector/ruvllm-wasm@2.0.0` to npm with:
+- Compiled WASM binary (~435 KB, ~150 KB gzipped)
+- TypeScript definitions (`.d.ts`)
+- ES module JS glue code
+- Accurate README with working API examples
+
+### 2.4 README
+
+Replaced placeholder README with accurate documentation covering all exported types, working code examples, and browser compatibility table.
+
+## 3. Files Modified
+
+| File | Changes |
+|------|---------|
+| `crates/ruvllm-wasm/Cargo.toml` | Gate WebGPU features, remove unused bytemuck/gpu_map_mode/GpuSupportedLimits, add wasm-opt=false |
+| `crates/ruvllm-wasm/pkg/README.md` | Complete rewrite with accurate API docs |
+| `crates/ruvllm-wasm/pkg/` | Generated: `.wasm`, `.js`, `.d.ts` files |
+
+## 4. Build Artifact Details
+
+| File | Size |
+|------|------|
+| `ruvllm_wasm_bg.wasm` | 435 KB |
+| `ruvllm_wasm.js` | 128 KB |
+| `ruvllm_wasm.d.ts` | 45 KB |
+
+## 5. Known Limitations
+
+| Area | Limitation | Resolution Path |
+|------|-----------|-----------------|
+| Rust 1.91 codegen bug | Requires `codegen-units=256` workaround | Fixed in future Rust compiler release |
+| IntelligentLLMWasm | Commented out, references non-existent `HnswRouterConfigWasm` | Create config struct or pass params directly |
+| WebGPU attention | CPU fallback only (matmul has GPU path) | Implement attention WGSL shader pipeline |
+| Worker pool | Uses `setTimeout` polling instead of proper task completion signals | Implement message-based completion tracking |
+| GGUF model loading | Not yet wired (no `load_model_from_url`) | Requires streaming fetch + parser integration |