lintrunner

Gasoonjia · Gasoonjia · commit 028894ef8e45 · 2026-04-13T14:26:07.000-07:00
diff --git a/backends/cuda/CMakeLists.txt b/backends/cuda/CMakeLists.txt
@@ -109,7 +109,9 @@ set(_aoti_cuda_shim_sources runtime/shims/memory.cpp
 
 # Only build CUDA shims when CUDA language/toolchain is available.
 if(CMAKE_CUDA_COMPILER)
-  list(APPEND _aoti_cuda_shim_sources runtime/shims/int4mm.cu runtime/shims/rand.cu)
+  list(APPEND _aoti_cuda_shim_sources runtime/shims/int4mm.cu
+       runtime/shims/rand.cu
+  )
 endif()
 
 add_library(aoti_cuda_shims SHARED ${_aoti_cuda_shim_sources})
diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp
@@ -80,8 +80,7 @@ namespace {
 constexpr char kSkipCopyOutputToCpuForMethod[] =
     "skip_copy_output_to_cpu_for_method";
 constexpr char kUseSharedCudaStream[] = "use_shared_cuda_stream";
-constexpr char kEnableCudaGraphForMethod[] =
-    "enable_cuda_graph_for_method";
+constexpr char kEnableCudaGraphForMethod[] = "enable_cuda_graph_for_method";
 constexpr int kCudaGraphWarmupSteps = 3;
 } // anonymous namespace
 
@@ -410,7 +409,9 @@ class ET_EXPERIMENTAL CudaBackend final
       cudaDeviceSynchronize();
       buffer_res->Free();
     } else {
-      ET_LOG(Info, "weights_blob '%s' not found or update fn is null",
+      ET_LOG(
+          Info,
+          "weights_blob '%s' not found or update fn is null",
           weights_blob_key.c_str());
     }
 
@@ -649,13 +650,17 @@ class ET_EXPERIMENTAL CudaBackend final
         void* static_ptr = nullptr;
         cudaError_t merr = cudaMalloc(&static_ptr, nbytes);
         ET_CHECK_OR_RETURN_ERROR(
-            merr == cudaSuccess, Internal,
+            merr == cudaSuccess,
+            Internal,
             "cudaMalloc for static input %zu failed: %s",
-            i, cudaGetErrorString(merr));
+            i,
+            cudaGetErrorString(merr));
 
         cudaMemcpy(
-            static_ptr, cpu_tensor->const_data_ptr(),
-            nbytes, cudaMemcpyHostToDevice);
+            static_ptr,
+            cpu_tensor->const_data_ptr(),
+            nbytes,
+            cudaMemcpyHostToDevice);
 
         handle->static_input_ptrs.push_back(static_ptr);
         handle->static_input_sizes.push_back(sizes_vec);
@@ -669,7 +674,8 @@ class ET_EXPERIMENTAL CudaBackend final
             slim::makeArrayRef(sizes_vec),
             slim::makeArrayRef(strides_vec),
             static_cast<slim::c10::ScalarType>(cpu_tensor->scalar_type()),
-            DEFAULT_CUDA_DEVICE, 0));
+            DEFAULT_CUDA_DEVICE,
+            0));
         continue;
       }
 
@@ -755,8 +761,8 @@ class ET_EXPERIMENTAL CudaBackend final
           "CUDA graph: beginning stream capture for '%s'",
           handle->method_name.c_str());
 
-      cudaError_t cerr = cudaStreamBeginCapture(
-          cuda_stream, cudaStreamCaptureModeRelaxed);
+      cudaError_t cerr =
+          cudaStreamBeginCapture(cuda_stream, cudaStreamCaptureModeRelaxed);
       ET_CHECK_OR_RETURN_ERROR(
           cerr == cudaSuccess,
           Internal,
@@ -791,16 +797,16 @@ class ET_EXPERIMENTAL CudaBackend final
 
     if (is_capture_step) {
       // End capture → instantiate graph
-      cudaError_t gerr =
-          cudaStreamEndCapture(cuda_stream, &handle->cuda_graph);
+      cudaError_t gerr = cudaStreamEndCapture(cuda_stream, &handle->cuda_graph);
       ET_CHECK_OR_RETURN_ERROR(
           gerr == cudaSuccess,
           Internal,
           "cudaStreamEndCapture failed: %s",
           cudaGetErrorString(gerr));
 
       gerr = cudaGraphInstantiate(
-          &handle->cuda_graph_exec, handle->cuda_graph,
+          &handle->cuda_graph_exec,
+          handle->cuda_graph,
           cudaGraphInstantiateFlagAutoFreeOnLaunch);
       ET_CHECK_OR_RETURN_ERROR(
           gerr == cudaSuccess,
diff --git a/backends/cuda/runtime/cuda_delegate_handle.h b/backends/cuda/runtime/cuda_delegate_handle.h
@@ -73,7 +73,7 @@ struct CudaDelegateHandle : public aoti::AOTIDelegateHandle {
   // These hold the tensor metadata; the underlying data pointers are fixed
   // addresses that CUDA graph replay will write to / read from.
   // SlimTensor pointers — owned by this handle.
-  std::vector<void*> static_input_ptrs;  // raw GPU data pointers for inputs
+  std::vector<void*> static_input_ptrs; // raw GPU data pointers for inputs
   std::vector<void*> static_output_ptrs; // raw GPU data pointers for outputs
   std::vector<std::vector<int64_t>> static_input_sizes;
   std::vector<std::vector<int64_t>> static_input_strides;
diff --git a/examples/models/qwen3_5_moe/export.py b/examples/models/qwen3_5_moe/export.py
@@ -434,7 +434,7 @@ def export_and_lower(model, config, args):
     prefill_dynamic_shapes = (
         {1: seq_dim},  # tokens
         {0: seq_dim},  # input_pos
-        None,          # temperature (static scalar)
+        None,  # temperature (static scalar)
     )
     with torch.no_grad():
         prefill_ep = export(
diff --git a/examples/models/qwen3_5_moe/main.cpp b/examples/models/qwen3_5_moe/main.cpp
@@ -157,11 +157,10 @@ int main(int argc, char** argv) {
 
   // Use a very small temperature for greedy to avoid division by zero
   // while keeping the Gumbel noise negligible relative to logit differences.
-  float temp_val = FLAGS_temperature <= 0.0
-      ? 1e-6f
-      : static_cast<float>(FLAGS_temperature);
-  auto temp_tensor = from_blob(
-      &temp_val, {1}, executorch::aten::ScalarType::Float);
+  float temp_val =
+      FLAGS_temperature <= 0.0 ? 1e-6f : static_cast<float>(FLAGS_temperature);
+  auto temp_tensor =
+      from_blob(&temp_val, {1}, executorch::aten::ScalarType::Float);
 
   // ---------------------------------------------------------------
   // Prefill

Original file line number	Diff line number	Diff line change
`@@ -434,7 +434,7 @@ def export_and_lower(model, config, args):`
`434`	`434`	`prefill_dynamic_shapes = (`
`435`	`435`	`{1: seq_dim}, # tokens`
`436`	`436`	`{0: seq_dim}, # input_pos`
`437`		`- None, # temperature (static scalar)`
	`437`	`+ None, # temperature (static scalar)`
`438`	`438`	`)`
`439`	`439`	`with torch.no_grad():`
`440`	`440`	`prefill_ep = export(`