Skip to content

Commit 028894e

Browse files
committed
lintrunner
1 parent c7450dd commit 028894e

5 files changed

Lines changed: 28 additions & 21 deletions

File tree

backends/cuda/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,9 @@ set(_aoti_cuda_shim_sources runtime/shims/memory.cpp
109109

110110
# Only build CUDA shims when CUDA language/toolchain is available.
111111
if(CMAKE_CUDA_COMPILER)
112-
list(APPEND _aoti_cuda_shim_sources runtime/shims/int4mm.cu runtime/shims/rand.cu)
112+
list(APPEND _aoti_cuda_shim_sources runtime/shims/int4mm.cu
113+
runtime/shims/rand.cu
114+
)
113115
endif()
114116

115117
add_library(aoti_cuda_shims SHARED ${_aoti_cuda_shim_sources})

backends/cuda/runtime/cuda_backend.cpp

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,7 @@ namespace {
8080
constexpr char kSkipCopyOutputToCpuForMethod[] =
8181
"skip_copy_output_to_cpu_for_method";
8282
constexpr char kUseSharedCudaStream[] = "use_shared_cuda_stream";
83-
constexpr char kEnableCudaGraphForMethod[] =
84-
"enable_cuda_graph_for_method";
83+
constexpr char kEnableCudaGraphForMethod[] = "enable_cuda_graph_for_method";
8584
constexpr int kCudaGraphWarmupSteps = 3;
8685
} // anonymous namespace
8786

@@ -410,7 +409,9 @@ class ET_EXPERIMENTAL CudaBackend final
410409
cudaDeviceSynchronize();
411410
buffer_res->Free();
412411
} else {
413-
ET_LOG(Info, "weights_blob '%s' not found or update fn is null",
412+
ET_LOG(
413+
Info,
414+
"weights_blob '%s' not found or update fn is null",
414415
weights_blob_key.c_str());
415416
}
416417

@@ -649,13 +650,17 @@ class ET_EXPERIMENTAL CudaBackend final
649650
void* static_ptr = nullptr;
650651
cudaError_t merr = cudaMalloc(&static_ptr, nbytes);
651652
ET_CHECK_OR_RETURN_ERROR(
652-
merr == cudaSuccess, Internal,
653+
merr == cudaSuccess,
654+
Internal,
653655
"cudaMalloc for static input %zu failed: %s",
654-
i, cudaGetErrorString(merr));
656+
i,
657+
cudaGetErrorString(merr));
655658

656659
cudaMemcpy(
657-
static_ptr, cpu_tensor->const_data_ptr(),
658-
nbytes, cudaMemcpyHostToDevice);
660+
static_ptr,
661+
cpu_tensor->const_data_ptr(),
662+
nbytes,
663+
cudaMemcpyHostToDevice);
659664

660665
handle->static_input_ptrs.push_back(static_ptr);
661666
handle->static_input_sizes.push_back(sizes_vec);
@@ -669,7 +674,8 @@ class ET_EXPERIMENTAL CudaBackend final
669674
slim::makeArrayRef(sizes_vec),
670675
slim::makeArrayRef(strides_vec),
671676
static_cast<slim::c10::ScalarType>(cpu_tensor->scalar_type()),
672-
DEFAULT_CUDA_DEVICE, 0));
677+
DEFAULT_CUDA_DEVICE,
678+
0));
673679
continue;
674680
}
675681

@@ -755,8 +761,8 @@ class ET_EXPERIMENTAL CudaBackend final
755761
"CUDA graph: beginning stream capture for '%s'",
756762
handle->method_name.c_str());
757763

758-
cudaError_t cerr = cudaStreamBeginCapture(
759-
cuda_stream, cudaStreamCaptureModeRelaxed);
764+
cudaError_t cerr =
765+
cudaStreamBeginCapture(cuda_stream, cudaStreamCaptureModeRelaxed);
760766
ET_CHECK_OR_RETURN_ERROR(
761767
cerr == cudaSuccess,
762768
Internal,
@@ -791,16 +797,16 @@ class ET_EXPERIMENTAL CudaBackend final
791797

792798
if (is_capture_step) {
793799
// End capture → instantiate graph
794-
cudaError_t gerr =
795-
cudaStreamEndCapture(cuda_stream, &handle->cuda_graph);
800+
cudaError_t gerr = cudaStreamEndCapture(cuda_stream, &handle->cuda_graph);
796801
ET_CHECK_OR_RETURN_ERROR(
797802
gerr == cudaSuccess,
798803
Internal,
799804
"cudaStreamEndCapture failed: %s",
800805
cudaGetErrorString(gerr));
801806

802807
gerr = cudaGraphInstantiate(
803-
&handle->cuda_graph_exec, handle->cuda_graph,
808+
&handle->cuda_graph_exec,
809+
handle->cuda_graph,
804810
cudaGraphInstantiateFlagAutoFreeOnLaunch);
805811
ET_CHECK_OR_RETURN_ERROR(
806812
gerr == cudaSuccess,

backends/cuda/runtime/cuda_delegate_handle.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ struct CudaDelegateHandle : public aoti::AOTIDelegateHandle {
7373
// These hold the tensor metadata; the underlying data pointers are fixed
7474
// addresses that CUDA graph replay will write to / read from.
7575
// SlimTensor pointers — owned by this handle.
76-
std::vector<void*> static_input_ptrs; // raw GPU data pointers for inputs
76+
std::vector<void*> static_input_ptrs; // raw GPU data pointers for inputs
7777
std::vector<void*> static_output_ptrs; // raw GPU data pointers for outputs
7878
std::vector<std::vector<int64_t>> static_input_sizes;
7979
std::vector<std::vector<int64_t>> static_input_strides;

examples/models/qwen3_5_moe/export.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ def export_and_lower(model, config, args):
434434
prefill_dynamic_shapes = (
435435
{1: seq_dim}, # tokens
436436
{0: seq_dim}, # input_pos
437-
None, # temperature (static scalar)
437+
None, # temperature (static scalar)
438438
)
439439
with torch.no_grad():
440440
prefill_ep = export(

examples/models/qwen3_5_moe/main.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,10 @@ int main(int argc, char** argv) {
157157

158158
// Use a very small temperature for greedy to avoid division by zero
159159
// while keeping the Gumbel noise negligible relative to logit differences.
160-
float temp_val = FLAGS_temperature <= 0.0
161-
? 1e-6f
162-
: static_cast<float>(FLAGS_temperature);
163-
auto temp_tensor = from_blob(
164-
&temp_val, {1}, executorch::aten::ScalarType::Float);
160+
float temp_val =
161+
FLAGS_temperature <= 0.0 ? 1e-6f : static_cast<float>(FLAGS_temperature);
162+
auto temp_tensor =
163+
from_blob(&temp_val, {1}, executorch::aten::ScalarType::Float);
165164

166165
// ---------------------------------------------------------------
167166
// Prefill

0 commit comments

Comments
 (0)