Merge branch 'main' into kmorabia/security-coding-guide

kevalmorabia97 · web-flow · commit 5ecd4b0e5b53 · 2026-03-03T02:31:58.000+05:30
diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml
@@ -60,7 +60,7 @@ jobs:
   torch-pr:
     needs: [check-file-changes, wait-checks]
     if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true'
-    strategy:
+    strategy: &torch_strategy
       fail-fast: false
       matrix:
         example: [llm_distill, llm_qat, llm_sparsity]
@@ -72,25 +72,21 @@ jobs:
     with:
       docker_image: "nvcr.io/nvidia/pytorch:${{ matrix.docker_image || '26.01' }}-py3"
       example: ${{ matrix.example }}
+      timeout_minutes: 30
       pip_install_extras: "[hf,dev-test]"
-      runner: linux-amd64-gpu-l4-latest-1
+      runner: linux-amd64-gpu-h100-latest-1
 
   torch-non-pr:
     if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy:
-      fail-fast: false
-      matrix:
-        example: [llm_distill, llm_qat, llm_sparsity]
-        include:
-          - example: speculative_decoding
-            docker_image: "26.01"
+    strategy: *torch_strategy
     uses: ./.github/workflows/_example_tests_runner.yml
     secrets: inherit
     with:
       docker_image: "nvcr.io/nvidia/pytorch:${{ matrix.docker_image || '26.01' }}-py3"
       example: ${{ matrix.example }}
+      timeout_minutes: 30
       pip_install_extras: "[hf,dev-test]"
-      runner: linux-amd64-gpu-h100-latest-2
+      runner: linux-amd64-gpu-rtxpro6000-latest-2
 
   ##### TensorRT-LLM Example Tests #####
   trtllm-pr:
@@ -99,14 +95,14 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        example: [llm_ptq] # vlm_ptq temporarily disabled due to pipeline error
+        example: [llm_ptq, vlm_ptq]
     uses: ./.github/workflows/_example_tests_runner.yml
     secrets: inherit
     with:
-      docker_image: "nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6.post3"
+      docker_image: "nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc5"
       example: ${{ matrix.example }}
       pip_install_extras: "[hf,dev-test]"
-      runner: linux-amd64-gpu-h100-latest-1
+      runner: linux-amd64-gpu-rtxpro6000-latest-1
 
   trtllm-non-pr:
     if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
@@ -117,16 +113,16 @@ jobs:
     uses: ./.github/workflows/_example_tests_runner.yml
     secrets: inherit
     with:
-      docker_image: "nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc6.post3"
+      docker_image: "nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc5"
       example: ${{ matrix.example }}
       pip_install_extras: "[hf,dev-test]"
-      runner: linux-amd64-gpu-h100-latest-2
+      runner: linux-amd64-gpu-rtxpro6000-latest-2
 
   ##### ONNX/TensorRT Example Tests #####
   onnx-pr:
     needs: [check-file-changes, wait-checks]
     if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true'
-    strategy:
+    strategy: &onnx_strategy
       fail-fast: false
       matrix:
         example: [diffusers, torch_onnx]
@@ -140,17 +136,14 @@ jobs:
 
   onnx-non-pr:
     if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy:
-      fail-fast: false
-      matrix:
-        example: [diffusers, torch_onnx]
+    strategy: *onnx_strategy
     uses: ./.github/workflows/_example_tests_runner.yml
     secrets: inherit
     with:
       docker_image: "nvcr.io/nvidia/tensorrt:26.01-py3"
       example: ${{ matrix.example }}
       pip_install_extras: "[all,dev-test]"
-      runner: linux-amd64-gpu-l4-latest-1
+      runner: linux-amd64-gpu-rtxpro6000-latest-2
 
   ##### Required Check for PR #####
   example-pr-required-check:
diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml
@@ -59,18 +59,23 @@ jobs:
   gpu-tests-pr:
     needs: [check-file-changes, wait-checks]
     if: needs.check-file-changes.outputs.any_changed == 'true'
-    strategy:
+    strategy: &gpu_strategy
       fail-fast: false
       matrix:
         include:
-          - example: cuda13-gpu
+          - example: gpu
+            timeout: 60
+            container_image: pytorch:26.01-py3
+          - example: gpu-megatron
             timeout: 90
-          - example: cuda13-gpu-megatron
-            timeout: 120
-    runs-on: linux-amd64-gpu-l4-latest-1
+            container_image: pytorch:26.01-py3
+          - example: gpu-trtllm
+            timeout: 30
+            container_image: tensorrt-llm/release:1.3.0rc5
+    runs-on: linux-amd64-gpu-rtxpro6000-latest-1
     timeout-minutes: ${{ matrix.timeout }}
     container: &gpu_container
-      image: nvcr.io/nvidia/pytorch:26.01-py3
+      image: nvcr.io/nvidia/${{ matrix.container_image }}
       env:
         GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
         PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
@@ -82,18 +87,11 @@ jobs:
         run: |
           echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV
       - name: Run gpu tests
-        run: pip install tox-current-env && tox -e ${{ matrix.example }} --current-env
+        run: pip install tox-current-env && tox -e cuda13-${{ matrix.example }} --current-env
   gpu-tests-non-pr:
     if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - example: cuda13-gpu
-            timeout: 90
-          - example: cuda13-gpu-megatron
-            timeout: 120
-    runs-on: linux-amd64-gpu-h100-latest-2
+    strategy: *gpu_strategy
+    runs-on: linux-amd64-gpu-rtxpro6000-latest-2
     timeout-minutes: ${{ matrix.timeout }}
     container: *gpu_container
     steps: *gpu_steps
diff --git a/tests/_test_utils/import_helper.py b/tests/_test_utils/import_helper.py
@@ -61,30 +61,10 @@ def skip_if_no_libcudnn():
         pytest.skip(f"{e}!", allow_module_level=True)
 
 
-def skip_if_no_megatron(*, te_required: bool = True, mamba_required: bool = False):
-    try:
-        import megatron  # noqa: F401
-    except ImportError:
-        pytest.skip("megatron not available", allow_module_level=True)
-
-    try:
-        import transformer_engine  # noqa: F401
-
-        has_te = True
-    except ImportError:
-        has_te = False
-
+def skip_if_no_mamba():
     try:
         import mamba_ssm  # noqa: F401
-
-        has_mamba = True
     except ImportError:
-        has_mamba = False
-
-    if te_required and not has_te:
-        pytest.skip("TE required for Megatron test", allow_module_level=True)
-
-    if mamba_required and not has_mamba:
         pytest.skip("Mamba required for Megatron test", allow_module_level=True)
 
 
diff --git a/tests/_test_utils/torch/megatron/models.py b/tests/_test_utils/torch/megatron/models.py
@@ -18,12 +18,8 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from _test_utils.import_helper import skip_if_no_megatron
-from huggingface_hub import constants as hf_constants
-
-skip_if_no_megatron()
-
 from _test_utils.torch.megatron.utils import initialize_for_megatron
+from huggingface_hub import constants as hf_constants
 from megatron.core.models.gpt import GPTModel
 from megatron.core.models.gpt.gpt_layer_specs import (
     get_gpt_layer_local_spec,
diff --git a/tests/_test_utils/torch/megatron/utils.py b/tests/_test_utils/torch/megatron/utils.py
@@ -18,10 +18,6 @@
 from warnings import warn
 
 import torch
-from _test_utils.import_helper import skip_if_no_megatron
-
-skip_if_no_megatron()
-
 from megatron.core import dist_checkpointing
 from megatron.core.inference.communication_utils import broadcast_from_last_pipeline_stage
 from megatron.core.inference.contexts import StaticInferenceContext
diff --git a/tests/gpu/onnx/quantization/autotune/test_workflow.py b/tests/gpu/onnx/quantization/autotune/test_workflow.py
@@ -35,6 +35,7 @@ def simple_conv_model():
     return _test_models._create_simple_conv_onnx_model()
 
 
+@pytest.mark.skip(reason="TODO: Fix test and enable")
 @pytest.mark.parametrize("use_trtexec", [True, False])
 def test_export_quantized_model(use_trtexec, simple_conv_model):
     """Test exporting quantized model with Q/DQ."""
diff --git a/tests/gpu/torch/quantization/test_hadamard.py b/tests/gpu/torch/quantization/test_hadamard.py
@@ -18,7 +18,14 @@
 import torch
 import torch.nn as nn
 
-pytest.importorskip("fast_hadamard_transform")
+fast_hadamard_transform = pytest.importorskip("fast_hadamard_transform")
+
+try:
+    fast_hadamard_transform.hadamard_transform(torch.randn(1, 2, device="cuda"))
+except Exception:
+    pytest.skip(
+        "fast_hadamard_transform CUDA kernels not available for this GPU", allow_module_level=True
+    )
 
 from _test_utils.torch.quantization.models import SDPAAttention
 
diff --git a/tests/gpu_megatron/_extensions b/tests/gpu_megatron/_extensions
diff --git a/tests/gpu_megatron/_extensions/test_torch_extensions.py b/tests/gpu_megatron/_extensions/test_torch_extensions.py
@@ -0,0 +1 @@
+../../gpu/_extensions/test_torch_extensions.py
diff --git a/tests/gpu_megatron/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py b/tests/gpu_megatron/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py
@@ -15,9 +15,9 @@
 
 
 import torch
-from _test_utils.import_helper import skip_if_no_megatron
+from _test_utils.import_helper import skip_if_no_mamba
 
-skip_if_no_megatron(mamba_required=True)
+skip_if_no_mamba()
 
 from _test_utils.torch.distributed.utils import spawn_multiprocess_job
 from _test_utils.torch.megatron.models import get_mcore_mamba_hybrid_model
diff --git a/tests/gpu_megatron/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py b/tests/gpu_megatron/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py
@@ -20,9 +20,9 @@
 
 import pytest
 import torch
-from _test_utils.import_helper import skip_if_no_megatron
+from _test_utils.import_helper import skip_if_no_mamba
 
-skip_if_no_megatron(mamba_required=True)
+skip_if_no_mamba()
 
 from _test_utils.torch.distributed.utils import spawn_multiprocess_job
 from _test_utils.torch.megatron.models import get_mcore_mamba_hybrid_model
diff --git a/tests/gpu_trtllm/_extensions/test_torch_extensions.py b/tests/gpu_trtllm/_extensions/test_torch_extensions.py
@@ -0,0 +1 @@
+../../gpu/_extensions/test_torch_extensions.py
diff --git a/tests/gpu_trtllm/torch/quantization/backends/test_fp8_per_tensor_gemm.py b/tests/gpu_trtllm/torch/quantization/backends/test_fp8_per_tensor_gemm.py
diff --git a/tests/gpu_trtllm/torch/quantization/backends/test_gemm_common.py b/tests/gpu_trtllm/torch/quantization/backends/test_gemm_common.py
diff --git a/tests/gpu_trtllm/torch/quantization/backends/test_gemm_registry.py b/tests/gpu_trtllm/torch/quantization/backends/test_gemm_registry.py
diff --git a/tests/gpu_trtllm/torch/quantization/backends/test_nvfp4_gemm.py b/tests/gpu_trtllm/torch/quantization/backends/test_nvfp4_gemm.py
diff --git a/tox.ini b/tox.ini
@@ -70,12 +70,19 @@ commands =
     # Coverage fails with "Can't combine line data with arc data" error so not using "--cov"
     python -m pytest tests/gpu
 
+[testenv:cuda13-gpu-trtllm]
+commands_pre =
+    # Install deps here so that it gets installed even in --current-env
+    pip install -e .[hf,dev-test]
+commands =
+    python -m pytest tests/gpu_trtllm
+
 [testenv:cuda13-gpu-megatron]
 commands_pre =
     # Install deps here so that it gets installed even in --current-env
     pip install -U megatron-core
     pip install --no-build-isolation git+https://github.com/state-spaces/mamba.git
-    pip install -e .[all,dev-test]
+    pip install -e .[hf,dev-test]
 commands =
     # Coverage fails with "Can't combine line data with arc data" error so not using "--cov"
     python -m pytest tests/gpu_megatron

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+../../gpu/_extensions/test_torch_extensions.py`