diff --git a/VERSION b/VERSION
index 9b7a431d9f..448ada3bda 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3.2.4
\ No newline at end of file
+3.2.5
\ No newline at end of file
diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/SuperBenchmark/SuperBenchmarkExecutorTests.cs b/src/VirtualClient/VirtualClient.Actions.UnitTests/SuperBenchmark/SuperBenchmarkExecutorTests.cs
index 92523d692c..265505b5d6 100644
--- a/src/VirtualClient/VirtualClient.Actions.UnitTests/SuperBenchmark/SuperBenchmarkExecutorTests.cs
+++ b/src/VirtualClient/VirtualClient.Actions.UnitTests/SuperBenchmark/SuperBenchmarkExecutorTests.cs
@@ -149,7 +149,7 @@ public async Task SuperBenchmarkExecutorUsesTheExpectedScriptFilesOnExecution()
         public async Task SuperBenchmarkExecutorDeploySuperBenchContainer()
         {
             ProcessStartInfo expectedInfo = new ProcessStartInfo();
-            string expectedCommand = $"sb deploy --host-list localhost -i testContainer";
+            string expectedCommand = $"bash -c \"source ./venv/bin/activate && sb deploy --host-list localhost -i testContainer\"";
 
             bool commandExecuted = false;
             this.mockFixture.ProcessManager.OnCreateProcess = (exe, arguments, workingDir) =>
@@ -184,7 +184,7 @@ public async Task SuperBenchmarkExecutorDeploySuperBenchContainer()
         public async Task SuperBenchmarkExecutorRunsTheExpectedWorkloadCommand()
         {
             ProcessStartInfo expectedInfo = new ProcessStartInfo();
-            string expectedCommand = $"sb run --host-list localhost -c Test.yaml";
+            string expectedCommand = $"bash -c \"source ./venv/bin/activate && sb run --host-list localhost -c Test.yaml\"";
 
             bool commandExecuted = false;
             this.mockFixture.ProcessManager.OnCreateProcess = (exe, arguments, workingDir) =>
@@ -224,8 +224,8 @@ public async Task SuperBenchmarkExecutorExecutesTheCorrectCommandsWithInstallati
                 $"sudo chmod -R 2777 \"{this.mockFixture.PlatformSpecifics.CurrentDirectory}\"",
                 $"sudo git clone -b v0.0.1 https://github.com/microsoft/superbenchmark",
                 $"sudo bash initialize.sh testuser",
-                $"sb deploy --host-list localhost -i testContainer",
-                $"sb run --host-list localhost -c Test.yaml"
+                $"bash -c \"source ./venv/bin/activate && sb deploy --host-list localhost -i testContainer\"",
+                $"bash -c \"source ./venv/bin/activate && sb run --host-list localhost -c Test.yaml\""
             };
 
             int processCount = 0;
@@ -278,8 +278,8 @@ public async Task SuperBenchmarkExecutorExecutesTheCorrectCommandsWithInstallati
                 $"sudo chmod -R 2777 \"{this.mockFixture.PlatformSpecifics.CurrentDirectory}\"",
                 $"sudo git clone -b v0.0.1 https://github.com/microsoft/superbenchmark",
                 $"sudo bash initialize.sh testuser /docker/path",
-                $"sb deploy --host-list localhost -i testContainer",
-                $"sb run --host-list localhost -c Test.yaml"
+                $"bash -c \"source ./venv/bin/activate && sb deploy --host-list localhost -i testContainer\"",
+                $"bash -c \"source ./venv/bin/activate && sb run --host-list localhost -c Test.yaml\""
             };
 
             int processCount = 0;
@@ -320,7 +320,7 @@ public async Task SuperBenchmarkExecutorSkipsInitializationOfTheWorkloadForExecu
             ProcessStartInfo expectedInfo = new ProcessStartInfo();
             List<string> expectedCommands = new List<string>
             {
-                $"sb run --host-list localhost -c Test.yaml"
+                $"bash -c \"source ./venv/bin/activate && sb run --host-list localhost -c Test.yaml\""
             };
 
             int processCount = 0;
diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/SuperBenchmarkExecutor.cs b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/SuperBenchmarkExecutor.cs
index 1010966216..ca827c261c 100644
--- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/SuperBenchmarkExecutor.cs
+++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/SuperBenchmarkExecutor.cs
@@ -20,7 +20,7 @@ namespace VirtualClient.Actions
     /// <summary>
     /// The SuperBenchmark workload executor.
     /// </summary>
-    [SupportedPlatforms("linux-x64", true)]
+    [SupportedPlatforms("linux-x64,linux-arm64", true)]
     public class SuperBenchmarkExecutor : VirtualClientComponent
     {
         private const string SuperBenchmarkRunShell = "RunSuperBenchmark.sh";
@@ -139,8 +139,9 @@ protected override async Task ExecuteAsync(EventContext telemetryContext, Cancel
             using (BackgroundOperations profiling = BackgroundOperations.BeginProfiling(this, cancellationToken))
             {
                 string commandArguments = this.GetCommandLineArguments();
+                string commandWithVenv = $"-c \"source ./venv/bin/activate && sb {commandArguments}\"";
 
-                using (IProcessProxy process = await this.ExecuteCommandAsync("sb", commandArguments, this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, runElevated: false))
+                using (IProcessProxy process = await this.ExecuteCommandAsync("bash", commandWithVenv, this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, runElevated: false))
                 {
                     if (!cancellationToken.IsCancellationRequested)
                     {
@@ -166,7 +167,6 @@ protected override async Task InitializeAsync(EventContext telemetryContext, Can
 
             if (!state.SuperBenchmarkInitialized)
             {
-                // This is to grant directory folders for 
                 await this.systemManager.MakeFilesExecutableAsync(this.PlatformSpecifics.CurrentDirectory, this.Platform, cancellationToken);
 
                 string cloneDir = this.PlatformSpecifics.Combine(this.PlatformSpecifics.PackagesDirectory, "superbenchmark");
@@ -191,7 +191,8 @@ protected override async Task InitializeAsync(EventContext telemetryContext, Can
                 }
 
                 await this.ExecuteSbCommandAsync("bash", initializeArgs, this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, true);
-                await this.ExecuteSbCommandAsync("sb", $"deploy --host-list localhost -i {this.ContainerVersion}", this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, false);
+                string commandWithVenv = $"-c \"source ./venv/bin/activate && sb deploy --host-list localhost -i {this.ContainerVersion}\"";
+                await this.ExecuteSbCommandAsync("bash", commandWithVenv, this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, false);
 
                 state.SuperBenchmarkInitialized = true;
             }
diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/default.yaml b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/default.yaml
index b2f75f655c..b36cbb5b97 100644
--- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/default.yaml
+++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/default.yaml
@@ -1,20 +1,24 @@
 # SuperBench Config
-version: v0.8
+version: v0.12
 superbench:
   enable: null
+  monitor:
+    enable: true
+    sample_duration: 1
+    sample_interval: 10
   var:
     default_local_mode: &default_local_mode
       enable: true
       modes:
         - name: local
-          proc_num: 1
+          proc_num: 8
           prefix: CUDA_VISIBLE_DEVICES={proc_rank}
           parallel: yes
     default_pytorch_mode: &default_pytorch_mode
       enable: true
       modes:
         - name: torch.distributed
-          proc_num: 1
+          proc_num: 8
           node_num: 1
       frameworks:
         - pytorch
@@ -22,33 +26,240 @@ superbench:
       duration: 0
       num_warmup: 16
       num_steps: 128
+      batch_size: 1
       precision:
         - float32
         - float16
       model_action:
         - train
   benchmarks:
+    gpu-burn:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 1
+          parallel: no
+      parameters:
+        time: 300
+        doubles: true
+        tensor_core: true
+    nccl-bw:default:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 1
+          parallel: no
+      parameters:
+        ngpus: 8
+    nccl-bw:gdr-only:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 1
+          parallel: no
+          env:
+            NCCL_IB_PCI_RELAXED_ORDERING: '1'
+            NCCL_NET_GDR_LEVEL: '5'
+            NCCL_P2P_DISABLE: '1'
+            NCCL_SHM_DISABLE: '1'
+            NCCL_MIN_NCHANNELS: '16'
+            NCCL_IB_DISABLE: '0'
+      parameters:
+        ngpus: 8
+    ib-loopback:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 4
+          prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2
+          parallel: yes
+        - name: local
+          proc_num: 4
+          prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2
+          parallel: yes
+    disk-benchmark:
+      enable: false
+      modes:
+        - name: local
+          proc_num: 1
+          parallel: no
+      parameters:
+        block_devices:
+          - /dev/nvme0n1
+    cpu-memory-bw-latency:
+      enable: false
+      modes:
+        - name: local
+          proc_num: 1
+          parallel: no
+      parameters:
+        tests:
+          - bandwidth_matrix
+          - latency_matrix
+          - max_bandwidth
+    mem-bw:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 8
+          prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -N $(({proc_rank}/2))
+          parallel: no
+    gpu-copy-bw:correctness:
+      enable: true
+      modes:
+        - name: local
+          parallel: no
+      parameters:
+        mem_type:
+          - htod
+          - dtoh
+          - dtod
+        copy_type:
+          - sm
+          - dma
+        size: 4096
+        num_warm_up: 0
+        num_loops: 1
+        check_data: true
+    gpu-copy-bw:perf:
+      enable: true
+      modes:
+        - name: local
+          parallel: no
+      parameters:
+        mem_type:
+          - htod
+          - dtoh
+          - dtod
+        copy_type:
+          - sm
+          - dma
+    nvbandwidth:
+      enable: true
+      modes:
+        - name: local
+          parallel: no
+      parameters:
+        buffer_size: 128
+        test_cases:
+          - host_to_device_memcpy_ce
+          - device_to_host_memcpy_ce
+          - host_to_device_memcpy_sm
+          - device_to_host_memcpy_sm
+        num_loops: 18
+        skip_verification: false
+        disable_affinity: false
+        use_mean: false
+    kernel-launch:
+      <<: *default_local_mode
+    gemm-flops:
+      <<: *default_local_mode
+    cudnn-function:
+      <<: *default_local_mode
+    cublas-function:
+      <<: *default_local_mode
+    matmul:
+      <<: *default_local_mode
+      frameworks:
+        - pytorch
+    sharding-matmul:
+      <<: *default_pytorch_mode
+    computation-communication-overlap:
+      <<: *default_pytorch_mode
+    ib-traffic:
+      enable: false
+      modes:
+        - name: mpi
+          proc_num: 8
+      parameters:
+        msg_size: 8388608
+        ib_dev: mlx5_$LOCAL_RANK
+        gpu_dev: $LOCAL_RANK
+        numa_dev: $((LOCAL_RANK/2))
+    gpcnet-network-test:
+      enable: false
+      modes:
+        - name: mpi
+          proc_num: 1
+          mca:
+            pml: ucx
+            btl: ^uct
+            btl_tcp_if_include: eth0
+          env:
+            UCX_NET_DEVICES: mlx5_0:1
+    gpcnet-network-load-test:
+      enable: false
+      modes:
+        - name: mpi
+          proc_num: 1
+          mca:
+            pml: ucx
+            btl: ^uct
+            btl_tcp_if_include: eth0
+          env:
+            UCX_NET_DEVICES: mlx5_0:1
+    tcp-connectivity:
+      enable: false
+      modes:
+        - name: local
+          parallel: no
+      parameters:
+        port: 22
+    ort-inference:
+      <<: *default_local_mode
+      parameters:
+        batch_size: 1
+    tensorrt-inference:
+      <<: *default_local_mode
+      parameters:
+        pytorch_models:
+          - resnet50
+          - resnet101
+          - resnet152
+          - densenet169
+          - densenet201
+          - bert-base
+          - bert-large
+        seq_length: 224
+        batch_size: 1
+        precision: int8
+    megatron-gpt:
+      modes:
+      - name: mpi
+        proc_num: 1
+        node_num: all
+      parameters:
+        code_base: /opt/superbench/third_party/Megatron/Megatron-DeepSpeed/
+        dataset_url: https://huggingface.co/datasets/suolyer/pile_bookcorpus2/raw/main/test.json
+        batch_size: 2048
+        num_warmup: 0
+        num_steps: 10
+        precision:
+          - float16
+          - bfloat16
+        deepspeed: yes
+        sequence_parallel: yes
+        use_rotary_position_embeddings: yes
     gpt_models:
       <<: *default_pytorch_mode
       models:
         - gpt2-small
+        - gpt2-large
       parameters:
         <<: *common_model_config
-        batch_size: 1
     bert_models:
       <<: *default_pytorch_mode
       models:
         - bert-base
+        - bert-large
       parameters:
         <<: *common_model_config
-        batch_size: 2
     lstm_models:
       <<: *default_pytorch_mode
       models:
         - lstm
       parameters:
         <<: *common_model_config
-        batch_size: 32
     resnet_models:
       <<: *default_pytorch_mode
       models:
@@ -57,7 +268,6 @@ superbench:
         - resnet152
       parameters:
         <<: *common_model_config
-        batch_size: 32
     densenet_models:
       <<: *default_pytorch_mode
       models:
@@ -65,7 +275,6 @@ superbench:
         - densenet201
       parameters:
         <<: *common_model_config
-        batch_size: 32
     vgg_models:
       <<: *default_pytorch_mode
       models:
@@ -74,5 +283,4 @@ superbench:
         - vgg16
         - vgg19
       parameters:
-        <<: *common_model_config
-        batch_size: 32
+        <<: *common_model_config
\ No newline at end of file
diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh
index 4e2d9262b1..18a7c4454c 100644
--- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh
+++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh
@@ -1,13 +1,13 @@
+sudo apt update
+sudo apt install -y python3-venv python3-full
+
 # Ansible will use sudo which needs explicit password input. This command removes that step.
-echo '$1 ALL=(ALL) NOPASSWD:ALL' | (sudo EDITOR='tee -a' visudo) 
+echo '$1 ALL=(ALL) NOPASSWD:ALL' | (sudo EDITOR='tee -a' visudo)
 
 # Remove any existing system-installed Ansible to avoid version conflicts
 sudo apt remove -y ansible || true
 sudo pip3 uninstall -y ansible ansible-base ansible-core || true
 
-# Install ansible-core compatible with Python 3.8 (Ubuntu 20.04)
-python3 -m pip install --user "ansible-core>=2.12,<2.14"
-
 # Ensure the pip user-installed ansible is in PATH and takes precedence
 export PATH=/home/$1/.local/bin:$PATH
 
@@ -33,17 +33,17 @@ if [[ -n "${2:-}" ]]; then
   # Start Docker back up
   sudo systemctl start docker
 
-  # (Optional) Warm-up/check NVIDIA devices as you had in the commented section
-  # sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base nvidia-smi
 else
   echo "No second argument provided; skipping Docker data-root configuration."
 fi
 
-# Command to install sb dependencies.
-python3 -m pip install .
+# Clean up any broken previous attempts
+rm -rf ./venv
+# create a new virtual environment
+python3 -m venv ./venv
+# activate the virtual environment
+source ./venv/bin/activate
 
-# Command to build sb.
-make postinstall 
-
-# This command initiates /dev/nvidiactl and /dev/nvidia-uvm directories, which sb checks before running.
-sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base nvidia-smi 
\ No newline at end of file
+# Commands to build sb.
+python3 -m pip install .
+make postinstall
\ No newline at end of file
diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json
index 9d43ae6774..05ef279102 100644
--- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json
+++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json
@@ -3,7 +3,7 @@
     "MinimumExecutionInterval": "00:01:00",
     "Metadata": {
         "RecommendedMinimumExecutionTime": "08:00:00",
-        "SupportedPlatforms": "linux-x64",
+        "SupportedPlatforms": "linux-x64,linux-arm64",
         "SupportedOperatingSystems": "Ubuntu",
         "PreliminaryRequirements": "Please note that the GPU hardware driver and related toolsets (e.g. Nvidia CUDA) MUST be already installed on the system for the workloads to function correctly."
     },
@@ -18,10 +18,10 @@
             "Parameters": {
                 "Scenario": "Models",
                 "Username": "$.Parameters.Username",
-                "Version": "0.9.0",
+                "Version": "0.12.0",
                 "DockerContainerPath": "$.Parameters.DockerContainerPath",
                 "ConfigurationFile": "$.Parameters.ConfigurationFile",
-                "ContainerVersion": "superbench/superbench:v0.9.0-cuda12.1"
+                "ContainerVersion": "superbench/superbench:v0.12.0-cuda12.9"
             }
         }
     ]
diff --git a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-GPU-NVIDIA-GB200.json b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-GPU-NVIDIA-GB200.json
new file mode 100644
index 0000000000..164eb54ef8
--- /dev/null
+++ b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-GPU-NVIDIA-GB200.json
@@ -0,0 +1,53 @@
+{
+    "Description": "NVIDIA GB200 GPU Driver Installation Dependency",
+    "Metadata": {
+        "RecommendedMinimumExecutionTime": "00:10:00",
+        "SupportedPlatforms": "linux-arm64",
+        "SupportedOperatingSystems": "Linux",
+        "SupportedLinuxGpuModel": "NVIDIA GB200",
+        "SupportedLinuxDistros": "Ubuntu24",
+        "SpecialRequirements": "This is an NVIDIA GPU Driver dependency. It can only be installed on the system having an NVIDIA GB200 GPU card/chip."
+    },
+    "Parameters": {
+        "ConfigurationFile": "default.yaml",
+        "Username": "",
+        "LinuxCudaVersion": "13.1",
+        "LinuxDriverVersion": "590",
+        "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/13.1.1/local_installers/cuda_13.1.1_590.48.01_linux_sbsa.run"
+    },
+    "Dependencies": [
+        {
+            "Type": "DockerInstallation",
+            "Parameters": {
+                "Scenario": "InstallDocker"
+            }
+        },
+        {
+            "Type": "NvidiaCudaInstallation",
+            "Parameters": {
+                "Scenario": "InstallNvidiaCuda",
+                "LinuxCudaVersion": "$.Parameters.LinuxCudaVersion",
+                "LinuxDriverVersion": "$.Parameters.LinuxDriverVersion",
+                "Username": "$.Parameters.Username",
+                "LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile"
+            }
+        },
+        {
+            "Type": "NvidiaContainerToolkitInstallation",
+            "Parameters": {
+                "Scenario": "InstallNvidiaContainerToolkit"
+            }
+        },
+        {
+            "Type": "LinuxPackageInstallation",
+            "Parameters": {
+                "Scenario": "InstallLinuxPackages",
+                "Packages": "sshpass,python3-pip",
+                "Packages-Apt": "nvidia-common",
+                "Packages-Dnf": "nvidia-driver",
+                "Packages-Yum": "nvidia-driver",
+                "Packages-Zypper": ""
+            }
+        }
+    ]
+}
\ No newline at end of file