Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.2.4
3.2.5
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public async Task SuperBenchmarkExecutorUsesTheExpectedScriptFilesOnExecution()
public async Task SuperBenchmarkExecutorDeploySuperBenchContainer()
{
ProcessStartInfo expectedInfo = new ProcessStartInfo();
string expectedCommand = $"sb deploy --host-list localhost -i testContainer";
string expectedCommand = $"bash -c \"source ./venv/bin/activate && sb deploy --host-list localhost -i testContainer\"";

bool commandExecuted = false;
this.mockFixture.ProcessManager.OnCreateProcess = (exe, arguments, workingDir) =>
Expand Down Expand Up @@ -184,7 +184,7 @@ public async Task SuperBenchmarkExecutorDeploySuperBenchContainer()
public async Task SuperBenchmarkExecutorRunsTheExpectedWorkloadCommand()
{
ProcessStartInfo expectedInfo = new ProcessStartInfo();
string expectedCommand = $"sb run --host-list localhost -c Test.yaml";
string expectedCommand = $"bash -c \"source ./venv/bin/activate && sb run --host-list localhost -c Test.yaml\"";

bool commandExecuted = false;
this.mockFixture.ProcessManager.OnCreateProcess = (exe, arguments, workingDir) =>
Expand Down Expand Up @@ -224,8 +224,8 @@ public async Task SuperBenchmarkExecutorExecutesTheCorrectCommandsWithInstallati
$"sudo chmod -R 2777 \"{this.mockFixture.PlatformSpecifics.CurrentDirectory}\"",
$"sudo git clone -b v0.0.1 https://github.com/microsoft/superbenchmark",
$"sudo bash initialize.sh testuser",
$"sb deploy --host-list localhost -i testContainer",
$"sb run --host-list localhost -c Test.yaml"
$"bash -c \"source ./venv/bin/activate && sb deploy --host-list localhost -i testContainer\"",
$"bash -c \"source ./venv/bin/activate && sb run --host-list localhost -c Test.yaml\""
};

int processCount = 0;
Expand Down Expand Up @@ -278,8 +278,8 @@ public async Task SuperBenchmarkExecutorExecutesTheCorrectCommandsWithInstallati
$"sudo chmod -R 2777 \"{this.mockFixture.PlatformSpecifics.CurrentDirectory}\"",
$"sudo git clone -b v0.0.1 https://github.com/microsoft/superbenchmark",
$"sudo bash initialize.sh testuser /docker/path",
$"sb deploy --host-list localhost -i testContainer",
$"sb run --host-list localhost -c Test.yaml"
$"bash -c \"source ./venv/bin/activate && sb deploy --host-list localhost -i testContainer\"",
$"bash -c \"source ./venv/bin/activate && sb run --host-list localhost -c Test.yaml\""
};

int processCount = 0;
Expand Down Expand Up @@ -320,7 +320,7 @@ public async Task SuperBenchmarkExecutorSkipsInitializationOfTheWorkloadForExecu
ProcessStartInfo expectedInfo = new ProcessStartInfo();
List<string> expectedCommands = new List<string>
{
$"sb run --host-list localhost -c Test.yaml"
$"bash -c \"source ./venv/bin/activate && sb run --host-list localhost -c Test.yaml\""
};

int processCount = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace VirtualClient.Actions
/// <summary>
/// The SuperBenchmark workload executor.
/// </summary>
[SupportedPlatforms("linux-x64", true)]
[SupportedPlatforms("linux-x64,linux-arm64", true)]
public class SuperBenchmarkExecutor : VirtualClientComponent
{
private const string SuperBenchmarkRunShell = "RunSuperBenchmark.sh";
Expand Down Expand Up @@ -139,8 +139,9 @@ protected override async Task ExecuteAsync(EventContext telemetryContext, Cancel
using (BackgroundOperations profiling = BackgroundOperations.BeginProfiling(this, cancellationToken))
{
string commandArguments = this.GetCommandLineArguments();
string commandWithVenv = $"-c \"source ./venv/bin/activate && sb {commandArguments}\"";

using (IProcessProxy process = await this.ExecuteCommandAsync("sb", commandArguments, this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, runElevated: false))
using (IProcessProxy process = await this.ExecuteCommandAsync("bash", commandWithVenv, this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, runElevated: false))
{
if (!cancellationToken.IsCancellationRequested)
{
Expand All @@ -166,7 +167,6 @@ protected override async Task InitializeAsync(EventContext telemetryContext, Can

if (!state.SuperBenchmarkInitialized)
{
// This is to grant directory folders for
await this.systemManager.MakeFilesExecutableAsync(this.PlatformSpecifics.CurrentDirectory, this.Platform, cancellationToken);

string cloneDir = this.PlatformSpecifics.Combine(this.PlatformSpecifics.PackagesDirectory, "superbenchmark");
Expand All @@ -191,7 +191,8 @@ protected override async Task InitializeAsync(EventContext telemetryContext, Can
}

await this.ExecuteSbCommandAsync("bash", initializeArgs, this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, true);
await this.ExecuteSbCommandAsync("sb", $"deploy --host-list localhost -i {this.ContainerVersion}", this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, false);
string commandWithVenv = $"-c \"source ./venv/bin/activate && sb deploy --host-list localhost -i {this.ContainerVersion}\"";
await this.ExecuteSbCommandAsync("bash", commandWithVenv, this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, false);

state.SuperBenchmarkInitialized = true;
}
Expand Down
228 changes: 218 additions & 10 deletions src/VirtualClient/VirtualClient.Actions/SuperBenchmark/default.yaml
Original file line number Diff line number Diff line change
@@ -1,54 +1,265 @@
# SuperBench Config
version: v0.8
version: v0.12
superbench:
enable: null
monitor:
enable: true
sample_duration: 1
sample_interval: 10
var:
default_local_mode: &default_local_mode
enable: true
modes:
- name: local
proc_num: 1
proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank}
parallel: yes
default_pytorch_mode: &default_pytorch_mode
enable: true
modes:
- name: torch.distributed
proc_num: 1
proc_num: 8
node_num: 1
frameworks:
- pytorch
common_model_config: &common_model_config
duration: 0
num_warmup: 16
num_steps: 128
batch_size: 1
precision:
- float32
- float16
model_action:
- train
benchmarks:
gpu-burn:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
time: 300
doubles: true
tensor_core: true
nccl-bw:default:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
parameters:
ngpus: 8
nccl-bw:gdr-only:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
env:
NCCL_IB_PCI_RELAXED_ORDERING: '1'
NCCL_NET_GDR_LEVEL: '5'
NCCL_P2P_DISABLE: '1'
NCCL_SHM_DISABLE: '1'
NCCL_MIN_NCHANNELS: '16'
NCCL_IB_DISABLE: '0'
parameters:
ngpus: 8
ib-loopback:
enable: true
modes:
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2
parallel: yes
- name: local
proc_num: 4
prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2
parallel: yes
disk-benchmark:
enable: false
modes:
- name: local
proc_num: 1
parallel: no
parameters:
block_devices:
- /dev/nvme0n1
cpu-memory-bw-latency:
enable: false
modes:
- name: local
proc_num: 1
parallel: no
parameters:
tests:
- bandwidth_matrix
- latency_matrix
- max_bandwidth
mem-bw:
enable: true
modes:
- name: local
proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -N $(({proc_rank}/2))
parallel: no
gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
nvbandwidth:
enable: true
modes:
- name: local
parallel: no
parameters:
buffer_size: 128
test_cases:
- host_to_device_memcpy_ce
- device_to_host_memcpy_ce
- host_to_device_memcpy_sm
- device_to_host_memcpy_sm
num_loops: 18
skip_verification: false
disable_affinity: false
use_mean: false
kernel-launch:
<<: *default_local_mode
gemm-flops:
<<: *default_local_mode
cudnn-function:
<<: *default_local_mode
cublas-function:
<<: *default_local_mode
matmul:
<<: *default_local_mode
frameworks:
- pytorch
sharding-matmul:
<<: *default_pytorch_mode
computation-communication-overlap:
<<: *default_pytorch_mode
ib-traffic:
enable: false
modes:
- name: mpi
proc_num: 8
parameters:
msg_size: 8388608
ib_dev: mlx5_$LOCAL_RANK
gpu_dev: $LOCAL_RANK
numa_dev: $((LOCAL_RANK/2))
gpcnet-network-test:
enable: false
modes:
- name: mpi
proc_num: 1
mca:
pml: ucx
btl: ^uct
btl_tcp_if_include: eth0
env:
UCX_NET_DEVICES: mlx5_0:1
gpcnet-network-load-test:
enable: false
modes:
- name: mpi
proc_num: 1
mca:
pml: ucx
btl: ^uct
btl_tcp_if_include: eth0
env:
UCX_NET_DEVICES: mlx5_0:1
tcp-connectivity:
enable: false
modes:
- name: local
parallel: no
parameters:
port: 22
ort-inference:
<<: *default_local_mode
parameters:
batch_size: 1
tensorrt-inference:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 1
precision: int8
megatron-gpt:
modes:
- name: mpi
proc_num: 1
node_num: all
parameters:
code_base: /opt/superbench/third_party/Megatron/Megatron-DeepSpeed/
dataset_url: https://huggingface.co/datasets/suolyer/pile_bookcorpus2/raw/main/test.json
batch_size: 2048
num_warmup: 0
num_steps: 10
precision:
- float16
- bfloat16
deepspeed: yes
sequence_parallel: yes
use_rotary_position_embeddings: yes
gpt_models:
<<: *default_pytorch_mode
models:
- gpt2-small
- gpt2-large
parameters:
<<: *common_model_config
batch_size: 1
bert_models:
<<: *default_pytorch_mode
models:
- bert-base
- bert-large
parameters:
<<: *common_model_config
batch_size: 2
lstm_models:
<<: *default_pytorch_mode
models:
- lstm
parameters:
<<: *common_model_config
batch_size: 32
resnet_models:
<<: *default_pytorch_mode
models:
Expand All @@ -57,15 +268,13 @@ superbench:
- resnet152
parameters:
<<: *common_model_config
batch_size: 32
densenet_models:
<<: *default_pytorch_mode
models:
- densenet169
- densenet201
parameters:
<<: *common_model_config
batch_size: 32
vgg_models:
<<: *default_pytorch_mode
models:
Expand All @@ -74,5 +283,4 @@ superbench:
- vgg16
- vgg19
parameters:
<<: *common_model_config
batch_size: 32
<<: *common_model_config
Loading
Loading