Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ await this.mockFixture.StateManager.SaveStateAsync(expectedStateId, JObject.Pars
private IEnumerable<string> GetProfileExpectedCommands(PlatformID platform)
{
string setupCommand = "curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey "
+ "| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \\\n "
+ "| sudo gpg --yes --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \\\n "
+ "&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \\\n "
+ " sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \\\n "
+ " sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class CUDAAndNvidiaGPUDriverInstallationTests

private const string UpgradeCommand = "apt upgrade -y";
private const string InstallDriverCommand = "apt install nvidia-driver-550-server nvidia-dkms-550-server -y";
private const string InstallOpenSourceDriverCommand = "apt install nvidia-driver-550-open nvidia-dkms-550-open -y";
private const string InstallFabricManagerCommand = "apt install cuda-drivers-fabricmanager-550 -y";

private MockFixture fixture;
Expand Down Expand Up @@ -74,6 +75,25 @@ public void CUDAAndNvidiaGPUDriverInstallationDependencyThrowsForUnsupportedDist
Assert.AreEqual(ErrorReason.LinuxDistributionNotSupported, exc.Reason);
}

[Test]
public async Task CUDAAndNvidiaGPUDriverInstallationDependencyStartsCorrectProcessesOnExecuteWithOpenSourceDriver()
{
this.SetupDefaultMockBehavior(PlatformID.Unix, openSourceDriver: true);

this.SetupProcessManager("sudo", UpdateCommand, Environment.CurrentDirectory);
this.SetupProcessManager("sudo", BuildEssentialInstallationCommand, Environment.CurrentDirectory);
this.SetupProcessManager("sudo", GetRunFileCommand, Environment.CurrentDirectory);
this.SetupProcessManager("sudo", RunRunFileCommand, Environment.CurrentDirectory);
this.SetupProcessManager("sudo", ExportPathCommand, Environment.CurrentDirectory);
this.SetupProcessManager("sudo", ExportLibraryPathCommand, Environment.CurrentDirectory);
this.SetupProcessManager("sudo", UpgradeCommand, Environment.CurrentDirectory);
this.SetupProcessManager("sudo", InstallOpenSourceDriverCommand, Environment.CurrentDirectory);

await this.component.ExecuteAsync(CancellationToken.None);

this.mockProcessManager.Verify();
}

[Test]
public async Task CUDAAndNvidiaGPUDriverInstallationDependencyStartsCorrectProcessesOnExecute()
{
Expand Down Expand Up @@ -153,7 +173,7 @@ public async Task CUDAAndNvidiaGPUDriverInstallationDependencyExecutesCorrectIns
this.mockProcessManager.Verify();
}

private void SetupDefaultMockBehavior(PlatformID platformID)
private void SetupDefaultMockBehavior(PlatformID platformID, bool openSourceDriver = false, bool cudaRequired = true)
{
this.fixture.Setup(platformID);
this.mockPackage = new DependencyPath("NvidiaDrivers", this.fixture.GetPackagePath("NvidiaDrivers"));
Expand All @@ -166,6 +186,8 @@ private void SetupDefaultMockBehavior(PlatformID platformID)
{ "LinuxDriverVersion", "550" },
{ "Username", "anyuser" },
{ "LinuxLocalRunFile", "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run" },
{ "OpenSourceDriver", openSourceDriver },
{ "CudaRequired", cudaRequired },
{ "RebootRequired", false }
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace VirtualClient.Dependencies
public class NvidiaContainerToolkitInstallationTests
{
private const string SetupCommand = "curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey "
+ "| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \\\n "
+ "| sudo gpg --yes --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \\\n "
+ "&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \\\n "
+ " sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \\\n "
+ " sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public CudaAndNvidiaGPUDriverInstallation(IServiceCollection dependencies, IDict
this.stateManager = this.systemManager.StateManager;
this.fileSystem = this.systemManager.FileSystem;
this.packageManager = this.systemManager.PackageManager;
}
}

/// <summary>
/// The version of CUDA to be installed in Linux Systems
Expand All @@ -60,6 +60,22 @@ public string LinuxCudaVersion
}
}

/// <summary>
/// Whether the open-source version of the linux driver is used (required for certain NVIDIA GPU systems such as GB200)
/// </summary>
public bool OpenSourceDriver
{
get
{
return this.Parameters.GetValue<bool>(nameof(CudaAndNvidiaGPUDriverInstallation.OpenSourceDriver), false);
}

set
{
this.Parameters[nameof(CudaAndNvidiaGPUDriverInstallation.OpenSourceDriver)] = value;
}
}

/// <summary>
/// The version of Nvidia GPU driver to be installed in Linux Systems
/// </summary>
Expand Down Expand Up @@ -92,6 +108,22 @@ public string LinuxLocalRunFile
}
}

/// <summary>
/// Whether CUDA is installed directly onto the host machine (not required for Superbench)
/// </summary>
public bool CudaRequired
{
get
{
return this.Parameters.GetValue<bool>(nameof(CudaAndNvidiaGPUDriverInstallation.CudaRequired), true);
}

set
{
this.Parameters[nameof(CudaAndNvidiaGPUDriverInstallation.CudaRequired)] = value;
}
}

/// <summary>
/// Determines whether Reboot is required or not after Driver installation
/// </summary>
Expand Down Expand Up @@ -198,12 +230,22 @@ private async Task InstallCudaAndDriversAsync(LinuxDistribution linuxDistributio
new Dictionary<string, object>
{
{ "gpuVendor", "Nvidia" },
{ "gpuDriverVersion_nvidia", this.LinuxDriverVersion },
{ "cudaVersion", this.LinuxCudaVersion }
{ "gpuDriverVersion_nvidia", this.LinuxDriverVersion }
},
MetadataContract.DependenciesCategory,
true);

if (this.CudaRequired)
{
MetadataContract.Persist(
new Dictionary<string, object>
{
{ "cudaVersion", this.LinuxCudaVersion }
},
MetadataContract.DependenciesCategory,
true);
}

// The .bashrc file is used to define commands that should be run whenever the system
// is booted. For the purpose of the CUDA driver installation, we want to include extra
// paths in the $PATH environment variable post installation.
Expand Down Expand Up @@ -235,10 +277,14 @@ await this.fileSystem.File.WriteAllLinesAsync(
// keeping the commands in here for reference in case we may need cleanup in future.
// cleanupCommands,
prerequisiteCommands,
installationCommands,
postInstallationCommands
installationCommands
};

if (this.CudaRequired)
{
commandsLists.Add(postInstallationCommands);
}
Comment thread
saibulusu marked this conversation as resolved.

foreach (var commandsList in commandsLists)
{
foreach (string command in commandsList)
Expand Down Expand Up @@ -327,21 +373,30 @@ private List<string> PrerequisiteCommands(LinuxDistribution linuxDistribution)

private List<string> VersionSpecificInstallationCommands(LinuxDistribution linuxDistribution)
{
string runFileName = this.LinuxLocalRunFile.Split('/').Last();
List<string> commands = new List<string>()
List<string> commands = new List<string>();
if (this.CudaRequired)
{
$"wget {this.LinuxLocalRunFile}",
$"sh {runFileName} --silent --toolkit"
};
string runFileName = this.LinuxLocalRunFile.Split('/').Last();
commands.Add($"wget {this.LinuxLocalRunFile}");
commands.Add($"sh {runFileName} --silent --toolkit");
}

switch (linuxDistribution)
{
case LinuxDistribution.Debian:
case LinuxDistribution.Ubuntu:
commands.Add("apt update");
commands.Add("apt upgrade -y");
commands.Add($"apt install nvidia-driver-{this.LinuxDriverVersion}-server nvidia-dkms-{this.LinuxDriverVersion}-server -y");
commands.Add($"apt install cuda-drivers-fabricmanager-{this.LinuxDriverVersion} -y");

if (this.OpenSourceDriver)
{
commands.Add($"apt install nvidia-driver-{this.LinuxDriverVersion}-open nvidia-dkms-{this.LinuxDriverVersion}-open -y");
}
Comment thread
saibulusu marked this conversation as resolved.
else
{
commands.Add($"apt install nvidia-driver-{this.LinuxDriverVersion}-server nvidia-dkms-{this.LinuxDriverVersion}-server -y");
commands.Add($"apt install cuda-drivers-fabricmanager-{this.LinuxDriverVersion} -y");
}

break;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ private List<string> NvidiaContainerToolkitInstallationCommands(LinuxDistributio
case LinuxDistribution.Debian:

string setupCommand = "curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey " +
"| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \\\n " +
"| sudo gpg --yes --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \\\n " +
Comment thread
saibulusu marked this conversation as resolved.
"&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \\\n " +
" sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \\\n " +
" sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
"SupportedPlatforms": "linux-x64",
"SupportedOperatingSystems": "Linux",
"SupportedLinuxGpuModel": "NVIDIA A100",
"SupportedLinuxDistros": "Ubuntu20",
"SupportedLinuxDistros": "Ubuntu24",
"SpecialRequirements": "This is an NVIDIA GPU Driver dependency. It can only be installed on the system having an NVIDIA A100 GPU card/chip."
},
"Parameters": {
"ConfigurationFile": "default.yaml",
"Username": "",
"LinuxCudaVersion": "12.0",
"OpenSourceDriver": false,
"LinuxDriverVersion": "525",
"LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run"
"LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run",
"CudaRequired": true
},
"Dependencies": [
{
Expand All @@ -27,9 +28,11 @@
"Parameters": {
"Scenario": "InstallNvidiaCuda",
"LinuxCudaVersion": "$.Parameters.LinuxCudaVersion",
"OpenSourceDriver": "$.Parameters.OpenSourceDriver",
"LinuxDriverVersion": "$.Parameters.LinuxDriverVersion",
"Username": "$.Parameters.Username",
"LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile"
"LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile",
"CudaRequired": "$.Parameters.CudaRequired"
}
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
"SpecialRequirements": "This is an NVIDIA GPU Driver dependency. It can only be installed on the system having an NVIDIA GB200 GPU card/chip."
},
"Parameters": {
"ConfigurationFile": "default.yaml",
"Username": "",
"LinuxCudaVersion": "13.1",
"OpenSourceDriver": true,
"LinuxDriverVersion": "590",
"LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/13.1.1/local_installers/cuda_13.1.1_590.48.01_linux_sbsa.run"
"LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/13.1.1/local_installers/cuda_13.1.1_590.48.01_linux_sbsa.run",
"CudaRequired": true
},
"Dependencies": [
{
Expand All @@ -27,9 +28,11 @@
"Parameters": {
"Scenario": "InstallNvidiaCuda",
"LinuxCudaVersion": "$.Parameters.LinuxCudaVersion",
"OpenSourceDriver": "$.Parameters.OpenSourceDriver",
"LinuxDriverVersion": "$.Parameters.LinuxDriverVersion",
"Username": "$.Parameters.Username",
"LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile"
"LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile",
"CudaRequired": "$.Parameters.CudaRequired"
}
},
{
Expand Down
Loading