diff --git a/deepspeed/inference/v2/inference_utils.py b/deepspeed/inference/v2/inference_utils.py index 7b2dd4237353..774f74d06ee4 100644 --- a/deepspeed/inference/v2/inference_utils.py +++ b/deepspeed/inference/v2/inference_utils.py @@ -8,6 +8,7 @@ import torch from enum import Enum, IntEnum +from deepspeed.utils.validation import ensure_nonzero_divisor class NormTypeEnum(Enum): @@ -102,4 +103,5 @@ def ceil_div(a: int, b: int) -> int: """ Return ceil(a / b). """ + ensure_nonzero_divisor(b, name="b") return -(-a // b) diff --git a/deepspeed/utils/groups.py b/deepspeed/utils/groups.py index a6f0a7228977..55f98d7faed0 100644 --- a/deepspeed/utils/groups.py +++ b/deepspeed/utils/groups.py @@ -29,6 +29,7 @@ from deepspeed.utils import log_dist from deepspeed.utils.bwc import bwc_tensor_model_parallel_world_size, bwc_pipeline_parallel_world_size from deepspeed.utils.exceptions import DeprecatedException +from deepspeed.utils.validation import ensure_nonzero_divisor from deepspeed.accelerator import get_accelerator # Expert parallel group that the current rank belongs to. @@ -63,6 +64,7 @@ def initialize(ep_size=1, mpu=None): def _ensure_divisibility(numerator, denominator): """Ensure that numerator is divisible by the denominator.""" + ensure_nonzero_divisor(denominator, name="denominator") assert numerator % denominator == 0, '{} is not divisible by {}'.format(numerator, denominator) diff --git a/deepspeed/utils/timer.py b/deepspeed/utils/timer.py index 0aa7be55d829..532edee7742e 100755 --- a/deepspeed/utils/timer.py +++ b/deepspeed/utils/timer.py @@ -4,6 +4,7 @@ # DeepSpeed Team import time +import numbers from numpy import mean from deepspeed.utils.logging import print_dist from deepspeed.accelerator import get_accelerator @@ -211,7 +212,16 @@ def __init__(self, config, batch_size, start_step=2, steps_per_output=None, moni self.global_step_count = 0 self.total_elapsed_time = 0 self.step_elapsed_time = 0 - self.steps_per_output = steps_per_output + if steps_per_output is not None: + if not isinstance(steps_per_output, numbers.Integral): + raise ValueError( + f"steps_per_output must be a positive integer or None, got {type(steps_per_output).__name__}" + ) + if steps_per_output <= 0: + raise ValueError(f"steps_per_output must be greater than 0, got {steps_per_output}") + self.steps_per_output = int(steps_per_output) + else: + self.steps_per_output = None self.monitor_memory = monitor_memory self.logging = logging_fn if self.logging is None: diff --git a/deepspeed/utils/validation.py b/deepspeed/utils/validation.py new file mode 100644 index 000000000000..28510169b5ff --- /dev/null +++ b/deepspeed/utils/validation.py @@ -0,0 +1,14 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +from typing import Any + + +def ensure_nonzero_divisor(divisor: Any, *, name: str = "divisor") -> None: + """ + Validate that a divisor is non-zero before modulo/division math. + """ + if divisor == 0: + raise ValueError(f"{name} must be non-zero") diff --git a/op_builder/hpu/fp_quantizer.py b/op_builder/hpu/fp_quantizer.py index c74affb55045..855e589bcdd1 100644 --- a/op_builder/hpu/fp_quantizer.py +++ b/op_builder/hpu/fp_quantizer.py @@ -56,6 +56,11 @@ def selective_dequantize(cls, val_q, scales, indexes, group_size, q_mantisa_bits def dequantize(cls, fp_out, input_q, scale, group_size, q_mantisa_bits, q_exponent_bits): orig_shape = fp_out.shape orig_dtype = fp_out.dtype + scale_tensor = scale if torch.is_tensor(scale) else torch.as_tensor(scale) + if not torch.all(torch.isfinite(scale_tensor)): + raise ValueError("dequantize scale must contain finite values") + if torch.any(scale_tensor == 0): + raise ValueError("dequantize scale must be non-zero") dequant_out = torch.ops.hpu.cast_from_fp8(input_q, (1.0 / scale), orig_dtype).view(orig_shape) fp_out.copy_(dequant_out) return fp_out diff --git a/tests/unit/inference/test_inference_utils.py b/tests/unit/inference/test_inference_utils.py new file mode 100644 index 000000000000..f985afc56f52 --- /dev/null +++ b/tests/unit/inference/test_inference_utils.py @@ -0,0 +1,18 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +import pytest + +from deepspeed.inference.v2.inference_utils import ceil_div + + +def test_ceil_div_basic_behavior(): + assert ceil_div(10, 4) == 3 + assert ceil_div(12, 4) == 3 + + +def test_ceil_div_rejects_zero_divisor(): + with pytest.raises(ValueError, match="b must be non-zero"): + ceil_div(10, 0) diff --git a/tests/unit/ops/fp_quantizer/test_fp_quantizer_scale_validation.py b/tests/unit/ops/fp_quantizer/test_fp_quantizer_scale_validation.py new file mode 100644 index 000000000000..f835269c2574 --- /dev/null +++ b/tests/unit/ops/fp_quantizer/test_fp_quantizer_scale_validation.py @@ -0,0 +1,27 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +import pytest +import torch + +from op_builder.hpu.fp_quantizer import FPQuantizer + + +def test_dequantize_rejects_non_finite_scale(): + fp_out = torch.zeros(4, dtype=torch.float16) + input_q = torch.zeros(4, dtype=torch.uint8) + scale = torch.tensor([float("inf")], dtype=torch.float32) + + with pytest.raises(ValueError, match="dequantize scale must contain finite values"): + FPQuantizer.dequantize(fp_out, input_q, scale, group_size=1, q_mantisa_bits=3, q_exponent_bits=4) + + +def test_dequantize_rejects_zero_scale(): + fp_out = torch.zeros(4, dtype=torch.float16) + input_q = torch.zeros(4, dtype=torch.uint8) + scale = torch.tensor([0.0], dtype=torch.float32) + + with pytest.raises(ValueError, match="dequantize scale must be non-zero"): + FPQuantizer.dequantize(fp_out, input_q, scale, group_size=1, q_mantisa_bits=3, q_exponent_bits=4) diff --git a/tests/unit/utils/test_groups.py b/tests/unit/utils/test_groups.py index 5cd35baf3510..5450b88a02da 100644 --- a/tests/unit/utils/test_groups.py +++ b/tests/unit/utils/test_groups.py @@ -3,7 +3,8 @@ # DeepSpeed Team -from deepspeed.utils.groups import _get_expert_parallel_ranks +import pytest +from deepspeed.utils.groups import _ensure_divisibility, _get_expert_parallel_ranks def test_get_expert_parallel_ranks(): @@ -36,3 +37,8 @@ def test_get_expert_parallel_ranks(): [5, 13], [7, 15], ] + + +def test_ensure_divisibility_rejects_zero_denominator(): + with pytest.raises(ValueError, match="denominator must be non-zero"): + _ensure_divisibility(8, 0) diff --git a/tests/unit/utils/test_timer.py b/tests/unit/utils/test_timer.py new file mode 100644 index 000000000000..9563d856d81d --- /dev/null +++ b/tests/unit/utils/test_timer.py @@ -0,0 +1,29 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +from types import SimpleNamespace +import pytest + +from deepspeed.utils.timer import ThroughputTimer + + +def _timer_config(): + return SimpleNamespace(enabled=True, synchronized=False) + + +def test_steps_per_output_rejects_zero(): + with pytest.raises(ValueError, match="steps_per_output must be greater than 0"): + ThroughputTimer(config=_timer_config(), batch_size=1, steps_per_output=0) + + +def test_steps_per_output_rejects_non_integral(): + with pytest.raises(ValueError, match="steps_per_output must be a positive integer or None"): + ThroughputTimer(config=_timer_config(), batch_size=1, steps_per_output=1.5) + + +def test_report_boundary_for_valid_steps_per_output(): + timer = ThroughputTimer(config=_timer_config(), batch_size=1, steps_per_output=3) + timer.global_step_count = 6 + assert timer._is_report_boundary()