Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tests/pytorch/nvfp4/test_nvfp4_group_quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ def check_group_quantization_nvfp4_versus_reference(
[
# edge case, zero tokens for all
(0, 512),
# edge case, not 128 multiple hidden dimension
(1024, 320),
# full tile cases
(256, 1024),
(1024, 256),
Expand Down
13 changes: 12 additions & 1 deletion transformer_engine/pytorch/csrc/extensions/cast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <algorithm>
#include <cstdint>
#include <memory>
#include <mutex>
#include <optional>
#include <tuple>
#include <utility>
Expand Down Expand Up @@ -1355,9 +1356,19 @@ std::vector<py::object> split_quantize(const at::Tensor &tensor,
for (auto &quantizer : quantizer_cpp_list) {
nvfp4_quantizers.push_back(static_cast<NVFP4Quantizer *>(quantizer.get()));
}
bool contiguous_data_and_scale;
bool contiguous_data_and_scale = false;
std::tie(output_py_list, output_cpp_list, contiguous_data_and_scale) =
bulk_allocate_nvfp4_tensors(split_shapes, quantizer_list, nvfp4_quantizers);
if (!input_shape.empty() && input_shape.back() % 128 != 0) {
static std::once_flag once_unfused_nvfp4_fallback_warning;
std::call_once(once_unfused_nvfp4_fallback_warning, []() {
NVTE_WARN(
"Unfused NVFP4 quantization fallback is triggered because the input tensor inner "
"dimension is not a multiple of 128, disabling NVFP4 grouped kernel fusion. "
"NVFP4 might bring performance regressions for this input tensor shape.");
});
quantization_method = QuantizationMethod::UNFUSED;
}
if (!contiguous_data_and_scale) {
// Avoid fused quantize kernel if data is not contiguous
quantization_method = QuantizationMethod::UNFUSED;
Expand Down
Loading