@@ -227,7 +227,7 @@ void compare_nvfp4_tensors(const std::string& name,
227227 }
228228 }
229229
230- constexpr bool print_detailed_summary = true ;
230+ bool print_detailed_summary = false ;
231231 if (print_detailed_summary) {
232232 // Always report summary - either success or failure
233233 std::cout << " === SUMMARY for tensor " << name << " ===" << std::endl;
@@ -492,7 +492,11 @@ void performTest(const ShapeRepresentation shape_rep,
492492 &offsets_tensor, sizeof (offsets_tensor));
493493 }
494494
495- nvte_group_quantize (in_group_tensor, out_group_tensor, 0 );
495+ QuantizationConfigWrapper quant_config;
496+ quant_config.set_use_fast_math (use_fast_math);
497+ quant_config.set_stochastic_rounding (false );
498+
499+ nvte_group_quantize_v2 (in_group_tensor, out_group_tensor, quant_config, 0 );
496500 cudaDeviceSynchronize ();
497501 auto err = cudaGetLastError ();
498502 ASSERT_EQ (err, cudaSuccess) << cudaGetErrorString (err);
@@ -548,14 +552,14 @@ void performTest(const ShapeRepresentation shape_rep,
548552
549553// {shape_representation, num_tensors, [logical_shape_M, logical_shape_K], [M_i], [K_i]}
550554std::vector<std::vector<size_t >> grouped_input_config = {
551- // {SAME_BOTH_DIMS, 1, 128,128},
552- // {SAME_BOTH_DIMS, 2, 256,128},
553- // {VARYING_FIRST_DIM, 2, 512,128, 128,384},
554- // {VARYING_FIRST_DIM, 3, 1024,160, 128,384,512},
555- // {VARYING_FIRST_DIM, 4, 1536,160, 128,384,512,512},
556- // {VARYING_FIRST_DIM, 5, 4096,512, 128,256,384,1024,2304},
557- // {VARYING_LAST_DIM, 3, 256,896, 128,256,512},
558- // {VARYING_BOTH_DIMS, 2, 1,(128*128)+(256*256), 128,256, 128,256},
555+ {SAME_BOTH_DIMS, 1 , 128 ,128 },
556+ {SAME_BOTH_DIMS, 2 , 256 ,128 },
557+ {VARYING_FIRST_DIM, 2 , 512 ,128 , 128 ,384 },
558+ {VARYING_FIRST_DIM, 3 , 1024 ,160 , 128 ,384 ,512 },
559+ {VARYING_FIRST_DIM, 4 , 1536 ,160 , 128 ,384 ,512 ,512 },
560+ {VARYING_FIRST_DIM, 5 , 4096 ,512 , 128 ,256 ,384 ,1024 ,2304 },
561+ {VARYING_LAST_DIM, 3 , 256 ,896 , 128 ,256 ,512 },
562+ {VARYING_BOTH_DIMS, 2 , 1 ,(128 *128 )+(256 *256 ), 128 ,256 , 128 ,256 },
559563 {VARYING_BOTH_DIMS, 2 , 1 ,(256 *128 )+(512 *640 ), 256 ,512 , 128 ,640 },
560564};
561565
0 commit comments