diff --git a/src/onnx/parse_dequantizelinear.cpp b/src/onnx/parse_dequantizelinear.cpp index 94919867127..dab9ee2df97 100644 --- a/src/onnx/parse_dequantizelinear.cpp +++ b/src/onnx/parse_dequantizelinear.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ + #include #include #include -#include -#include -#include +#include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -36,10 +36,10 @@ struct parse_dequantizelinear : op_parser { std::vector operators() const { return {{"DequantizeLinear"}}; } - instruction_ref parse(const op_desc& opd, + instruction_ref parse(const op_desc& /*opd*/, const onnx_parser& /*parser*/, const onnx_parser::node_info& info, - std::vector args) const + const std::vector& args) const { if(args.size() < 2 or args.size() > 3) { @@ -63,18 +63,18 @@ struct parse_dequantizelinear : op_parser } } - int axis = 1; + value options = {}; if(contains(info.attributes, "axis")) - axis = info.attributes.at("axis").i(); + { + options.insert({"axis", info.attributes.at("axis").i()}); + } - int block_size = 0; if(contains(info.attributes, "block_size")) - block_size = info.attributes.at("block_size").i(); - - args = transform_quantize_dequantize_linear_inputs( - info, opd.onnx_name, block_size, axis, args); + { + options.insert({"block_size", info.attributes.at("block_size").i()}); + } - return info.add_instruction(make_op("dequantizelinear"), args); + return op::builder::add("dequantizelinear", *info.mod, args, options).at(0); } }; diff --git a/src/onnx/parse_quantizelinear.cpp b/src/onnx/parse_quantizelinear.cpp index 91ae4de6692..8e8a334bc0e 100644 --- a/src/onnx/parse_quantizelinear.cpp +++ b/src/onnx/parse_quantizelinear.cpp @@ -22,12 +22,11 @@ * THE SOFTWARE. */ #include + #include #include -#include -#include -#include -#include +#include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -37,10 +36,10 @@ struct parse_quantizelinear : op_parser { std::vector operators() const { return {{"QuantizeLinear"}}; } - instruction_ref parse(const op_desc& opd, + instruction_ref parse(const op_desc& /*opd*/, const onnx_parser& parser, const onnx_parser::node_info& info, - std::vector args) const + const std::vector& args) const { if(args.size() < 2 or args.size() > 3) { @@ -65,18 +64,23 @@ struct parse_quantizelinear : op_parser ", provided y_zero_point shape: " + to_string_range(args[2]->get_shape().lens())); } - int axis = 1; + value options = {}; if(contains(info.attributes, "axis")) - axis = info.attributes.at("axis").i(); + { + options.insert({"axis", info.attributes.at("axis").i()}); + } - int block_size = 0; if(contains(info.attributes, "block_size")) - block_size = info.attributes.at("block_size").i(); + { + options.insert({"block_size", info.attributes.at("block_size").i()}); + } std::optional output_type; if(contains(info.attributes, "output_dtype")) { - output_type = get_type(info.attributes.at("output_dtype").i()); + const auto& out_type = get_type(info.attributes.at("output_dtype").i()); + output_type = out_type; + options.insert({"output_type", out_type}); } if(output_type.has_value() and args.size() == 3 and @@ -88,65 +92,7 @@ struct parse_quantizelinear : op_parser +", y_zero_point type: " + to_string(args[2]->get_shape().type())); } - args = transform_quantize_dequantize_linear_inputs( - info, opd.onnx_name, block_size, axis, args); - - if(output_type == migraphx::shape::fp4x2_type) - { - // Parsing in pack_fp4 and unpack_fp4 for the FP4 case - auto q_ins = info.add_instruction( - make_op("quantizelinear", {{"out_type", migraphx::shape::float_type}}), args); - - // packing axis set to fastest dimension - auto quantized_shape = q_ins->get_shape(); - const auto& qs_strides = quantized_shape.strides(); - if(qs_strides.empty()) - { - MIGRAPHX_THROW("QuantizeLinear: MX type quantized_shape has no strides"); - } - int fast_axis = - std::min_element(qs_strides.cbegin(), qs_strides.cend()) - qs_strides.cbegin(); - bool odd_fast_axis = (quantized_shape.lens().at(fast_axis) % 2 == 1); - if(odd_fast_axis) - { - // pad fastest dimension by 1 if it is odd - std::vector padding(2 * quantized_shape.ndim(), 0); - padding.at(fast_axis * 2 + 1) = 1; - q_ins = info.add_instruction(make_op("pad", {{"pads", padding}}), q_ins); - } - // output is fp4x2_type - auto pack_ins = info.add_instruction(make_op("pack_fp4"), q_ins); - // output is fp8e4m3fn_type - auto unpack_ins = info.add_instruction(make_op("unpack_fp4"), pack_ins); - if(odd_fast_axis) - { - // slice off padded values - unpack_ins = info.add_instruction( - make_op("slice", - {{"axes", {fast_axis}}, - {"starts", {0}}, - {"ends", {quantized_shape.lens().at(fast_axis)}}}), - unpack_ins); - } - return unpack_ins; - } - - if(parser.opset_version < 19) - { - auto common_type = common_shape({args[0]->get_shape(), args[1]->get_shape()}).type(); - std::transform(args.begin(), args.begin() + 2, args.begin(), [&](auto ins) { - if(ins->get_shape().type() != common_type) - ins = info.add_instruction(make_op("convert", {{"target_type", common_type}}), - ins); - return ins; - }); - } - - if(output_type.has_value()) - return info.add_instruction(make_op("quantizelinear", {{"out_type", *output_type}}), - args); - else - return info.add_instruction(make_op("quantizelinear"), args); + return op::builder::add("quantizelinear", *info.mod, args, options).at(0); } }; diff --git a/src/onnx/quantize_dequantize_linear.cpp b/src/onnx/quantize_dequantize_linear.cpp index f7ac2b0f1f4..7113cafbce1 100644 --- a/src/onnx/quantize_dequantize_linear.cpp +++ b/src/onnx/quantize_dequantize_linear.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -23,10 +23,7 @@ */ #include -#include -#include -#include -#include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -39,102 +36,8 @@ transform_quantize_dequantize_linear_inputs(const onnx_parser::node_info& info, int axis, std::vector args) { - const auto x = args.at(0); - const auto x_lens = x->get_shape().lens(); - const auto x_rank = x_lens.size(); - - instruction_ref y_scale = args.at(1); - const auto y_scale_lens = y_scale->get_shape().lens(); - const auto y_scale_rank = y_scale_lens.size(); - - // Per-tensor (per-layer) granularity - if(y_scale->get_shape().elements() == 1) - { - std::transform(args.begin() + 1, args.end(), args.begin() + 1, [&](auto ins) { - return info.add_instruction(make_op("multibroadcast", {{"out_lens", x_lens}}), ins); - }); - } - // Per-axis granularity - else if(y_scale_rank == 1) - { - axis = tune_axis(x_rank, axis, onnx_name); - if(x_lens[axis] != y_scale_lens[0]) - { - MIGRAPHX_THROW(onnx_name + - ": For per axis granularity the length of y_scale (actual: " + - to_string(y_scale_lens[0]) + ") must be equal to size of x on axis " + - to_string(axis) + "(actual: " + to_string(x_lens[axis]) + ")"); - } - - std::transform(args.begin() + 1, args.end(), args.begin() + 1, [&](auto ins) { - return info.add_instruction( - make_op("broadcast", {{"axis", axis}, {"out_lens", x_lens}}), ins); - }); - } - // Blocked granularity - else - { - axis = tune_axis(x_rank, axis, onnx_name); - - if(x_rank != y_scale_rank) - { - MIGRAPHX_THROW(onnx_name + ": x(rank: " + to_string(x_rank) + - ") and y_scale(rank: " + to_string(y_scale_rank) + - ") must be of same rank for block granularity"); - } - - for(auto i = 0u; i < x_lens.size(); ++i) - { - if(x_lens[i] != y_scale_lens[i] and i != axis) - { - MIGRAPHX_THROW(onnx_name + ": x(shape: " + to_string_range(x_lens) + - ") and y_scale(shape: " + to_string_range(y_scale_lens) + - ") shapes may only differ along provided axis(" + to_string(axis) + - ")"); - } - } - - // Given x shape (D0, ..., Di, ..., Dn), y_scale shape (S0, ... Si, ...Sn) and - // axis=i, the accepted range is [ceil(Di/Si), ceil(Di/(Si-1))-1] - float di = x_lens[axis]; - float si = y_scale_lens[axis]; - int block_size_min = std::ceil(di / si); - int block_size_max = std::ceil(di / (si - 1)) - 1; - // default block_size if not given is calculated (to support quark generated models): - if(block_size == 0) - block_size = block_size_min; - if(block_size < block_size_min or block_size > block_size_max) - MIGRAPHX_THROW(onnx_name + ": Block size(actual: " + to_string(block_size) + - ") must be within range [" + to_string(block_size_min) + ", " + - to_string(block_size_max) + "]"); - - std::transform(args.begin() + 1, args.end(), args.begin() + 1, [&](auto ins) { - if(block_size == 1) - return ins; - - ins = info.add_instruction(make_op("unsqueeze", {{"axes", {axis + 1}}}), ins); - - auto bc_lens = ins->get_shape().lens(); - bc_lens[axis + 1] = block_size; - ins = info.add_instruction(make_op("multibroadcast", {{"out_lens", bc_lens}}), ins); - - auto reshape_lens = x_lens; - reshape_lens[axis] = ins->get_shape().lens()[axis] * block_size; - ins = info.add_instruction(make_op("reshape", {{"dims", reshape_lens}}), ins); - - // Detect runt block - if(x_lens[axis] < reshape_lens[axis]) - { - ins = info.add_instruction( - make_op("slice", {{"axes", {axis}}, {"starts", {0}}, {"ends", {x_lens[axis]}}}), - ins); - } - - return ins; - }); - } - - return args; + return op::builder::transform_quantize_dequantize_linear_inputs( + info, onnx_name, block_size, axis, std::move(args)); } } // namespace onnx diff --git a/src/op/builder/dequantizelinear.cpp b/src/op/builder/dequantizelinear.cpp new file mode 100644 index 00000000000..bfee6ad6a04 --- /dev/null +++ b/src/op/builder/dequantizelinear.cpp @@ -0,0 +1,56 @@ +/* The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace op { +namespace builder { + +struct dequantizelinear : op_builder +{ + int axis = 1; + int block_size = 0; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.axis, "axis"), f(self.block_size, "block_size")); + } + + std::vector + insert(module& m, instruction_ref /*ins*/, const std::vector& args) const + { + auto args_new = + transform_quantize_dequantize_linear_inputs(m, name(), block_size, axis, args); + + return {m.add_instruction(make_op(name()), args_new)}; + } +}; + +} // namespace builder +} // namespace op +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/src/op/builder/include/migraphx/op/builder/quantize_dequantize_linear.hpp b/src/op/builder/include/migraphx/op/builder/quantize_dequantize_linear.hpp new file mode 100644 index 00000000000..00fa2939291 --- /dev/null +++ b/src/op/builder/include/migraphx/op/builder/quantize_dequantize_linear.hpp @@ -0,0 +1,149 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_QUANTIZE_DEQUANTIZE_LINEAR_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_QUANTIZE_DEQUANTIZE_LINEAR_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace op { +namespace builder { + +template +std::vector +transform_quantize_dequantize_linear_inputs(Builder& bldr, + const std::string& op_name, + int block_size, + int axis, + std::vector args) +{ + const auto x = args.at(0); + const auto x_lens = x->get_shape().lens(); + const auto x_rank = x_lens.size(); + + instruction_ref y_scale = args.at(1); + const auto y_scale_lens = y_scale->get_shape().lens(); + const auto y_scale_rank = y_scale_lens.size(); + + // Per-tensor (per-layer) granularity + if(y_scale->get_shape().elements() == 1) + { + std::transform(args.begin() + 1, args.end(), args.begin() + 1, [&](auto ins) { + return bldr.add_instruction(make_op("multibroadcast", {{"out_lens", x_lens}}), ins); + }); + } + // Per-axis granularity + else if(y_scale_rank == 1) + { + axis = tune_axis(x_rank, axis, op_name); + if(x_lens[axis] != y_scale_lens[0]) + { + MIGRAPHX_THROW(op_name + ": For per axis granularity the length of y_scale (actual: " + + to_string(y_scale_lens[0]) + ") must be equal to size of x on axis " + + to_string(axis) + "(actual: " + to_string(x_lens[axis]) + ")"); + } + + std::transform(args.begin() + 1, args.end(), args.begin() + 1, [&](auto ins) { + return bldr.add_instruction( + make_op("broadcast", {{"axis", axis}, {"out_lens", x_lens}}), ins); + }); + } + // Blocked granularity + else + { + axis = tune_axis(x_rank, axis, op_name); + + if(x_rank != y_scale_rank) + { + MIGRAPHX_THROW(op_name + ": x(rank: " + to_string(x_rank) + + ") and y_scale(rank: " + to_string(y_scale_rank) + + ") must be of same rank for block granularity"); + } + + for(auto i = 0u; i < x_lens.size(); ++i) + { + if(x_lens[i] != y_scale_lens[i] and i != axis) + { + MIGRAPHX_THROW(op_name + ": x(shape: " + to_string_range(x_lens) + + ") and y_scale(shape: " + to_string_range(y_scale_lens) + + ") shapes may only differ along provided axis(" + to_string(axis) + + ")"); + } + } + + // Given x shape (D0, ..., Di, ..., Dn), y_scale shape (S0, ... Si, ...Sn) and + // axis=i, the accepted range is [ceil(Di/Si), ceil(Di/(Si-1))-1] + float di = x_lens[axis]; + float si = y_scale_lens[axis]; + int block_size_min = std::ceil(di / si); + int block_size_max = std::ceil(di / (si - 1)) - 1; + // default block_size if not given is calculated (to support quark generated models): + if(block_size == 0) + block_size = block_size_min; + if(block_size < block_size_min or block_size > block_size_max) + MIGRAPHX_THROW(op_name + ": Block size(actual: " + to_string(block_size) + + ") must be within range [" + to_string(block_size_min) + ", " + + to_string(block_size_max) + "]"); + + std::transform(args.begin() + 1, args.end(), args.begin() + 1, [&](auto ins) { + if(block_size == 1) + return ins; + + ins = bldr.add_instruction(make_op("unsqueeze", {{"axes", {axis + 1}}}), ins); + + auto bc_lens = ins->get_shape().lens(); + bc_lens[axis + 1] = block_size; + ins = bldr.add_instruction(make_op("multibroadcast", {{"out_lens", bc_lens}}), ins); + + auto reshape_lens = x_lens; + reshape_lens[axis] = ins->get_shape().lens()[axis] * block_size; + ins = bldr.add_instruction(make_op("reshape", {{"dims", reshape_lens}}), ins); + + // Detect runt block + if(x_lens[axis] < reshape_lens[axis]) + { + ins = bldr.add_instruction( + make_op("slice", {{"axes", {axis}}, {"starts", {0}}, {"ends", {x_lens[axis]}}}), + ins); + } + + return ins; + }); + } + + return args; +} + +} // namespace builder +} // namespace op +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/src/op/builder/quantizelinear.cpp b/src/op/builder/quantizelinear.cpp new file mode 100644 index 00000000000..dfb9ac6d16e --- /dev/null +++ b/src/op/builder/quantizelinear.cpp @@ -0,0 +1,121 @@ +/* The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace op { +namespace builder { + +struct quantizelinear : op_builder +{ + int axis = 1; + int block_size = 0; + std::optional output_type; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.axis, "axis"), + f(self.block_size, "block_size"), + f(self.output_type, "output_type")); + } + + std::vector handle_fp4x2(module& m, + const std::vector& args) const + { + // Parsing in pack_fp4 and unpack_fp4 for the FP4 case + auto q_ins = m.add_instruction( + make_op("quantizelinear", {{"out_type", migraphx::shape::float_type}}), args); + + // packing axis set to fastest dimension + auto quantized_shape = q_ins->get_shape(); + const auto& qs_strides = quantized_shape.strides(); + if(qs_strides.empty()) + { + MIGRAPHX_THROW("QuantizeLinear: MX type quantized_shape has no strides"); + } + int fast_axis = + std::min_element(qs_strides.cbegin(), qs_strides.cend()) - qs_strides.cbegin(); + bool odd_fast_axis = (quantized_shape.lens().at(fast_axis) % 2 == 1); + if(odd_fast_axis) + { + // pad fastest dimension by 1 if it is odd + std::vector padding(2 * quantized_shape.ndim(), 0); + padding.at(fast_axis * 2 + 1) = 1; + q_ins = m.add_instruction(make_op("pad", {{"pads", padding}}), q_ins); + } + // output is fp4x2_type + auto pack_ins = m.add_instruction(make_op("pack_fp4"), q_ins); + // output is fp8e4m3fn_type + auto unpack_ins = m.add_instruction(make_op("unpack_fp4"), pack_ins); + if(odd_fast_axis) + { + // slice off padded values + unpack_ins = + m.add_instruction(make_op("slice", + {{"axes", {fast_axis}}, + {"starts", {0}}, + {"ends", {quantized_shape.lens().at(fast_axis)}}}), + unpack_ins); + } + return {unpack_ins}; + } + + void convert_arg_to_common_type(module& m, std::vector& args) const + { + auto common_type = common_shape({args[0]->get_shape(), args[1]->get_shape()}).type(); + std::transform(args.begin(), args.begin() + 2, args.begin(), [&](auto ins) { + if(ins->get_shape().type() != common_type) + ins = m.add_instruction(make_op("convert", {{"target_type", common_type}}), ins); + return ins; + }); + } + + std::vector + insert(module& m, instruction_ref /*ins*/, const std::vector& args) const + { + auto args_new = + transform_quantize_dequantize_linear_inputs(m, name(), block_size, axis, args); + + if(output_type == migraphx::shape::fp4x2_type) + { + return handle_fp4x2(m, args_new); + } + + convert_arg_to_common_type(m, args_new); + + if(output_type.has_value()) + return {m.add_instruction(make_op("quantizelinear", {{"out_type", *output_type}}), + args_new)}; + else + return {m.add_instruction(make_op("quantizelinear"), args_new)}; + } +}; + +} // namespace builder +} // namespace op +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/test/op/builder/dequantizelinear_test.cpp b/test/op/builder/dequantizelinear_test.cpp new file mode 100644 index 00000000000..73cbd01624e --- /dev/null +++ b/test/op/builder/dequantizelinear_test.cpp @@ -0,0 +1,401 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include +#include +#include + +namespace { +template +struct test_ctx +{ + using x_typ = x_t; + using s_typ = s_t; + + test_ctx(const std::vector& x_lens, + const std::vector& s_lens, + migraphx::shape::type_t x_type = migraphx::shape::get_type::value, + migraphx::shape::type_t s_type = migraphx::shape::get_type::value) + : x_shape{x_type, x_lens}, + s_shape{s_type, s_lens}, + zp_shape{x_type, s_lens}, + axis{1}, + block_size{0} + { + x = m.add_parameter("x", x_shape); + s = m.add_parameter("s", s_shape); + zp = m.add_parameter("zp", zp_shape); + } + + migraphx::module make_op_bldr() + { + return make_op_module( + "dequantizelinear", {{"axis", axis}, {"block_size", block_size}}, m.get_parameters()); + } + + void expect() { EXPECT(m == make_op_bldr()); } + + void expect_verify(const std::vector& expected, const std::vector& result) + { + EXPECT(migraphx::verify::verify_rms_range(result, expected)); + } + + std::vector + run_with_data(std::vector x_data, std::vector s_data, std::vector zp_data) + { + m = make_op_bldr(); + migraphx::program p{std::move(m)}; + p.compile(migraphx::make_target("ref")); + + migraphx::parameter_map params; + params["x"] = migraphx::argument(x_shape, x_data.data()); + params["s"] = migraphx::argument(s_shape, s_data.data()); + params["zp"] = migraphx::argument(zp_shape, zp_data.data()); + + auto result = p.eval(params).back(); + std::vector result_data(result.get_shape().elements()); + result.visit( + [&](auto output) { std::copy(output.begin(), output.end(), result_data.begin()); }); + return result_data; + } + + migraphx::module m; + migraphx::shape x_shape; + migraphx::shape s_shape; + migraphx::shape zp_shape; + int axis; + int block_size; + + migraphx::instruction_ref x; + migraphx::instruction_ref s; + migraphx::instruction_ref zp; +}; + +template +test_ctx per_tensor_ctx(const std::vector& x_lens) +{ + return test_ctx{x_lens, {1}}; +} + +template +test_ctx per_axis_ctx(const std::vector& x_lens, size_t s_dim, int axis) +{ + test_ctx ctx{x_lens, {s_dim}}; + ctx.axis = axis; + return ctx; +} + +template +test_ctx per_axis_ctx_valid(const std::vector& x_lens, int axis) +{ + return per_axis_ctx(x_lens, x_lens[axis], axis); +} + +template +test_ctx blocked_ctx(const std::vector& x_lens, + const std::vector& s_lens, + int axis, + int block_size) +{ + test_ctx ctx{x_lens, s_lens}; + ctx.axis = axis; + ctx.block_size = block_size; + return ctx; +} +} // namespace + +// per-tensor +TEST_CASE(dequantizelinear_per_tensor_op_builder_test) +{ + auto ctx = per_tensor_ctx({4, 3}); + migraphx::module& m = ctx.m; + + auto new_s = m.add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", ctx.x_shape.lens()}}), ctx.s); + auto new_zp = m.add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", ctx.x_shape.lens()}}), ctx.zp); + m.add_instruction(migraphx::make_op("dequantizelinear"), ctx.x, new_s, new_zp); + + ctx.expect(); +} + +// per-axis +TEST_CASE(dequantizelinear_per_axis_op_builder_test) +{ + auto ctx = per_axis_ctx_valid({4, 3}, 1); + migraphx::module& m = ctx.m; + + auto new_s = m.add_instruction( + migraphx::make_op("broadcast", {{"axis", ctx.axis}, {"out_lens", ctx.x_shape.lens()}}), + ctx.s); + auto new_zp = m.add_instruction( + migraphx::make_op("broadcast", {{"axis", ctx.axis}, {"out_lens", ctx.x_shape.lens()}}), + ctx.zp); + + m.add_instruction(migraphx::make_op("dequantizelinear"), ctx.x, new_s, new_zp); + + ctx.expect(); +} + +TEST_CASE(dequantizelinear_per_axis_invalid_shapes_op_builder_test) +{ + auto ctx = per_axis_ctx({4, 3}, 5, 1); + EXPECT(test::throws( + [&] { ctx.make_op_bldr(); }, + "dequantizelinear: For per axis granularity the length of y_scale (actual: 5) must be " + "equal to size of x on axis 1(actual: 3)")); +} + +TEST_CASE(dequantizelinear_per_axis_invalid_axis_op_builder_test) +{ + auto ctx = per_axis_ctx({4, 3}, 3, 8); + EXPECT(test::throws([&] { ctx.make_op_bldr(); }, + "DEQUANTIZELINEAR: axis is out of range.")); +} + +// blocked +TEST_CASE(dequantizelinear_blocked_op_builder_test) +{ + auto ctx = blocked_ctx({4, 6}, {4, 3}, 1, 2); + migraphx::module& m = ctx.m; + + auto i1 = m.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {2}}}), ctx.s); + i1 = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {4, 3, 2}}}), i1); + auto new_s = m.add_instruction(migraphx::make_op("reshape", {{"dims", {4, 6}}}), i1); + + i1 = m.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {2}}}), ctx.zp); + i1 = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {4, 3, 2}}}), i1); + auto new_zp = m.add_instruction(migraphx::make_op("reshape", {{"dims", {4, 6}}}), i1); + + m.add_instruction(migraphx::make_op("dequantizelinear"), ctx.x, new_s, new_zp); + + ctx.expect(); +} + +TEST_CASE(dequantizelinear_blocked_invalid_axis_op_builder_test) +{ + auto ctx = blocked_ctx({4, 3}, {4, 3}, 8, 2); + EXPECT(test::throws([&] { ctx.make_op_bldr(); }, + "DEQUANTIZELINEAR: axis is out of range.")); +} + +TEST_CASE(dequantizelinear_blocked_invalid_rank_op_builder_test) +{ + auto ctx = blocked_ctx({3, 4, 6}, {4, 3}, 1, 2); + EXPECT(test::throws([&] { ctx.make_op_bldr(); }, + "dequantizelinear: x(rank: 3) and y_scale(rank: 2) " + "must be of same rank for block granularity")); +} + +TEST_CASE(dequantizelinear_blocked_invalid_shape_op_builder_test) +{ + auto ctx = blocked_ctx({4, 6}, {5, 3}, 1, 2); + EXPECT( + test::throws([&] { ctx.make_op_bldr(); }, + "dequantizelinear: x(shape: 4, 6) and y_scale(shape: 5, " + "3) shapes may only differ along provided axis(1)")); +} + +TEST_CASE(dequantizelinear_blocked_invalid_blocksize_op_builder_test) +{ + auto ctx = blocked_ctx({4, 6}, {4, 3}, 1, 3); + EXPECT(test::throws( + [&] { ctx.make_op_bldr(); }, + "dequantizelinear: Block size(actual: 3) must be within range [2, 2]")); +} + +TEST_CASE(dequantizelinear_blocked_blocksize_zero_op_builder_test) +{ + auto ctx = blocked_ctx({4, 6}, {4, 3}, 1, 0); + migraphx::module& m = ctx.m; + + auto i1 = m.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {2}}}), ctx.s); + i1 = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {4, 3, 2}}}), i1); + auto new_s = m.add_instruction(migraphx::make_op("reshape", {{"dims", {4, 6}}}), i1); + + i1 = m.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {2}}}), ctx.zp); + i1 = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {4, 3, 2}}}), i1); + auto new_zp = m.add_instruction(migraphx::make_op("reshape", {{"dims", {4, 6}}}), i1); + + m.add_instruction(migraphx::make_op("dequantizelinear"), ctx.x, new_s, new_zp); + + ctx.expect(); +} + +TEST_CASE(dequantizelinear_blocked_blocksize_one_op_builder_test) +{ + auto ctx = blocked_ctx({4, 3}, {4, 3}, 1, 1); + migraphx::module& m = ctx.m; + + m.add_instruction(migraphx::make_op("dequantizelinear"), ctx.x, ctx.s, ctx.zp); + + ctx.expect(); +} + +// verify tests +// per-tensor +TEST_CASE(dequantizelinear_verify_per_tensor_op_builder_test) +{ + /* + y = (x - zp) * s + + same 's' and 'zp' scalar is applied to each and every element of the input tensor + + E.g. For x = 64, zp = -128, s = 0.1 + y = (64 - (-128)) * 0.1 = 19.2 + + input: + { + -128, -64, 0, + 64, 127, 0, + 64, -64, -128, + 32, -32, 16 + } + + expected output: + { + 0, 6.4, 12.8, + 19.2, 25.5, 12.8, + 19.2, 6.4, 0, + 16, 9.6, 14.4 + } + */ + + auto ctx = per_tensor_ctx({4, 3}); + + std::vector x = {-128, -64, 0, 64, 127, 0, 64, -64, -128, 32, -32, 16}; + std::vector s = {0.1f}; + std::vector zp = {-128}; + + std::vector expected_result = { + 0, 6.4, 12.8, 19.2, 25.5, 12.8, 19.2, 6.4, 0, 16, 9.6, 14.4}; + + auto result = ctx.run_with_data(x, s, zp); + ctx.expect_verify(expected_result, result); +} + +// per-axis +TEST_CASE(dequantizelinear_verify_per_axis_op_builder_test) +{ + /* + different scale and zero-point is applied for the elements of the tensor along the specified + axis E.g.: s[1] = 1.0 and zp[1] = 1 will be applied for column 1, so: x = -64, zp = 1 y = (-64 - + (1)) * 1.0 = -65 + + input scale : {0.1, 1.0, 10.0} + input zero-points: {-128, 1, 64 } + 'axis': 1 + + input: + { + -128, -64, 0, + 64, 127, 0, + 64, -64, -128, + 32, -32, 16 + } + + expected output: + { + 0, -65, -640, + 19.2, 126, -640, + 19.2, -65, -1920, + 16, -33, -480 + } + */ + + auto ctx = per_axis_ctx_valid({4, 3}, 1); + + std::vector x = {-128, -64, 0, 64, 127, 0, 64, -64, -128, 32, -32, 16}; + std::vector s = {0.1f, 1.0f, 10.0f}; + std::vector zp = {-128, 1, 64}; + + std::vector expected_result = { + 0, -65, -640, 19.2, 126, -640, 19.2, -65, -1920, 16, -33, -480}; + + auto result = ctx.run_with_data(x, s, zp); + ctx.expect_verify(expected_result, result); +} + +// blocked +TEST_CASE(dequantizelinear_verify_blocked_op_builder_test) +{ + /* + input: + { + -128, -64, 0, 64, 127, 0, + 64, -64, -128, 32, -32, 16, + -16, -32, -64, -128, 0, 64, + 127, 0, 64, -64, -128, 32 + } + + the input will be split into blocks along the specified axis: + input_sliced: + { + -128, -64, 0, 64, 127, 0, + 64, -64, -128, 32, -32, 16, + -16, -32, -64,-128, 0, 64, + 127, 0, 64, -64, -128, 32 + } + + the scales and zero-points will be applied per block + E.g. for block 0 (elements along axis 1: -128, -64), + s[0] = 1.0, zp[0] = 1 + so for x = -64, + y = (-64 - (1)) * 1.0 = -65 + + or + for block 1 in the last row (elements along axis 1: 64, -64), + s[3,1] = 10, zp[3,1] = 1 + so for x = 64, + y = (64 - (1)) * 10 = 5.4 + and for x = -64 + y = (-64 - (1)) * 10 = -650 + + expected output: + { + -129, -65, -1, 5.4, 1270, 0, + 108, -148, 12.8, 3.2, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 630,-650, -12.8, 3.2 + } + */ + + auto ctx = blocked_ctx(/*x_lens*/ {4, 6}, /*s_lens*/ {4, 3}, /*axis*/ 1, /*block_size*/ 2); + + std::vector x = {-128, -64, 0, 64, 127, 0, 64, -64, -128, 32, -32, 16, + -16, -32, -64, -128, 0, 64, 127, 0, 64, -64, -128, 32}; + std::vector s = { + 1.0f, 0.1f, 10.0f, 2.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 10.0f, 0.1f}; + std::vector zp = {1, 10, 0, 10, 0, 1, 0, 0, 0, 0, 1, 0}; + + std::vector expected_result = {-129, -65, -1, 5.4, 1270, 0, 108, -148, + -12.8, 3.2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 630, -650, -12.8, 3.2}; + + auto result = ctx.run_with_data(x, s, zp); + ctx.expect_verify(expected_result, result); +}