diff --git a/backends/openvino/quantizer/observers.py b/backends/openvino/quantizer/observers.py index 6cda4561604..92a6533f619 100644 --- a/backends/openvino/quantizer/observers.py +++ b/backends/openvino/quantizer/observers.py @@ -19,6 +19,9 @@ module_insertion, node_removal, ) +from nncf.quantization.algorithms.weight_compression.parameters import ( # type: ignore[import-untyped] + CompressedWeight +) from nncf.quantization.algorithms.weight_compression.config import ( # type: ignore[import-untyped] WeightCompressionParameters, ) @@ -71,9 +74,24 @@ def calculate_qparams( # type: ignore[override] wc_param = self._wc_param wc_config = wc_param.compression_config reduction_axes = wc_param.reduction_axes - q_weight, scale, zp = do_integer_quantization( + nncf_compressed_weight = do_integer_quantization( NNCFTensor(weight), wc_config, reduction_axes=reduction_axes ) + + q_weight, scale, zp = None, None, None + if isinstance(nncf_compressed_weight, CompressedWeight): + q_weight = nncf_compressed_weight.tensor + scale = nncf_compressed_weight.scale + zp = nncf_compressed_weight.zero_point + if isinstance(nncf_compressed_weight, tuple): + # depreceate this part. For backwards compatibility with older NNCF commit + q_weight, scale, zp = nncf_compressed_weight + + if not all(val is not None for val in (q_weight, scale)): + msg = f"Could not calculate quantization parameters for weight compression observer. " \ + f"None values: { {name: val for name, val in [('quantized_weight', q_weight), ('scale', scale)] if val is None} }" + raise ValueError(msg) + zp = zp.data if zp is not None else None return q_weight.data, scale.data, zp