From 7128c6b7b085a6dd7932bad742d769a4bc1b0212 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Jul 2025 07:26:41 -0700 Subject: [PATCH 01/15] Extract _batched_lu_factor() to simplify batched logic in _lu_factor() --- dpnp/linalg/dpnp_utils_linalg.py | 236 ++++++++++++++++--------------- 1 file changed, 123 insertions(+), 113 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 51cebb2815bb..795ad27dcfb5 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -275,6 +275,126 @@ def _batched_inv(a, res_type): return a_h.reshape(orig_shape) +def _batched_lu_factor(a, res_type): + """Compute pivoted LU decomposition for a batch of matrices.""" + + # TODO: Find out at which array sizes the best performance is obtained + # getrf_batch implementation shows slow results with large arrays on GPU. + # Use getrf_batch only on CPU. + # On GPU call getrf for each two-dimensional array by loop + use_batch = a.sycl_device.has_aspect_cpu + + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type + _manager = dpu.SequentialOrderManager[a_sycl_queue] + + n = a.shape[-2] + orig_shape = a.shape + # get 3d input arrays by reshape + a = dpnp.reshape(a, (-1, n, n)) + batch_size = a.shape[0] + a_usm_arr = dpnp.get_usm_ndarray(a) + + if use_batch: + # `a` must be copied because getrf_batch destroys the input matrix + a_h = dpnp.empty_like(a, order="C", dtype=res_type) + ipiv_h = dpnp.empty( + (batch_size, n), + dtype=dpnp.int64, + order="C", + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + dev_info_h = [0] * batch_size + + ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, + dst=a_h.get_array(), + sycl_queue=a_sycl_queue, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(ht_ev, copy_ev) + + ipiv_stride = n + a_stride = a_h.strides[0] + + # Call the LAPACK extension function _getrf_batch + # to perform LU decomposition of a batch of general matrices + ht_ev, getrf_ev = li._getrf_batch( + a_sycl_queue, + a_h.get_array(), + ipiv_h.get_array(), + dev_info_h, + n, + a_stride, + ipiv_stride, + batch_size, + depends=[copy_ev], + ) + _manager.add_event_pair(ht_ev, getrf_ev) + + dev_info_array = dpnp.array( + dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + + # Reshape the results back to their original shape + a_h = a_h.reshape(orig_shape) + ipiv_h = ipiv_h.reshape(orig_shape[:-1]) + dev_info_array = dev_info_array.reshape(orig_shape[:-2]) + + return (a_h, ipiv_h, dev_info_array) + + # Initialize lists for storing arrays and events for each batch + a_vecs = [None] * batch_size + ipiv_vecs = [None] * batch_size + dev_info_vecs = [None] * batch_size + + dep_evs = _manager.submitted_events + + # Process each batch + for i in range(batch_size): + # Copy each 2D slice to a new array because getrf will destroy + # the input matrix + a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type) + + ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr[i], + dst=a_vecs[i].get_array(), + sycl_queue=a_sycl_queue, + depends=dep_evs, + ) + _manager.add_event_pair(ht_ev, copy_ev) + + ipiv_vecs[i] = dpnp.empty( + (n,), + dtype=dpnp.int64, + order="C", + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + dev_info_vecs[i] = [0] + + # Call the LAPACK extension function _getrf + # to perform LU decomposition on each batch in 'a_vecs[i]' + ht_ev, getrf_ev = li._getrf( + a_sycl_queue, + a_vecs[i].get_array(), + ipiv_vecs[i].get_array(), + dev_info_vecs[i], + depends=[copy_ev], + ) + _manager.add_event_pair(ht_ev, getrf_ev) + + # Reshape the results back to their original shape + out_a = dpnp.array(a_vecs, order="C").reshape(orig_shape) + out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1]) + out_dev_info = dpnp.array( + dev_info_vecs, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ).reshape(orig_shape[:-2]) + + return (out_a, out_ipiv, out_dev_info) + + def _batched_solve(a, b, exec_q, res_usm_type, res_type): """ _batched_solve(a, b, exec_q, res_usm_type, res_type) @@ -901,125 +1021,15 @@ def _lu_factor(a, res_type): """ + if a.ndim > 2: + return _batched_lu_factor(a, res_type) + n = a.shape[-2] a_sycl_queue = a.sycl_queue a_usm_type = a.usm_type - - # TODO: Find out at which array sizes the best performance is obtained - # getrf_batch implementation shows slow results with large arrays on GPU. - # Use getrf_batch only on CPU. - # On GPU call getrf for each two-dimensional array by loop - use_batch = a.sycl_device.has_aspect_cpu - _manager = dpu.SequentialOrderManager[a_sycl_queue] - if a.ndim > 2: - orig_shape = a.shape - # get 3d input arrays by reshape - a = dpnp.reshape(a, (-1, n, n)) - batch_size = a.shape[0] - a_usm_arr = dpnp.get_usm_ndarray(a) - - if use_batch: - # `a` must be copied because getrf_batch destroys the input matrix - a_h = dpnp.empty_like(a, order="C", dtype=res_type) - ipiv_h = dpnp.empty( - (batch_size, n), - dtype=dpnp.int64, - order="C", - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - dev_info_h = [0] * batch_size - - ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr, - dst=a_h.get_array(), - sycl_queue=a_sycl_queue, - depends=_manager.submitted_events, - ) - _manager.add_event_pair(ht_ev, copy_ev) - - ipiv_stride = n - a_stride = a_h.strides[0] - - # Call the LAPACK extension function _getrf_batch - # to perform LU decomposition of a batch of general matrices - ht_ev, getrf_ev = li._getrf_batch( - a_sycl_queue, - a_h.get_array(), - ipiv_h.get_array(), - dev_info_h, - n, - a_stride, - ipiv_stride, - batch_size, - depends=[copy_ev], - ) - _manager.add_event_pair(ht_ev, getrf_ev) - - dev_info_array = dpnp.array( - dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue - ) - - # Reshape the results back to their original shape - a_h = a_h.reshape(orig_shape) - ipiv_h = ipiv_h.reshape(orig_shape[:-1]) - dev_info_array = dev_info_array.reshape(orig_shape[:-2]) - - return (a_h, ipiv_h, dev_info_array) - - # Initialize lists for storing arrays and events for each batch - a_vecs = [None] * batch_size - ipiv_vecs = [None] * batch_size - dev_info_vecs = [None] * batch_size - - dep_evs = _manager.submitted_events - - # Process each batch - for i in range(batch_size): - # Copy each 2D slice to a new array because getrf will destroy - # the input matrix - a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type) - - ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr[i], - dst=a_vecs[i].get_array(), - sycl_queue=a_sycl_queue, - depends=dep_evs, - ) - _manager.add_event_pair(ht_ev, copy_ev) - - ipiv_vecs[i] = dpnp.empty( - (n,), - dtype=dpnp.int64, - order="C", - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - dev_info_vecs[i] = [0] - - # Call the LAPACK extension function _getrf - # to perform LU decomposition on each batch in 'a_vecs[i]' - ht_ev, getrf_ev = li._getrf( - a_sycl_queue, - a_vecs[i].get_array(), - ipiv_vecs[i].get_array(), - dev_info_vecs[i], - depends=[copy_ev], - ) - _manager.add_event_pair(ht_ev, getrf_ev) - - # Reshape the results back to their original shape - out_a = dpnp.array(a_vecs, order="C").reshape(orig_shape) - out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1]) - out_dev_info = dpnp.array( - dev_info_vecs, usm_type=a_usm_type, sycl_queue=a_sycl_queue - ).reshape(orig_shape[:-2]) - - return (out_a, out_ipiv, out_dev_info) - a_usm_arr = dpnp.get_usm_ndarray(a) # `a` must be copied because getrf destroys the input matrix From 8d491a09ea6534fdf35e9b874e8b02992ce52c35 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 4 Aug 2025 06:24:21 -0700 Subject: [PATCH 02/15] Allow F-contig input in getrf --- dpnp/backend/extensions/lapack/getrf.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index 5ba1a7cc9b65..65bd191265c0 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -190,10 +190,12 @@ std::pair } bool is_a_array_c_contig = a_array.is_c_contiguous(); + bool is_a_array_f_contig = a_array.is_f_contiguous(); bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous(); - if (!is_a_array_c_contig) { + if (!is_a_array_c_contig && !is_a_array_f_contig) { throw py::value_error("The input array " - "must be C-contiguous"); + "must be must be either C-contiguous " + "or F-contiguous"); } if (!is_ipiv_array_c_contig) { throw py::value_error("The array of pivot indices " From 0d4a33a1ad5ef363d7f8d4d1e160d8d286183da8 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 4 Aug 2025 06:25:59 -0700 Subject: [PATCH 03/15] Allow F-contig input in getrf_batch --- dpnp/backend/extensions/lapack/getrf_batch.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index 83d06816fce4..6ac61045653b 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -221,10 +221,12 @@ std::pair } bool is_a_array_c_contig = a_array.is_c_contiguous(); + bool is_a_array_f_contig = a_array.is_f_contiguous(); bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous(); - if (!is_a_array_c_contig) { + if (!is_a_array_c_contig && !is_a_array_f_contig) { throw py::value_error("The input array " - "must be C-contiguous"); + "must be must be either C-contiguous " + "or F-contiguous"); } if (!is_ipiv_array_c_contig) { throw py::value_error("The array of pivot indices " From 86381ca24192c9aa285d64c80043d0916975f6d7 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 5 Aug 2025 02:59:40 -0700 Subject: [PATCH 04/15] Implement dpnp_lu_factor() and update _lu_factor() --- dpnp/linalg/dpnp_utils_linalg.py | 108 ++++++++++++++++++++++++++----- 1 file changed, 92 insertions(+), 16 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 795ad27dcfb5..06655cfb5ebb 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -39,6 +39,7 @@ # pylint: disable=useless-import-alias from typing import NamedTuple +from warnings import warn import dpctl.tensor._tensor_impl as ti import dpctl.utils as dpu @@ -986,12 +987,29 @@ def _hermitian_svd(a, compute_uv): return dpnp.sort(s)[..., ::-1] +def _is_copy_required(a, res_type): + """ + Determine if `a` needs to be copied before LU decomposition. + This matches SciPy behavior: copy is needed unless input is suitable + for in-place modification. + """ + + if a.dtype != res_type: + return True + if not a.flags["F_CONTIGUOUS"]: + return True + if not a.flags["WRITABLE"]: + return True + + return False + + def _is_empty_2d(arr): # check size first for efficiency return arr.size == 0 and numpy.prod(arr.shape[-2:]) == 0 -def _lu_factor(a, res_type): +def _lu_factor(a, res_type, scipy=False, overwrite_a=False): """ Compute pivoted LU decomposition. @@ -1032,18 +1050,41 @@ def _lu_factor(a, res_type): a_usm_arr = dpnp.get_usm_ndarray(a) - # `a` must be copied because getrf destroys the input matrix - a_h = dpnp.empty_like(a, order="C", dtype=res_type) + if not scipy: + # Internal use case (e.g., det(), slogdet()). Always copy. + # `a` must be copied because getrf destroys the input matrix + a_h = dpnp.empty_like(a, order="C", dtype=res_type) - # use DPCTL tensor function to fill the сopy of the input array - # from the input array - ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr, - dst=a_h.get_array(), - sycl_queue=a_sycl_queue, - depends=_manager.submitted_events, - ) - _manager.add_event_pair(ht_ev, copy_ev) + # use DPCTL tensor function to fill the сopy of the input array + # from the input array + ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, + dst=a_h.get_array(), + sycl_queue=a_sycl_queue, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(ht_ev, copy_ev) + + else: + # SciPy-compatible behavior + # Copy is required if: + # - overwrite_a is False (always copy), + # - dtype mismatch, + # - not F-contiguous, + # - not writeable + if not overwrite_a or _is_copy_required(a, res_type): + a_h = dpnp.empty_like(a, order="F", dtype=res_type) + ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, + dst=a_h.get_array(), + sycl_queue=a_sycl_queue, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(ht_ev, copy_ev) + else: + # input is suitable for in-place modification + a_h = a + copy_ev = None ipiv_h = dpnp.empty( n, @@ -1061,13 +1102,18 @@ def _lu_factor(a, res_type): a_h.get_array(), ipiv_h.get_array(), dev_info_h, - depends=[copy_ev], + depends=[copy_ev] if copy_ev is not None else [], ) _manager.add_event_pair(ht_ev, getrf_ev) - dev_info_array = dpnp.array( - dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue - ) + # Return list if called in SciPy-compatible mode + # else dpnp.ndarray + if scipy: + dev_info_array = dev_info_h + else: + dev_info_array = dpnp.array( + dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) # Return a tuple containing the factorized matrix 'a_h', # pivot indices 'ipiv_h' @@ -1075,6 +1121,36 @@ def _lu_factor(a, res_type): return (a_h, ipiv_h, dev_info_array) +def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): + """Compute pivoted LU decomposition.""" + + res_type = _common_type(a) + + # accommodate empty arrays + if a.size == 0: + lu = dpnp.empty_like(a) + piv = dpnp.arange(0, dtype=dpnp.int32) + return lu, piv + + if check_finite: + if not dpnp.isfinite(a).all(): + raise ValueError("array must not contain infs or NaNs") + + lu, piv, dev_info = _lu_factor( + a, res_type, scipy=True, overwrite_a=overwrite_a + ) + + if any(dev_info): + diag_nums = ", ".join(str(v) for v in dev_info if v > 0) + warn( + f"Diagonal number {diag_nums} are exactly zero. Singular matrix.", + RuntimeWarning, + stacklevel=2, + ) + + return lu, piv + + def _multi_dot(arrays, order, i, j, out=None): """Actually do the multiplication with the given order.""" From 6e3ad5724d2256d6dfd70451088a051cf822a4c2 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 5 Aug 2025 05:00:41 -0700 Subject: [PATCH 05/15] Simplify lu_factor by moving logic to dpnp_lu_factor --- dpnp/linalg/dpnp_utils_linalg.py | 180 ++++++++++++++++++------------- 1 file changed, 106 insertions(+), 74 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 06655cfb5ebb..347d3b5d193b 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -1009,7 +1009,7 @@ def _is_empty_2d(arr): return arr.size == 0 and numpy.prod(arr.shape[-2:]) == 0 -def _lu_factor(a, res_type, scipy=False, overwrite_a=False): +def _lu_factor(a, res_type): """ Compute pivoted LU decomposition. @@ -1050,41 +1050,18 @@ def _lu_factor(a, res_type, scipy=False, overwrite_a=False): a_usm_arr = dpnp.get_usm_ndarray(a) - if not scipy: - # Internal use case (e.g., det(), slogdet()). Always copy. - # `a` must be copied because getrf destroys the input matrix - a_h = dpnp.empty_like(a, order="C", dtype=res_type) - - # use DPCTL tensor function to fill the сopy of the input array - # from the input array - ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr, - dst=a_h.get_array(), - sycl_queue=a_sycl_queue, - depends=_manager.submitted_events, - ) - _manager.add_event_pair(ht_ev, copy_ev) + # `a` must be copied because getrf destroys the input matrix + a_h = dpnp.empty_like(a, order="C", dtype=res_type) - else: - # SciPy-compatible behavior - # Copy is required if: - # - overwrite_a is False (always copy), - # - dtype mismatch, - # - not F-contiguous, - # - not writeable - if not overwrite_a or _is_copy_required(a, res_type): - a_h = dpnp.empty_like(a, order="F", dtype=res_type) - ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr, - dst=a_h.get_array(), - sycl_queue=a_sycl_queue, - depends=_manager.submitted_events, - ) - _manager.add_event_pair(ht_ev, copy_ev) - else: - # input is suitable for in-place modification - a_h = a - copy_ev = None + # use DPCTL tensor function to fill the сopy of the input array + # from the input array + ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, + dst=a_h.get_array(), + sycl_queue=a_sycl_queue, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(ht_ev, copy_ev) ipiv_h = dpnp.empty( n, @@ -1102,18 +1079,13 @@ def _lu_factor(a, res_type, scipy=False, overwrite_a=False): a_h.get_array(), ipiv_h.get_array(), dev_info_h, - depends=[copy_ev] if copy_ev is not None else [], + depends=[copy_ev], ) _manager.add_event_pair(ht_ev, getrf_ev) - # Return list if called in SciPy-compatible mode - # else dpnp.ndarray - if scipy: - dev_info_array = dev_info_h - else: - dev_info_array = dpnp.array( - dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue - ) + dev_info_array = dpnp.array( + dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) # Return a tuple containing the factorized matrix 'a_h', # pivot indices 'ipiv_h' @@ -1121,36 +1093,6 @@ def _lu_factor(a, res_type, scipy=False, overwrite_a=False): return (a_h, ipiv_h, dev_info_array) -def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): - """Compute pivoted LU decomposition.""" - - res_type = _common_type(a) - - # accommodate empty arrays - if a.size == 0: - lu = dpnp.empty_like(a) - piv = dpnp.arange(0, dtype=dpnp.int32) - return lu, piv - - if check_finite: - if not dpnp.isfinite(a).all(): - raise ValueError("array must not contain infs or NaNs") - - lu, piv, dev_info = _lu_factor( - a, res_type, scipy=True, overwrite_a=overwrite_a - ) - - if any(dev_info): - diag_nums = ", ".join(str(v) for v in dev_info if v > 0) - warn( - f"Diagonal number {diag_nums} are exactly zero. Singular matrix.", - RuntimeWarning, - stacklevel=2, - ) - - return lu, piv - - def _multi_dot(arrays, order, i, j, out=None): """Actually do the multiplication with the given order.""" @@ -2349,6 +2291,96 @@ def dpnp_lstsq(a, b, rcond=None): return x, resids, rank, s +def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): + """ + dpnp_lu_factor(a, overwrite_a=False, check_finite=True) + + Compute pivoted LU decomposition (SciPy-compatible behavior). + + This function mimics the behavior of `scipy.linalg.lu_factor` including + support for `overwrite_a`, `check_finite` and 0-based pivot indexing. + + """ + + res_type = _common_type(a) + + # accommodate empty arrays + if a.size == 0: + lu = dpnp.empty_like(a) + piv = dpnp.arange(0, dtype=dpnp.int32) + return lu, piv + + if check_finite: + if not dpnp.isfinite(a).all(): + raise ValueError("array must not contain infs or NaNs") + + # if a.ndim > 2: + # return _batched_lu_factor_scipy(a, res_type, overwrite_a=overwrite_a) + + n = a.shape[-2] + + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type + _manager = dpu.SequentialOrderManager[a_sycl_queue] + + a_usm_arr = dpnp.get_usm_ndarray(a) + + # SciPy-compatible behavior + # Copy is required if: + # - overwrite_a is False (always copy), + # - dtype mismatch, + # - not F-contiguous, + # - not writeable + if not overwrite_a or _is_copy_required(a, res_type): + a_h = dpnp.empty_like(a, order="F", dtype=res_type) + ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, + dst=a_h.get_array(), + sycl_queue=a_sycl_queue, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(ht_ev, copy_ev) + else: + # input is suitable for in-place modification + a_h = a + copy_ev = None + + ipiv_h = dpnp.empty( + n, + dtype=dpnp.int64, + order="C", + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + dev_info_h = [0] + + # Call the LAPACK extension function _getrf + # to perform LU decomposition on the input matrix + ht_ev, getrf_ev = li._getrf( + a_sycl_queue, + a_h.get_array(), + ipiv_h.get_array(), + dev_info_h, + depends=[copy_ev] if copy_ev is not None else [], + ) + _manager.add_event_pair(ht_ev, getrf_ev) + + if any(dev_info_h): + diag_nums = ", ".join(str(v) for v in dev_info_h if v > 0) + warn( + f"Diagonal number {diag_nums} are exactly zero. Singular matrix.", + RuntimeWarning, + stacklevel=2, + ) + + # MKL lapack uses 1-origin while SciPy uses 0-origin + ipiv_h -= 1 + + # Return a tuple containing the factorized matrix 'a_h', + # pivot indices 'ipiv_h' + return (a_h, ipiv_h) + + def dpnp_matrix_power(a, n): """ dpnp_matrix_power(a, n) From 63138e9ffe7a912aea11a7cf1f79b31f7864c1ff Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 12 Aug 2025 03:15:05 -0700 Subject: [PATCH 06/15] Extend getrf to support non-square matrices --- dpnp/backend/extensions/lapack/getrf.cpp | 30 +++++++++++++++--------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index 65bd191265c0..66b91a4121fa 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -44,6 +44,7 @@ namespace py = pybind11; namespace type_utils = dpctl::tensor::type_utils; typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue &, + const std::int64_t, const std::int64_t, char *, std::int64_t, @@ -56,6 +57,7 @@ static getrf_impl_fn_ptr_t getrf_dispatch_vector[dpctl_td_ns::num_types]; template static sycl::event getrf_impl(sycl::queue &exec_q, + const std::int64_t m, const std::int64_t n, char *in_a, std::int64_t lda, @@ -82,11 +84,11 @@ static sycl::event getrf_impl(sycl::queue &exec_q, getrf_event = mkl_lapack::getrf( exec_q, - n, // The order of the square matrix A (0 ≤ n). + m, // The number of rows in the input matrix A (0 ≤ m). // It must be a non-negative integer. - n, // The number of columns in the square matrix A (0 ≤ n). + n, // The number of columns in the input matrix A (0 ≤ n). // It must be a non-negative integer. - a, // Pointer to the square matrix A (n x n). + a, // Pointer to the input matrix A (n x n). lda, // The leading dimension of matrix A. // It must be at least max(1, n). ipiv, // Pointer to the output array of pivot indices. @@ -99,7 +101,7 @@ static sycl::event getrf_impl(sycl::queue &exec_q, if (info < 0) { error_msg << "Parameter number " << -info - << " had an illegal value."; + << " had an illegal value"; } else if (info == scratchpad_size && e.detail() != 0) { error_msg @@ -168,13 +170,13 @@ std::pair if (a_array_nd != 2) { throw py::value_error( "The input array has ndim=" + std::to_string(a_array_nd) + - ", but a 2-dimensional array is expected."); + ", but a 2-dimensional array is expected"); } if (ipiv_array_nd != 1) { throw py::value_error("The array of pivot indices has ndim=" + std::to_string(ipiv_array_nd) + - ", but a 1-dimensional array is expected."); + ", but a 1-dimensional array is expected"); } // check compatibility of execution queue and allocation queue @@ -210,7 +212,7 @@ std::pair if (getrf_fn == nullptr) { throw py::value_error( "No getrf implementation defined for the provided type " - "of the input matrix."); + "of the input matrix"); } auto ipiv_types = dpctl_td_ns::usm_ndarray_types(); @@ -218,19 +220,25 @@ std::pair ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum()); if (ipiv_array_type_id != static_cast(dpctl_td_ns::typenum_t::INT64)) { - throw py::value_error("The type of 'ipiv_array' must be int64."); + throw py::value_error("The type of 'ipiv_array' must be int64"); } - const std::int64_t n = a_array.get_shape_raw()[0]; + const py::ssize_t *a_array_shape = a_array.get_shape_raw(); + const std::int64_t m = a_array_shape[0]; + const std::int64_t n = a_array_shape[1]; + const std::int64_t lda = std::max(1UL, m); + + if (ipiv_array.get_size() != std::min(m, n)) { + throw py::value_error("The size of 'ipiv_array' must be min(m, n)"); + } char *a_array_data = a_array.get_data(); - const std::int64_t lda = std::max(1UL, n); char *ipiv_array_data = ipiv_array.get_data(); std::int64_t *d_ipiv = reinterpret_cast(ipiv_array_data); std::vector host_task_events; - sycl::event getrf_ev = getrf_fn(exec_q, n, a_array_data, lda, d_ipiv, + sycl::event getrf_ev = getrf_fn(exec_q, m, n, a_array_data, lda, d_ipiv, dev_info, host_task_events, depends); sycl::event args_ev = dpctl::utils::keep_args_alive( From 8a17f678b42f8126beb938376a401af484524e97 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 12 Aug 2025 03:22:25 -0700 Subject: [PATCH 07/15] Raise NotImplementedError for bathed matrices --- dpnp/linalg/dpnp_utils_linalg.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 347d3b5d193b..557258c04070 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -2314,10 +2314,10 @@ def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): if not dpnp.isfinite(a).all(): raise ValueError("array must not contain infs or NaNs") - # if a.ndim > 2: - # return _batched_lu_factor_scipy(a, res_type, overwrite_a=overwrite_a) + if a.ndim > 2: + raise NotImplementedError("Batched matrices are not supported") - n = a.shape[-2] + m, n = a.shape a_sycl_queue = a.sycl_queue a_usm_type = a.usm_type @@ -2346,7 +2346,7 @@ def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): copy_ev = None ipiv_h = dpnp.empty( - n, + min(m, n), dtype=dpnp.int64, order="C", usm_type=a_usm_type, From 6fe2fe492cb36731c8ffe4a86dc0f5c0062d128f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 12 Aug 2025 05:29:58 -0700 Subject: [PATCH 08/15] Add implementation of dpnp.linalg.lu_factor() --- dpnp/linalg/dpnp_iface_linalg.py | 64 ++++++++++++++++++++++++++++++++ dpnp/linalg/dpnp_utils_linalg.py | 2 +- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 07ed0078be28..55381c917203 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -56,6 +56,7 @@ dpnp_eigh, dpnp_inv, dpnp_lstsq, + dpnp_lu_factor, dpnp_matrix_power, dpnp_matrix_rank, dpnp_multi_dot, @@ -79,6 +80,7 @@ "eigvalsh", "inv", "lstsq", + "lu_factor", "matmul", "matrix_norm", "matrix_power", @@ -901,6 +903,68 @@ def lstsq(a, b, rcond=None): return dpnp_lstsq(a, b, rcond=rcond) +def lu_factor(a, overwrite_a=False, check_finite=True): + """ + Compute the pivoted LU decomposition of a matrix. + + The decomposition is:: + + A = P @ L @ U + + where `P` is a permutation matrix, `L` is lower triangular with unit + diagonal elements, and `U` is upper triangular. + + Parameters + ---------- + a : (M, N) {dpnp.ndarray, usm_ndarray} + Input array to decompose. + overwrite_a : {None, bool}, optional + Whether to overwrite data in `a` (may increase performance) + Default: ``False``. + check_finite : {None, bool}, optional + Whether to check that the input matrix contains only finite numbers. + Disabling may give a performance gain, but may result in problems + (crashes, non-termination) if the inputs do contain infinities or NaNs. + + Returns + ------- + lu :(M, N) dpnp.ndarray + Matrix containing U in its upper triangle, and L in its lower triangle. + The unit diagonal elements of L are not stored. + piv (K, ): dpnp.ndarray + Pivot indices representing the permutation matrix P: + row i of matrix was interchanged with row piv[i]. + ``K = min(M, N)``. + + Warning + ------- + This function synchronizes in order to validate array elements + when ``check_finite=True``. + + Limitations + ----------- + Only two-dimensional input matrices are supported. + Otherwise, the function raises ``NotImplementedError`` exception. + + Examples + -------- + >>> import dpnp as np + >>> a = np.array([[4., 3.], [6., 3.]]) + >>> lu, piv = np.linalg.lu_factor(a) + >>> lu + array([[6. , 3. ], + [0.66666667, 1. ]]) + >>> piv + array([1, 1]) + + """ + + dpnp.check_supported_arrays_type(a) + assert_stacked_2d(a) + + return dpnp_lu_factor(a, overwrite_a=overwrite_a, check_finite=check_finite) + + def matmul(x1, x2, /): """ Computes the matrix product. diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 557258c04070..1b56e9359fcc 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -2307,7 +2307,7 @@ def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): # accommodate empty arrays if a.size == 0: lu = dpnp.empty_like(a) - piv = dpnp.arange(0, dtype=dpnp.int32) + piv = dpnp.arange(0, dtype=dpnp.int64) return lu, piv if check_finite: From e781beb51d40d8c75d94b7091c3b0abfd0412fb3 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 12 Aug 2025 06:02:48 -0700 Subject: [PATCH 09/15] Add sycl_queue and usm_type tests --- dpnp/linalg/dpnp_utils_linalg.py | 13 +++++++------ dpnp/tests/test_sycl_queue.py | 12 ++++++++++++ dpnp/tests/test_usm_type.py | 12 ++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 1b56e9359fcc..bc8d7dc748b1 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -2303,11 +2303,15 @@ def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): """ res_type = _common_type(a) + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type # accommodate empty arrays if a.size == 0: lu = dpnp.empty_like(a) - piv = dpnp.arange(0, dtype=dpnp.int64) + piv = dpnp.arange( + 0, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) return lu, piv if check_finite: @@ -2317,12 +2321,7 @@ def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): if a.ndim > 2: raise NotImplementedError("Batched matrices are not supported") - m, n = a.shape - - a_sycl_queue = a.sycl_queue - a_usm_type = a.usm_type _manager = dpu.SequentialOrderManager[a_sycl_queue] - a_usm_arr = dpnp.get_usm_ndarray(a) # SciPy-compatible behavior @@ -2345,6 +2344,8 @@ def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): a_h = a copy_ev = None + m, n = a.shape + ipiv_h = dpnp.empty( min(m, n), dtype=dpnp.int64, diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 31dfb74f2cfa..7c08263e672c 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -1570,6 +1570,18 @@ def test_lstsq(self, m, n, nrhs, device): assert_sycl_queue_equal(param_queue, a.sycl_queue) assert_sycl_queue_equal(param_queue, b.sycl_queue) + @pytest.mark.parametrize( + "data", + [[[1.0, 2.0], [3.0, 5.0]], [[]]], + ) + def test_lu_factor(self, data, device): + a = dpnp.array(data, device=device) + result = dpnp.linalg.lu_factor(a) + + for param in result: + param_queue = param.sycl_queue + assert_sycl_queue_equal(param_queue, a.sycl_queue) + @pytest.mark.parametrize("n", [-1, 0, 1, 2, 3]) def test_matrix_power(self, n, device): x = dpnp.array([[1.0, 2.0], [3.0, 5.0]], device=device) diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index aed316eca533..6f886f8ec3c7 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -1449,6 +1449,18 @@ def test_lstsq(self, m, n, nrhs, usm_type, usm_type_other): [usm_type, usm_type_other] ) + @pytest.mark.parametrize( + "data", + [[[1.0, 2.0], [3.0, 5.0]], [[]]], + ) + def test_lu_factor(self, data, usm_type): + a = dpnp.array(data, usm_type=usm_type) + result = dpnp.linalg.lu_factor(a) + + assert a.usm_type == usm_type + for param in result: + assert param.usm_type == a.usm_type + @pytest.mark.parametrize("n", [-1, 0, 1, 2, 3]) def test_matrix_power(self, n, usm_type): a = dpnp.array([[1, 2], [3, 5]], usm_type=usm_type) From 506d24da0a4eebae1608e5e0463262a1f8f61cd1 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 13 Aug 2025 03:38:05 -0700 Subject: [PATCH 10/15] Add TestLuFactor --- dpnp/tests/test_linalg.py | 133 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 931c631b5633..dff60b6ca0b9 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -15,6 +15,7 @@ ) import dpnp +import dpnp.linalg from .helper import ( assert_dtype_allclose, @@ -1868,6 +1869,138 @@ def test_lstsq_errors(self): assert_raises(TypeError, dpnp.linalg.lstsq, a_dp, b_dp, [-1]) +class TestLuFactor: + @staticmethod + def _apply_pivots_rows(A_dp, piv_dp): + m = A_dp.shape[0] + rows = dpnp.arange(m) + for i in range(int(piv_dp.shape[0])): + r = int(piv_dp[i].item()) + if i != r: + tmp = rows[i].copy() + rows[i] = rows[r] + rows[r] = tmp + return A_dp[rows] + + @staticmethod + def _split_lu(lu, m, n): + L = dpnp.tril(lu, k=-1) + dpnp.fill_diagonal(L, 1) + L = L[:, : min(m, n)] + U = dpnp.triu(lu)[: min(m, n), :] + return L, U + + @pytest.mark.parametrize( + "shape", [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)] + ) + @pytest.mark.parametrize("order", ["C", "F"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_lu_factor(self, shape, order, dtype): + a_np = generate_random_numpy_array(shape, dtype, order) + a_dp = dpnp.array(a_np, order=order) + + lu, piv = dpnp.linalg.lu_factor( + a_dp, check_finite=False, overwrite_a=False + ) + + # verify piv + assert piv.shape == (min(shape),) + assert piv.dtype == dpnp.int64 + if shape[0] > 0: + assert int(dpnp.min(piv)) >= 0 + assert int(dpnp.max(piv)) < shape[0] + + m, n = shape + L, U = self._split_lu(lu, m, n) + LU = L @ U + + A_cast = a_dp.astype(LU.dtype, copy=False) + PA = self._apply_pivots_rows(A_cast, piv) + + assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_overwrite_inplace(self, dtype): + a_dp = dpnp.array([[4, 3], [6, 3]], dtype=dtype, order="F") + a_dp_orig = a_dp.copy() + lu, piv = dpnp.linalg.lu_factor( + a_dp, overwrite_a=True, check_finite=False + ) + + assert lu is a_dp + assert lu.flags["F_CONTIGUOUS"] is True + + L, U = self._split_lu(lu, 2, 2) + PA = self._apply_pivots_rows(a_dp_orig, piv) + LU = L @ U + + assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_overwrite_copy(self, dtype): + a_dp = dpnp.array([[4, 3], [6, 3]], dtype=dtype, order="C") + a_dp_orig = a_dp.copy() + lu, piv = dpnp.linalg.lu_factor( + a_dp, overwrite_a=True, check_finite=False + ) + + assert lu is not a_dp + assert lu.flags["F_CONTIGUOUS"] is True + + L, U = self._split_lu(lu, 2, 2) + PA = self._apply_pivots_rows(a_dp_orig, piv) + LU = L @ U + + assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize("shape", [(0, 0), (0, 2), (2, 0)]) + def test_empty_inputs(self, shape): + a_dp = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F") + lu, piv = dpnp.linalg.lu_factor(a_dp, check_finite=False) + assert lu.shape == shape + assert piv.shape == (min(shape),) + + @pytest.mark.parametrize( + "sl", + [ + (slice(None, None, 2), slice(None, None, 2)), + (slice(None, None, -1), slice(None, None, -1)), + ], + ) + def test_strided(self, sl): + base = ( + numpy.arange(7 * 7, dtype=dpnp.default_float_type()).reshape( + 7, 7, order="F" + ) + + 0.1 + ) + a_np = base[sl] + a_dp = dpnp.array(a_np) + + lu, piv = dpnp.linalg.lu_factor(a_dp, check_finite=False) + L, U = self._split_lu(lu, *a_dp.shape) + PA = self._apply_pivots_rows(a_dp, piv) + LU = L @ U + + assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + + def test_singular_matrix(self): + a_dp = dpnp.array([[1.0, 2.0], [2.0, 4.0]]) + with pytest.warns(RuntimeWarning, match="Singular matrix"): + dpnp.linalg.lu_factor(a_dp, check_finite=False) + + @pytest.mark.parametrize("bad", [numpy.inf, -numpy.inf, numpy.nan]) + def test_check_finite_raises(self, bad): + a_dp = dpnp.array([[1.0, 2.0], [3.0, bad]], order="F") + assert_raises( + ValueError, dpnp.linalg.lu_factor, a_dp, check_finite=True + ) + + def test_batched_not_supported(self): + a_dp = dpnp.ones((2, 2, 2)) + assert_raises(NotImplementedError, dpnp.linalg.lu_factor, a_dp) + + class TestMatrixPower: @pytest.mark.parametrize("dtype", get_all_dtypes()) @pytest.mark.parametrize( From 08f5ca343f80844cdcb37e7618d7a140313de315 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 13 Aug 2025 05:54:55 -0700 Subject: [PATCH 11/15] Add test_overwrite_copy_special to improve coverage --- dpnp/tests/test_linalg.py | 75 +++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 22 deletions(-) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index f8c6026f070a..00b280483b4b 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -279,12 +279,15 @@ def test_cholesky_errors(self): class TestCond: - _norms = [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] + def setup_method(self): + numpy.random.seed(70) @pytest.mark.parametrize( - "shape", [(0, 4, 4), (4, 0, 3, 3)], ids=["(0, 4, 4)", "(4, 0, 3, 3)"] + "shape", [(0, 4, 4), (4, 0, 3, 3)], ids=["(0, 5, 3)", "(4, 0, 2, 3)"] + ) + @pytest.mark.parametrize( + "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] ) - @pytest.mark.parametrize("p", _norms) def test_empty(self, shape, p): a = numpy.empty(shape) ia = dpnp.array(a) @@ -293,27 +296,26 @@ def test_empty(self, shape, p): expected = numpy.linalg.cond(a, p=p) assert_dtype_allclose(result, expected) - # TODO: uncomment once numpy 2.3.3 release is published - # @testing.with_requires("numpy>=2.3.3") @pytest.mark.parametrize( "dtype", get_all_dtypes(no_none=True, no_bool=True) ) @pytest.mark.parametrize( "shape", [(4, 4), (2, 4, 3, 3)], ids=["(4, 4)", "(2, 4, 3, 3)"] ) - @pytest.mark.parametrize("p", _norms) + @pytest.mark.parametrize( + "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] + ) def test_basic(self, dtype, shape, p): a = generate_random_numpy_array(shape, dtype) ia = dpnp.array(a) result = dpnp.linalg.cond(ia, p=p) expected = numpy.linalg.cond(a, p=p) - # TODO: remove when numpy#29333 is released - if numpy_version() < "2.3.3": - expected = expected.real assert_dtype_allclose(result, expected, factor=16) - @pytest.mark.parametrize("p", _norms) + @pytest.mark.parametrize( + "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] + ) def test_bool(self, p): a = numpy.array([[True, True], [True, False]]) ia = dpnp.array(a) @@ -322,7 +324,9 @@ def test_bool(self, p): expected = numpy.linalg.cond(a, p=p) assert_dtype_allclose(result, expected) - @pytest.mark.parametrize("p", _norms) + @pytest.mark.parametrize( + "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] + ) def test_nan_to_inf(self, p): a = numpy.zeros((2, 2)) ia = dpnp.array(a) @@ -340,7 +344,9 @@ def test_nan_to_inf(self, p): else: assert_raises(dpnp.linalg.LinAlgError, dpnp.linalg.cond, ia, p=p) - @pytest.mark.parametrize("p", _norms) + @pytest.mark.parametrize( + "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] + ) @pytest.mark.parametrize( "stride", [(-2, -3, 2, -2), (-2, 4, -4, -4), (2, 3, 4, 4), (-1, 3, 3, -3)], @@ -352,23 +358,21 @@ def test_nan_to_inf(self, p): ], ) def test_strided(self, p, stride): - A = generate_random_numpy_array( - (6, 8, 10, 10), seed_value=70, low=0, high=1 - ) - iA = dpnp.array(A) + A = numpy.random.rand(6, 8, 10, 10) + B = dpnp.asarray(A) slices = tuple(slice(None, None, stride[i]) for i in range(A.ndim)) - a, ia = A[slices], iA[slices] + a = A[slices] + b = B[slices] - result = dpnp.linalg.cond(ia, p=p) + result = dpnp.linalg.cond(b, p=p) expected = numpy.linalg.cond(a, p=p) assert_dtype_allclose(result, expected, factor=24) - @pytest.mark.parametrize("xp", [dpnp, numpy]) - def test_error(self, xp): + def test_error(self): # cond is not defined on empty arrays - a = xp.empty((2, 0)) + ia = dpnp.empty((2, 0)) with pytest.raises(ValueError): - xp.linalg.cond(a, p=1) + dpnp.linalg.cond(ia, p=1) class TestDet: @@ -1949,6 +1953,33 @@ def test_overwrite_copy(self, dtype): assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + def test_overwrite_copy_special(self): + # F-contig but dtype != res_type + a1 = dpnp.array([[4, 3], [6, 3]], dtype=dpnp.int32, order="F") + a1_orig = a1.copy() + + # F-contig, match dtype but read-only input + a2 = dpnp.array( + [[4, 3], [6, 3]], dtype=dpnp.default_float_type(), order="F" + ) + a2_orig = a2.copy() + a2.flags["WRITABLE"] = False + + for a_dp, a_orig in zip((a1, a1), (a1_orig, a2_orig)): + lu, piv = dpnp.linalg.lu_factor( + a_dp, overwrite_a=True, check_finite=False + ) + + assert lu is not a_dp + assert lu.flags["F_CONTIGUOUS"] is True + + L, U = self._split_lu(lu, 2, 2) + PA = self._apply_pivots_rows( + a_orig.astype(L.dtype, copy=False), piv + ) + LU = L @ U + assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + @pytest.mark.parametrize("shape", [(0, 0), (0, 2), (2, 0)]) def test_empty_inputs(self, shape): a_dp = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F") From c2f297161581208f77fb7b0b14261703164ef525 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 13 Aug 2025 07:50:00 -0700 Subject: [PATCH 12/15] Align test_linalg.py with master --- dpnp/tests/test_linalg.py | 49 ++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 00b280483b4b..fd87e0ffd51f 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -15,7 +15,6 @@ ) import dpnp -import dpnp.linalg from .helper import ( assert_dtype_allclose, @@ -279,15 +278,12 @@ def test_cholesky_errors(self): class TestCond: - def setup_method(self): - numpy.random.seed(70) + _norms = [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] @pytest.mark.parametrize( - "shape", [(0, 4, 4), (4, 0, 3, 3)], ids=["(0, 5, 3)", "(4, 0, 2, 3)"] - ) - @pytest.mark.parametrize( - "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] + "shape", [(0, 4, 4), (4, 0, 3, 3)], ids=["(0, 4, 4)", "(4, 0, 3, 3)"] ) + @pytest.mark.parametrize("p", _norms) def test_empty(self, shape, p): a = numpy.empty(shape) ia = dpnp.array(a) @@ -296,26 +292,27 @@ def test_empty(self, shape, p): expected = numpy.linalg.cond(a, p=p) assert_dtype_allclose(result, expected) + # TODO: uncomment once numpy 2.3.3 release is published + # @testing.with_requires("numpy>=2.3.3") @pytest.mark.parametrize( "dtype", get_all_dtypes(no_none=True, no_bool=True) ) @pytest.mark.parametrize( "shape", [(4, 4), (2, 4, 3, 3)], ids=["(4, 4)", "(2, 4, 3, 3)"] ) - @pytest.mark.parametrize( - "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] - ) + @pytest.mark.parametrize("p", _norms) def test_basic(self, dtype, shape, p): a = generate_random_numpy_array(shape, dtype) ia = dpnp.array(a) result = dpnp.linalg.cond(ia, p=p) expected = numpy.linalg.cond(a, p=p) + # TODO: remove when numpy#29333 is released + if numpy_version() < "2.3.3": + expected = expected.real assert_dtype_allclose(result, expected, factor=16) - @pytest.mark.parametrize( - "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] - ) + @pytest.mark.parametrize("p", _norms) def test_bool(self, p): a = numpy.array([[True, True], [True, False]]) ia = dpnp.array(a) @@ -324,9 +321,7 @@ def test_bool(self, p): expected = numpy.linalg.cond(a, p=p) assert_dtype_allclose(result, expected) - @pytest.mark.parametrize( - "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] - ) + @pytest.mark.parametrize("p", _norms) def test_nan_to_inf(self, p): a = numpy.zeros((2, 2)) ia = dpnp.array(a) @@ -344,9 +339,7 @@ def test_nan_to_inf(self, p): else: assert_raises(dpnp.linalg.LinAlgError, dpnp.linalg.cond, ia, p=p) - @pytest.mark.parametrize( - "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"] - ) + @pytest.mark.parametrize("p", _norms) @pytest.mark.parametrize( "stride", [(-2, -3, 2, -2), (-2, 4, -4, -4), (2, 3, 4, 4), (-1, 3, 3, -3)], @@ -358,21 +351,23 @@ def test_nan_to_inf(self, p): ], ) def test_strided(self, p, stride): - A = numpy.random.rand(6, 8, 10, 10) - B = dpnp.asarray(A) + A = generate_random_numpy_array( + (6, 8, 10, 10), seed_value=70, low=0, high=1 + ) + iA = dpnp.array(A) slices = tuple(slice(None, None, stride[i]) for i in range(A.ndim)) - a = A[slices] - b = B[slices] + a, ia = A[slices], iA[slices] - result = dpnp.linalg.cond(b, p=p) + result = dpnp.linalg.cond(ia, p=p) expected = numpy.linalg.cond(a, p=p) assert_dtype_allclose(result, expected, factor=24) - def test_error(self): + @pytest.mark.parametrize("xp", [dpnp, numpy]) + def test_error(self, xp): # cond is not defined on empty arrays - ia = dpnp.empty((2, 0)) + a = xp.empty((2, 0)) with pytest.raises(ValueError): - dpnp.linalg.cond(ia, p=1) + xp.linalg.cond(a, p=1) class TestDet: From 74f6c6351e60b5befcfc11d79a397339c5943510 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 19 Aug 2025 02:01:52 -0700 Subject: [PATCH 13/15] Apply comment --- dpnp/backend/extensions/lapack/getrf.cpp | 3 +-- dpnp/backend/extensions/lapack/getrf_batch.cpp | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index 66b91a4121fa..286797cb17e7 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -196,8 +196,7 @@ std::pair bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous(); if (!is_a_array_c_contig && !is_a_array_f_contig) { throw py::value_error("The input array " - "must be must be either C-contiguous " - "or F-contiguous"); + "must be contiguous"); } if (!is_ipiv_array_c_contig) { throw py::value_error("The array of pivot indices " diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index 6ac61045653b..ec87c8b1f2ae 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -225,8 +225,7 @@ std::pair bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous(); if (!is_a_array_c_contig && !is_a_array_f_contig) { throw py::value_error("The input array " - "must be must be either C-contiguous " - "or F-contiguous"); + "must be must contiguous"); } if (!is_ipiv_array_c_contig) { throw py::value_error("The array of pivot indices " From 3276f92bd6caf11edb18a38672250b55fc03609d Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 22 Aug 2025 06:09:18 -0700 Subject: [PATCH 14/15] Apply remarks --- dpnp/backend/extensions/lapack/getrf.cpp | 6 +++--- dpnp/backend/extensions/lapack/lapack_py.cpp | 2 +- dpnp/tests/test_linalg.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index 286797cb17e7..81231b5055af 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -71,7 +71,7 @@ static sycl::event getrf_impl(sycl::queue &exec_q, T *a = reinterpret_cast(in_a); const std::int64_t scratchpad_size = - mkl_lapack::getrf_scratchpad_size(exec_q, n, n, lda); + mkl_lapack::getrf_scratchpad_size(exec_q, m, n, lda); T *scratchpad = nullptr; std::stringstream error_msg; @@ -88,9 +88,9 @@ static sycl::event getrf_impl(sycl::queue &exec_q, // It must be a non-negative integer. n, // The number of columns in the input matrix A (0 ≤ n). // It must be a non-negative integer. - a, // Pointer to the input matrix A (n x n). + a, // Pointer to the input matrix A (m x n). lda, // The leading dimension of matrix A. - // It must be at least max(1, n). + // It must be at least max(1, m). ipiv, // Pointer to the output array of pivot indices. scratchpad, // Pointer to scratchpad memory to be used by MKL // routine for storing intermediate results. diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index 4d5adfe09e4a..fb4dce4643b7 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -135,7 +135,7 @@ PYBIND11_MODULE(_lapack_impl, m) m.def("_getrf", &lapack_ext::getrf, "Call `getrf` from OneMKL LAPACK library to return " - "the LU factorization of a general n x n matrix", + "the LU factorization of a general m x n matrix", py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"), py::arg("dev_info"), py::arg("depends") = py::list()); diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index 7aa5942bab46..d4c2d540f7c4 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -1955,7 +1955,7 @@ def test_overwrite_copy_special(self): a2_orig = a2.copy() a2.flags["WRITABLE"] = False - for a_dp, a_orig in zip((a1, a1), (a1_orig, a2_orig)): + for a_dp, a_orig in zip((a1, a2), (a1_orig, a2_orig)): lu, piv = dpnp.linalg.lu_factor( a_dp, overwrite_a=True, check_finite=False ) From 4cf630df55f7e22af66aa8fb777c67b032579e56 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 22 Aug 2025 06:24:41 -0700 Subject: [PATCH 15/15] Simplify _apply_pivots_rows --- dpnp/tests/test_linalg.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index d4c2d540f7c4..dd5daa99b74d 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -1863,13 +1863,17 @@ class TestLuFactor: @staticmethod def _apply_pivots_rows(A_dp, piv_dp): m = A_dp.shape[0] - rows = dpnp.arange(m) - for i in range(int(piv_dp.shape[0])): - r = int(piv_dp[i].item()) + + if m == 0 or piv_dp.size == 0: + return A_dp + + rows = list(range(m)) + piv_np = dpnp.asnumpy(piv_dp) + for i, r in enumerate(piv_np): if i != r: - tmp = rows[i].copy() - rows[i] = rows[r] - rows[r] = tmp + rows[i], rows[r] = rows[r], rows[i] + + rows = dpnp.asarray(rows) return A_dp[rows] @staticmethod