From a1e0027584886a7c1c6ef00d25adf0eaa9018332 Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Tue, 25 Feb 2025 19:42:16 +0100 Subject: [PATCH 1/2] Fixing memory corruption in correlate kernel (#2333) - [x] Have you provided a meaningful PR description? - [x] Have you added a test, reproducer or referred to issue with a reproducer? - [x] Have you tested your changes locally for CPU and GPU devices? - [x] Have you made sure that new changes do not introduce compiler warnings? - [ ] Have you checked performance impact of proposed changes? - [x] If this PR is a work in progress, are you filing the PR as a draft? Temporary workaround for memory corruption inside correlate kernel. The root cause is still unknown, but the workaround seems to solve crashes --- .../statistics/sliding_window1d.hpp | 36 +++++++++++++++---- dpnp/tests/test_usm_type.py | 3 -- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.hpp b/dpnp/backend/extensions/statistics/sliding_window1d.hpp index be9bd85f8fa..278fdaaa83c 100644 --- a/dpnp/backend/extensions/statistics/sliding_window1d.hpp +++ b/dpnp/backend/extensions/statistics/sliding_window1d.hpp @@ -436,7 +436,7 @@ class PaddedSpan : public Span using size_type = SizeT; PaddedSpan(T *const data, const SizeT size, const SizeT pad) - : Span(data, size), pad_(pad) + : Span(data, size), pad_(pad) { } @@ -574,9 +574,20 @@ void submit_sliding_window1d(const PaddedSpan &a, } auto *const out_ptr = out.begin(); - auto *const out_end = out.end(); - results.store(&out_ptr[glid], - [out_end](auto &&ptr) { return ptr < out_end; }); + // auto *const out_end = out.end(); + + auto y_start = glid; + auto y_stop = + std::min(y_start + WorkPI * results.size_x(), out.size()); + uint32_t i = 0; + for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { + out_ptr[y] = results[i++]; + } + // while the code itself seems to be valid, inside correlate + // kernel it results in memory corruption. Further investigation + // is needed. SAT-7693 + // corruption results.store(&out_ptr[glid], + // [out_end](auto &&ptr) { return ptr < out_end; }); }); } @@ -635,9 +646,20 @@ void submit_sliding_window1d_small_kernel(const PaddedSpan &a, red); auto *const out_ptr = out.begin(); - auto *const out_end = out.end(); - results.store(&out_ptr[glid], - [out_end](auto &&ptr) { return ptr < out_end; }); + // auto *const out_end = out.end(); + + auto y_start = glid; + auto y_stop = + std::min(y_start + WorkPI * results.size_x(), out.size()); + uint32_t i = 0; + for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { + out_ptr[y] = results[i++]; + } + // while the code itself seems to be valid, inside correlate + // kernel it results in memory corruption. Further investigation + // is needed. SAT-7693 + // corruption results.store(&out_ptr[glid], + // [out_end](auto &&ptr) { return ptr < out_end; }); }); } diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index 74fdd28d127..213f618f187 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -800,9 +800,6 @@ def test_1in_1out(func, data, usm_type): @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) def test_2in_1out(func, data1, data2, usm_type_x, usm_type_y): - if func == "correlate" and is_win_platform(): - pytest.skip("due to SAT-7693") - x = dp.array(data1, usm_type=usm_type_x) y = dp.array(data2, usm_type=usm_type_y) z = getattr(dp, func)(x, y) From dea6121b6df70954953b06b150e5bc8d10813529 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 25 Feb 2025 20:05:41 +0100 Subject: [PATCH 2/2] Add entry to changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 16b18460ebf..c345b4d1a72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -103,6 +103,7 @@ Furthermore, a number of issues relating to running on NVIDIA GPUs have been res * Resolved an issue with wrong result returned by `dpnp.tensordot` for integer data types [#2296](https://github.com/IntelPython/dpnp/pull/2296) * Resolved `ValueError` exception raised by `dpnp.linalg.qr` with non-contiguous input array [#2314](https://github.com/IntelPython/dpnp/pull/2314) * Resolved an issue with wrong result returned by `dpnp.fft.fftn` and `dpnp.fft.rfftn` when running on NVIDIA GPU [#2332](https://github.com/IntelPython/dpnp/pull/2332) +* Added a workaround to prevent a memory corruption in `dpnp.correlate` [#2333](https://github.com/IntelPython/dpnp/pull/2333) ## [0.16.1] - 12/06/2024