From daa5f73ebdf48143441045015d03337f1cfdf196 Mon Sep 17 00:00:00 2001 From: Alirana2829 Date: Sat, 31 Jan 2026 22:21:26 +0500 Subject: [PATCH 01/13] [C++] Fix Segfault in SparseCSFIndex::Equals with mismatched dimensions --- cpp/src/arrow/sparse_tensor.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index b84070b3d28..fb33ec6ead9 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -405,6 +405,9 @@ SparseCSFIndex::SparseCSFIndex(const std::vector>& indpt std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); } bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { + if (indices().size() != other.indices().size() || indptr().size() != other.indptr().size()) { + return false; + } for (int64_t i = 0; i < static_cast(indices().size()); ++i) { if (!indices()[i]->Equals(*other.indices()[i])) return false; } From 3e1cbd6735985152b2e4aca214c03ae71192695f Mon Sep 17 00:00:00 2001 From: Alirana2829 Date: Sun, 1 Feb 2026 18:39:12 +0500 Subject: [PATCH 02/13] [C++] Add regression test for SparseCSFIndex::Equals segfault --- cpp/src/arrow/sparse_tensor_test.cc | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index c9c28a11b1b..0502c412ab7 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -1676,4 +1676,29 @@ TEST(TestSparseCSFMatrixForUInt64Index, Make) { ASSERT_RAISES(Invalid, SparseCSFTensor::Make(dense_tensor, uint64())); } +TEST(TestSparseCSFIndex, EqualsMismatchedDimensions) { + std::vector axis_order_4d = {0, 1, 2, 3}; + std::vector axis_order_2d = {0, 1}; + + // Create empty tensors for indptr/indices + auto empty_buffer = std::make_shared(nullptr, 0); + auto tensor_4d = std::make_shared(int32(), empty_buffer, std::vector{0}); + auto tensor_2d = std::make_shared(int32(), empty_buffer, std::vector{0}); + + // Create a 4D index + std::vector> indptr_4d = {tensor_4d, tensor_4d, tensor_4d}; + std::vector> indices_4d = {tensor_4d, tensor_4d, tensor_4d, tensor_4d}; + auto si_4d = std::make_shared(indptr_4d, indices_4d, axis_order_4d); + + // Create a 2D index + std::vector> indptr_2d = {tensor_2d}; + std::vector> indices_2d = {tensor_2d, tensor_2d}; + auto si_2d = std::make_shared(indptr_2d, indices_2d, axis_order_2d); + + // Before fix: This would segfault. + // After fix: Returns false safely. + ASSERT_FALSE(si_4d->Equals(*si_2d)); + ASSERT_FALSE(si_2d->Equals(*si_4d)); +} + } // namespace arrow From fdf506e57397aaa089d1f79f4409ff330949ceb1 Mon Sep 17 00:00:00 2001 From: Alirana2829 Date: Sun, 1 Feb 2026 21:24:07 +0500 Subject: [PATCH 03/13] Fix Segfault in SparseCSFIndex::Equals with mismatched dimensions and add test --- cpp/src/arrow/sparse_tensor.cc | 6 +++--- cpp/src/arrow/sparse_tensor_test.cc | 21 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index fb33ec6ead9..5987c51999e 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -405,9 +405,9 @@ SparseCSFIndex::SparseCSFIndex(const std::vector>& indpt std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); } bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { - if (indices().size() != other.indices().size() || indptr().size() != other.indptr().size()) { - return false; - } + if (indices().size() != other.indices().size()) return false; + if (indptr().size() != other.indptr().size()) return false; + if (axis_order().size() != other.axis_order().size()) return false; for (int64_t i = 0; i < static_cast(indices().size()); ++i) { if (!indices()[i]->Equals(*other.indices()[i])) return false; } diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 0502c412ab7..87f07b9a7a8 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -1641,10 +1641,29 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestNonAscendingShape) { ASSERT_TRUE(st->Equals(*sparse_tensor)); } +TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestEqualityMismatchedDimensions) { + using IndexValueType = TypeParam; + using c_index_value_type = typename IndexValueType::c_type; + + // 1D (trivial) vs 2D + std::vector axis_order_1D = {0}; + std::vector> indptr_1D = {{0, 1}}; + std::vector> indices_1D = {{0}}; + auto si_1D = this->MakeSparseCSFIndex(axis_order_1D, indptr_1D, indices_1D); + + std::vector axis_order_2D = {0, 1}; + std::vector> indptr_2D = {{0, 1}}; + std::vector> indices_2D = {{0}, {0}}; + auto si_2D = this->MakeSparseCSFIndex(axis_order_2D, indptr_2D, indices_2D); + + ASSERT_FALSE(si_1D->Equals(*si_2D)); + ASSERT_FALSE(si_2D->Equals(*si_1D)); +} + REGISTER_TYPED_TEST_SUITE_P(TestSparseCSFTensorForIndexValueType, TestCreateSparseTensor, TestTensorToSparseTensor, TestSparseTensorToTensor, TestAlternativeAxisOrder, TestNonAscendingShape, - TestRoundTrip); + TestRoundTrip, TestEqualityMismatchedDimensions); INSTANTIATE_TYPED_TEST_SUITE_P(TestInt8, TestSparseCSFTensorForIndexValueType, Int8Type); INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt8, TestSparseCSFTensorForIndexValueType, From c87afdb3bf68a88c14b24d9f573d0b80f91d73a9 Mon Sep 17 00:00:00 2001 From: Alirana2829 Date: Mon, 2 Feb 2026 19:36:05 +0500 Subject: [PATCH 04/13] Fix lint: Format SparseCSFIndex::Equals to comply with 90-char line limit and style guide --- cpp/src/arrow/sparse_tensor.cc | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index 5987c51999e..acb531435b0 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -405,14 +405,25 @@ SparseCSFIndex::SparseCSFIndex(const std::vector>& indpt std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); } bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { - if (indices().size() != other.indices().size()) return false; - if (indptr().size() != other.indptr().size()) return false; - if (axis_order().size() != other.axis_order().size()) return false; + if (indices().size() != other.indices().size()) { + return false; + } + if (indptr().size() != other.indptr().size()) { + return false; + } + if (axis_order().size() != other.axis_order().size()) { + return false; + } + for (int64_t i = 0; i < static_cast(indices().size()); ++i) { - if (!indices()[i]->Equals(*other.indices()[i])) return false; + if (!indices()[i]->Equals(*other.indices()[i])) { + return false; + } } for (int64_t i = 0; i < static_cast(indptr().size()); ++i) { - if (!indptr()[i]->Equals(*other.indptr()[i])) return false; + if (!indptr()[i]->Equals(*other.indptr()[i])) { + return false; + } } return axis_order() == other.axis_order(); } From 8dbc21bd21e12a257048342d7bc0c568db0c13ac Mon Sep 17 00:00:00 2001 From: Alirana2829 Date: Mon, 2 Feb 2026 20:35:17 +0500 Subject: [PATCH 05/13] Fix test: Use valid CSF index structures (2D vs 3D instead of invalid 1D) --- cpp/src/arrow/sparse_tensor_test.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 87f07b9a7a8..a0ad171c745 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -1645,19 +1645,21 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestEqualityMismatchedDimensi using IndexValueType = TypeParam; using c_index_value_type = typename IndexValueType::c_type; - // 1D (trivial) vs 2D - std::vector axis_order_1D = {0}; - std::vector> indptr_1D = {{0, 1}}; - std::vector> indices_1D = {{0}}; - auto si_1D = this->MakeSparseCSFIndex(axis_order_1D, indptr_1D, indices_1D); - + // 2D vs 3D - comparing indices with different dimensionality + // 2D CSF: ndim=2, so indptr.size()=1, indices.size()=2 std::vector axis_order_2D = {0, 1}; std::vector> indptr_2D = {{0, 1}}; std::vector> indices_2D = {{0}, {0}}; auto si_2D = this->MakeSparseCSFIndex(axis_order_2D, indptr_2D, indices_2D); - ASSERT_FALSE(si_1D->Equals(*si_2D)); - ASSERT_FALSE(si_2D->Equals(*si_1D)); + // 3D CSF: ndim=3, so indptr.size()=2, indices.size()=3 + std::vector axis_order_3D = {0, 1, 2}; + std::vector> indptr_3D = {{0, 1}, {0, 1}}; + std::vector> indices_3D = {{0}, {0}, {0}}; + auto si_3D = this->MakeSparseCSFIndex(axis_order_3D, indptr_3D, indices_3D); + + ASSERT_FALSE(si_2D->Equals(*si_3D)); + ASSERT_FALSE(si_3D->Equals(*si_2D)); } REGISTER_TYPED_TEST_SUITE_P(TestSparseCSFTensorForIndexValueType, TestCreateSparseTensor, From 900b98d43cc8d5978bc4aabde2baf13d8c332ec5 Mon Sep 17 00:00:00 2001 From: Alirana2829 Date: Mon, 2 Feb 2026 20:49:15 +0500 Subject: [PATCH 06/13] Remove invalid test causing segfaults with empty tensors The TEST(TestSparseCSFIndex, EqualsMismatchedDimensions) test created SparseCSFIndex objects with empty tensors (nullptr buffers, 0-length shape), causing segfaults during validation on ASAN/UBSAN and 'front() called on empty vector' errors on MSVC. The typed test TestEqualityMismatchedDimensions already properly validates the fix with valid CSF index structures. --- cpp/src/arrow/sparse_tensor_test.cc | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index a0ad171c745..062d5fceebd 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -1697,29 +1697,4 @@ TEST(TestSparseCSFMatrixForUInt64Index, Make) { ASSERT_RAISES(Invalid, SparseCSFTensor::Make(dense_tensor, uint64())); } -TEST(TestSparseCSFIndex, EqualsMismatchedDimensions) { - std::vector axis_order_4d = {0, 1, 2, 3}; - std::vector axis_order_2d = {0, 1}; - - // Create empty tensors for indptr/indices - auto empty_buffer = std::make_shared(nullptr, 0); - auto tensor_4d = std::make_shared(int32(), empty_buffer, std::vector{0}); - auto tensor_2d = std::make_shared(int32(), empty_buffer, std::vector{0}); - - // Create a 4D index - std::vector> indptr_4d = {tensor_4d, tensor_4d, tensor_4d}; - std::vector> indices_4d = {tensor_4d, tensor_4d, tensor_4d, tensor_4d}; - auto si_4d = std::make_shared(indptr_4d, indices_4d, axis_order_4d); - - // Create a 2D index - std::vector> indptr_2d = {tensor_2d}; - std::vector> indices_2d = {tensor_2d, tensor_2d}; - auto si_2d = std::make_shared(indptr_2d, indices_2d, axis_order_2d); - - // Before fix: This would segfault. - // After fix: Returns false safely. - ASSERT_FALSE(si_4d->Equals(*si_2d)); - ASSERT_FALSE(si_2d->Equals(*si_4d)); -} - } // namespace arrow From 76fe33892b713edbd1915117ae06bf91fd9c8a43 Mon Sep 17 00:00:00 2001 From: Alirana2829 Date: Tue, 3 Feb 2026 23:58:42 +0500 Subject: [PATCH 07/13] Revert formatting changes per maintainer feedback Keep only essential size checks. Maintainers requested reverting formatting changes to reduce diff noise and improve readability. --- cpp/src/arrow/sparse_tensor.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index acb531435b0..cdbb4be7e20 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -416,14 +416,10 @@ bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { } for (int64_t i = 0; i < static_cast(indices().size()); ++i) { - if (!indices()[i]->Equals(*other.indices()[i])) { - return false; - } + if (!indices()[i]->Equals(*other.indices()[i])) return false; } for (int64_t i = 0; i < static_cast(indptr().size()); ++i) { - if (!indptr()[i]->Equals(*other.indptr()[i])) { - return false; - } + if (!indptr()[i]->Equals(*other.indptr()[i])) return false; } return axis_order() == other.axis_order(); } From a6e50883f8f3a94338f62da3dfee44cda952326b Mon Sep 17 00:00:00 2001 From: Alirana2829 Date: Wed, 4 Feb 2026 00:11:24 +0500 Subject: [PATCH 08/13] Remove redundant axis_order size check per review feedback The axis_order().size() check was unnecessary because vector equality operator already compares sizes. Keeping only the essential checks for indices() and indptr() that prevent segfault from out-of-bounds access. --- cpp/src/arrow/sparse_tensor.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index cdbb4be7e20..ac27b80b2ae 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -411,9 +411,6 @@ bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { if (indptr().size() != other.indptr().size()) { return false; } - if (axis_order().size() != other.axis_order().size()) { - return false; - } for (int64_t i = 0; i < static_cast(indices().size()); ++i) { if (!indices()[i]->Equals(*other.indices()[i])) return false; From f9bc643128d43f79f713d289f9c3575722182c66 Mon Sep 17 00:00:00 2001 From: Ali Mahmood Rana <159713825+AliRana30@users.noreply.github.com> Date: Wed, 4 Feb 2026 14:57:52 +0500 Subject: [PATCH 09/13] Update cpp/src/arrow/sparse_tensor.cc Co-authored-by: Rok Mihevc --- cpp/src/arrow/sparse_tensor.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index ac27b80b2ae..9af922c4b41 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -419,6 +419,11 @@ bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { if (!indptr()[i]->Equals(*other.indptr()[i])) return false; } return axis_order() == other.axis_order(); +bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { + auto eq = [](const auto& a, const auto& b) { return a->Equals(*b); }; + return axis_order() == other.axis_order() + && std::ranges::equal(indices(), other.indices(), eq) + && std::ranges::equal(indptr(), other.indptr(), eq); } // ---------------------------------------------------------------------- From c1e33a27d4ad7802ca90dda35304e2a4689c4372 Mon Sep 17 00:00:00 2001 From: Ali Mahmood Rana <159713825+AliRana30@users.noreply.github.com> Date: Wed, 4 Feb 2026 15:12:30 +0500 Subject: [PATCH 10/13] Update cpp/src/arrow/sparse_tensor_test.cc Co-authored-by: Rok Mihevc --- cpp/src/arrow/sparse_tensor_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 062d5fceebd..434f4a1723c 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -1660,6 +1660,7 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestEqualityMismatchedDimensi ASSERT_FALSE(si_2D->Equals(*si_3D)); ASSERT_FALSE(si_3D->Equals(*si_2D)); + ASSERT_TRUE(si_2D->Equals(*si_2D)); } REGISTER_TYPED_TEST_SUITE_P(TestSparseCSFTensorForIndexValueType, TestCreateSparseTensor, From 915dcdc4d29e62163a1aef4a448d35733de829a5 Mon Sep 17 00:00:00 2001 From: Alirana2829 Date: Wed, 4 Feb 2026 15:23:07 +0500 Subject: [PATCH 11/13] Fix duplicate function definition from GitHub suggestion GitHub's 'Commit suggestion' feature added rok's implementation but didn't remove the old code, causing duplicate definition error. Removed old implementation to keep only the cleaner C++20 ranges version. --- cpp/src/arrow/sparse_tensor.cc | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index 9af922c4b41..9d3ac1d7c18 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -404,21 +404,6 @@ SparseCSFIndex::SparseCSFIndex(const std::vector>& indpt std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); } -bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { - if (indices().size() != other.indices().size()) { - return false; - } - if (indptr().size() != other.indptr().size()) { - return false; - } - - for (int64_t i = 0; i < static_cast(indices().size()); ++i) { - if (!indices()[i]->Equals(*other.indices()[i])) return false; - } - for (int64_t i = 0; i < static_cast(indptr().size()); ++i) { - if (!indptr()[i]->Equals(*other.indptr()[i])) return false; - } - return axis_order() == other.axis_order(); bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { auto eq = [](const auto& a, const auto& b) { return a->Equals(*b); }; return axis_order() == other.axis_order() From b508636b0a2f58c3bcb0981a65397f7ad00fe8f4 Mon Sep 17 00:00:00 2001 From: Ali Mahmood Rana <159713825+AliRana30@users.noreply.github.com> Date: Wed, 4 Feb 2026 15:59:16 +0500 Subject: [PATCH 12/13] Update cpp/src/arrow/sparse_tensor.cc Co-authored-by: Rok Mihevc --- cpp/src/arrow/sparse_tensor.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index 9d3ac1d7c18..17fc48da2b4 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -406,9 +406,9 @@ std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFInde bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { auto eq = [](const auto& a, const auto& b) { return a->Equals(*b); }; - return axis_order() == other.axis_order() - && std::ranges::equal(indices(), other.indices(), eq) - && std::ranges::equal(indptr(), other.indptr(), eq); + return axis_order() == other.axis_order() && + std::ranges::equal(indices(), other.indices(), eq) && + std::ranges::equal(indptr(), other.indptr(), eq); } // ---------------------------------------------------------------------- From ae4c1b975c20624d00596ffaeab283988ac0fafd Mon Sep 17 00:00:00 2001 From: Alirana2829 Date: Wed, 4 Feb 2026 17:31:33 +0500 Subject: [PATCH 13/13] Apply clang-format indentation per CI requirement --- cpp/src/arrow/sparse_tensor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index 17fc48da2b4..477fa2f7650 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -407,8 +407,8 @@ std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFInde bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const { auto eq = [](const auto& a, const auto& b) { return a->Equals(*b); }; return axis_order() == other.axis_order() && - std::ranges::equal(indices(), other.indices(), eq) && - std::ranges::equal(indptr(), other.indptr(), eq); + std::ranges::equal(indices(), other.indices(), eq) && + std::ranges::equal(indptr(), other.indptr(), eq); } // ----------------------------------------------------------------------