From 2b70a689bdca9fae15fe1ad9e262ae76cfed3e00 Mon Sep 17 00:00:00 2001 From: Alex Lindsay Date: Mon, 26 Jan 2026 10:04:53 -0700 Subject: [PATCH 01/10] We don't need ghosted vectors when we have a single rank --- include/numerics/petsc_vector.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/numerics/petsc_vector.h b/include/numerics/petsc_vector.h index 57391039a99..7b1f5882d9c 100644 --- a/include/numerics/petsc_vector.h +++ b/include/numerics/petsc_vector.h @@ -773,6 +773,12 @@ void PetscVector::init (const numeric_index_type n, { parallel_object_only(); + if (this->comm().size() == 1) + { + libmesh_assert(ghost.empty()); + this->init(n, n_local, fast, SERIAL); + } + PetscInt petsc_n=static_cast(n); PetscInt petsc_n_local=static_cast(n_local); PetscInt petsc_n_ghost=static_cast(ghost.size()); From 059d4707cae32e605254af7c8b982dff8968a9e5 Mon Sep 17 00:00:00 2001 From: Alex Lindsay Date: Mon, 26 Jan 2026 10:10:00 -0700 Subject: [PATCH 02/10] Try just using operator= for all localize calls The code path that used to exist after the `operator=` branch is incredibly non-performant. It led to a serial GPU run running 20% slower on residual evaluations than a 30 rank CPU run. After its removal, the GPU run residual evaluation is 3 times faster. That's how large the performance penalty is --- src/numerics/petsc_vector.C | 90 +++---------------------------------- 1 file changed, 5 insertions(+), 85 deletions(-) diff --git a/src/numerics/petsc_vector.C b/src/numerics/petsc_vector.C index 5cca270b57a..0e98bd665b2 100644 --- a/src/numerics/petsc_vector.C +++ b/src/numerics/petsc_vector.C @@ -579,99 +579,19 @@ void PetscVector::localize (NumericVector & v_local_in) const { parallel_object_only(); - this->_restore_array(); - - // Make sure the NumericVector passed in is really a PetscVector - PetscVector * v_local = cast_ptr *>(&v_local_in); - - libmesh_assert(v_local); - // v_local_in should be closed - libmesh_assert(v_local->closed()); - libmesh_assert_equal_to (v_local->size(), this->size()); - // 1) v_local_in is a large vector to hold the whole world - // 2) v_local_in is a ghosted vector - // 3) v_local_in is a parallel vector - // Cases 2) and 3) should be scalable - libmesh_assert(this->size()==v_local->local_size() || this->local_size()==v_local->local_size()); - - - if (v_local->type() == SERIAL && this->size() == v_local->local_size()) - { - WrappedPetsc scatter; - LibmeshPetscCall(VecScatterCreateToAll(_vec, scatter.get(), nullptr)); - VecScatterBeginEnd(this->comm(), scatter, _vec, v_local->_vec, INSERT_VALUES, SCATTER_FORWARD); - } - // Two vectors have the same size, and we should just do a simple copy. - // v_local could be either a parallel or ghosted vector - else if (this->local_size() == v_local->local_size()) - LibmeshPetscCall(VecCopy(_vec,v_local->_vec)); - else - libmesh_error_msg("Vectors are inconsistent"); + libmesh_assert(this->comm().verify(int(this->type()))); + libmesh_assert(this->comm().verify(int(v_local_in.type()))); - // Make sure ghost dofs are up to date - // We do not call "close" here to save a global reduction - if (v_local->type() == GHOSTED) - VecGhostUpdateBeginEnd(this->comm(), v_local->_vec, INSERT_VALUES, SCATTER_FORWARD); + v_local_in = *this; } template void PetscVector::localize (NumericVector & v_local_in, - const std::vector & send_list) const + const std::vector & /*send_list*/) const { - parallel_object_only(); - - libmesh_assert(this->comm().verify(int(this->type()))); - libmesh_assert(this->comm().verify(int(v_local_in.type()))); - - // FIXME: Workaround for a strange bug at large-scale. - // If we have ghosting, PETSc lets us just copy the solution, and - // doing so avoids a segfault? - if (v_local_in.type() == GHOSTED && - this->type() == PARALLEL) - { - v_local_in = *this; - return; - } - - // Normal code path begins here - - this->_restore_array(); - - // Make sure the NumericVector passed in is really a PetscVector - PetscVector * v_local = cast_ptr *>(&v_local_in); - - libmesh_assert(v_local); - libmesh_assert_equal_to (v_local->size(), this->size()); - libmesh_assert_less_equal (send_list.size(), v_local->size()); - - const numeric_index_type n_sl = - cast_int(send_list.size()); - - std::vector idx(n_sl + this->local_size()); - for (numeric_index_type i=0; i(send_list[i]); - for (auto i : make_range(this->local_size())) - idx[n_sl+i] = i + this->first_local_index(); - - // Create the index set & scatter objects - WrappedPetsc is; - PetscInt * idxptr = idx.empty() ? nullptr : idx.data(); - LibmeshPetscCall(ISCreateGeneral(this->comm().get(), n_sl+this->local_size(), - idxptr, PETSC_USE_POINTER, is.get())); - - WrappedPetsc scatter; - LibmeshPetscCall(VecScatterCreate(_vec, is, - v_local->_vec, is, - scatter.get())); - - // Perform the scatter - VecScatterBeginEnd(this->comm(), scatter, _vec, v_local->_vec, INSERT_VALUES, SCATTER_FORWARD); - - // Make sure ghost dofs are up to date - if (v_local->type() == GHOSTED) - v_local->close(); + this->localize(v_local_in); } From 99dd2208d1a839d06ea2c0844da976e645d07893 Mon Sep 17 00:00:00 2001 From: Alex Lindsay Date: Tue, 27 Jan 2026 14:56:41 -0700 Subject: [PATCH 03/10] Upgrade operator= to handle parallel to serial assignment --- include/numerics/petsc_vector.h | 6 +++++- src/numerics/petsc_vector.C | 28 +++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/include/numerics/petsc_vector.h b/include/numerics/petsc_vector.h index 7b1f5882d9c..387b5006edd 100644 --- a/include/numerics/petsc_vector.h +++ b/include/numerics/petsc_vector.h @@ -121,7 +121,11 @@ class PetscVector final : public NumericVector /** * Copy assignment operator. - * Calls VecCopy after performing various checks. + * Supported assignments based on ParallelType combination (note that we lump ghosted into + * parallel for this method documentation): + * - Assign from parallel to parallel + * - Assign from serial to serial + * - Assign from parallel to serial * \returns A reference to *this as the derived type. */ PetscVector & operator= (const PetscVector & v); diff --git a/src/numerics/petsc_vector.C b/src/numerics/petsc_vector.C index 0e98bd665b2..3282a6f6ce8 100644 --- a/src/numerics/petsc_vector.C +++ b/src/numerics/petsc_vector.C @@ -517,11 +517,33 @@ PetscVector::operator = (const PetscVector & v) v._restore_array(); libmesh_assert_equal_to (this->size(), v.size()); - libmesh_assert_equal_to (this->local_size(), v.local_size()); libmesh_assert (v.closed()); - - LibmeshPetscCall(VecCopy (v._vec, this->_vec)); + // This is PETSc's only requirement for VecCopy + if (this->local_size() == v.local_size()) + LibmeshPetscCall(VecCopy(v._vec, this->_vec)); + else if (this->local_size() == v.size()) // scatter from parallel to serial + { + VecScatter scat; + + libmesh_assert(this->type() == SERIAL); + libmesh_assert(v.type() != SERIAL); + LibmeshPetscCall(VecDestroy(&this->_vec)); + LibmeshPetscCall(VecScatterCreateToAll(v._vec, &scat, &this->_vec)); + LibmeshPetscCall(VecScatterBegin(scat, v._vec, this->_vec, INSERT_VALUES, SCATTER_FORWARD)); + LibmeshPetscCall(VecScatterEnd(scat, v._vec, this->_vec, INSERT_VALUES, SCATTER_FORWARD)); + LibmeshPetscCall(VecScatterDestroy(&scat)); + } + else if (v.local_size() == this->size()) + { + libmesh_assert(v.type() == SERIAL); + libmesh_assert(this->type() != SERIAL); + libmesh_error_msg( + "Scattering from a serial vector on every rank to a parallel vector is not behavior we " + "define because we do not verify the serial vector is the same on each rank"); + } + else + libmesh_error_msg("Unhandled vector combination"); libmesh_assert(this->comm().verify(int(this->type()))); From a1f776a13eab7701ac2c93b02b1404009399cce8 Mon Sep 17 00:00:00 2001 From: Alex Lindsay Date: Tue, 27 Jan 2026 15:00:10 -0700 Subject: [PATCH 04/10] Use std::underlying_type --- src/numerics/petsc_vector.C | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/numerics/petsc_vector.C b/src/numerics/petsc_vector.C index 3282a6f6ce8..be9e31867ee 100644 --- a/src/numerics/petsc_vector.C +++ b/src/numerics/petsc_vector.C @@ -335,7 +335,8 @@ void PetscVector::add (const T a_in, const NumericVector & v_in) LibmeshPetscCall(VecAXPY(_vec, a, v->vec())); - libmesh_assert(this->comm().verify(int(this->type()))); + libmesh_assert(this->comm().verify( + static_cast::type>(this->type()))); if (this->type() == GHOSTED) VecGhostUpdateBeginEnd(this->comm(), _vec, INSERT_VALUES, SCATTER_FORWARD); @@ -375,7 +376,8 @@ void PetscVector::scale (const T factor_in) LibmeshPetscCall(VecScale(_vec, factor)); - libmesh_assert(this->comm().verify(int(this->type()))); + libmesh_assert(this->comm().verify( + static_cast::type>(this->type()))); if (this->type() == GHOSTED) VecGhostUpdateBeginEnd(this->comm(), _vec, INSERT_VALUES, SCATTER_FORWARD); @@ -416,7 +418,8 @@ void PetscVector::abs() LibmeshPetscCall(VecAbs(_vec)); - libmesh_assert(this->comm().verify(int(this->type()))); + libmesh_assert(this->comm().verify( + static_cast::type>(this->type()))); if (this->type() == GHOSTED) VecGhostUpdateBeginEnd(this->comm(), _vec, INSERT_VALUES, SCATTER_FORWARD); @@ -480,7 +483,8 @@ PetscVector::operator = (const T s_in) { LibmeshPetscCall(VecSet(_vec, s)); - libmesh_assert(this->comm().verify(int(this->type()))); + libmesh_assert(this->comm().verify( + static_cast::type>(this->type()))); if (this->type() == GHOSTED) VecGhostUpdateBeginEnd(this->comm(), _vec, INSERT_VALUES, SCATTER_FORWARD); @@ -545,7 +549,8 @@ PetscVector::operator = (const PetscVector & v) else libmesh_error_msg("Unhandled vector combination"); - libmesh_assert(this->comm().verify(int(this->type()))); + libmesh_assert(this->comm().verify( + static_cast::type>(this->type()))); if (this->type() == GHOSTED) VecGhostUpdateBeginEnd(this->comm(), _vec, INSERT_VALUES, SCATTER_FORWARD); @@ -601,8 +606,10 @@ void PetscVector::localize (NumericVector & v_local_in) const { parallel_object_only(); - libmesh_assert(this->comm().verify(int(this->type()))); - libmesh_assert(this->comm().verify(int(v_local_in.type()))); + libmesh_assert(this->comm().verify( + static_cast::type>(this->type()))); + libmesh_assert(this->comm().verify( + static_cast::type>(v_local_in.type()))); v_local_in = *this; } @@ -942,7 +949,8 @@ void PetscVector::pointwise_mult (const NumericVector & vec1, // Call PETSc function. LibmeshPetscCall(VecPointwiseMult(_vec, vec1_petsc->vec(), vec2_petsc->vec())); - libmesh_assert(this->comm().verify(int(this->type()))); + libmesh_assert(this->comm().verify( + static_cast::type>(this->type()))); if (this->type() == GHOSTED) VecGhostUpdateBeginEnd(this->comm(), _vec, INSERT_VALUES, SCATTER_FORWARD); @@ -965,7 +973,8 @@ void PetscVector::pointwise_divide (const NumericVector & vec1, // Call PETSc function. LibmeshPetscCall(VecPointwiseDivide(_vec, vec1_petsc->vec(), vec2_petsc->vec())); - libmesh_assert(this->comm().verify(int(this->type()))); + libmesh_assert(this->comm().verify( + static_cast::type>(this->type()))); if (this->type() == GHOSTED) VecGhostUpdateBeginEnd(this->comm(), _vec, INSERT_VALUES, SCATTER_FORWARD); From 701ab7ae9fa95da15d81977670545deb2d38338d Mon Sep 17 00:00:00 2001 From: Alex Lindsay Date: Fri, 30 Jan 2026 11:53:18 -0700 Subject: [PATCH 05/10] Register a one rank comm size as a sequential vector --- include/numerics/petsc_vector.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/numerics/petsc_vector.h b/include/numerics/petsc_vector.h index 387b5006edd..5b0a954e09e 100644 --- a/include/numerics/petsc_vector.h +++ b/include/numerics/petsc_vector.h @@ -707,7 +707,11 @@ void PetscVector::init (const numeric_index_type n, if (this->initialized()) this->clear(); - if (ptype == AUTOMATIC) + if (this->comm().size() == 1) + // This can help with some branching decisions and is also consistent with what PETSc does... a + // single rank Vec is going to be a sequential vector + this->_type = SERIAL; + else if (ptype == AUTOMATIC) { if (n == n_local) this->_type = SERIAL; From 281b9ace5f0d92a469b69223c051a3a77c825195 Mon Sep 17 00:00:00 2001 From: Alex Lindsay Date: Fri, 30 Jan 2026 13:10:54 -0700 Subject: [PATCH 06/10] Clean-up operator= --- src/numerics/petsc_vector.C | 61 +++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/src/numerics/petsc_vector.C b/src/numerics/petsc_vector.C index be9e31867ee..88c59225034 100644 --- a/src/numerics/petsc_vector.C +++ b/src/numerics/petsc_vector.C @@ -515,39 +515,54 @@ template PetscVector & PetscVector::operator = (const PetscVector & v) { + enum AssignmentType { ParallelToSerial, SerialToParallel, SameToSame, Error }; + parallel_object_only(); + if (this == &v) + return *this; + this->_restore_array(); v._restore_array(); libmesh_assert_equal_to (this->size(), v.size()); libmesh_assert (v.closed()); - // This is PETSc's only requirement for VecCopy - if (this->local_size() == v.local_size()) - LibmeshPetscCall(VecCopy(v._vec, this->_vec)); - else if (this->local_size() == v.size()) // scatter from parallel to serial - { - VecScatter scat; - - libmesh_assert(this->type() == SERIAL); - libmesh_assert(v.type() != SERIAL); - LibmeshPetscCall(VecDestroy(&this->_vec)); - LibmeshPetscCall(VecScatterCreateToAll(v._vec, &scat, &this->_vec)); - LibmeshPetscCall(VecScatterBegin(scat, v._vec, this->_vec, INSERT_VALUES, SCATTER_FORWARD)); - LibmeshPetscCall(VecScatterEnd(scat, v._vec, this->_vec, INSERT_VALUES, SCATTER_FORWARD)); - LibmeshPetscCall(VecScatterDestroy(&scat)); - } - else if (v.local_size() == this->size()) + AssignmentType assign_type = Error; + if (this->type() == SERIAL && v.type() != SERIAL && v.size() == this->local_size()) + assign_type = ParallelToSerial; + else if (this->type() != SERIAL && v.type() == SERIAL && this->size() == v.local_size()) + assign_type = SerialToParallel; + else if (this->local_size() == v.local_size()) + assign_type = SameToSame; + +#ifdef DEBUG + libmesh_assert(this->comm().verify( + static_cast::type>(assign_type))); +#endif + + switch (assign_type) { - libmesh_assert(v.type() == SERIAL); - libmesh_assert(this->type() != SERIAL); - libmesh_error_msg( - "Scattering from a serial vector on every rank to a parallel vector is not behavior we " - "define because we do not verify the serial vector is the same on each rank"); + case ParallelToSerial: + { + // scatter from parallel to serial + libmesh_assert(v.comm().size() > 1); + WrappedPetsc scatter; + LibmeshPetscCall(VecScatterCreateToAll(v._vec, scatter.get(), nullptr)); + VecScatterBeginEnd(v.comm(), scatter, v._vec, _vec, INSERT_VALUES, SCATTER_FORWARD); + break; + } + case SameToSame: + // serial to serial or parallel to parallel + LibmeshPetscCall(VecCopy(v._vec, _vec)); + break; + case SerialToParallel: + libmesh_not_implemented_msg( + "Scattering from a serial vector on every rank to a parallel vector is not behavior we " + "define because we do not verify the serial vector is the same on each rank"); + default: + libmesh_error_msg("Unhandled vector combination"); } - else - libmesh_error_msg("Unhandled vector combination"); libmesh_assert(this->comm().verify( static_cast::type>(this->type()))); From e364ebd8cc73629be4203e395574b8da605f5884 Mon Sep 17 00:00:00 2001 From: Alex Lindsay Date: Fri, 30 Jan 2026 14:33:47 -0700 Subject: [PATCH 07/10] Return after registering a one-rank ghosted vector as seq --- include/numerics/petsc_vector.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/numerics/petsc_vector.h b/include/numerics/petsc_vector.h index 5b0a954e09e..c3fb3608092 100644 --- a/include/numerics/petsc_vector.h +++ b/include/numerics/petsc_vector.h @@ -785,6 +785,7 @@ void PetscVector::init (const numeric_index_type n, { libmesh_assert(ghost.empty()); this->init(n, n_local, fast, SERIAL); + return; } PetscInt petsc_n=static_cast(n); From e8b717e09fc07c8f858fc50434e8a4b36c9e2c3c Mon Sep 17 00:00:00 2001 From: Alex Lindsay Date: Fri, 30 Jan 2026 14:34:02 -0700 Subject: [PATCH 08/10] More assertions --- src/numerics/petsc_vector.C | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/numerics/petsc_vector.C b/src/numerics/petsc_vector.C index 88c59225034..c10952f57af 100644 --- a/src/numerics/petsc_vector.C +++ b/src/numerics/petsc_vector.C @@ -518,6 +518,10 @@ PetscVector::operator = (const PetscVector & v) enum AssignmentType { ParallelToSerial, SerialToParallel, SameToSame, Error }; parallel_object_only(); + libmesh_assert(this->comm().verify( + static_cast::type>(this->type()))); + libmesh_assert(this->comm().verify( + static_cast::type>(v.type()))); if (this == &v) return *this; @@ -536,10 +540,8 @@ PetscVector::operator = (const PetscVector & v) else if (this->local_size() == v.local_size()) assign_type = SameToSame; -#ifdef DEBUG libmesh_assert(this->comm().verify( static_cast::type>(assign_type))); -#endif switch (assign_type) { From 1ee4fc37394153f634aa9268fea6861a8d80cbca Mon Sep 17 00:00:00 2001 From: Alex Lindsay Date: Mon, 2 Feb 2026 00:12:30 -0700 Subject: [PATCH 09/10] Create serial subvector if parent is serial --- src/numerics/petsc_vector.C | 46 ++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/src/numerics/petsc_vector.C b/src/numerics/petsc_vector.C index c10952f57af..069f2800003 100644 --- a/src/numerics/petsc_vector.C +++ b/src/numerics/petsc_vector.C @@ -1071,29 +1071,37 @@ void PetscVector::create_subvector(NumericVector & subvector, // If not, we use the appropriate PETSc routines to initialize it. if (!petsc_subvector->initialized()) { - libmesh_assert(petsc_subvector->_type == AUTOMATIC || petsc_subvector->_type == PARALLEL); - - if (supplying_global_rows) - // Initialize the petsc_subvector to have enough space to hold - // the entries which will be scattered into it. Note: such an - // init() function (where we let PETSc decide the number of local - // entries) is not currently offered by the PetscVector - // class. Should we differentiate here between sequential and - // parallel vector creation based on this->n_processors() ? - LibmeshPetscCall(VecCreateMPI(this->comm().get(), - PETSC_DECIDE, // n_local - cast_int(rows.size()), // n_global - &(petsc_subvector->_vec))); + if (this->type() == SERIAL) + { + libmesh_assert(this->comm().verify(rows.size())); + LibmeshPetscCall(VecCreateSeq(this->comm().get(), rows.size(), &(petsc_subvector->_vec))); + petsc_subvector->_type = SERIAL; + } else - LibmeshPetscCall(VecCreateMPI(this->comm().get(), - cast_int(rows.size()), - PETSC_DETERMINE, - &(petsc_subvector->_vec))); + { + if (supplying_global_rows) + // Initialize the petsc_subvector to have enough space to hold + // the entries which will be scattered into it. Note: such an + // init() function (where we let PETSc decide the number of local + // entries) is not currently offered by the PetscVector + // class. Should we differentiate here between sequential and + // parallel vector creation based on this->n_processors() ? + LibmeshPetscCall(VecCreateMPI(this->comm().get(), + PETSC_DECIDE, // n_local + cast_int(rows.size()), // n_global + &(petsc_subvector->_vec))); + else + LibmeshPetscCall(VecCreateMPI(this->comm().get(), + cast_int(rows.size()), + PETSC_DETERMINE, + &(petsc_subvector->_vec))); + + // We created a parallel vector + petsc_subvector->_type = PARALLEL; + } LibmeshPetscCall(VecSetFromOptions (petsc_subvector->_vec)); - // We created a parallel vector - petsc_subvector->_type = PARALLEL; // Mark the subvector as initialized petsc_subvector->_is_initialized = true; From 879862a1c68aa646fc8f70f971f8a2286d675d28 Mon Sep 17 00:00:00 2001 From: Alex Lindsay Date: Wed, 11 Feb 2026 01:35:42 -0700 Subject: [PATCH 10/10] remove truisms --- src/numerics/petsc_vector.C | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/numerics/petsc_vector.C b/src/numerics/petsc_vector.C index 069f2800003..a7bfab100c5 100644 --- a/src/numerics/petsc_vector.C +++ b/src/numerics/petsc_vector.C @@ -533,9 +533,9 @@ PetscVector::operator = (const PetscVector & v) libmesh_assert (v.closed()); AssignmentType assign_type = Error; - if (this->type() == SERIAL && v.type() != SERIAL && v.size() == this->local_size()) + if (this->type() == SERIAL && v.type() != SERIAL) assign_type = ParallelToSerial; - else if (this->type() != SERIAL && v.type() == SERIAL && this->size() == v.local_size()) + else if (this->type() != SERIAL && v.type() == SERIAL) assign_type = SerialToParallel; else if (this->local_size() == v.local_size()) assign_type = SameToSame;