From b2a8f9f2d35f5e341bb2bc7074e50615b0a551c1 Mon Sep 17 00:00:00 2001 From: Aiden Woodruff Date: Sun, 22 Jun 2025 15:27:49 -0400 Subject: [PATCH 1/4] add mbalanceEmpty test - test/mbalanceEmpty.cc: add test to try to balance with empty input parts, which may occur during cavity operator localization. - test/CMakeLists.txt: add mbalanceEmpty. - test/testing.cmake: add mbalanceEmpty. - add msplit dependencies on convert given ENABLE_SIMMETRIX. Signed-off-by: Aiden Woodruff --- test/CMakeLists.txt | 1 + test/mbalanceEmpty.cc | 54 +++++++++++++++++++++++++++++++++++++++++++ test/testing.cmake | 11 +++++++++ 3 files changed, 66 insertions(+) create mode 100644 test/mbalanceEmpty.cc diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d8a2cc1ee..85b3db547 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -112,6 +112,7 @@ if(ENABLE_ZOLTAN) endif() if(ENABLE_METIS) util_exe_func(mbalance mbalance.cc) + test_exe_func(mbalanceEmpty mbalanceEmpty.cc) endif() # Mesh improvement utilities diff --git a/test/mbalanceEmpty.cc b/test/mbalanceEmpty.cc new file mode 100644 index 000000000..3100522a2 --- /dev/null +++ b/test/mbalanceEmpty.cc @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char* argv[]) { + pcu::Init(&argc, &argv); + try { + pcu::PCU PCU; + if (argc != 5) { + if (PCU.Self() == 0) + std::cerr << "USAGE: " + << std::endl; + throw std::runtime_error("invalid arguments"); + } + lion_set_verbosity(1); + gmi_register_mesh(); + // load model and mesh + int inParts = std::stoi(argv[3]); + int group = PCU.Self() / inParts; + auto loadPCU = PCU.Split(group, 0); + gmi_model* model = gmi_load(argv[1]); + apf::Mesh2* m = nullptr; + if (group == 0) { + m = apf::loadMdsMesh(model, argv[2], loadPCU.get()); + m->switchPCU(&PCU); + } + m = apf::expandMdsMesh(m, model, inParts, &PCU); + try { + std::unique_ptr balancer(apf::makeMETISbalancer(m)); + double imbalance = 1.1; + balancer->balance(nullptr, imbalance); + } catch (...) { + std::throw_with_nested(std::runtime_error("balancing failed")); + } + m->writeNative(argv[4]); + // destroy mds + m->destroyNative(); + apf::destroyMesh(m); + } catch (...) { + pcu::Finalize(); + return 1; + } + pcu::Finalize(); + return 0; +} diff --git a/test/testing.cmake b/test/testing.cmake index b075903c3..058c3ece1 100644 --- a/test/testing.cmake +++ b/test/testing.cmake @@ -442,6 +442,17 @@ if(ENABLE_METIS) 3 ) set_test_depends(TESTS msplit_6 DEPENDS msplit_2) + mpi_test(mbalanceEmpty 4 + ./mbalanceEmpty + "${MDIR}/pipe.dmg" "pipe.smb" 1 + "pipe_mbe_.smb" + ) + if(ENABLE_SIMMETRIX) + set_test_depends( + TESTS msplit_2 msplit_3 msplit_6 mBalanceEmpty + DEPENDS convert + ) + endif() endif() if(ENABLE_ZOLTAN) mpi_test(refineX 2 From 055487d39195aa4c3f9d481e20be75df62b05a49 Mon Sep 17 00:00:00 2001 From: Aiden Woodruff Date: Sun, 22 Jun 2025 22:28:14 -0400 Subject: [PATCH 2/4] fix MetisBalancer helper asserts - change asserts to debug since they are on helpers and any bad input should be programmer error. - metis/apfMETISbalancer.cc (gatherGraph): change precondition from |owned_xadj| > 1 to owned_xadj is non-empty. an empty xadj should just have the number 0. - (scatterPart): remove n_owned > 0. send/receive 0 should just be a no-op. Signed-off-by: Aiden Woodruff --- metis/apfMETISbalancer.cc | 16 ++++++++-------- test/mbalanceEmpty.cc | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/metis/apfMETISbalancer.cc b/metis/apfMETISbalancer.cc index f86756968..a310716c2 100644 --- a/metis/apfMETISbalancer.cc +++ b/metis/apfMETISbalancer.cc @@ -33,9 +33,9 @@ static void gatherGraph( std::vector& xadj, std::vector& adjncy, std::vector& vtx_cts ) { - PCU_ALWAYS_ASSERT(PCU.Peers() > 1); - PCU_ALWAYS_ASSERT(owned_xadj.size() > 1); - PCU_ALWAYS_ASSERT(owned_adjncy.size() == size_t(owned_xadj.back())); + PCU_DEBUG_ASSERT(PCU.Peers() > 1); + PCU_DEBUG_ASSERT(!owned_xadj.empty()); + PCU_DEBUG_ASSERT(owned_adjncy.size() == size_t(owned_xadj.back())); auto t0 = pcu::Time(); int owned_vtx_ct = owned_xadj.size() - 1; int xadj_size = PCU.Add(owned_vtx_ct) + 1; @@ -92,12 +92,11 @@ static void scatterPart( const std::vector& vtx_cts, std::vector& owned_part, int n_owned ) { - PCU_ALWAYS_ASSERT(PCU.Peers() > 1); - PCU_ALWAYS_ASSERT(PCU.Self() != 0 || vtx_cts.size() == size_t(PCU.Peers())); - PCU_ALWAYS_ASSERT( + PCU_DEBUG_ASSERT(PCU.Peers() > 1); + PCU_DEBUG_ASSERT(PCU.Self() != 0 || vtx_cts.size() == size_t(PCU.Peers())); + PCU_DEBUG_ASSERT( std::accumulate(vtx_cts.begin(), vtx_cts.end(), 0UL) == part.size() ); - PCU_ALWAYS_ASSERT(n_owned > 0); auto t0 = pcu::Time(); owned_part.resize(n_owned); PCU.Begin(); @@ -171,7 +170,8 @@ static void remapPart(int nparts, std::vector& part, const std::vector 1.0); - if (mesh_->getPCU()->Peers() > APF_METIS_MAXRANKS) { + if (mesh_->getPCU()->Peers() == 1) return; // no work to be done. + else if (mesh_->getPCU()->Peers() > APF_METIS_MAXRANKS) { fail( "METIS called with > " STRINGIFY(APF_METIS_MAXRANKS) " procs, which is unsupported due to memory requirements\n" diff --git a/test/mbalanceEmpty.cc b/test/mbalanceEmpty.cc index 3100522a2..42d607054 100644 --- a/test/mbalanceEmpty.cc +++ b/test/mbalanceEmpty.cc @@ -41,6 +41,7 @@ int main(int argc, char* argv[]) { } catch (...) { std::throw_with_nested(std::runtime_error("balancing failed")); } + m->verify(); m->writeNative(argv[4]); // destroy mds m->destroyNative(); From 947383fa9cb46e808228067cce5ebe019d9b343d Mon Sep 17 00:00:00 2001 From: Aiden Woodruff Date: Mon, 23 Jun 2025 12:45:46 -0400 Subject: [PATCH 3/4] Update test/mbalanceEmpty.cc usage string Co-authored-by: Cameron Smith --- test/mbalanceEmpty.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/mbalanceEmpty.cc b/test/mbalanceEmpty.cc index 42d607054..e0a038958 100644 --- a/test/mbalanceEmpty.cc +++ b/test/mbalanceEmpty.cc @@ -17,7 +17,7 @@ int main(int argc, char* argv[]) { pcu::PCU PCU; if (argc != 5) { if (PCU.Self() == 0) - std::cerr << "USAGE: " + std::cerr << "USAGE: " << std::endl; throw std::runtime_error("invalid arguments"); } From c0e7b55c58b05615fdd91c9d65ca1bd83ec7e56a Mon Sep 17 00:00:00 2001 From: Aiden Woodruff Date: Mon, 23 Jun 2025 15:28:52 -0400 Subject: [PATCH 4/4] require that inParts < PCU.Peers - test/mbalanceEmpty.cc: add info to USAGE text about new requirement. - check for inParts >= PCU.Peers and throw. - print exceptions with nested printing function. - alter scope so that PCU.Self() can be used from exception printing code. - add retval to only write pcu::Finalize once. Signed-off-by: Aiden Woodruff --- test/mbalanceEmpty.cc | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/test/mbalanceEmpty.cc b/test/mbalanceEmpty.cc index e0a038958..8b89705c6 100644 --- a/test/mbalanceEmpty.cc +++ b/test/mbalanceEmpty.cc @@ -11,13 +11,20 @@ #include #include +namespace { +void print_exception(const std::exception& e, int level = 0); +} + int main(int argc, char* argv[]) { + int retval = 0; pcu::Init(&argc, &argv); + { + pcu::PCU PCU; try { - pcu::PCU PCU; if (argc != 5) { if (PCU.Self() == 0) - std::cerr << "USAGE: " + std::cerr << "USAGE: \n" + "\nwhere inParts < PCU.Peers()" << std::endl; throw std::runtime_error("invalid arguments"); } @@ -25,6 +32,9 @@ int main(int argc, char* argv[]) { gmi_register_mesh(); // load model and mesh int inParts = std::stoi(argv[3]); + if (inParts >= PCU.Peers()) { + throw std::runtime_error("inParts >= PCU.Peers()"); + } int group = PCU.Self() / inParts; auto loadPCU = PCU.Split(group, 0); gmi_model* model = gmi_load(argv[1]); @@ -46,10 +56,31 @@ int main(int argc, char* argv[]) { // destroy mds m->destroyNative(); apf::destroyMesh(m); + } catch (const std::exception& e) { + if (PCU.Self() == 0) { + std::cerr << "ERROR: "; + print_exception(e); + } + retval = 1; } catch (...) { - pcu::Finalize(); - return 1; + if (PCU.Self() == 0) + std::cerr << "Unknown exception occurred." << std::endl; + retval = 1; } + } // PCU object scope pcu::Finalize(); - return 0; + return retval; } + +namespace { + +void print_exception(const std::exception& e, int level) { + std::cerr << std::string(level * 2, ' ') << e.what() << '\n'; + try { + std::rethrow_if_nested(e); + } catch (const std::exception& nestedE) { + print_exception(nestedE, level + 1); + } catch (...) {} +} + +} // namespace