From 72c3227b3a0be94c0ac51e0c0206de5005d7b215 Mon Sep 17 00:00:00 2001 From: Pratyksh Gupta Date: Mon, 5 Jan 2026 02:29:58 +0530 Subject: [PATCH 1/8] Fix #2546: Implemented ADT-based probe search and batched AllReduce --- AUTHORS.md | 1 + SU2_CFD/src/output/CFlowOutput.cpp | 113 +++++++++++++++--- TestCases/parallel_regression.py | 17 +++ .../lam_flatplate_probes.cfg | 104 ++++++++++++++++ .../user_defined_functions/test_11_probes.cfg | 62 ++++++++++ 5 files changed, 281 insertions(+), 16 deletions(-) create mode 100644 TestCases/user_defined_functions/lam_flatplate_probes.cfg create mode 100644 TestCases/user_defined_functions/test_11_probes.cfg diff --git a/AUTHORS.md b/AUTHORS.md index 3d7e7eb8416b..f6580fadb892 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -123,6 +123,7 @@ Paul Zhang Pedro Gomes Peng Yan Pete Bachant +Pratyksh Gupta RaulFeijo55 Ruben Sanchez Ryan Barrett diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp index 696a4c595475..aec97d39ec5b 100644 --- a/SU2_CFD/src/output/CFlowOutput.cpp +++ b/SU2_CFD/src/output/CFlowOutput.cpp @@ -33,6 +33,7 @@ #include "../../include/output/CFlowOutput.hpp" #include "../../../Common/include/geometry/CGeometry.hpp" +#include "../../../Common/include/adt/CADTPointsOnlyClass.hpp" #include "../../../Common/include/toolboxes/geometry_toolbox.hpp" #include "../../include/solvers/CSolver.hpp" #include "../../include/variables/CPrimitiveIndices.hpp" @@ -819,6 +820,38 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry const bool axisymmetric = config->GetAxisymmetric(); const auto* flowNodes = su2staticcast_p(solver[FLOW_SOL]->GetNodes()); + /*--- Count probes that need processing and use heuristic to decide ADT vs linear search. + ADT overhead is only worth it for larger numbers of probes. ---*/ + unsigned long nProbes = 0; + for (const auto& output : customOutputs) { + if (!output.skip && output.type == OperationType::PROBE && output.varIndices.empty()) { + ++nProbes; + } + } + + /*--- Heuristic: Build ADT if we have more than 10 probes. For small numbers of probes, + the overhead of building the ADT may not be worth it compared to linear search. ---*/ + const unsigned long ADT_THRESHOLD = 10; + const bool useADT = (nProbes > ADT_THRESHOLD); + + /*--- Build ADT for probe nearest neighbor search if heuristic suggests it. ---*/ + std::unique_ptr probeADT; + if (useADT) { + const unsigned long nPointDomain = geometry->GetnPointDomain(); + vector coords(nDim * nPointDomain); + vector pointIDs(nPointDomain); + + for (unsigned long iPoint = 0; iPoint < nPointDomain; ++iPoint) { + pointIDs[iPoint] = iPoint; + for (unsigned short iDim = 0; iDim < nDim; ++iDim) { + coords[iPoint * nDim + iDim] = geometry->nodes->GetCoord(iPoint, iDim); + } + } + + /*--- Build global ADT to find nearest nodes across all ranks. ---*/ + probeADT = std::make_unique(nDim, nPointDomain, coords.data(), pointIDs.data(), true); + } + for (auto& output : customOutputs) { if (output.skip) continue; @@ -849,19 +882,33 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry } su2double coord[3] = {}; for (auto iDim = 0u; iDim < nDim; ++iDim) coord[iDim] = std::stod(output.markers[iDim]); + /*--- Use ADT for efficient nearest neighbor search instead of brute force. ---*/ su2double minDist = std::numeric_limits::max(); unsigned long minPoint = 0; - for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) { - const su2double dist = GeometryToolbox::SquaredDistance(nDim, coord, geometry->nodes->GetCoord(iPoint)); - if (dist < minDist) { - minDist = dist; - minPoint = iPoint; + int rankID = -1; + int rank; + SU2_MPI::Comm_rank(SU2_MPI::GetComm(), &rank); + + if (useADT && probeADT && !probeADT->IsEmpty()) { + /*--- Use ADT to find the nearest node efficiently (O(log n) instead of O(n)). ---*/ + probeADT->DetermineNearestNode(coord, minDist, minPoint, rankID); + + /*--- Check if this rank owns the nearest point. ---*/ + output.iPoint = (rankID == rank) ? minPoint : CustomOutput::PROBE_NOT_OWNED; + } else { + /*--- Use linear search for small numbers of probes or when ADT is not available. ---*/ + for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) { + const su2double dist = GeometryToolbox::SquaredDistance(nDim, coord, geometry->nodes->GetCoord(iPoint)); + if (dist < minDist) { + minDist = dist; + minPoint = iPoint; + } } + /*--- Decide which rank owns the probe using Allreduce. ---*/ + su2double globMinDist; + SU2_MPI::Allreduce(&minDist, &globMinDist, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); + output.iPoint = fabs(minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED; } - /*--- Decide which rank owns the probe. ---*/ - su2double globMinDist; - SU2_MPI::Allreduce(&minDist, &globMinDist, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); - output.iPoint = fabs(minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED; if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) { std::cout << "Probe " << output.name << " is using global point " << geometry->nodes->GetGlobalIndex(output.iPoint) @@ -899,13 +946,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry }; if (output.type == OperationType::PROBE) { - su2double value = std::numeric_limits::max(); - if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) { - value = output.Eval(MakeFunctor(output.iPoint)); - } - su2double tmp = value; - SU2_MPI::Allreduce(&tmp, &value, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); - SetHistoryOutputValue(output.name, value); + /*--- Probe evaluation will be done after all outputs are processed, with batched AllReduce. ---*/ continue; } @@ -954,6 +995,46 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry } SetHistoryOutputValue(output.name, integral[0]); } + + /*--- Batch AllReduce for all probe values to reduce MPI communication overhead. ---*/ + if (nProbes > 0) { + /*--- Evaluate all probe values locally first. ---*/ + vector probeValues(nProbes); + unsigned long iProbe = 0; + for (auto& output : customOutputs) { + if (output.skip || output.type != OperationType::PROBE) continue; + su2double value = std::numeric_limits::max(); + if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) { + auto MakeFunctor = [&](unsigned long iPoint) { + return [&, iPoint](unsigned long i) { + if (i < CustomOutput::NOT_A_VARIABLE) { + const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER; + const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER; + if (solIdx == FLOW_SOL) { + return flowNodes->GetPrimitive(iPoint, varIdx); + } + return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx); + } else { + return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE]; + } + }; + }; + value = output.Eval(MakeFunctor(output.iPoint)); + } + probeValues[iProbe++] = value; + } + + /*--- Single AllReduce for all probe values. ---*/ + vector probeValuesGlobal(nProbes); + SU2_MPI::Allreduce(probeValues.data(), probeValuesGlobal.data(), nProbes, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); + + /*--- Set history output values for all probes. ---*/ + iProbe = 0; + for (auto& output : customOutputs) { + if (output.skip || output.type != OperationType::PROBE) continue; + SetHistoryOutputValue(output.name, probeValuesGlobal[iProbe++]); + } + } } // The "AddHistoryOutput(" must not be split over multiple lines to ensure proper python parsing diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py index e980b665df6f..764b589c71f5 100755 --- a/TestCases/parallel_regression.py +++ b/TestCases/parallel_regression.py @@ -314,6 +314,23 @@ def main(): flatplate_udobj.test_vals = [-6.760101, -1.283906, -0.745653, 0.000587, -0.000038, 0.000977, -0.001015, 596.450000, 299.550000, 296.900000, 21.318000, 0.586640, 36.553000, 2.188800] test_list.append(flatplate_udobj) + # Probe performance test (15 probes, ADT path) - tests issue #2546 fix + probe_performance = TestCase('probe_performance_15') + probe_performance.cfg_dir = "user_defined_functions" + probe_performance.cfg_file = "lam_flatplate_probes.cfg" + probe_performance.test_iter = 20 + probe_performance.test_vals = [-6.676934, 99047.0, 98854.0, 98775.0] # RMS_DENSITY, probe1, probe8, probe15 + test_list.append(probe_performance) + + # Probe performance test (11 probes, ADT path) - additional check + probe_performance_11 = TestCase('probe_performance_11') + probe_performance_11.cfg_dir = "user_defined_functions" + probe_performance_11.cfg_file = "test_11_probes.cfg" + probe_performance_11.test_iter = 5 + probe_performance_11.test_vals = [-6.300237, 1.0141e+05, 1.0132e+05, 1.0093e+05] # RMS_DENSITY, probe1, probe6, probe11 + # Tolerances are typically 0.001 in TestCase.py + test_list.append(probe_performance_11) + # Laminar cylinder (steady) cylinder = TestCase('cylinder') cylinder.cfg_dir = "navierstokes/cylinder" diff --git a/TestCases/user_defined_functions/lam_flatplate_probes.cfg b/TestCases/user_defined_functions/lam_flatplate_probes.cfg new file mode 100644 index 000000000000..c3bffa77fa38 --- /dev/null +++ b/TestCases/user_defined_functions/lam_flatplate_probes.cfg @@ -0,0 +1,104 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% % +% SU2 configuration file % +% Case description: Test probe performance with multiple probes (ADT path). % +% Author: Probe Performance Fix % +% Date: Jan 2025 % +% File Version 8.3.0 "Harrier" % +% % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +SOLVER= NAVIER_STOKES +KIND_TURB_MODEL= NONE +RESTART_SOL= NO +% +% User defined expressions with multiple probes to test ADT path (>10 probes) +% This tests the probe performance fix for issue #2546 +CUSTOM_OUTPUTS= 'velocity : Macro{sqrt(pow(VELOCITY_X, 2) + pow(VELOCITY_Y, 2) + pow(VELOCITY_Z, 2))};\ + probe1 : Probe{PRESSURE}[0.001, 0.001, 0.01];\ + probe2 : Probe{PRESSURE}[0.002, 0.001, 0.02];\ + probe3 : Probe{PRESSURE}[0.003, 0.001, 0.03];\ + probe4 : Probe{PRESSURE}[0.004, 0.001, 0.04];\ + probe5 : Probe{PRESSURE}[0.005, 0.001, 0.05];\ + probe6 : Probe{PRESSURE}[0.006, 0.001, 0.06];\ + probe7 : Probe{PRESSURE}[0.007, 0.001, 0.07];\ + probe8 : Probe{PRESSURE}[0.008, 0.001, 0.08];\ + probe9 : Probe{PRESSURE}[0.009, 0.001, 0.09];\ + probe10 : Probe{PRESSURE}[0.001, 0.002, 0.01];\ + probe11 : Probe{PRESSURE}[0.002, 0.002, 0.02];\ + probe12 : Probe{PRESSURE}[0.003, 0.002, 0.03];\ + probe13 : Probe{PRESSURE}[0.004, 0.002, 0.04];\ + probe14 : Probe{PRESSURE}[0.005, 0.002, 0.05];\ + probe15 : Probe{PRESSURE}[0.006, 0.002, 0.06]' +% +SCREEN_OUTPUT= INNER_ITER, RMS_DENSITY, probe1, probe8, probe15 +HISTORY_OUTPUT = ITER, CUSTOM + +% -------------------- COMPRESSIBLE FREE-STREAM DEFINITION --------------------% +% +MACH_NUMBER= 0.1 +INIT_OPTION= TD_CONDITIONS +FREESTREAM_OPTION= TEMPERATURE_FS +FREESTREAM_TEMPERATURE= 297.62 +REYNOLDS_NUMBER= 600 +REYNOLDS_LENGTH= 0.02 + +% ---------------------- REFERENCE VALUE DEFINITION ---------------------------% +% +REF_ORIGIN_MOMENT_X = 0.00 +REF_ORIGIN_MOMENT_Y = 0.00 +REF_ORIGIN_MOMENT_Z = 0.00 +REF_LENGTH= 0.02 +REF_AREA= 0.02 +% +FLUID_MODEL= IDEAL_GAS +GAMMA_VALUE= 1.4 +GAS_CONSTANT= 287.87 +VISCOSITY_MODEL= CONSTANT_VISCOSITY +MU_CONSTANT= 0.001 + +% -------------------- BOUNDARY CONDITION DEFINITION --------------------------% +% +MARKER_HEATFLUX= ( y_minus, 0.0 ) +% +MARKER_SYM= ( y_plus ) +MARKER_PERIODIC= ( x_minus, x_plus, 0,0,0, 0,0,0, 0.01,0,0 ) +% +MARKER_INLET= ( z_minus, 300.0, 100000.0, 0.0, 0.0, 1.0 ) +MARKER_OUTLET= ( z_plus, 99000.0 ) +% +MARKER_PLOTTING= ( y_minus ) +MARKER_MONITORING= ( y_minus ) +MARKER_ANALYZE= ( z_minus, z_plus ) + +% ------------- COMMON PARAMETERS DEFINING THE NUMERICAL METHOD ---------------% +% +NUM_METHOD_GRAD= GREEN_GAUSS +CFL_NUMBER= 1e4 +CFL_ADAPT= NO +TIME_DISCRE_FLOW= EULER_IMPLICIT + +% ------------------------ LINEAR SOLVER DEFINITION ---------------------------% +% +LINEAR_SOLVER= FGMRES +LINEAR_SOLVER_PREC= ILU +LINEAR_SOLVER_ERROR= 0.2 +LINEAR_SOLVER_ITER= 5 + +% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------% +% +CONV_NUM_METHOD_FLOW= ROE +MUSCL_FLOW= YES +SLOPE_LIMITER_FLOW= NONE + +% --------------------------- CONVERGENCE PARAMETERS --------------------------% +% +CONV_RESIDUAL_MINVAL= -11 +CONV_STARTITER= 0 +INNER_ITER= 20 + +% ------------------------- INPUT/OUTPUT INFORMATION --------------------------% +% +MESH_FORMAT= BOX +MESH_BOX_LENGTH= (0.01, 0.01, 0.1) +MESH_BOX_SIZE= (9, 17, 65) diff --git a/TestCases/user_defined_functions/test_11_probes.cfg b/TestCases/user_defined_functions/test_11_probes.cfg new file mode 100644 index 000000000000..3310b78ce9f1 --- /dev/null +++ b/TestCases/user_defined_functions/test_11_probes.cfg @@ -0,0 +1,62 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Test case: 11 probes (ADT path, >10) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +SOLVER= NAVIER_STOKES +KIND_TURB_MODEL= NONE +RESTART_SOL= NO + +CUSTOM_OUTPUTS= 'probe1 : Probe{PRESSURE}[0.001000, 0.001000, 0.010000]; probe2 : Probe{PRESSURE}[0.001700, 0.001700, 0.018000]; probe3 : Probe{PRESSURE}[0.002400, 0.002400, 0.026000]; probe4 : Probe{PRESSURE}[0.003100, 0.003100, 0.034000]; probe5 : Probe{PRESSURE}[0.003800, 0.003800, 0.042000]; probe6 : Probe{PRESSURE}[0.004500, 0.004500, 0.050000]; probe7 : Probe{PRESSURE}[0.005200, 0.005200, 0.058000]; probe8 : Probe{PRESSURE}[0.005900, 0.005900, 0.066000]; probe9 : Probe{PRESSURE}[0.006600, 0.006600, 0.074000]; probe10 : Probe{PRESSURE}[0.007300, 0.007300, 0.082000]; probe11 : Probe{PRESSURE}[0.008000, 0.008000, 0.090000]' + +SCREEN_OUTPUT= INNER_ITER, RMS_DENSITY, probe1, probe6, probe11 +HISTORY_OUTPUT = ITER, CUSTOM + +MACH_NUMBER= 0.1 +INIT_OPTION= TD_CONDITIONS +FREESTREAM_OPTION= TEMPERATURE_FS +FREESTREAM_TEMPERATURE= 297.62 +REYNOLDS_NUMBER= 600 +REYNOLDS_LENGTH= 0.02 + +REF_ORIGIN_MOMENT_X = 0.00 +REF_ORIGIN_MOMENT_Y = 0.00 +REF_ORIGIN_MOMENT_Z = 0.00 +REF_LENGTH= 0.02 +REF_AREA= 0.02 + +FLUID_MODEL= IDEAL_GAS +GAMMA_VALUE= 1.4 +GAS_CONSTANT= 287.87 +VISCOSITY_MODEL= CONSTANT_VISCOSITY +MU_CONSTANT= 0.001 + +MARKER_HEATFLUX= ( y_minus, 0.0 ) +MARKER_SYM= ( y_plus ) +MARKER_PERIODIC= ( x_minus, x_plus, 0,0,0, 0,0,0, 0.01,0,0 ) +MARKER_INLET= ( z_minus, 300.0, 100000.0, 0.0, 0.0, 1.0 ) +MARKER_OUTLET= ( z_plus, 99000.0 ) +MARKER_PLOTTING= ( y_minus ) +MARKER_MONITORING= ( y_minus ) +MARKER_ANALYZE= ( z_minus, z_plus ) + +NUM_METHOD_GRAD= GREEN_GAUSS +CFL_NUMBER= 1e4 +CFL_ADAPT= NO +TIME_DISCRE_FLOW= EULER_IMPLICIT + +LINEAR_SOLVER= FGMRES +LINEAR_SOLVER_PREC= ILU +LINEAR_SOLVER_ERROR= 0.2 +LINEAR_SOLVER_ITER= 5 + +CONV_NUM_METHOD_FLOW= ROE +MUSCL_FLOW= YES +SLOPE_LIMITER_FLOW= NONE + +CONV_RESIDUAL_MINVAL= -11 +CONV_STARTITER= 0 +INNER_ITER= 5 + +MESH_FORMAT= BOX +MESH_BOX_LENGTH= (0.01, 0.01, 0.1) +MESH_BOX_SIZE= (9, 17, 65) From 50cbf62a9c60742e556540c855b9f136146571f1 Mon Sep 17 00:00:00 2001 From: Pratyksh Gupta Date: Mon, 5 Jan 2026 10:25:47 +0530 Subject: [PATCH 2/8] Address review: Refactor duplicate code and cleanup tests --- SU2_CFD/src/output/CFlowOutput.cpp | 45 ++++---- TestCases/parallel_regression.py | 10 +- .../lam_flatplate_probes.cfg | 104 ------------------ 3 files changed, 20 insertions(+), 139 deletions(-) delete mode 100644 TestCases/user_defined_functions/lam_flatplate_probes.cfg diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp index aec97d39ec5b..dee6af316eb1 100644 --- a/SU2_CFD/src/output/CFlowOutput.cpp +++ b/SU2_CFD/src/output/CFlowOutput.cpp @@ -819,6 +819,20 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry const bool adjoint = config->GetDiscrete_Adjoint(); const bool axisymmetric = config->GetAxisymmetric(); const auto* flowNodes = su2staticcast_p(solver[FLOW_SOL]->GetNodes()); + auto GetPointValue = [&](const auto& output, unsigned long iPoint) { + return [&](unsigned long i) { + if (i < CustomOutput::NOT_A_VARIABLE) { + const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER; + const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER; + if (solIdx == FLOW_SOL) { + return flowNodes->GetPrimitive(iPoint, varIdx); + } + return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx); + } else { + return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE]; + } + }; + }; /*--- Count probes that need processing and use heuristic to decide ADT vs linear search. ADT overhead is only worth it for larger numbers of probes. ---*/ @@ -830,7 +844,9 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry } /*--- Heuristic: Build ADT if we have more than 10 probes. For small numbers of probes, - the overhead of building the ADT may not be worth it compared to linear search. ---*/ + the overhead of building the ADT may not be worth it compared to linear search. + Note: If this threshold is increased, the regression test (probe_performance_11) + must be updated to ensure the ADT path is still tested. ---*/ const unsigned long ADT_THRESHOLD = 10; const bool useADT = (nProbes > ADT_THRESHOLD); @@ -930,19 +946,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry * (see ConvertVariableSymbolsToIndices). ---*/ auto MakeFunctor = [&](unsigned long iPoint) { - /*--- This returns another lambda that captures iPoint by value. ---*/ - return [&, iPoint](unsigned long i) { - if (i < CustomOutput::NOT_A_VARIABLE) { - const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER; - const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER; - if (solIdx == FLOW_SOL) { - return flowNodes->GetPrimitive(iPoint, varIdx); - } - return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx); - } else { - return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE]; - } - }; + return GetPointValue(output, iPoint); }; if (output.type == OperationType::PROBE) { @@ -1006,18 +1010,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry su2double value = std::numeric_limits::max(); if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) { auto MakeFunctor = [&](unsigned long iPoint) { - return [&, iPoint](unsigned long i) { - if (i < CustomOutput::NOT_A_VARIABLE) { - const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER; - const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER; - if (solIdx == FLOW_SOL) { - return flowNodes->GetPrimitive(iPoint, varIdx); - } - return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx); - } else { - return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE]; - } - }; + return GetPointValue(output, iPoint); }; value = output.Eval(MakeFunctor(output.iPoint)); } diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py index 764b589c71f5..c85919f07adf 100755 --- a/TestCases/parallel_regression.py +++ b/TestCases/parallel_regression.py @@ -314,15 +314,7 @@ def main(): flatplate_udobj.test_vals = [-6.760101, -1.283906, -0.745653, 0.000587, -0.000038, 0.000977, -0.001015, 596.450000, 299.550000, 296.900000, 21.318000, 0.586640, 36.553000, 2.188800] test_list.append(flatplate_udobj) - # Probe performance test (15 probes, ADT path) - tests issue #2546 fix - probe_performance = TestCase('probe_performance_15') - probe_performance.cfg_dir = "user_defined_functions" - probe_performance.cfg_file = "lam_flatplate_probes.cfg" - probe_performance.test_iter = 20 - probe_performance.test_vals = [-6.676934, 99047.0, 98854.0, 98775.0] # RMS_DENSITY, probe1, probe8, probe15 - test_list.append(probe_performance) - - # Probe performance test (11 probes, ADT path) - additional check + # Probe performance test (11 probes, ADT path) probe_performance_11 = TestCase('probe_performance_11') probe_performance_11.cfg_dir = "user_defined_functions" probe_performance_11.cfg_file = "test_11_probes.cfg" diff --git a/TestCases/user_defined_functions/lam_flatplate_probes.cfg b/TestCases/user_defined_functions/lam_flatplate_probes.cfg deleted file mode 100644 index c3bffa77fa38..000000000000 --- a/TestCases/user_defined_functions/lam_flatplate_probes.cfg +++ /dev/null @@ -1,104 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% % -% SU2 configuration file % -% Case description: Test probe performance with multiple probes (ADT path). % -% Author: Probe Performance Fix % -% Date: Jan 2025 % -% File Version 8.3.0 "Harrier" % -% % -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -SOLVER= NAVIER_STOKES -KIND_TURB_MODEL= NONE -RESTART_SOL= NO -% -% User defined expressions with multiple probes to test ADT path (>10 probes) -% This tests the probe performance fix for issue #2546 -CUSTOM_OUTPUTS= 'velocity : Macro{sqrt(pow(VELOCITY_X, 2) + pow(VELOCITY_Y, 2) + pow(VELOCITY_Z, 2))};\ - probe1 : Probe{PRESSURE}[0.001, 0.001, 0.01];\ - probe2 : Probe{PRESSURE}[0.002, 0.001, 0.02];\ - probe3 : Probe{PRESSURE}[0.003, 0.001, 0.03];\ - probe4 : Probe{PRESSURE}[0.004, 0.001, 0.04];\ - probe5 : Probe{PRESSURE}[0.005, 0.001, 0.05];\ - probe6 : Probe{PRESSURE}[0.006, 0.001, 0.06];\ - probe7 : Probe{PRESSURE}[0.007, 0.001, 0.07];\ - probe8 : Probe{PRESSURE}[0.008, 0.001, 0.08];\ - probe9 : Probe{PRESSURE}[0.009, 0.001, 0.09];\ - probe10 : Probe{PRESSURE}[0.001, 0.002, 0.01];\ - probe11 : Probe{PRESSURE}[0.002, 0.002, 0.02];\ - probe12 : Probe{PRESSURE}[0.003, 0.002, 0.03];\ - probe13 : Probe{PRESSURE}[0.004, 0.002, 0.04];\ - probe14 : Probe{PRESSURE}[0.005, 0.002, 0.05];\ - probe15 : Probe{PRESSURE}[0.006, 0.002, 0.06]' -% -SCREEN_OUTPUT= INNER_ITER, RMS_DENSITY, probe1, probe8, probe15 -HISTORY_OUTPUT = ITER, CUSTOM - -% -------------------- COMPRESSIBLE FREE-STREAM DEFINITION --------------------% -% -MACH_NUMBER= 0.1 -INIT_OPTION= TD_CONDITIONS -FREESTREAM_OPTION= TEMPERATURE_FS -FREESTREAM_TEMPERATURE= 297.62 -REYNOLDS_NUMBER= 600 -REYNOLDS_LENGTH= 0.02 - -% ---------------------- REFERENCE VALUE DEFINITION ---------------------------% -% -REF_ORIGIN_MOMENT_X = 0.00 -REF_ORIGIN_MOMENT_Y = 0.00 -REF_ORIGIN_MOMENT_Z = 0.00 -REF_LENGTH= 0.02 -REF_AREA= 0.02 -% -FLUID_MODEL= IDEAL_GAS -GAMMA_VALUE= 1.4 -GAS_CONSTANT= 287.87 -VISCOSITY_MODEL= CONSTANT_VISCOSITY -MU_CONSTANT= 0.001 - -% -------------------- BOUNDARY CONDITION DEFINITION --------------------------% -% -MARKER_HEATFLUX= ( y_minus, 0.0 ) -% -MARKER_SYM= ( y_plus ) -MARKER_PERIODIC= ( x_minus, x_plus, 0,0,0, 0,0,0, 0.01,0,0 ) -% -MARKER_INLET= ( z_minus, 300.0, 100000.0, 0.0, 0.0, 1.0 ) -MARKER_OUTLET= ( z_plus, 99000.0 ) -% -MARKER_PLOTTING= ( y_minus ) -MARKER_MONITORING= ( y_minus ) -MARKER_ANALYZE= ( z_minus, z_plus ) - -% ------------- COMMON PARAMETERS DEFINING THE NUMERICAL METHOD ---------------% -% -NUM_METHOD_GRAD= GREEN_GAUSS -CFL_NUMBER= 1e4 -CFL_ADAPT= NO -TIME_DISCRE_FLOW= EULER_IMPLICIT - -% ------------------------ LINEAR SOLVER DEFINITION ---------------------------% -% -LINEAR_SOLVER= FGMRES -LINEAR_SOLVER_PREC= ILU -LINEAR_SOLVER_ERROR= 0.2 -LINEAR_SOLVER_ITER= 5 - -% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------% -% -CONV_NUM_METHOD_FLOW= ROE -MUSCL_FLOW= YES -SLOPE_LIMITER_FLOW= NONE - -% --------------------------- CONVERGENCE PARAMETERS --------------------------% -% -CONV_RESIDUAL_MINVAL= -11 -CONV_STARTITER= 0 -INNER_ITER= 20 - -% ------------------------- INPUT/OUTPUT INFORMATION --------------------------% -% -MESH_FORMAT= BOX -MESH_BOX_LENGTH= (0.01, 0.01, 0.1) -MESH_BOX_SIZE= (9, 17, 65) From 3af399c96ee3b48a04e51cc155a16a291ddc8bc8 Mon Sep 17 00:00:00 2001 From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com> Date: Mon, 5 Jan 2026 10:13:50 -0800 Subject: [PATCH 3/8] Update SU2_CFD/src/output/CFlowOutput.cpp --- SU2_CFD/src/output/CFlowOutput.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp index dee6af316eb1..f52de512f717 100644 --- a/SU2_CFD/src/output/CFlowOutput.cpp +++ b/SU2_CFD/src/output/CFlowOutput.cpp @@ -820,7 +820,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry const bool axisymmetric = config->GetAxisymmetric(); const auto* flowNodes = su2staticcast_p(solver[FLOW_SOL]->GetNodes()); auto GetPointValue = [&](const auto& output, unsigned long iPoint) { - return [&](unsigned long i) { + return [&, iPoint](unsigned long i) { if (i < CustomOutput::NOT_A_VARIABLE) { const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER; const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER; From 8d3ddcae27f0749a7509b7896b12198448345082 Mon Sep 17 00:00:00 2001 From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com> Date: Mon, 5 Jan 2026 11:43:13 -0800 Subject: [PATCH 4/8] Apply suggestions from code review --- SU2_CFD/src/output/CFlowOutput.cpp | 5 +---- TestCases/parallel_regression.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp index f52de512f717..746d6d483314 100644 --- a/SU2_CFD/src/output/CFlowOutput.cpp +++ b/SU2_CFD/src/output/CFlowOutput.cpp @@ -1009,10 +1009,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry if (output.skip || output.type != OperationType::PROBE) continue; su2double value = std::numeric_limits::max(); if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) { - auto MakeFunctor = [&](unsigned long iPoint) { - return GetPointValue(output, iPoint); - }; - value = output.Eval(MakeFunctor(output.iPoint)); + value = output.Eval(GetPointValue(output, output.iPoint)); } probeValues[iProbe++] = value; } diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py index 2ce93f052703..ab8e3fcb1357 100755 --- a/TestCases/parallel_regression.py +++ b/TestCases/parallel_regression.py @@ -318,7 +318,7 @@ def main(): probe_performance_11 = TestCase('probe_performance_11') probe_performance_11.cfg_dir = "user_defined_functions" probe_performance_11.cfg_file = "test_11_probes.cfg" - probe_performance_11.test_iter = 5 + probe_performance_11.test_iter = 4 probe_performance_11.test_vals = [-6.300237, 1.0141e+05, 1.0132e+05, 1.0093e+05] # RMS_DENSITY, probe1, probe6, probe11 # Tolerances are typically 0.001 in TestCase.py test_list.append(probe_performance_11) From 542572eaa8de6f2e52b91c98269e0d0d20eff6f5 Mon Sep 17 00:00:00 2001 From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com> Date: Mon, 5 Jan 2026 12:52:49 -0800 Subject: [PATCH 5/8] Update SU2_CFD/src/output/CFlowOutput.cpp --- SU2_CFD/src/output/CFlowOutput.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp index 746d6d483314..a2e711f19b36 100644 --- a/SU2_CFD/src/output/CFlowOutput.cpp +++ b/SU2_CFD/src/output/CFlowOutput.cpp @@ -908,6 +908,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry if (useADT && probeADT && !probeADT->IsEmpty()) { /*--- Use ADT to find the nearest node efficiently (O(log n) instead of O(n)). ---*/ probeADT->DetermineNearestNode(coord, minDist, minPoint, rankID); + minDist = pow(minDist, 2); /*--- Check if this rank owns the nearest point. ---*/ output.iPoint = (rankID == rank) ? minPoint : CustomOutput::PROBE_NOT_OWNED; From 3a43e2a8a2571bd838442b290af9bdf822f29f94 Mon Sep 17 00:00:00 2001 From: Pratyksh Gupta Date: Tue, 6 Jan 2026 15:26:55 +0530 Subject: [PATCH 6/8] Update regression values for probe test case (Iter 3) --- TestCases/parallel_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py index ab8e3fcb1357..bb94fe02a9e3 100755 --- a/TestCases/parallel_regression.py +++ b/TestCases/parallel_regression.py @@ -319,7 +319,7 @@ def main(): probe_performance_11.cfg_dir = "user_defined_functions" probe_performance_11.cfg_file = "test_11_probes.cfg" probe_performance_11.test_iter = 4 - probe_performance_11.test_vals = [-6.300237, 1.0141e+05, 1.0132e+05, 1.0093e+05] # RMS_DENSITY, probe1, probe6, probe11 + probe_performance_11.test_vals = [-6.285098, 1.0125e+05, 1.0132e+05, 9.9411e+04] # RMS_DENSITY, probe1, probe6, probe11 # Tolerances are typically 0.001 in TestCase.py test_list.append(probe_performance_11) From a256d393db241ebdc6a5c9e4e7fd77e10537377d Mon Sep 17 00:00:00 2001 From: Pratyksh Gupta Date: Sat, 10 Jan 2026 23:13:29 +0530 Subject: [PATCH 7/8] Refactor probe value handling and memory allocation --- SU2_CFD/src/output/CFlowOutput.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp index a2e711f19b36..1b1a78d0dc50 100644 --- a/SU2_CFD/src/output/CFlowOutput.cpp +++ b/SU2_CFD/src/output/CFlowOutput.cpp @@ -838,7 +838,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry ADT overhead is only worth it for larger numbers of probes. ---*/ unsigned long nProbes = 0; for (const auto& output : customOutputs) { - if (!output.skip && output.type == OperationType::PROBE && output.varIndices.empty()) { + if (!output.skip && output.type == OperationType::PROBE) { ++nProbes; } } @@ -1004,23 +1004,24 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry /*--- Batch AllReduce for all probe values to reduce MPI communication overhead. ---*/ if (nProbes > 0) { /*--- Evaluate all probe values locally first. ---*/ - vector probeValues(nProbes); - unsigned long iProbe = 0; + vector probeValues; + probeValues.reserve(nProbes); for (auto& output : customOutputs) { if (output.skip || output.type != OperationType::PROBE) continue; su2double value = std::numeric_limits::max(); if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) { value = output.Eval(GetPointValue(output, output.iPoint)); } - probeValues[iProbe++] = value; + probeValues.push_back(value); } /*--- Single AllReduce for all probe values. ---*/ - vector probeValuesGlobal(nProbes); - SU2_MPI::Allreduce(probeValues.data(), probeValuesGlobal.data(), nProbes, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); + unsigned long nProbesActual = probeValues.size(); + vector probeValuesGlobal(nProbesActual); + SU2_MPI::Allreduce(probeValues.data(), probeValuesGlobal.data(), nProbesActual, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); /*--- Set history output values for all probes. ---*/ - iProbe = 0; + unsigned long iProbe = 0; for (auto& output : customOutputs) { if (output.skip || output.type != OperationType::PROBE) continue; SetHistoryOutputValue(output.name, probeValuesGlobal[iProbe++]); From 43ab0e69227498a17623cf6c51b181f4c5322aea Mon Sep 17 00:00:00 2001 From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com> Date: Sat, 10 Jan 2026 14:56:07 -0800 Subject: [PATCH 8/8] Update TestCases/parallel_regression.py --- TestCases/parallel_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py index df9bae36ec45..3a18ed9efbc5 100755 --- a/TestCases/parallel_regression.py +++ b/TestCases/parallel_regression.py @@ -319,7 +319,7 @@ def main(): probe_performance_11.cfg_dir = "user_defined_functions" probe_performance_11.cfg_file = "test_11_probes.cfg" probe_performance_11.test_iter = 4 - probe_performance_11.test_vals = [-6.285098, 1.0125e+05, 1.0132e+05, 9.9411e+04] # RMS_DENSITY, probe1, probe6, probe11 + probe_performance_11.test_vals = [-6.290748, 101020, 101050, 99123] # RMS_DENSITY, probe1, probe6, probe11 # Tolerances are typically 0.001 in TestCase.py test_list.append(probe_performance_11)