From 72c3227b3a0be94c0ac51e0c0206de5005d7b215 Mon Sep 17 00:00:00 2001
From: Pratyksh Gupta <pratykshgupta9999@gmail.com>
Date: Mon, 5 Jan 2026 02:29:58 +0530
Subject: [PATCH 1/8] Fix #2546: Implemented ADT-based probe search and batched
 AllReduce

---
 AUTHORS.md                                    |   1 +
 SU2_CFD/src/output/CFlowOutput.cpp            | 113 +++++++++++++++---
 TestCases/parallel_regression.py              |  17 +++
 .../lam_flatplate_probes.cfg                  | 104 ++++++++++++++++
 .../user_defined_functions/test_11_probes.cfg |  62 ++++++++++
 5 files changed, 281 insertions(+), 16 deletions(-)
 create mode 100644 TestCases/user_defined_functions/lam_flatplate_probes.cfg
 create mode 100644 TestCases/user_defined_functions/test_11_probes.cfg
diff --git a/AUTHORS.md b/AUTHORS.md
index 3d7e7eb8416b..f6580fadb892 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -123,6 +123,7 @@ Paul Zhang
 Pedro Gomes
 Peng Yan
 Pete Bachant
+Pratyksh Gupta
 RaulFeijo55
 Ruben Sanchez
 Ryan Barrett
diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp
index 696a4c595475..aec97d39ec5b 100644
--- a/SU2_CFD/src/output/CFlowOutput.cpp
+++ b/SU2_CFD/src/output/CFlowOutput.cpp
@@ -33,6 +33,7 @@
 #include "../../include/output/CFlowOutput.hpp"
 
 #include "../../../Common/include/geometry/CGeometry.hpp"
+#include "../../../Common/include/adt/CADTPointsOnlyClass.hpp"
 #include "../../../Common/include/toolboxes/geometry_toolbox.hpp"
 #include "../../include/solvers/CSolver.hpp"
 #include "../../include/variables/CPrimitiveIndices.hpp"
@@ -819,6 +820,38 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
   const bool axisymmetric = config->GetAxisymmetric();
   const auto* flowNodes = su2staticcast_p<const CFlowVariable*>(solver[FLOW_SOL]->GetNodes());
 
+  /*--- Count probes that need processing and use heuristic to decide ADT vs linear search.
+        ADT overhead is only worth it for larger numbers of probes. ---*/
+  unsigned long nProbes = 0;
+  for (const auto& output : customOutputs) {
+    if (!output.skip && output.type == OperationType::PROBE && output.varIndices.empty()) {
+      ++nProbes;
+    }
+  }
+
+  /*--- Heuristic: Build ADT if we have more than 10 probes. For small numbers of probes,
+        the overhead of building the ADT may not be worth it compared to linear search. ---*/
+  const unsigned long ADT_THRESHOLD = 10;
+  const bool useADT = (nProbes > ADT_THRESHOLD);
+
+  /*--- Build ADT for probe nearest neighbor search if heuristic suggests it. ---*/
+  std::unique_ptr<CADTPointsOnlyClass> probeADT;
+  if (useADT) {
+    const unsigned long nPointDomain = geometry->GetnPointDomain();
+    vector<su2double> coords(nDim * nPointDomain);
+    vector<unsigned long> pointIDs(nPointDomain);
+
+    for (unsigned long iPoint = 0; iPoint < nPointDomain; ++iPoint) {
+      pointIDs[iPoint] = iPoint;
+      for (unsigned short iDim = 0; iDim < nDim; ++iDim) {
+        coords[iPoint * nDim + iDim] = geometry->nodes->GetCoord(iPoint, iDim);
+      }
+    }
+
+    /*--- Build global ADT to find nearest nodes across all ranks. ---*/
+    probeADT = std::make_unique<CADTPointsOnlyClass>(nDim, nPointDomain, coords.data(), pointIDs.data(), true);
+  }
+
   for (auto& output : customOutputs) {
     if (output.skip) continue;
 
@@ -849,19 +882,33 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
         }
         su2double coord[3] = {};
         for (auto iDim = 0u; iDim < nDim; ++iDim) coord[iDim] = std::stod(output.markers[iDim]);
+        /*--- Use ADT for efficient nearest neighbor search instead of brute force. ---*/
         su2double minDist = std::numeric_limits<su2double>::max();
         unsigned long minPoint = 0;
-        for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) {
-          const su2double dist = GeometryToolbox::SquaredDistance(nDim, coord, geometry->nodes->GetCoord(iPoint));
-          if (dist < minDist) {
-            minDist = dist;
-            minPoint = iPoint;
+        int rankID = -1;
+        int rank;
+        SU2_MPI::Comm_rank(SU2_MPI::GetComm(), &rank);
+
+        if (useADT && probeADT && !probeADT->IsEmpty()) {
+          /*--- Use ADT to find the nearest node efficiently (O(log n) instead of O(n)). ---*/
+          probeADT->DetermineNearestNode(coord, minDist, minPoint, rankID);
+
+          /*--- Check if this rank owns the nearest point. ---*/
+          output.iPoint = (rankID == rank) ? minPoint : CustomOutput::PROBE_NOT_OWNED;
+        } else {
+          /*--- Use linear search for small numbers of probes or when ADT is not available. ---*/
+          for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) {
+            const su2double dist = GeometryToolbox::SquaredDistance(nDim, coord, geometry->nodes->GetCoord(iPoint));
+            if (dist < minDist) {
+              minDist = dist;
+              minPoint = iPoint;
+            }
           }
+          /*--- Decide which rank owns the probe using Allreduce. ---*/
+          su2double globMinDist;
+          SU2_MPI::Allreduce(&minDist, &globMinDist, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
+          output.iPoint = fabs(minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED;
         }
-        /*--- Decide which rank owns the probe. ---*/
-        su2double globMinDist;
-        SU2_MPI::Allreduce(&minDist, &globMinDist, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
-        output.iPoint = fabs(minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED;
         if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
           std::cout << "Probe " << output.name << " is using global point "
                     << geometry->nodes->GetGlobalIndex(output.iPoint)
@@ -899,13 +946,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
     };
 
     if (output.type == OperationType::PROBE) {
-      su2double value = std::numeric_limits<su2double>::max();
-      if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
-        value = output.Eval(MakeFunctor(output.iPoint));
-      }
-      su2double tmp = value;
-      SU2_MPI::Allreduce(&tmp, &value, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
-      SetHistoryOutputValue(output.name, value);
+      /*--- Probe evaluation will be done after all outputs are processed, with batched AllReduce. ---*/
       continue;
     }
 
@@ -954,6 +995,46 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
     }
     SetHistoryOutputValue(output.name, integral[0]);
   }
+
+  /*--- Batch AllReduce for all probe values to reduce MPI communication overhead. ---*/
+  if (nProbes > 0) {
+    /*--- Evaluate all probe values locally first. ---*/
+    vector<su2double> probeValues(nProbes);
+    unsigned long iProbe = 0;
+    for (auto& output : customOutputs) {
+      if (output.skip || output.type != OperationType::PROBE) continue;
+      su2double value = std::numeric_limits<su2double>::max();
+      if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
+        auto MakeFunctor = [&](unsigned long iPoint) {
+          return [&, iPoint](unsigned long i) {
+            if (i < CustomOutput::NOT_A_VARIABLE) {
+              const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
+              const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;
+              if (solIdx == FLOW_SOL) {
+                return flowNodes->GetPrimitive(iPoint, varIdx);
+              }
+              return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx);
+            } else {
+              return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE];
+            }
+          };
+        };
+        value = output.Eval(MakeFunctor(output.iPoint));
+      }
+      probeValues[iProbe++] = value;
+    }
+
+    /*--- Single AllReduce for all probe values. ---*/
+    vector<su2double> probeValuesGlobal(nProbes);
+    SU2_MPI::Allreduce(probeValues.data(), probeValuesGlobal.data(), nProbes, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
+
+    /*--- Set history output values for all probes. ---*/
+    iProbe = 0;
+    for (auto& output : customOutputs) {
+      if (output.skip || output.type != OperationType::PROBE) continue;
+      SetHistoryOutputValue(output.name, probeValuesGlobal[iProbe++]);
+    }
+  }
 }
 
 // The "AddHistoryOutput(" must not be split over multiple lines to ensure proper python parsing
diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py
index e980b665df6f..764b589c71f5 100755
--- a/TestCases/parallel_regression.py
+++ b/TestCases/parallel_regression.py
@@ -314,6 +314,23 @@ def main():
     flatplate_udobj.test_vals = [-6.760101, -1.283906, -0.745653, 0.000587, -0.000038, 0.000977, -0.001015, 596.450000, 299.550000, 296.900000, 21.318000, 0.586640, 36.553000, 2.188800]
     test_list.append(flatplate_udobj)
 
+    # Probe performance test (15 probes, ADT path) - tests issue #2546 fix
+    probe_performance = TestCase('probe_performance_15')
+    probe_performance.cfg_dir = "user_defined_functions"
+    probe_performance.cfg_file = "lam_flatplate_probes.cfg"
+    probe_performance.test_iter = 20
+    probe_performance.test_vals = [-6.676934, 99047.0, 98854.0, 98775.0]  # RMS_DENSITY, probe1, probe8, probe15
+    test_list.append(probe_performance)
+
+    # Probe performance test (11 probes, ADT path) - additional check
+    probe_performance_11 = TestCase('probe_performance_11')
+    probe_performance_11.cfg_dir = "user_defined_functions"
+    probe_performance_11.cfg_file = "test_11_probes.cfg"
+    probe_performance_11.test_iter = 5
+    probe_performance_11.test_vals = [-6.300237, 1.0141e+05, 1.0132e+05, 1.0093e+05] # RMS_DENSITY, probe1, probe6, probe11
+    # Tolerances are typically 0.001 in TestCase.py
+    test_list.append(probe_performance_11)
+
     # Laminar cylinder (steady)
     cylinder           = TestCase('cylinder')
     cylinder.cfg_dir   = "navierstokes/cylinder"
diff --git a/TestCases/user_defined_functions/lam_flatplate_probes.cfg b/TestCases/user_defined_functions/lam_flatplate_probes.cfg
new file mode 100644
index 000000000000..c3bffa77fa38
--- /dev/null
+++ b/TestCases/user_defined_functions/lam_flatplate_probes.cfg
@@ -0,0 +1,104 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%                                                                              %
+% SU2 configuration file                                                       %
+% Case description: Test probe performance with multiple probes (ADT path).   %
+% Author: Probe Performance Fix                                               %
+% Date: Jan 2025                                                               %
+% File Version 8.3.0 "Harrier"                                               %
+%                                                                              %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+SOLVER= NAVIER_STOKES
+KIND_TURB_MODEL= NONE
+RESTART_SOL= NO
+%
+% User defined expressions with multiple probes to test ADT path (>10 probes)
+% This tests the probe performance fix for issue #2546
+CUSTOM_OUTPUTS= 'velocity : Macro{sqrt(pow(VELOCITY_X, 2) + pow(VELOCITY_Y, 2) + pow(VELOCITY_Z, 2))};\
+                 probe1 : Probe{PRESSURE}[0.001, 0.001, 0.01];\
+                 probe2 : Probe{PRESSURE}[0.002, 0.001, 0.02];\
+                 probe3 : Probe{PRESSURE}[0.003, 0.001, 0.03];\
+                 probe4 : Probe{PRESSURE}[0.004, 0.001, 0.04];\
+                 probe5 : Probe{PRESSURE}[0.005, 0.001, 0.05];\
+                 probe6 : Probe{PRESSURE}[0.006, 0.001, 0.06];\
+                 probe7 : Probe{PRESSURE}[0.007, 0.001, 0.07];\
+                 probe8 : Probe{PRESSURE}[0.008, 0.001, 0.08];\
+                 probe9 : Probe{PRESSURE}[0.009, 0.001, 0.09];\
+                 probe10 : Probe{PRESSURE}[0.001, 0.002, 0.01];\
+                 probe11 : Probe{PRESSURE}[0.002, 0.002, 0.02];\
+                 probe12 : Probe{PRESSURE}[0.003, 0.002, 0.03];\
+                 probe13 : Probe{PRESSURE}[0.004, 0.002, 0.04];\
+                 probe14 : Probe{PRESSURE}[0.005, 0.002, 0.05];\
+                 probe15 : Probe{PRESSURE}[0.006, 0.002, 0.06]'
+%
+SCREEN_OUTPUT= INNER_ITER, RMS_DENSITY, probe1, probe8, probe15
+HISTORY_OUTPUT = ITER, CUSTOM
+
+% -------------------- COMPRESSIBLE FREE-STREAM DEFINITION --------------------%
+%
+MACH_NUMBER= 0.1
+INIT_OPTION= TD_CONDITIONS
+FREESTREAM_OPTION= TEMPERATURE_FS
+FREESTREAM_TEMPERATURE= 297.62
+REYNOLDS_NUMBER= 600
+REYNOLDS_LENGTH= 0.02
+
+% ---------------------- REFERENCE VALUE DEFINITION ---------------------------%
+%
+REF_ORIGIN_MOMENT_X = 0.00
+REF_ORIGIN_MOMENT_Y = 0.00
+REF_ORIGIN_MOMENT_Z = 0.00
+REF_LENGTH= 0.02
+REF_AREA= 0.02
+%
+FLUID_MODEL= IDEAL_GAS
+GAMMA_VALUE= 1.4
+GAS_CONSTANT= 287.87
+VISCOSITY_MODEL= CONSTANT_VISCOSITY
+MU_CONSTANT= 0.001
+
+% -------------------- BOUNDARY CONDITION DEFINITION --------------------------%
+%
+MARKER_HEATFLUX= ( y_minus, 0.0 )
+%
+MARKER_SYM= ( y_plus )
+MARKER_PERIODIC= ( x_minus, x_plus, 0,0,0, 0,0,0, 0.01,0,0 )
+%
+MARKER_INLET= ( z_minus, 300.0, 100000.0, 0.0, 0.0, 1.0 )
+MARKER_OUTLET= ( z_plus, 99000.0 )
+%
+MARKER_PLOTTING= ( y_minus )
+MARKER_MONITORING= ( y_minus )
+MARKER_ANALYZE= ( z_minus, z_plus )
+
+% ------------- COMMON PARAMETERS DEFINING THE NUMERICAL METHOD ---------------%
+%
+NUM_METHOD_GRAD= GREEN_GAUSS
+CFL_NUMBER= 1e4
+CFL_ADAPT= NO
+TIME_DISCRE_FLOW= EULER_IMPLICIT
+
+% ------------------------ LINEAR SOLVER DEFINITION ---------------------------%
+%
+LINEAR_SOLVER= FGMRES
+LINEAR_SOLVER_PREC= ILU
+LINEAR_SOLVER_ERROR= 0.2
+LINEAR_SOLVER_ITER= 5
+
+% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
+%
+CONV_NUM_METHOD_FLOW= ROE
+MUSCL_FLOW= YES
+SLOPE_LIMITER_FLOW= NONE
+
+% --------------------------- CONVERGENCE PARAMETERS --------------------------%
+%
+CONV_RESIDUAL_MINVAL= -11
+CONV_STARTITER= 0
+INNER_ITER= 20
+
+% ------------------------- INPUT/OUTPUT INFORMATION --------------------------%
+%
+MESH_FORMAT= BOX
+MESH_BOX_LENGTH= (0.01, 0.01, 0.1)
+MESH_BOX_SIZE= (9, 17, 65)
diff --git a/TestCases/user_defined_functions/test_11_probes.cfg b/TestCases/user_defined_functions/test_11_probes.cfg
new file mode 100644
index 000000000000..3310b78ce9f1
--- /dev/null
+++ b/TestCases/user_defined_functions/test_11_probes.cfg
@@ -0,0 +1,62 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Test case: 11 probes (ADT path, >10)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+SOLVER= NAVIER_STOKES
+KIND_TURB_MODEL= NONE
+RESTART_SOL= NO
+
+CUSTOM_OUTPUTS= 'probe1 : Probe{PRESSURE}[0.001000, 0.001000, 0.010000]; probe2 : Probe{PRESSURE}[0.001700, 0.001700, 0.018000]; probe3 : Probe{PRESSURE}[0.002400, 0.002400, 0.026000]; probe4 : Probe{PRESSURE}[0.003100, 0.003100, 0.034000]; probe5 : Probe{PRESSURE}[0.003800, 0.003800, 0.042000]; probe6 : Probe{PRESSURE}[0.004500, 0.004500, 0.050000]; probe7 : Probe{PRESSURE}[0.005200, 0.005200, 0.058000]; probe8 : Probe{PRESSURE}[0.005900, 0.005900, 0.066000]; probe9 : Probe{PRESSURE}[0.006600, 0.006600, 0.074000]; probe10 : Probe{PRESSURE}[0.007300, 0.007300, 0.082000]; probe11 : Probe{PRESSURE}[0.008000, 0.008000, 0.090000]'
+
+SCREEN_OUTPUT= INNER_ITER, RMS_DENSITY, probe1, probe6, probe11
+HISTORY_OUTPUT = ITER, CUSTOM
+
+MACH_NUMBER= 0.1
+INIT_OPTION= TD_CONDITIONS
+FREESTREAM_OPTION= TEMPERATURE_FS
+FREESTREAM_TEMPERATURE= 297.62
+REYNOLDS_NUMBER= 600
+REYNOLDS_LENGTH= 0.02
+
+REF_ORIGIN_MOMENT_X = 0.00
+REF_ORIGIN_MOMENT_Y = 0.00
+REF_ORIGIN_MOMENT_Z = 0.00
+REF_LENGTH= 0.02
+REF_AREA= 0.02
+
+FLUID_MODEL= IDEAL_GAS
+GAMMA_VALUE= 1.4
+GAS_CONSTANT= 287.87
+VISCOSITY_MODEL= CONSTANT_VISCOSITY
+MU_CONSTANT= 0.001
+
+MARKER_HEATFLUX= ( y_minus, 0.0 )
+MARKER_SYM= ( y_plus )
+MARKER_PERIODIC= ( x_minus, x_plus, 0,0,0, 0,0,0, 0.01,0,0 )
+MARKER_INLET= ( z_minus, 300.0, 100000.0, 0.0, 0.0, 1.0 )
+MARKER_OUTLET= ( z_plus, 99000.0 )
+MARKER_PLOTTING= ( y_minus )
+MARKER_MONITORING= ( y_minus )
+MARKER_ANALYZE= ( z_minus, z_plus )
+
+NUM_METHOD_GRAD= GREEN_GAUSS
+CFL_NUMBER= 1e4
+CFL_ADAPT= NO
+TIME_DISCRE_FLOW= EULER_IMPLICIT
+
+LINEAR_SOLVER= FGMRES
+LINEAR_SOLVER_PREC= ILU
+LINEAR_SOLVER_ERROR= 0.2
+LINEAR_SOLVER_ITER= 5
+
+CONV_NUM_METHOD_FLOW= ROE
+MUSCL_FLOW= YES
+SLOPE_LIMITER_FLOW= NONE
+
+CONV_RESIDUAL_MINVAL= -11
+CONV_STARTITER= 0
+INNER_ITER= 5
+
+MESH_FORMAT= BOX
+MESH_BOX_LENGTH= (0.01, 0.01, 0.1)
+MESH_BOX_SIZE= (9, 17, 65)

From 50cbf62a9c60742e556540c855b9f136146571f1 Mon Sep 17 00:00:00 2001
From: Pratyksh Gupta <pratykshgupta9999@gmail.com>
Date: Mon, 5 Jan 2026 10:25:47 +0530
Subject: [PATCH 2/8] Address review: Refactor duplicate code and cleanup tests

---
 SU2_CFD/src/output/CFlowOutput.cpp            |  45 ++++----
 TestCases/parallel_regression.py              |  10 +-
 .../lam_flatplate_probes.cfg                  | 104 ------------------
 3 files changed, 20 insertions(+), 139 deletions(-)
 delete mode 100644 TestCases/user_defined_functions/lam_flatplate_probes.cfg

diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp
index aec97d39ec5b..dee6af316eb1 100644
--- a/SU2_CFD/src/output/CFlowOutput.cpp
+++ b/SU2_CFD/src/output/CFlowOutput.cpp
@@ -819,6 +819,20 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
   const bool adjoint = config->GetDiscrete_Adjoint();
   const bool axisymmetric = config->GetAxisymmetric();
   const auto* flowNodes = su2staticcast_p<const CFlowVariable*>(solver[FLOW_SOL]->GetNodes());
+  auto GetPointValue = [&](const auto& output, unsigned long iPoint) {
+    return [&](unsigned long i) {
+      if (i < CustomOutput::NOT_A_VARIABLE) {
+        const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
+        const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;
+        if (solIdx == FLOW_SOL) {
+          return flowNodes->GetPrimitive(iPoint, varIdx);
+        }
+        return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx);
+      } else {
+        return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE];
+      }
+    };
+  };
 
   /*--- Count probes that need processing and use heuristic to decide ADT vs linear search.
         ADT overhead is only worth it for larger numbers of probes. ---*/
@@ -830,7 +844,9 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
   }
 
   /*--- Heuristic: Build ADT if we have more than 10 probes. For small numbers of probes,
-        the overhead of building the ADT may not be worth it compared to linear search. ---*/
+        the overhead of building the ADT may not be worth it compared to linear search.
+        Note: If this threshold is increased, the regression test (probe_performance_11)
+        must be updated to ensure the ADT path is still tested. ---*/
   const unsigned long ADT_THRESHOLD = 10;
   const bool useADT = (nProbes > ADT_THRESHOLD);
 
@@ -930,19 +946,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
      * (see ConvertVariableSymbolsToIndices). ---*/
 
     auto MakeFunctor = [&](unsigned long iPoint) {
-      /*--- This returns another lambda that captures iPoint by value. ---*/
-      return [&, iPoint](unsigned long i) {
-        if (i < CustomOutput::NOT_A_VARIABLE) {
-          const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
-          const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;
-          if (solIdx == FLOW_SOL) {
-            return flowNodes->GetPrimitive(iPoint, varIdx);
-          }
-          return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx);
-        } else {
-          return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE];
-        }
-      };
+      return GetPointValue(output, iPoint);
     };
 
     if (output.type == OperationType::PROBE) {
@@ -1006,18 +1010,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
       su2double value = std::numeric_limits<su2double>::max();
       if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
         auto MakeFunctor = [&](unsigned long iPoint) {
-          return [&, iPoint](unsigned long i) {
-            if (i < CustomOutput::NOT_A_VARIABLE) {
-              const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
-              const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;
-              if (solIdx == FLOW_SOL) {
-                return flowNodes->GetPrimitive(iPoint, varIdx);
-              }
-              return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx);
-            } else {
-              return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE];
-            }
-          };
+          return GetPointValue(output, iPoint);
         };
         value = output.Eval(MakeFunctor(output.iPoint));
       }
diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py
index 764b589c71f5..c85919f07adf 100755
--- a/TestCases/parallel_regression.py
+++ b/TestCases/parallel_regression.py
@@ -314,15 +314,7 @@ def main():
     flatplate_udobj.test_vals = [-6.760101, -1.283906, -0.745653, 0.000587, -0.000038, 0.000977, -0.001015, 596.450000, 299.550000, 296.900000, 21.318000, 0.586640, 36.553000, 2.188800]
     test_list.append(flatplate_udobj)
 
-    # Probe performance test (15 probes, ADT path) - tests issue #2546 fix
-    probe_performance = TestCase('probe_performance_15')
-    probe_performance.cfg_dir = "user_defined_functions"
-    probe_performance.cfg_file = "lam_flatplate_probes.cfg"
-    probe_performance.test_iter = 20
-    probe_performance.test_vals = [-6.676934, 99047.0, 98854.0, 98775.0]  # RMS_DENSITY, probe1, probe8, probe15
-    test_list.append(probe_performance)
-
-    # Probe performance test (11 probes, ADT path) - additional check
+    # Probe performance test (11 probes, ADT path)
     probe_performance_11 = TestCase('probe_performance_11')
     probe_performance_11.cfg_dir = "user_defined_functions"
     probe_performance_11.cfg_file = "test_11_probes.cfg"
diff --git a/TestCases/user_defined_functions/lam_flatplate_probes.cfg b/TestCases/user_defined_functions/lam_flatplate_probes.cfg
deleted file mode 100644
index c3bffa77fa38..000000000000
--- a/TestCases/user_defined_functions/lam_flatplate_probes.cfg
+++ /dev/null
@@ -1,104 +0,0 @@
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%                                                                              %
-% SU2 configuration file                                                       %
-% Case description: Test probe performance with multiple probes (ADT path).   %
-% Author: Probe Performance Fix                                               %
-% Date: Jan 2025                                                               %
-% File Version 8.3.0 "Harrier"                                               %
-%                                                                              %
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%
-SOLVER= NAVIER_STOKES
-KIND_TURB_MODEL= NONE
-RESTART_SOL= NO
-%
-% User defined expressions with multiple probes to test ADT path (>10 probes)
-% This tests the probe performance fix for issue #2546
-CUSTOM_OUTPUTS= 'velocity : Macro{sqrt(pow(VELOCITY_X, 2) + pow(VELOCITY_Y, 2) + pow(VELOCITY_Z, 2))};\
-                 probe1 : Probe{PRESSURE}[0.001, 0.001, 0.01];\
-                 probe2 : Probe{PRESSURE}[0.002, 0.001, 0.02];\
-                 probe3 : Probe{PRESSURE}[0.003, 0.001, 0.03];\
-                 probe4 : Probe{PRESSURE}[0.004, 0.001, 0.04];\
-                 probe5 : Probe{PRESSURE}[0.005, 0.001, 0.05];\
-                 probe6 : Probe{PRESSURE}[0.006, 0.001, 0.06];\
-                 probe7 : Probe{PRESSURE}[0.007, 0.001, 0.07];\
-                 probe8 : Probe{PRESSURE}[0.008, 0.001, 0.08];\
-                 probe9 : Probe{PRESSURE}[0.009, 0.001, 0.09];\
-                 probe10 : Probe{PRESSURE}[0.001, 0.002, 0.01];\
-                 probe11 : Probe{PRESSURE}[0.002, 0.002, 0.02];\
-                 probe12 : Probe{PRESSURE}[0.003, 0.002, 0.03];\
-                 probe13 : Probe{PRESSURE}[0.004, 0.002, 0.04];\
-                 probe14 : Probe{PRESSURE}[0.005, 0.002, 0.05];\
-                 probe15 : Probe{PRESSURE}[0.006, 0.002, 0.06]'
-%
-SCREEN_OUTPUT= INNER_ITER, RMS_DENSITY, probe1, probe8, probe15
-HISTORY_OUTPUT = ITER, CUSTOM
-
-% -------------------- COMPRESSIBLE FREE-STREAM DEFINITION --------------------%
-%
-MACH_NUMBER= 0.1
-INIT_OPTION= TD_CONDITIONS
-FREESTREAM_OPTION= TEMPERATURE_FS
-FREESTREAM_TEMPERATURE= 297.62
-REYNOLDS_NUMBER= 600
-REYNOLDS_LENGTH= 0.02
-
-% ---------------------- REFERENCE VALUE DEFINITION ---------------------------%
-%
-REF_ORIGIN_MOMENT_X = 0.00
-REF_ORIGIN_MOMENT_Y = 0.00
-REF_ORIGIN_MOMENT_Z = 0.00
-REF_LENGTH= 0.02
-REF_AREA= 0.02
-%
-FLUID_MODEL= IDEAL_GAS
-GAMMA_VALUE= 1.4
-GAS_CONSTANT= 287.87
-VISCOSITY_MODEL= CONSTANT_VISCOSITY
-MU_CONSTANT= 0.001
-
-% -------------------- BOUNDARY CONDITION DEFINITION --------------------------%
-%
-MARKER_HEATFLUX= ( y_minus, 0.0 )
-%
-MARKER_SYM= ( y_plus )
-MARKER_PERIODIC= ( x_minus, x_plus, 0,0,0, 0,0,0, 0.01,0,0 )
-%
-MARKER_INLET= ( z_minus, 300.0, 100000.0, 0.0, 0.0, 1.0 )
-MARKER_OUTLET= ( z_plus, 99000.0 )
-%
-MARKER_PLOTTING= ( y_minus )
-MARKER_MONITORING= ( y_minus )
-MARKER_ANALYZE= ( z_minus, z_plus )
-
-% ------------- COMMON PARAMETERS DEFINING THE NUMERICAL METHOD ---------------%
-%
-NUM_METHOD_GRAD= GREEN_GAUSS
-CFL_NUMBER= 1e4
-CFL_ADAPT= NO
-TIME_DISCRE_FLOW= EULER_IMPLICIT
-
-% ------------------------ LINEAR SOLVER DEFINITION ---------------------------%
-%
-LINEAR_SOLVER= FGMRES
-LINEAR_SOLVER_PREC= ILU
-LINEAR_SOLVER_ERROR= 0.2
-LINEAR_SOLVER_ITER= 5
-
-% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
-%
-CONV_NUM_METHOD_FLOW= ROE
-MUSCL_FLOW= YES
-SLOPE_LIMITER_FLOW= NONE
-
-% --------------------------- CONVERGENCE PARAMETERS --------------------------%
-%
-CONV_RESIDUAL_MINVAL= -11
-CONV_STARTITER= 0
-INNER_ITER= 20
-
-% ------------------------- INPUT/OUTPUT INFORMATION --------------------------%
-%
-MESH_FORMAT= BOX
-MESH_BOX_LENGTH= (0.01, 0.01, 0.1)
-MESH_BOX_SIZE= (9, 17, 65)

From 3af399c96ee3b48a04e51cc155a16a291ddc8bc8 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com>
Date: Mon, 5 Jan 2026 10:13:50 -0800
Subject: [PATCH 3/8] Update SU2_CFD/src/output/CFlowOutput.cpp

---
 SU2_CFD/src/output/CFlowOutput.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp
index dee6af316eb1..f52de512f717 100644
--- a/SU2_CFD/src/output/CFlowOutput.cpp
+++ b/SU2_CFD/src/output/CFlowOutput.cpp
@@ -820,7 +820,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
   const bool axisymmetric = config->GetAxisymmetric();
   const auto* flowNodes = su2staticcast_p<const CFlowVariable*>(solver[FLOW_SOL]->GetNodes());
   auto GetPointValue = [&](const auto& output, unsigned long iPoint) {
-    return [&](unsigned long i) {
+    return [&, iPoint](unsigned long i) {
       if (i < CustomOutput::NOT_A_VARIABLE) {
         const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
         const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;

From 8d3ddcae27f0749a7509b7896b12198448345082 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com>
Date: Mon, 5 Jan 2026 11:43:13 -0800
Subject: [PATCH 4/8] Apply suggestions from code review

---
 SU2_CFD/src/output/CFlowOutput.cpp | 5 +----
 TestCases/parallel_regression.py   | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp
index f52de512f717..746d6d483314 100644
--- a/SU2_CFD/src/output/CFlowOutput.cpp
+++ b/SU2_CFD/src/output/CFlowOutput.cpp
@@ -1009,10 +1009,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
       if (output.skip || output.type != OperationType::PROBE) continue;
       su2double value = std::numeric_limits<su2double>::max();
       if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
-        auto MakeFunctor = [&](unsigned long iPoint) {
-          return GetPointValue(output, iPoint);
-        };
-        value = output.Eval(MakeFunctor(output.iPoint));
+        value = output.Eval(GetPointValue(output, output.iPoint));
       }
       probeValues[iProbe++] = value;
     }
diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py
index 2ce93f052703..ab8e3fcb1357 100755
--- a/TestCases/parallel_regression.py
+++ b/TestCases/parallel_regression.py
@@ -318,7 +318,7 @@ def main():
     probe_performance_11 = TestCase('probe_performance_11')
     probe_performance_11.cfg_dir = "user_defined_functions"
     probe_performance_11.cfg_file = "test_11_probes.cfg"
-    probe_performance_11.test_iter = 5
+    probe_performance_11.test_iter = 4
     probe_performance_11.test_vals = [-6.300237, 1.0141e+05, 1.0132e+05, 1.0093e+05] # RMS_DENSITY, probe1, probe6, probe11
     # Tolerances are typically 0.001 in TestCase.py
     test_list.append(probe_performance_11)

From 542572eaa8de6f2e52b91c98269e0d0d20eff6f5 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com>
Date: Mon, 5 Jan 2026 12:52:49 -0800
Subject: [PATCH 5/8] Update SU2_CFD/src/output/CFlowOutput.cpp

---
 SU2_CFD/src/output/CFlowOutput.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp
index 746d6d483314..a2e711f19b36 100644
--- a/SU2_CFD/src/output/CFlowOutput.cpp
+++ b/SU2_CFD/src/output/CFlowOutput.cpp
@@ -908,6 +908,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
         if (useADT && probeADT && !probeADT->IsEmpty()) {
           /*--- Use ADT to find the nearest node efficiently (O(log n) instead of O(n)). ---*/
           probeADT->DetermineNearestNode(coord, minDist, minPoint, rankID);
+          minDist = pow(minDist, 2);
 
           /*--- Check if this rank owns the nearest point. ---*/
           output.iPoint = (rankID == rank) ? minPoint : CustomOutput::PROBE_NOT_OWNED;

From 3a43e2a8a2571bd838442b290af9bdf822f29f94 Mon Sep 17 00:00:00 2001
From: Pratyksh Gupta <pratykshgupta9999@gmail.com>
Date: Tue, 6 Jan 2026 15:26:55 +0530
Subject: [PATCH 6/8] Update regression values for probe test case (Iter 3)

---
 TestCases/parallel_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py
index ab8e3fcb1357..bb94fe02a9e3 100755
--- a/TestCases/parallel_regression.py
+++ b/TestCases/parallel_regression.py
@@ -319,7 +319,7 @@ def main():
     probe_performance_11.cfg_dir = "user_defined_functions"
     probe_performance_11.cfg_file = "test_11_probes.cfg"
     probe_performance_11.test_iter = 4
-    probe_performance_11.test_vals = [-6.300237, 1.0141e+05, 1.0132e+05, 1.0093e+05] # RMS_DENSITY, probe1, probe6, probe11
+    probe_performance_11.test_vals = [-6.285098, 1.0125e+05, 1.0132e+05, 9.9411e+04] # RMS_DENSITY, probe1, probe6, probe11
     # Tolerances are typically 0.001 in TestCase.py
     test_list.append(probe_performance_11)
 

From a256d393db241ebdc6a5c9e4e7fd77e10537377d Mon Sep 17 00:00:00 2001
From: Pratyksh Gupta <pratykshgupta9999@gmail.com>
Date: Sat, 10 Jan 2026 23:13:29 +0530
Subject: [PATCH 7/8] Refactor probe value handling and memory allocation

---
 SU2_CFD/src/output/CFlowOutput.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp
index a2e711f19b36..1b1a78d0dc50 100644
--- a/SU2_CFD/src/output/CFlowOutput.cpp
+++ b/SU2_CFD/src/output/CFlowOutput.cpp
@@ -838,7 +838,7 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
         ADT overhead is only worth it for larger numbers of probes. ---*/
   unsigned long nProbes = 0;
   for (const auto& output : customOutputs) {
-    if (!output.skip && output.type == OperationType::PROBE && output.varIndices.empty()) {
+    if (!output.skip && output.type == OperationType::PROBE) {
       ++nProbes;
     }
   }
@@ -1004,23 +1004,24 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
   /*--- Batch AllReduce for all probe values to reduce MPI communication overhead. ---*/
   if (nProbes > 0) {
     /*--- Evaluate all probe values locally first. ---*/
-    vector<su2double> probeValues(nProbes);
-    unsigned long iProbe = 0;
+    vector<su2double> probeValues;
+    probeValues.reserve(nProbes);
     for (auto& output : customOutputs) {
       if (output.skip || output.type != OperationType::PROBE) continue;
       su2double value = std::numeric_limits<su2double>::max();
       if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
         value = output.Eval(GetPointValue(output, output.iPoint));
       }
-      probeValues[iProbe++] = value;
+      probeValues.push_back(value);
     }
 
     /*--- Single AllReduce for all probe values. ---*/
-    vector<su2double> probeValuesGlobal(nProbes);
-    SU2_MPI::Allreduce(probeValues.data(), probeValuesGlobal.data(), nProbes, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
+    unsigned long nProbesActual = probeValues.size();
+    vector<su2double> probeValuesGlobal(nProbesActual);
+    SU2_MPI::Allreduce(probeValues.data(), probeValuesGlobal.data(), nProbesActual, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
 
     /*--- Set history output values for all probes. ---*/
-    iProbe = 0;
+    unsigned long iProbe = 0;
     for (auto& output : customOutputs) {
       if (output.skip || output.type != OperationType::PROBE) continue;
       SetHistoryOutputValue(output.name, probeValuesGlobal[iProbe++]);

From 43ab0e69227498a17623cf6c51b181f4c5322aea Mon Sep 17 00:00:00 2001
From: Pedro Gomes <38071223+pcarruscag@users.noreply.github.com>
Date: Sat, 10 Jan 2026 14:56:07 -0800
Subject: [PATCH 8/8] Update TestCases/parallel_regression.py

---
 TestCases/parallel_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py
index df9bae36ec45..3a18ed9efbc5 100755
--- a/TestCases/parallel_regression.py
+++ b/TestCases/parallel_regression.py
@@ -319,7 +319,7 @@ def main():
     probe_performance_11.cfg_dir = "user_defined_functions"
     probe_performance_11.cfg_file = "test_11_probes.cfg"
     probe_performance_11.test_iter = 4
-    probe_performance_11.test_vals = [-6.285098, 1.0125e+05, 1.0132e+05, 9.9411e+04] # RMS_DENSITY, probe1, probe6, probe11
+    probe_performance_11.test_vals = [-6.290748, 101020, 101050, 99123] # RMS_DENSITY, probe1, probe6, probe11
     # Tolerances are typically 0.001 in TestCase.py
     test_list.append(probe_performance_11)