From 6e309dff24a226492bb8705e736a00aff54a6a22 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Wed, 4 Feb 2026 10:39:04 +0100
Subject: [PATCH 01/23] working fp32 support for PDLP no presolve no crossover

---
 .../pdlp/solver_settings.hpp                  |   2 +-
 cpp/src/dual_simplex/sparse_matrix.cpp        |   7 +
 cpp/src/linear_programming/cusparse_view.cu   |   4 +-
 .../initial_scaling.cu                        |   2 +-
 .../optimal_batch_size_handler.cu             |   2 +-
 .../optimization_problem.cu                   |   2 +-
 cpp/src/linear_programming/pdhg.cu            |   2 +-
 cpp/src/linear_programming/pdlp.cu            |  67 +++-
 .../pdlp_warm_start_data.cu                   |   2 +-
 .../localized_duality_gap_container.cu        |   2 +-
 .../restart_strategy/pdlp_restart_strategy.cu |   2 +-
 .../weighted_average_solution.cu              |   2 +-
 cpp/src/linear_programming/saddle_point.cu    |   2 +-
 cpp/src/linear_programming/solve.cu           | 290 ++++++++++--------
 cpp/src/linear_programming/solver_settings.cu |   2 +-
 cpp/src/linear_programming/solver_solution.cu |   2 +-
 .../adaptive_step_size_strategy.cu            |   2 +-
 .../convergence_information.cu                |   2 +-
 .../infeasibility_information.cu              |   2 +-
 .../termination_strategy.cu                   |   2 +-
 cpp/src/linear_programming/translate.hpp      |   2 +-
 .../utilities/problem_checking.cu             |   2 +-
 cpp/src/math_optimization/solution_writer.cu  |  27 +-
 cpp/src/math_optimization/solution_writer.hpp |   5 +-
 cpp/src/math_optimization/solver_settings.cu  |  30 +-
 cpp/src/mip/diversity/lns/rins.cu             |  16 +-
 .../local_search/rounding/simple_rounding.cu  |   2 +-
 cpp/src/mip/mip_constants.hpp                 |   2 +
 cpp/src/mip/problem/problem.cu                |   2 +-
 cpp/src/mip/solution/solution.cu              |   2 +-
 cpp/src/mip/solver_solution.cu                |   6 +-
 cpp/tests/linear_programming/pdlp_test.cu     | 115 ++++++-
 32 files changed, 414 insertions(+), 197 deletions(-)
diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
index 3b94fee14b..e7377165c2 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
@@ -208,7 +208,7 @@ class pdlp_solver_settings_t {
   bool detect_infeasibility{false};
   bool strict_infeasibility{false};
   i_t iteration_limit{std::numeric_limits<i_t>::max()};
-  double time_limit{std::numeric_limits<double>::infinity()};
+  f_t time_limit{std::numeric_limits<f_t>::infinity()};
   pdlp_solver_mode_t pdlp_solver_mode{pdlp_solver_mode_t::Stable3};
   bool log_to_console{true};
   std::string log_file{""};
diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index 7edc7b1eb5..15379c132d 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -10,6 +10,7 @@
 #include <dual_simplex/sparse_vector.hpp>
 
 #include <dual_simplex/types.hpp>
+#include <mip/mip_constants.hpp>
 
 // #include <thrust/for_each.h>
 // #include <thrust/iterator/counting_iterator.h>
@@ -850,6 +851,12 @@ f_t sparse_dot(const std::vector<i_t>& xind,
   return dot;
 }
 
+#if PDLP_INSTANTIATE_FLOAT
+// Minimal float instantiation for LP usage
+template class csc_matrix_t<int, float>;
+template class csr_matrix_t<int, float>;
+#endif
+
 #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
 template class csc_matrix_t<int, double>;
 
diff --git a/cpp/src/linear_programming/cusparse_view.cu b/cpp/src/linear_programming/cusparse_view.cu
index bdd2aa0c81..02332da03d 100644
--- a/cpp/src/linear_programming/cusparse_view.cu
+++ b/cpp/src/linear_programming/cusparse_view.cu
@@ -946,7 +946,7 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
 {
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class cusparse_sp_mat_descr_wrapper_t<int, float>;
 template class cusparse_dn_vec_descr_wrapper_t<float>;
 template class cusparse_dn_mat_descr_wrapper_t<float>;
@@ -960,7 +960,7 @@ template class cusparse_view_t<int, double>;
 #endif
 
 #if CUDA_VER_12_4_UP
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template void my_cusparsespmm_preprocess<float>(cusparseHandle_t,
                                                 cusparseOperation_t,
                                                 cusparseOperation_t,
diff --git a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu
index 72474fe90a..451694ffa7 100644
--- a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu
+++ b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu
@@ -858,7 +858,7 @@ pdlp_initial_scaling_strategy_t<i_t, f_t>::view()
     int* A_T_offsets,                                                                         \
     int* A_T_indices);
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/linear_programming/optimal_batch_size_handler/optimal_batch_size_handler.cu b/cpp/src/linear_programming/optimal_batch_size_handler/optimal_batch_size_handler.cu
index 0abd6a0b63..d0fa98d83f 100644
--- a/cpp/src/linear_programming/optimal_batch_size_handler/optimal_batch_size_handler.cu
+++ b/cpp/src/linear_programming/optimal_batch_size_handler/optimal_batch_size_handler.cu
@@ -434,7 +434,7 @@ int optimal_batch_size_handler(const optimization_problem_t<i_t, f_t>& op_proble
   return 0;
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template int optimal_batch_size_handler<int, float>(
   const optimization_problem_t<int, float>& op_problem, int max_batch_size);
 #endif
diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu
index ba57141e9d..bd802325da 100644
--- a/cpp/src/linear_programming/optimization_problem.cu
+++ b/cpp/src/linear_programming/optimization_problem.cu
@@ -1062,7 +1062,7 @@ bool optimization_problem_t<i_t, f_t>::has_quadratic_objective() const
   return !Q_values_.empty();
 }
 // NOTE: Explicitly instantiate all types here in order to avoid linker error
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class optimization_problem_t<int, float>;
 #endif
 #if MIP_INSTANTIATE_DOUBLE
diff --git a/cpp/src/linear_programming/pdhg.cu b/cpp/src/linear_programming/pdhg.cu
index 551772cf32..f094e37ddf 100644
--- a/cpp/src/linear_programming/pdhg.cu
+++ b/cpp/src/linear_programming/pdhg.cu
@@ -1159,7 +1159,7 @@ rmm::device_uvector<f_t>& pdhg_solver_t<i_t, f_t>::get_dual_solution()
   return current_saddle_point_state_.get_dual_solution();
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class pdhg_solver_t<int, float>;
 #endif
 #if MIP_INSTANTIATE_DOUBLE
diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu
index 8a05f1b2a1..f68fa3e33b 100644
--- a/cpp/src/linear_programming/pdlp.cu
+++ b/cpp/src/linear_programming/pdlp.cu
@@ -41,6 +41,59 @@
 
 namespace cuopt::linear_programming::detail {
 
+// Templated wrapper for cuBLAS geam function
+// cublasSgeam for float, cublasDgeam for double
+template <typename T>
+inline cublasStatus_t cublasGeam(cublasHandle_t handle,
+                                 cublasOperation_t transa,
+                                 cublasOperation_t transb,
+                                 int m,
+                                 int n,
+                                 const T* alpha,
+                                 const T* A,
+                                 int lda,
+                                 const T* beta,
+                                 const T* B,
+                                 int ldb,
+                                 T* C,
+                                 int ldc);
+
+template <>
+inline cublasStatus_t cublasGeam<float>(cublasHandle_t handle,
+                                        cublasOperation_t transa,
+                                        cublasOperation_t transb,
+                                        int m,
+                                        int n,
+                                        const float* alpha,
+                                        const float* A,
+                                        int lda,
+                                        const float* beta,
+                                        const float* B,
+                                        int ldb,
+                                        float* C,
+                                        int ldc)
+{
+  return cublasSgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+}
+
+template <>
+inline cublasStatus_t cublasGeam<double>(cublasHandle_t handle,
+                                         cublasOperation_t transa,
+                                         cublasOperation_t transb,
+                                         int m,
+                                         int n,
+                                         const double* alpha,
+                                         const double* A,
+                                         int lda,
+                                         const double* beta,
+                                         const double* B,
+                                         int ldb,
+                                         double* C,
+                                         int ldc)
+{
+  return cublasDgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+}
+
 template <typename i_t, typename f_t>
 static size_t batch_size_handler(const problem_t<i_t, f_t>& op_problem,
                                  const pdlp_solver_settings_t<i_t, f_t>& settings)
@@ -1869,7 +1922,7 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_to_row(
   rmm::device_uvector<f_t> dual_slack_transposed(
     is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_);
 
-  CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(),
+  CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
                            CUBLAS_OP_T,
                            CUBLAS_OP_N,
                            climber_strategies_.size(),
@@ -1884,7 +1937,7 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_to_row(
                            climber_strategies_.size()));
 
   if (!is_dual_slack_empty) {
-    CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(),
+    CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
                              CUBLAS_OP_T,
                              CUBLAS_OP_N,
                              climber_strategies_.size(),
@@ -1898,7 +1951,7 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_to_row(
                              dual_slack_transposed.data(),
                              climber_strategies_.size()));
   }
-  CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(),
+  CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
                            CUBLAS_OP_T,
                            CUBLAS_OP_N,
                            climber_strategies_.size(),
@@ -1945,7 +1998,7 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_back_to_col(
   rmm::device_uvector<f_t> dual_slack_transposed(
     is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_);
 
-  CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(),
+  CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
                            CUBLAS_OP_T,
                            CUBLAS_OP_N,
                            primal_size_h_,
@@ -1960,7 +2013,7 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_back_to_col(
                            primal_size_h_));
 
   if (!is_dual_slack_empty) {
-    CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(),
+    CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
                              CUBLAS_OP_T,
                              CUBLAS_OP_N,
                              primal_size_h_,
@@ -1975,7 +2028,7 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_back_to_col(
                              primal_size_h_));
   }
 
-  CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(),
+  CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
                            CUBLAS_OP_T,
                            CUBLAS_OP_N,
                            dual_size_h_,
@@ -2858,7 +2911,7 @@ pdlp_solver_t<i_t, f_t>::get_current_termination_strategy()
   return current_termination_strategy_;
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class pdlp_solver_t<int, float>;
 
 template __global__ void compute_weights_initial_primal_weight_from_squared_norms<float>(
diff --git a/cpp/src/linear_programming/pdlp_warm_start_data.cu b/cpp/src/linear_programming/pdlp_warm_start_data.cu
index 3145552fc7..219d803ec5 100644
--- a/cpp/src/linear_programming/pdlp_warm_start_data.cu
+++ b/cpp/src/linear_programming/pdlp_warm_start_data.cu
@@ -178,7 +178,7 @@ void pdlp_warm_start_data_t<i_t, f_t>::check_sizes()
                "All dual vectors should be of same size");
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class pdlp_warm_start_data_t<int, float>;
 #endif
 
diff --git a/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu b/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu
index 0938a3eccd..0549012996 100644
--- a/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu
+++ b/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu
@@ -144,7 +144,7 @@ localized_duality_gap_container_t<i_t, f_t>::view()
   return v;
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template struct localized_duality_gap_container_t<int, float>;
 #endif
 #if MIP_INSTANTIATE_DOUBLE
diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu
index 615a276584..41e5a38889 100644
--- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu
+++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu
@@ -2523,7 +2523,7 @@ bool pdlp_restart_strategy_t<i_t, f_t>::get_last_restart_was_average() const
     const typename localized_duality_gap_container_t<int, F_TYPE>::view_t duality_gap_view,     \
     F_TYPE* primal_product);
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu
index d47fbba0c9..717183f410 100644
--- a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu
+++ b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu
@@ -139,7 +139,7 @@ i_t weighted_average_solution_t<i_t, f_t>::get_iterations_since_last_restart() c
   return iterations_since_last_restart_;
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template __global__ void add_weight_sums<float>(const float* primal_weight,
                                                 const float* dual_weight,
                                                 float* sum_primal_solution_weights,
diff --git a/cpp/src/linear_programming/saddle_point.cu b/cpp/src/linear_programming/saddle_point.cu
index 727bcbf0bc..b07d5486d6 100644
--- a/cpp/src/linear_programming/saddle_point.cu
+++ b/cpp/src/linear_programming/saddle_point.cu
@@ -166,7 +166,7 @@ rmm::device_uvector<f_t>& saddle_point_state_t<i_t, f_t>::get_next_AtY()
   return next_AtY_;
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class saddle_point_state_t<int, float>;
 #endif
 
diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu
index db15eed82d..4943cef743 100644
--- a/cpp/src/linear_programming/solve.cu
+++ b/cpp/src/linear_programming/solve.cu
@@ -568,75 +568,81 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
       sol.get_solve_time());
   }
 
-  const bool do_crossover = settings.crossover;
-  i_t crossover_info      = 0;
-  if (do_crossover && sol.get_termination_status() == pdlp_termination_status_t::Optimal) {
-    crossover_info = -1;
-
-    dual_simplex::lp_problem_t<i_t, f_t> lp(problem.handle_ptr, 1, 1, 1);
-    dual_simplex::lp_solution_t<i_t, f_t> initial_solution(1, 1);
-    translate_to_crossover_problem(problem, sol, lp, initial_solution);
-    dual_simplex::simplex_solver_settings_t<i_t, f_t> dual_simplex_settings;
-    dual_simplex_settings.time_limit      = timer.remaining_time();
-    dual_simplex_settings.iteration_limit = settings.iteration_limit;
-    dual_simplex_settings.concurrent_halt = settings.concurrent_halt;
-    dual_simplex::lp_solution_t<i_t, f_t> vertex_solution(lp.num_rows, lp.num_cols);
-    std::vector<dual_simplex::variable_status_t> vstatus(lp.num_cols);
-    dual_simplex::crossover_status_t crossover_status = dual_simplex::crossover(
-      lp, dual_simplex_settings, initial_solution, start_time, vertex_solution, vstatus);
-    pdlp_termination_status_t termination_status = pdlp_termination_status_t::TimeLimit;
-    auto to_termination_status                   = [](dual_simplex::crossover_status_t status) {
-      switch (status) {
-        case dual_simplex::crossover_status_t::OPTIMAL: return pdlp_termination_status_t::Optimal;
-        case dual_simplex::crossover_status_t::PRIMAL_FEASIBLE:
-          return pdlp_termination_status_t::PrimalFeasible;
-        case dual_simplex::crossover_status_t::DUAL_FEASIBLE:
-          return pdlp_termination_status_t::NumericalError;
-        case dual_simplex::crossover_status_t::NUMERICAL_ISSUES:
-          return pdlp_termination_status_t::NumericalError;
-        case dual_simplex::crossover_status_t::CONCURRENT_LIMIT:
-          return pdlp_termination_status_t::ConcurrentLimit;
-        case dual_simplex::crossover_status_t::TIME_LIMIT:
-          return pdlp_termination_status_t::TimeLimit;
-        default: return pdlp_termination_status_t::NumericalError;
-      }
-    };
-    termination_status = to_termination_status(crossover_status);
-    if (crossover_status == dual_simplex::crossover_status_t::OPTIMAL) { crossover_info = 0; }
-    rmm::device_uvector<f_t> final_primal_solution =
-      cuopt::device_copy(vertex_solution.x, problem.handle_ptr->get_stream());
-    rmm::device_uvector<f_t> final_dual_solution =
-      cuopt::device_copy(vertex_solution.y, problem.handle_ptr->get_stream());
-    rmm::device_uvector<f_t> final_reduced_cost =
-      cuopt::device_copy(vertex_solution.z, problem.handle_ptr->get_stream());
-
-    // Should be filled with more information from dual simplex
-    std::vector<
-      typename optimization_problem_solution_t<i_t, f_t>::additional_termination_information_t>
-      info(1);
-    info[0].primal_objective      = vertex_solution.user_objective;
-    info[0].number_of_steps_taken = vertex_solution.iterations;
-    auto crossover_end            = std::chrono::high_resolution_clock::now();
-    auto crossover_duration =
-      std::chrono::duration_cast<std::chrono::milliseconds>(crossover_end - start_solver);
-    info[0].solve_time = crossover_duration.count() / 1000.0;
-    auto sol_crossover = optimization_problem_solution_t<i_t, f_t>(final_primal_solution,
-                                                                   final_dual_solution,
-                                                                   final_reduced_cost,
-                                                                   problem.objective_name,
-                                                                   problem.var_names,
-                                                                   problem.row_names,
-                                                                   std::move(info),
-                                                                   {termination_status});
-    sol.copy_from(problem.handle_ptr, sol_crossover);
-    CUOPT_LOG_CONDITIONAL_INFO(
-      !settings.inside_mip, "Crossover status %s", sol.get_termination_status_string().c_str());
-  }
-  if (settings.method == method_t::Concurrent && settings.concurrent_halt != nullptr &&
-      crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) {
-    // We finished. Tell dual simplex to stop if it is still running.
-    CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop");
-    *settings.concurrent_halt = 1;
+  if constexpr (!std::is_same_v<f_t, double>) {
+    cuopt_expects(!settings.crossover,
+      error_type_t::ValidationError,
+      "PDLP with crossover is not supported for float precision. Set crossover=false or use double precision.");
+    } else {
+      const bool do_crossover = settings.crossover;
+      i_t crossover_info = 0;
+    if (do_crossover && sol.get_termination_status() == pdlp_termination_status_t::Optimal) {
+      crossover_info = -1;
+
+      dual_simplex::lp_problem_t<i_t, f_t> lp(problem.handle_ptr, 1, 1, 1);
+      dual_simplex::lp_solution_t<i_t, f_t> initial_solution(1, 1);
+      translate_to_crossover_problem(problem, sol, lp, initial_solution);
+      dual_simplex::simplex_solver_settings_t<i_t, f_t> dual_simplex_settings;
+      dual_simplex_settings.time_limit      = timer.remaining_time();
+      dual_simplex_settings.iteration_limit = settings.iteration_limit;
+      dual_simplex_settings.concurrent_halt = settings.concurrent_halt;
+      dual_simplex::lp_solution_t<i_t, f_t> vertex_solution(lp.num_rows, lp.num_cols);
+      std::vector<dual_simplex::variable_status_t> vstatus(lp.num_cols);
+      dual_simplex::crossover_status_t crossover_status = dual_simplex::crossover(
+        lp, dual_simplex_settings, initial_solution, start_time, vertex_solution, vstatus);
+      pdlp_termination_status_t termination_status = pdlp_termination_status_t::TimeLimit;
+      auto to_termination_status                   = [](dual_simplex::crossover_status_t status) {
+        switch (status) {
+          case dual_simplex::crossover_status_t::OPTIMAL: return pdlp_termination_status_t::Optimal;
+          case dual_simplex::crossover_status_t::PRIMAL_FEASIBLE:
+            return pdlp_termination_status_t::PrimalFeasible;
+          case dual_simplex::crossover_status_t::DUAL_FEASIBLE:
+            return pdlp_termination_status_t::NumericalError;
+          case dual_simplex::crossover_status_t::NUMERICAL_ISSUES:
+            return pdlp_termination_status_t::NumericalError;
+          case dual_simplex::crossover_status_t::CONCURRENT_LIMIT:
+            return pdlp_termination_status_t::ConcurrentLimit;
+          case dual_simplex::crossover_status_t::TIME_LIMIT:
+            return pdlp_termination_status_t::TimeLimit;
+          default: return pdlp_termination_status_t::NumericalError;
+        }
+      };
+      termination_status = to_termination_status(crossover_status);
+      if (crossover_status == dual_simplex::crossover_status_t::OPTIMAL) { crossover_info = 0; }
+      rmm::device_uvector<f_t> final_primal_solution =
+        cuopt::device_copy(vertex_solution.x, problem.handle_ptr->get_stream());
+      rmm::device_uvector<f_t> final_dual_solution =
+        cuopt::device_copy(vertex_solution.y, problem.handle_ptr->get_stream());
+      rmm::device_uvector<f_t> final_reduced_cost =
+        cuopt::device_copy(vertex_solution.z, problem.handle_ptr->get_stream());
+
+      // Should be filled with more information from dual simplex
+      std::vector<
+        typename optimization_problem_solution_t<i_t, f_t>::additional_termination_information_t>
+        info(1);
+      info[0].primal_objective      = vertex_solution.user_objective;
+      info[0].number_of_steps_taken = vertex_solution.iterations;
+      auto crossover_end            = std::chrono::high_resolution_clock::now();
+      auto crossover_duration =
+        std::chrono::duration_cast<std::chrono::milliseconds>(crossover_end - start_solver);
+      info[0].solve_time = crossover_duration.count() / 1000.0;
+      auto sol_crossover = optimization_problem_solution_t<i_t, f_t>(final_primal_solution,
+                                                                     final_dual_solution,
+                                                                     final_reduced_cost,
+                                                                     problem.objective_name,
+                                                                     problem.var_names,
+                                                                     problem.row_names,
+                                                                     std::move(info),
+                                                                     {termination_status});
+      sol.copy_from(problem.handle_ptr, sol_crossover);
+      CUOPT_LOG_CONDITIONAL_INFO(
+        !settings.inside_mip, "Crossover status %s", sol.get_termination_status_string().c_str());
+    }
+    if (settings.method == method_t::Concurrent && settings.concurrent_halt != nullptr &&
+        crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) {
+      // We finished. Tell dual simplex to stop if it is still running.
+      CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop");
+        *settings.concurrent_halt = 1;
+    }
   }
   return sol;
 }
@@ -1055,14 +1061,23 @@ optimization_problem_solution_t<i_t, f_t> solve_lp_with_method(
   const timer_t& timer,
   bool is_batch_mode)
 {
-  if (settings.method == method_t::DualSimplex) {
-    return run_dual_simplex(problem, settings, timer);
-  } else if (settings.method == method_t::Barrier) {
-    return run_barrier(problem, settings, timer);
-  } else if (settings.method == method_t::Concurrent) {
-    return run_concurrent(problem, settings, timer, is_batch_mode);
+  if constexpr (std::is_same_v<f_t, double>) {
+    if (settings.method == method_t::DualSimplex) {
+      return run_dual_simplex(problem, settings, timer);
+    } else if (settings.method == method_t::Barrier) {
+      return run_barrier(problem, settings, timer);
+    } else if (settings.method == method_t::Concurrent) {
+      return run_concurrent(problem, settings, timer, is_batch_mode);
+    } else {
+      return run_pdlp(problem, settings, timer, is_batch_mode);
+    }
   } else {
-    return run_pdlp(problem, settings, timer, is_batch_mode);
+    // Float precision only supports PDLP without presolve/crossover
+    cuopt_expects(settings.method == method_t::PDLP,
+                  error_type_t::ValidationError,
+                  "Float precision only supports PDLP method. DualSimplex, Barrier, and Concurrent "
+                  "require double precision.");
+      return run_pdlp(problem, settings, timer, is_batch_mode);
   }
 }
 
@@ -1131,31 +1146,38 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
     std::unique_ptr<detail::third_party_presolve_t<i_t, f_t>> presolver;
     auto run_presolve = settings.presolve;
     run_presolve = run_presolve && settings.get_pdlp_warm_start_data().total_pdlp_iterations_ == -1;
-    if (!run_presolve && !settings_const.inside_mip) {
-      CUOPT_LOG_INFO("Third-party presolve is disabled, skipping");
-    }
 
-    if (run_presolve) {
-      detail::sort_csr(op_problem);
-      // allocate no more than 10% of the time limit to presolve.
-      // Note that this is not the presolve time, but the time limit for presolve.
-      // But no less than 1 second, to avoid early timeout triggering known crashes
-      const double presolve_time_limit =
-        std::max(1.0, std::min(0.1 * lp_timer.remaining_time(), 60.0));
-      presolver   = std::make_unique<detail::third_party_presolve_t<i_t, f_t>>();
-      auto result = presolver->apply(op_problem,
-                                     cuopt::linear_programming::problem_category_t::LP,
-                                     settings.dual_postsolve,
-                                     settings.tolerances.absolute_primal_tolerance,
-                                     settings.tolerances.relative_primal_tolerance,
-                                     presolve_time_limit);
-      if (!result.has_value()) {
-        return optimization_problem_solution_t<i_t, f_t>(
-          pdlp_termination_status_t::PrimalInfeasible, op_problem.get_handle_ptr()->get_stream());
+    if constexpr (!std::is_same_v<f_t, double>) {
+      cuopt_expects(!run_presolve,
+                    error_type_t::ValidationError,
+                    "Only double precision is supported with third-party presolve (papilo). Set presolve=false or use double precision.");
+    } else {
+      if (!run_presolve && !settings_const.inside_mip) {
+        CUOPT_LOG_INFO("Third-party presolve is disabled, skipping");
+      }
+
+      if (run_presolve) {
+        detail::sort_csr(op_problem);
+        // allocate no more than 10% of the time limit to presolve.
+        // Note that this is not the presolve time, but the time limit for presolve.
+        // But no less than 1 second, to avoid early timeout triggering known crashes
+        const double presolve_time_limit =
+          std::max(1.0, std::min(0.1 * lp_timer.remaining_time(), 60.0));
+        presolver   = std::make_unique<detail::third_party_presolve_t<i_t, f_t>>();
+        auto result = presolver->apply(op_problem,
+                                       cuopt::linear_programming::problem_category_t::LP,
+                                       settings.dual_postsolve,
+                                       settings.tolerances.absolute_primal_tolerance,
+                                       settings.tolerances.relative_primal_tolerance,
+                                       presolve_time_limit);
+        if (!result.has_value()) {
+          return optimization_problem_solution_t<i_t, f_t>(
+            pdlp_termination_status_t::PrimalInfeasible, op_problem.get_handle_ptr()->get_stream());
+        }
+        problem       = detail::problem_t<i_t, f_t>(result->reduced_problem);
+        presolve_time = lp_timer.elapsed_time();
+        CUOPT_LOG_INFO("Papilo presolve time: %f", presolve_time);
       }
-      problem       = detail::problem_t<i_t, f_t>(result->reduced_problem);
-      presolve_time = lp_timer.elapsed_time();
-      CUOPT_LOG_INFO("Papilo presolve time: %f", presolve_time);
     }
 
     if (!settings_const.inside_mip) {
@@ -1174,39 +1196,41 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
 
     auto solution = solve_lp_with_method(problem, settings, lp_timer, is_batch_mode);
 
-    if (run_presolve) {
-      auto primal_solution = cuopt::device_copy(solution.get_primal_solution(),
-                                                op_problem.get_handle_ptr()->get_stream());
-      auto dual_solution =
-        cuopt::device_copy(solution.get_dual_solution(), op_problem.get_handle_ptr()->get_stream());
-      auto reduced_costs =
-        cuopt::device_copy(solution.get_reduced_cost(), op_problem.get_handle_ptr()->get_stream());
-      bool status_to_skip = false;
-
-      presolver->undo(primal_solution,
-                      dual_solution,
-                      reduced_costs,
-                      cuopt::linear_programming::problem_category_t::LP,
-                      status_to_skip,
-                      settings.dual_postsolve,
-                      op_problem.get_handle_ptr()->get_stream());
-
-      std::vector<
-        typename optimization_problem_solution_t<i_t, f_t>::additional_termination_information_t>
-        term_vec = solution.get_additional_termination_informations();
-      std::vector<pdlp_termination_status_t> status_vec = solution.get_terminations_status();
-
-      // Create a new solution with the full problem solution
-      solution =
-        optimization_problem_solution_t<i_t, f_t>(primal_solution,
-                                                  dual_solution,
-                                                  reduced_costs,
-                                                  std::move(solution.get_pdlp_warm_start_data()),
-                                                  op_problem.get_objective_name(),
-                                                  op_problem.get_variable_names(),
-                                                  op_problem.get_row_names(),
-                                                  std::move(term_vec),
-                                                  std::move(status_vec));
+    if constexpr (std::is_same_v<f_t, double>) {
+      if (run_presolve) {
+        auto primal_solution = cuopt::device_copy(solution.get_primal_solution(),
+                                                  op_problem.get_handle_ptr()->get_stream());
+        auto dual_solution =
+          cuopt::device_copy(solution.get_dual_solution(), op_problem.get_handle_ptr()->get_stream());
+        auto reduced_costs =
+          cuopt::device_copy(solution.get_reduced_cost(), op_problem.get_handle_ptr()->get_stream());
+        bool status_to_skip = false;
+
+        presolver->undo(primal_solution,
+                        dual_solution,
+                        reduced_costs,
+                        cuopt::linear_programming::problem_category_t::LP,
+                        status_to_skip,
+                        settings.dual_postsolve,
+                        op_problem.get_handle_ptr()->get_stream());
+
+        std::vector<
+          typename optimization_problem_solution_t<i_t, f_t>::additional_termination_information_t>
+          term_vec = solution.get_additional_termination_informations();
+        std::vector<pdlp_termination_status_t> status_vec = solution.get_terminations_status();
+
+        // Create a new solution with the full problem solution
+        solution =
+          optimization_problem_solution_t<i_t, f_t>(primal_solution,
+                                                    dual_solution,
+                                                    reduced_costs,
+                                                    std::move(solution.get_pdlp_warm_start_data()),
+                                                    op_problem.get_objective_name(),
+                                                    op_problem.get_variable_names(),
+                                                    op_problem.get_row_names(),
+                                                    std::move(term_vec),
+                                                    std::move(status_vec));
+      }
     }
 
     if (settings.sol_file != "") {
@@ -1353,7 +1377,7 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
     const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& data_model);               \
   template void set_pdlp_solver_mode(pdlp_solver_settings_t<int, F_TYPE>& settings);
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/linear_programming/solver_settings.cu b/cpp/src/linear_programming/solver_settings.cu
index fc02b6f121..a38ed4fcc2 100644
--- a/cpp/src/linear_programming/solver_settings.cu
+++ b/cpp/src/linear_programming/solver_settings.cu
@@ -368,7 +368,7 @@ pdlp_solver_settings_t<i_t, f_t>::get_pdlp_warm_start_data_view() const noexcept
   return pdlp_warm_start_data_view_;
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class pdlp_solver_settings_t<int, float>;
 #endif
 
diff --git a/cpp/src/linear_programming/solver_solution.cu b/cpp/src/linear_programming/solver_solution.cu
index ff62340247..32fcc03dea 100644
--- a/cpp/src/linear_programming/solver_solution.cu
+++ b/cpp/src/linear_programming/solver_solution.cu
@@ -448,7 +448,7 @@ void optimization_problem_solution_t<i_t, f_t>::write_to_sol_file(
     std::string(filename), status, objective_value, var_names_, solution);
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class optimization_problem_solution_t<int, float>;
 #endif
 
diff --git a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu
index 352070ce78..85ae32d18c 100644
--- a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu
+++ b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu
@@ -596,7 +596,7 @@ adaptive_step_size_strategy_t<i_t, f_t>::view()
     F_TYPE * dual_step_size,                                                                   \
     int* pdhg_iteration);
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/linear_programming/termination_strategy/convergence_information.cu b/cpp/src/linear_programming/termination_strategy/convergence_information.cu
index 6247cc7ed6..a382658182 100644
--- a/cpp/src/linear_programming/termination_strategy/convergence_information.cu
+++ b/cpp/src/linear_programming/termination_strategy/convergence_information.cu
@@ -986,7 +986,7 @@ convergence_information_t<i_t, f_t>::to_primal_quality_adapter(
           primal_objective_.element(0, stream_view_)};
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class convergence_information_t<int, float>;
 
 template __global__ void compute_remaining_stats_kernel<int, float>(
diff --git a/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu b/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu
index 2f7e3b1132..f5c37f7f8c 100644
--- a/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu
+++ b/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu
@@ -745,7 +745,7 @@ typename infeasibility_information_t<i_t, f_t>::view_t infeasibility_information
   return v;
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class infeasibility_information_t<int, float>;
 
 template __global__ void compute_remaining_stats_kernel<int, float>(
diff --git a/cpp/src/linear_programming/termination_strategy/termination_strategy.cu b/cpp/src/linear_programming/termination_strategy/termination_strategy.cu
index 1041bd98a8..3eb862dae9 100644
--- a/cpp/src/linear_programming/termination_strategy/termination_strategy.cu
+++ b/cpp/src/linear_programming/termination_strategy/termination_strategy.cu
@@ -681,7 +681,7 @@ void pdlp_termination_strategy_t<i_t, f_t>::print_termination_criteria(i_t itera
     bool per_constraint_residual,                                                              \
     int batch_size);
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/linear_programming/translate.hpp b/cpp/src/linear_programming/translate.hpp
index 19f6c024ce..bf223d42d9 100644
--- a/cpp/src/linear_programming/translate.hpp
+++ b/cpp/src/linear_programming/translate.hpp
@@ -133,7 +133,7 @@ void translate_to_crossover_problem(const detail::problem_t<i_t, f_t>& problem,
   std::vector<f_t> slack(problem.n_constraints);
   std::vector<f_t> tmp_x = cuopt::host_copy(sol.get_primal_solution(), stream);
   stream.synchronize();
-  dual_simplex::matrix_vector_multiply(lp.A, 1.0, tmp_x, 0.0, slack);
+  dual_simplex::matrix_vector_multiply(lp.A, f_t(1.0), tmp_x, f_t(0.0), slack);
   CUOPT_LOG_DEBUG("Multiplied A and x");
 
   lp.A.col_start.resize(problem.n_variables + problem.n_constraints + 1);
diff --git a/cpp/src/linear_programming/utilities/problem_checking.cu b/cpp/src/linear_programming/utilities/problem_checking.cu
index 879707c669..2a1a771012 100644
--- a/cpp/src/linear_programming/utilities/problem_checking.cu
+++ b/cpp/src/linear_programming/utilities/problem_checking.cu
@@ -340,7 +340,7 @@ bool problem_checking_t<i_t, f_t>::has_crossing_bounds(
 
 #define INSTANTIATE(F_TYPE) template class problem_checking_t<int, F_TYPE>;
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/math_optimization/solution_writer.cu b/cpp/src/math_optimization/solution_writer.cu
index 01e9c1838e..3cac57c59e 100644
--- a/cpp/src/math_optimization/solution_writer.cu
+++ b/cpp/src/math_optimization/solution_writer.cu
@@ -9,15 +9,18 @@
 #include <utilities/logger.hpp>
 #include "solution_writer.hpp"
 
+#include <mip/mip_constants.hpp>
+
 #include <fstream>
 
 namespace cuopt::linear_programming {
 
+template <typename f_t>
 void solution_writer_t::write_solution_to_sol_file(const std::string& filename,
                                                    const std::string& status,
-                                                   const double objective_value,
+                                                   const f_t objective_value,
                                                    const std::vector<std::string>& variable_names,
-                                                   const std::vector<double>& variable_values)
+                                                   const std::vector<f_t>& variable_values)
 {
   raft::common::nvtx::range fun_scope("write final solution to .sol file");
   std::ofstream file(filename.data());
@@ -27,7 +30,7 @@ void solution_writer_t::write_solution_to_sol_file(const std::string& filename,
     return;
   }
 
-  file.precision(std::numeric_limits<double>::max_digits10 + 1);
+  file.precision(std::numeric_limits<f_t>::max_digits10 + 1);
 
   file << "# Status: " << status << std::endl;
 
@@ -39,4 +42,22 @@ void solution_writer_t::write_solution_to_sol_file(const std::string& filename,
   }
 }
 
+#if PDLP_INSTANTIATE_FLOAT
+template void solution_writer_t::write_solution_to_sol_file<float>(
+  const std::string& filename,
+  const std::string& status,
+  const float objective_value,
+  const std::vector<std::string>& variable_names,
+  const std::vector<float>& variable_values);
+#endif
+
+#if MIP_INSTANTIATE_DOUBLE
+template void solution_writer_t::write_solution_to_sol_file<double>(
+  const std::string& filename,
+  const std::string& status,
+  const double objective_value,
+  const std::vector<std::string>& variable_names,
+  const std::vector<double>& variable_values);
+#endif
+
 }  // namespace cuopt::linear_programming
diff --git a/cpp/src/math_optimization/solution_writer.hpp b/cpp/src/math_optimization/solution_writer.hpp
index 0890bf260b..e187f64313 100644
--- a/cpp/src/math_optimization/solution_writer.hpp
+++ b/cpp/src/math_optimization/solution_writer.hpp
@@ -23,10 +23,11 @@ namespace cuopt::linear_programming {
  */
 class solution_writer_t {
  public:
+  template <typename f_t>
   static void write_solution_to_sol_file(const std::string& sol_file_path,
                                          const std::string& status,
-                                         const double objective_value,
+                                         const f_t objective_value,
                                          const std::vector<std::string>& variable_names,
-                                         const std::vector<double>& variable_values);
+                                         const std::vector<f_t>& variable_values);
 };
 }  // namespace cuopt::linear_programming
diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index 41c186e193..b21fa64b9d 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -58,21 +58,21 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
   // clang-format off
   // Float parameters
   float_parameters = {
-    {CUOPT_TIME_LIMIT, &mip_settings.time_limit, 0.0, std::numeric_limits<f_t>::infinity(), std::numeric_limits<f_t>::infinity()},
-    {CUOPT_TIME_LIMIT, &pdlp_settings.time_limit, 0.0, std::numeric_limits<f_t>::infinity(), std::numeric_limits<f_t>::infinity()},
-    {CUOPT_ABSOLUTE_DUAL_TOLERANCE, &pdlp_settings.tolerances.absolute_dual_tolerance, 0.0, 1e-1, 1e-4},
-    {CUOPT_RELATIVE_DUAL_TOLERANCE, &pdlp_settings.tolerances.relative_dual_tolerance, 0.0, 1e-1, 1e-4},
-    {CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.absolute_primal_tolerance, 0.0, 1e-1, 1e-4},
-    {CUOPT_RELATIVE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.relative_primal_tolerance, 0.0, 1e-1, 1e-4},
-    {CUOPT_ABSOLUTE_GAP_TOLERANCE, &pdlp_settings.tolerances.absolute_gap_tolerance, 0.0, 1e-1, 1e-4},
-    {CUOPT_RELATIVE_GAP_TOLERANCE, &pdlp_settings.tolerances.relative_gap_tolerance, 0.0, 1e-1, 1e-4},
-    {CUOPT_MIP_ABSOLUTE_TOLERANCE, &mip_settings.tolerances.absolute_tolerance, 0.0, 1e-1, 1e-4},
-    {CUOPT_MIP_RELATIVE_TOLERANCE, &mip_settings.tolerances.relative_tolerance, 0.0, 1e-1, 1e-4},
-    {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, 0.0, 1e-1, 1e-5},
-    {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, 0.0, CUOPT_INFINITY, 1e-10},
-    {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, 0.0, 1e-1, 1e-4},
-    {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, 0.0, 1e-1, 1e-10},
-    {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, 0.0, 1e-1, 1e-10}
+    {CUOPT_TIME_LIMIT, &mip_settings.time_limit, f_t(0.0), std::numeric_limits<f_t>::infinity(), std::numeric_limits<f_t>::infinity()},
+    {CUOPT_TIME_LIMIT, &pdlp_settings.time_limit, f_t(0.0), std::numeric_limits<f_t>::infinity(), std::numeric_limits<f_t>::infinity()},
+    {CUOPT_ABSOLUTE_DUAL_TOLERANCE, &pdlp_settings.tolerances.absolute_dual_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
+    {CUOPT_RELATIVE_DUAL_TOLERANCE, &pdlp_settings.tolerances.relative_dual_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
+    {CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.absolute_primal_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
+    {CUOPT_RELATIVE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.relative_primal_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
+    {CUOPT_ABSOLUTE_GAP_TOLERANCE, &pdlp_settings.tolerances.absolute_gap_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
+    {CUOPT_RELATIVE_GAP_TOLERANCE, &pdlp_settings.tolerances.relative_gap_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
+    {CUOPT_MIP_ABSOLUTE_TOLERANCE, &mip_settings.tolerances.absolute_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
+    {CUOPT_MIP_RELATIVE_TOLERANCE, &mip_settings.tolerances.relative_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
+    {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-5)},
+    {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits<f_t>::infinity(), f_t(1e-10)},
+    {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, f_t(0.0), f_t(1e-1), f_t(1e-4)},
+    {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-10)},
+    {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-10)}
    };
 
   // Int parameters
diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu
index 7456b59edd..ab5d17b601 100644
--- a/cpp/src/mip/diversity/lns/rins.cu
+++ b/cpp/src/mip/diversity/lns/rins.cu
@@ -180,7 +180,7 @@ void rins_t<i_t, f_t>::run_rins()
 
   total_calls++;
   node_count_at_last_rins = node_count.load();
-  time_limit              = std::min(time_limit, dm.timer.remaining_time());
+  time_limit              = std::min(time_limit, static_cast<f_t>(dm.timer.remaining_time()));
   CUOPT_LOG_DEBUG("Running RINS on solution with objective %g, fixing %d/%d",
                   best_sol.get_user_objective(),
                   vars_to_fix.size(),
@@ -287,22 +287,22 @@ void rins_t<i_t, f_t>::run_rins()
   if (branch_and_bound_status == dual_simplex::mip_status_t::OPTIMAL) {
     CUOPT_LOG_DEBUG("RINS submip optimal");
     // do goldilocks update
-    fixrate    = std::max(fixrate - 0.05, settings.min_fixrate);
-    time_limit = std::max(time_limit - 2, settings.min_time_limit);
+    fixrate    = std::max(fixrate - f_t(0.05), static_cast<f_t>(settings.min_fixrate));
+    time_limit = std::max(time_limit - f_t(2), static_cast<f_t>(settings.min_time_limit));
   } else if (branch_and_bound_status == dual_simplex::mip_status_t::TIME_LIMIT) {
     CUOPT_LOG_DEBUG("RINS submip time limit");
     // do goldilocks update
-    fixrate    = std::min(fixrate + 0.05, settings.max_fixrate);
-    time_limit = std::min(time_limit + 2, settings.max_time_limit);
+    fixrate    = std::min(fixrate + f_t(0.05), static_cast<f_t>(settings.max_fixrate));
+    time_limit = std::min(time_limit + f_t(2), static_cast<f_t>(settings.max_time_limit));
   } else if (branch_and_bound_status == dual_simplex::mip_status_t::INFEASIBLE) {
     CUOPT_LOG_DEBUG("RINS submip infeasible");
     // do goldilocks update, decreasing fixrate
-    fixrate = std::max(fixrate - 0.05, settings.min_fixrate);
+    fixrate = std::max(fixrate - f_t(0.05), static_cast<f_t>(settings.min_fixrate));
   } else {
     CUOPT_LOG_DEBUG("RINS solution not found");
     // do goldilocks update
-    fixrate    = std::min(fixrate + 0.05, settings.max_fixrate);
-    time_limit = std::min(time_limit + 2, settings.max_time_limit);
+    fixrate    = std::min(fixrate + f_t(0.05), static_cast<f_t>(settings.max_fixrate));
+    time_limit = std::min(time_limit + f_t(2), static_cast<f_t>(settings.max_time_limit));
   }
 
   cpu_fj_thread.stop_cpu_solver();
diff --git a/cpp/src/mip/local_search/rounding/simple_rounding.cu b/cpp/src/mip/local_search/rounding/simple_rounding.cu
index 48b525dbba..dd67f1a452 100644
--- a/cpp/src/mip/local_search/rounding/simple_rounding.cu
+++ b/cpp/src/mip/local_search/rounding/simple_rounding.cu
@@ -179,7 +179,7 @@ void invoke_correct_integers(solution_t<i_t, f_t>& solution, f_t tol)
   template void invoke_correct_integers<int, F_TYPE>(solution_t<int, F_TYPE> & solution,     \
                                                      F_TYPE tol);
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/mip/mip_constants.hpp b/cpp/src/mip/mip_constants.hpp
index 66f5ebd273..cf04df9b0f 100644
--- a/cpp/src/mip/mip_constants.hpp
+++ b/cpp/src/mip/mip_constants.hpp
@@ -11,3 +11,5 @@
 
 #define MIP_INSTANTIATE_FLOAT  CUOPT_INSTANTIATE_FLOAT
 #define MIP_INSTANTIATE_DOUBLE CUOPT_INSTANTIATE_DOUBLE
+
+#define PDLP_INSTANTIATE_FLOAT 1
diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu
index 8feaee5239..74a6144a7d 100644
--- a/cpp/src/mip/problem/problem.cu
+++ b/cpp/src/mip/problem/problem.cu
@@ -2070,7 +2070,7 @@ void problem_t<i_t, f_t>::update_variable_bounds(const std::vector<i_t>& var_ind
   RAFT_CHECK_CUDA(handle_ptr->get_stream());
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class problem_t<int, float>;
 #endif
 
diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu
index 9e9a2d75f4..399ac6b6af 100644
--- a/cpp/src/mip/solution/solution.cu
+++ b/cpp/src/mip/solution/solution.cu
@@ -657,7 +657,7 @@ mip_solution_t<i_t, f_t> solution_t<i_t, f_t>::get_solution(bool output_feasible
   }
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class solution_t<int, float>;
 #endif
 
diff --git a/cpp/src/mip/solver_solution.cu b/cpp/src/mip/solver_solution.cu
index 2ce6d5700c..c3fde8f3cf 100644
--- a/cpp/src/mip/solver_solution.cu
+++ b/cpp/src/mip/solver_solution.cu
@@ -208,8 +208,8 @@ void mip_solution_t<i_t, f_t>::write_to_sol_file(std::string_view filename,
     status = "Infeasible";
   }
 
-  double objective_value = get_objective_value();
-  auto& var_names        = get_variable_names();
+  f_t objective_value = get_objective_value();
+  auto& var_names     = get_variable_names();
   std::vector<f_t> solution;
   solution.resize(solution_.size());
   raft::copy(solution.data(), solution_.data(), solution_.size(), stream_view.value());
@@ -233,7 +233,7 @@ void mip_solution_t<i_t, f_t>::log_summary() const
   CUOPT_LOG_INFO("Total Solve Time: %f", get_total_solve_time());
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if PDLP_INSTANTIATE_FLOAT
 template class mip_solution_t<int, float>;
 #endif
 
diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu
index 994fa89fef..17b9f61d0a 100644
--- a/cpp/tests/linear_programming/pdlp_test.cu
+++ b/cpp/tests/linear_programming/pdlp_test.cu
@@ -20,8 +20,10 @@
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+
 #include <mip/problem/problem.cuh>
 #include <mps_parser/parser.hpp>
+#include <mip/mip_constants.hpp>
 
 #include <utilities/copy_helpers.hpp>
 #include <utilities/error.hpp>
@@ -45,10 +47,13 @@
 
 namespace cuopt::linear_programming::test {
 
-constexpr double afiro_primal_objective = -464;
-
+constexpr double afiro_primal_objective = -464.0;
+#if PDLP_INSTANTIATE_FLOAT
+constexpr float afiro_primal_objective_f32 = -464.0f;
+#endif
 // Accept a 1% error
-static bool is_incorrect_objective(double reference, double objective)
+template <typename f_t>
+static bool is_incorrect_objective(f_t reference, f_t objective)
 {
   if (reference == 0) { return std::abs(objective) > 0.01; }
   if (objective == 0) { return std::abs(reference) > 0.01; }
@@ -1867,6 +1872,110 @@ TEST(pdlp_class, some_climber_hit_iteration_limit)
   }
 }
 
+#if PDLP_INSTANTIATE_FLOAT
+TEST(pdlp_class, run_float32)
+{
+  const raft::handle_t handle_{};
+
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
+    cuopt::mps_parser::parse_mps<int, float>(path, true);
+
+  auto solver_settings   = pdlp_solver_settings_t<int, float>{};
+  solver_settings.method = cuopt::linear_programming::method_t::PDLP;
+
+  optimization_problem_solution_t<int, float> solution =
+    solve_lp(&handle_, op_problem, solver_settings);
+  EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
+
+  EXPECT_FALSE(is_incorrect_objective(
+    afiro_primal_objective_f32,
+    solution.get_additional_termination_information().primal_objective));
+}
+
+TEST(pdlp_class, float32_dual_simplex_throws_validation_error)
+{
+  const raft::handle_t handle_{};
+
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
+    cuopt::mps_parser::parse_mps<int, float>(path, true);
+
+  auto solver_settings   = pdlp_solver_settings_t<int, float>{};
+  solver_settings.method = cuopt::linear_programming::method_t::DualSimplex;
+
+  optimization_problem_solution_t<int, float> solution =
+    solve_lp(&handle_, op_problem, solver_settings);
+  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+}
+
+TEST(pdlp_class, float32_barrier_throws_validation_error)
+{
+  const raft::handle_t handle_{};
+
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
+    cuopt::mps_parser::parse_mps<int, float>(path, true);
+
+  auto solver_settings   = pdlp_solver_settings_t<int, float>{};
+  solver_settings.method = cuopt::linear_programming::method_t::Barrier;
+
+  optimization_problem_solution_t<int, float> solution =
+    solve_lp(&handle_, op_problem, solver_settings);
+  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+}
+
+TEST(pdlp_class, float32_concurrent_throws_validation_error)
+{
+  const raft::handle_t handle_{};
+
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
+    cuopt::mps_parser::parse_mps<int, float>(path, true);
+
+  auto solver_settings   = pdlp_solver_settings_t<int, float>{};
+  solver_settings.method = cuopt::linear_programming::method_t::Concurrent;
+
+  optimization_problem_solution_t<int, float> solution =
+    solve_lp(&handle_, op_problem, solver_settings);
+  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+}
+
+TEST(pdlp_class, float32_presolve_throws_validation_error)
+{
+  const raft::handle_t handle_{};
+
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
+    cuopt::mps_parser::parse_mps<int, float>(path, true);
+
+  auto solver_settings    = pdlp_solver_settings_t<int, float>{};
+  solver_settings.method  = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.presolve = true;
+
+  optimization_problem_solution_t<int, float> solution =
+    solve_lp(&handle_, op_problem, solver_settings);
+  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+}
+
+TEST(pdlp_class, float32_crossover_throws_validation_error)
+{
+  const raft::handle_t handle_{};
+
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
+    cuopt::mps_parser::parse_mps<int, float>(path, true);
+
+  auto solver_settings    = pdlp_solver_settings_t<int, float>{};
+  solver_settings.method  = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.crossover = true;
+
+  optimization_problem_solution_t<int, float> solution =
+    solve_lp(&handle_, op_problem, solver_settings);
+  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+}
+#endif
+
 }  // namespace cuopt::linear_programming::test
 
 CUOPT_TEST_PROGRAM_MAIN()

From b9ac3630a9490285a81952d1af59ab69dbbc626c Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Wed, 4 Feb 2026 11:11:30 +0100
Subject: [PATCH 02/23] updade run_pdlp to allow for fp32

---
 .../linear_programming/cuopt/run_pdlp.cu      | 78 ++++++++++++-------
 1 file changed, 48 insertions(+), 30 deletions(-)

diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu
index 229c72a49b..78b4f42e9c 100644
--- a/benchmarks/linear_programming/cuopt/run_pdlp.cu
+++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu
@@ -77,6 +77,11 @@ static void parse_arguments(argparse::ArgumentParser& program)
     .choices(0, 1);
 
   program.add_argument("--solution-path").help("Path where solution file will be generated");
+
+  program.add_argument("--pdlp-fp32")
+    .help("Use FP32 (float) precision instead of FP64 (double). Only PDLP method without presolve and crossover is supported.")
+    .default_value(false)
+    .implicit_value(true);
 }
 
 static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode(
@@ -94,15 +99,16 @@ static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode(
   return cuopt::linear_programming::pdlp_solver_mode_t::Stable3;
 }
 
-static cuopt::linear_programming::pdlp_solver_settings_t<int, double> create_solver_settings(
+template <typename f_t>
+static cuopt::linear_programming::pdlp_solver_settings_t<int, f_t> create_solver_settings(
   const argparse::ArgumentParser& program)
 {
-  cuopt::linear_programming::pdlp_solver_settings_t<int, double> settings =
-    cuopt::linear_programming::pdlp_solver_settings_t<int, double>{};
+  cuopt::linear_programming::pdlp_solver_settings_t<int, f_t> settings =
+    cuopt::linear_programming::pdlp_solver_settings_t<int, f_t>{};
 
-  settings.time_limit      = program.get<double>("--time-limit");
+  settings.time_limit      = static_cast<f_t>(program.get<double>("--time-limit"));
   settings.iteration_limit = program.get<int>("--iteration-limit");
-  settings.set_optimality_tolerance(program.get<double>("--optimality-tolerance"));
+  settings.set_optimality_tolerance(static_cast<f_t>(program.get<double>("--optimality-tolerance")));
   settings.pdlp_solver_mode =
     string_to_pdlp_solver_mode(program.get<std::string>("--pdlp-solver-mode"));
   settings.method = static_cast<cuopt::linear_programming::method_t>(program.get<int>("--method"));
@@ -112,23 +118,12 @@ static cuopt::linear_programming::pdlp_solver_settings_t<int, double> create_sol
   return settings;
 }
 
-int main(int argc, char* argv[])
+template <typename f_t>
+static int run_solver(const argparse::ArgumentParser& program, const raft::handle_t& handle_)
 {
-  // Parse binary arguments
-  argparse::ArgumentParser program("solve_LP");
-  parse_arguments(program);
-
-  try {
-    program.parse_args(argc, argv);
-  } catch (const std::runtime_error& err) {
-    std::cerr << err.what() << std::endl;
-    std::cerr << program;
-    return 1;
-  }
-
   // Initialize solver settings from binary arguments
-  cuopt::linear_programming::pdlp_solver_settings_t<int, double> settings =
-    create_solver_settings(program);
+  cuopt::linear_programming::pdlp_solver_settings_t<int, f_t> settings =
+    create_solver_settings<f_t>(program);
 
   bool use_pdlp_solver_mode = true;
   if (program.is_used("--pdlp-hyper-params-path")) {
@@ -137,20 +132,13 @@ int main(int argc, char* argv[])
     use_pdlp_solver_mode = false;
   }
 
-  // Setup up RMM memory pool
-  auto memory_resource = make_pool();
-  rmm::mr::set_current_device_resource(memory_resource.get());
-
-  // Initialize raft handle and running stream
-  const raft::handle_t handle_{};
-
   // Parse MPS file
-  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
-    cuopt::mps_parser::parse_mps<int, double>(program.get<std::string>("--path"));
+  cuopt::mps_parser::mps_data_model_t<int, f_t> op_problem =
+    cuopt::mps_parser::parse_mps<int, f_t>(program.get<std::string>("--path"));
 
   // Solve LP problem
   bool problem_checking = true;
-  cuopt::linear_programming::optimization_problem_solution_t<int, double> solution =
+  cuopt::linear_programming::optimization_problem_solution_t<int, f_t> solution =
     cuopt::linear_programming::solve_lp(
       &handle_, op_problem, settings, problem_checking, use_pdlp_solver_mode);
 
@@ -160,3 +148,33 @@ int main(int argc, char* argv[])
 
   return 0;
 }
+
+int main(int argc, char* argv[])
+{
+  // Parse binary arguments
+  argparse::ArgumentParser program("solve_LP");
+  parse_arguments(program);
+
+  try {
+    program.parse_args(argc, argv);
+  } catch (const std::runtime_error& err) {
+    std::cerr << err.what() << std::endl;
+    std::cerr << program;
+    return 1;
+  }
+
+  // Setup up RMM memory pool
+  auto memory_resource = make_pool();
+  rmm::mr::set_current_device_resource(memory_resource.get());
+
+  // Initialize raft handle and running stream
+  const raft::handle_t handle_{};
+
+  // Run solver with appropriate precision
+  bool use_fp32 = program.get<bool>("--pdlp-fp32");
+  if (use_fp32) {
+    return run_solver<float>(program, handle_);
+  } else {
+    return run_solver<double>(program, handle_);
+  }
+}

From c4e778efba8b54a15c5f6bbc2bcdb322077e9e60 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Wed, 11 Feb 2026 12:02:55 +0100
Subject: [PATCH 03/23] support fp32 with presolve

---
 .../linear_programming/cuopt/run_pdlp.cu      |  20 +-
 cpp/src/linear_programming/solve.cu           |  67 +++--
 cpp/src/mip/presolve/gf2_presolve.cpp         |   2 +-
 cpp/src/mip/presolve/third_party_presolve.cpp | 256 +++++++++++++-----
 cpp/src/mip/problem/presolve_data.cu          |   2 +-
 cpp/tests/linear_programming/pdlp_test.cu     |  33 ++-
 6 files changed, 264 insertions(+), 116 deletions(-)

diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu
index 78b4f42e9c..f8b08c7e31 100644
--- a/benchmarks/linear_programming/cuopt/run_pdlp.cu
+++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu
@@ -70,11 +70,10 @@ static void parse_arguments(argparse::ArgumentParser& program)
       "Path to PDLP hyper-params file to configure PDLP solver. Has priority over PDLP solver "
       "modes.");
 
-  program.add_argument("--presolve")
-    .help("enable/disable presolve (default: true for MIP problems, false for LP problems)")
-    .default_value(0)
-    .scan<'i', int>()
-    .choices(0, 1);
+  program.add_argument("--presolver")
+    .help("Presolver to use. Possible values: None, Papilo, PSLP, Default")
+    .default_value("Default")
+    .choices("None", "Papilo", "PSLP", "Default");
 
   program.add_argument("--solution-path").help("Path where solution file will be generated");
 
@@ -99,6 +98,15 @@ static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode(
   return cuopt::linear_programming::pdlp_solver_mode_t::Stable3;
 }
 
+static cuopt::linear_programming::presolver_t string_to_presolver(const std::string& presolver)
+{
+  if (presolver == "None") return cuopt::linear_programming::presolver_t::None;
+  if (presolver == "Papilo") return cuopt::linear_programming::presolver_t::Papilo;
+  if (presolver == "PSLP") return cuopt::linear_programming::presolver_t::PSLP;
+  if (presolver == "Default") return cuopt::linear_programming::presolver_t::Default;
+  return cuopt::linear_programming::presolver_t::Default;
+}
+
 template <typename f_t>
 static cuopt::linear_programming::pdlp_solver_settings_t<int, f_t> create_solver_settings(
   const argparse::ArgumentParser& program)
@@ -113,7 +121,7 @@ static cuopt::linear_programming::pdlp_solver_settings_t<int, f_t> create_solver
     string_to_pdlp_solver_mode(program.get<std::string>("--pdlp-solver-mode"));
   settings.method = static_cast<cuopt::linear_programming::method_t>(program.get<int>("--method"));
   settings.crossover = program.get<int>("--crossover");
-  settings.presolve  = program.get<int>("--presolve");
+  settings.presolver  = string_to_presolver(program.get<std::string>("--presolver"));
 
   return settings;
 }
diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu
index d8b9fbe617..f141091bf2 100644
--- a/cpp/src/linear_programming/solve.cu
+++ b/cpp/src/linear_programming/solve.cu
@@ -1126,6 +1126,7 @@ optimization_problem_solution_t<i_t, f_t> solve_lp_with_method(
     }
   } else {
     // Float precision only supports PDLP without presolve/crossover
+    // TODO when running with cuopt_cli this doesn't show, should we just use CUOPT_LOG_INFO instead?
     cuopt_expects(settings.method == method_t::PDLP,
                   error_type_t::ValidationError,
                   "Float precision only supports PDLP method. DualSimplex, Barrier, and Concurrent "
@@ -1300,41 +1301,39 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
 
     auto solution = solve_lp_with_method(problem, settings, lp_timer, is_batch_mode);
 
-    if constexpr (std::is_same_v<f_t, double>) {
-      if (run_presolve) {
-        auto primal_solution = cuopt::device_copy(solution.get_primal_solution(),
-                                                  op_problem.get_handle_ptr()->get_stream());
-        auto dual_solution =
-          cuopt::device_copy(solution.get_dual_solution(), op_problem.get_handle_ptr()->get_stream());
-        auto reduced_costs =
-          cuopt::device_copy(solution.get_reduced_cost(), op_problem.get_handle_ptr()->get_stream());
-        bool status_to_skip = false;
-
-        presolver->undo(primal_solution,
-                        dual_solution,
-                        reduced_costs,
-                        cuopt::linear_programming::problem_category_t::LP,
-                        status_to_skip,
-                        settings.dual_postsolve,
-                        op_problem.get_handle_ptr()->get_stream());
+    if (run_presolve) {
+      auto primal_solution = cuopt::device_copy(solution.get_primal_solution(),
+                                                op_problem.get_handle_ptr()->get_stream());
+      auto dual_solution =
+        cuopt::device_copy(solution.get_dual_solution(), op_problem.get_handle_ptr()->get_stream());
+      auto reduced_costs =
+        cuopt::device_copy(solution.get_reduced_cost(), op_problem.get_handle_ptr()->get_stream());
+      bool status_to_skip = false;
+
+      presolver->undo(primal_solution,
+                      dual_solution,
+                      reduced_costs,
+                      cuopt::linear_programming::problem_category_t::LP,
+                      status_to_skip,
+                      settings.dual_postsolve,
+                      op_problem.get_handle_ptr()->get_stream());
 
-        std::vector<
-          typename optimization_problem_solution_t<i_t, f_t>::additional_termination_information_t>
-          term_vec = solution.get_additional_termination_informations();
-        std::vector<pdlp_termination_status_t> status_vec = solution.get_terminations_status();
-
-        // Create a new solution with the full problem solution
-        solution =
-          optimization_problem_solution_t<i_t, f_t>(primal_solution,
-                                                    dual_solution,
-                                                    reduced_costs,
-                                                    std::move(solution.get_pdlp_warm_start_data()),
-                                                    op_problem.get_objective_name(),
-                                                    op_problem.get_variable_names(),
-                                                    op_problem.get_row_names(),
-                                                    std::move(term_vec),
-                                                    std::move(status_vec));
-      }
+      std::vector<
+        typename optimization_problem_solution_t<i_t, f_t>::additional_termination_information_t>
+        term_vec = solution.get_additional_termination_informations();
+      std::vector<pdlp_termination_status_t> status_vec = solution.get_terminations_status();
+
+      // Create a new solution with the full problem solution
+      solution =
+        optimization_problem_solution_t<i_t, f_t>(primal_solution,
+                                                  dual_solution,
+                                                  reduced_costs,
+                                                  std::move(solution.get_pdlp_warm_start_data()),
+                                                  op_problem.get_objective_name(),
+                                                  op_problem.get_variable_names(),
+                                                  op_problem.get_row_names(),
+                                                  std::move(term_vec),
+                                                  std::move(status_vec));
     }
 
     if (settings.sol_file != "") {
diff --git a/cpp/src/mip/presolve/gf2_presolve.cpp b/cpp/src/mip/presolve/gf2_presolve.cpp
index b23526e155..af8ef4b771 100644
--- a/cpp/src/mip/presolve/gf2_presolve.cpp
+++ b/cpp/src/mip/presolve/gf2_presolve.cpp
@@ -247,7 +247,7 @@ papilo::PresolveStatus GF2Presolve<f_t>::execute(const papilo::Problem<f_t>& pro
 
 #define INSTANTIATE(F_TYPE) template class GF2Presolve<F_TYPE>;
 
-#if MIP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/mip/presolve/third_party_presolve.cpp b/cpp/src/mip/presolve/third_party_presolve.cpp
index 3d0c17fde7..ab38be6536 100644
--- a/cpp/src/mip/presolve/third_party_presolve.cpp
+++ b/cpp/src/mip/presolve/third_party_presolve.cpp
@@ -30,6 +30,21 @@
 
 namespace cuopt::linear_programming::detail {
 
+// Helper to convert vector from one type to another (only when types differ)
+template <typename To, typename From>
+std::vector<To> convert_vector(const std::vector<From>& src)
+{
+  if constexpr (std::is_same_v<To, From>) {
+    return src;  // No conversion needed
+  } else {
+    std::vector<To> dst(src.size());
+    for (size_t i = 0; i < src.size(); ++i) {
+      dst[i] = static_cast<To>(src[i]);
+    }
+    return dst;
+  }
+}
+
 template <typename i_t, typename f_t>
 papilo::Problem<f_t> build_papilo_problem(const optimization_problem_t<i_t, f_t>& op_problem,
                                           problem_category_t category,
@@ -220,62 +235,70 @@ PSLPContext build_and_run_pslp_presolver(const optimization_problem_t<i_t, f_t>&
   const auto& constr_ub    = op_problem.get_constraint_upper_bounds();
   const auto& var_types    = op_problem.get_variable_types();
 
-  // Copy data to host
-  std::vector<f_t> h_coefficients(coefficients.size());
+  // Copy data to host (using f_t type)
+  std::vector<f_t> h_coefficients_ft(coefficients.size());
   auto stream_view = op_problem.get_handle_ptr()->get_stream();
-  raft::copy(h_coefficients.data(), coefficients.data(), coefficients.size(), stream_view);
+  raft::copy(h_coefficients_ft.data(), coefficients.data(), coefficients.size(), stream_view);
   std::vector<i_t> h_offsets(offsets.size());
   raft::copy(h_offsets.data(), offsets.data(), offsets.size(), stream_view);
   std::vector<i_t> h_variables(variables.size());
   raft::copy(h_variables.data(), variables.data(), variables.size(), stream_view);
-  std::vector<f_t> h_obj_coeffs(obj_coeffs.size());
-  raft::copy(h_obj_coeffs.data(), obj_coeffs.data(), obj_coeffs.size(), stream_view);
-  std::vector<f_t> h_var_lb(var_lb.size());
-  raft::copy(h_var_lb.data(), var_lb.data(), var_lb.size(), stream_view);
-  std::vector<f_t> h_var_ub(var_ub.size());
-  raft::copy(h_var_ub.data(), var_ub.data(), var_ub.size(), stream_view);
-  std::vector<f_t> h_bounds(bounds.size());
-  raft::copy(h_bounds.data(), bounds.data(), bounds.size(), stream_view);
+  std::vector<f_t> h_obj_coeffs_ft(obj_coeffs.size());
+  raft::copy(h_obj_coeffs_ft.data(), obj_coeffs.data(), obj_coeffs.size(), stream_view);
+  std::vector<f_t> h_var_lb_ft(var_lb.size());
+  raft::copy(h_var_lb_ft.data(), var_lb.data(), var_lb.size(), stream_view);
+  std::vector<f_t> h_var_ub_ft(var_ub.size());
+  raft::copy(h_var_ub_ft.data(), var_ub.data(), var_ub.size(), stream_view);
+  std::vector<f_t> h_bounds_ft(bounds.size());
+  raft::copy(h_bounds_ft.data(), bounds.data(), bounds.size(), stream_view);
   std::vector<char> h_row_types(row_types.size());
   raft::copy(h_row_types.data(), row_types.data(), row_types.size(), stream_view);
-  std::vector<f_t> h_constr_lb(constr_lb.size());
-  raft::copy(h_constr_lb.data(), constr_lb.data(), constr_lb.size(), stream_view);
-  std::vector<f_t> h_constr_ub(constr_ub.size());
-  raft::copy(h_constr_ub.data(), constr_ub.data(), constr_ub.size(), stream_view);
+  std::vector<f_t> h_constr_lb_ft(constr_lb.size());
+  raft::copy(h_constr_lb_ft.data(), constr_lb.data(), constr_lb.size(), stream_view);
+  std::vector<f_t> h_constr_ub_ft(constr_ub.size());
+  raft::copy(h_constr_ub_ft.data(), constr_ub.data(), constr_ub.size(), stream_view);
   std::vector<var_t> h_var_types(var_types.size());
   raft::copy(h_var_types.data(), var_types.data(), var_types.size(), stream_view);
 
   stream_view.synchronize();
   if (maximize) {
-    for (size_t i = 0; i < h_obj_coeffs.size(); ++i) {
-      h_obj_coeffs[i] = -h_obj_coeffs[i];
+    for (size_t i = 0; i < h_obj_coeffs_ft.size(); ++i) {
+      h_obj_coeffs_ft[i] = -h_obj_coeffs_ft[i];
     }
   }
 
-  auto constr_bounds_empty = h_constr_lb.empty() && h_constr_ub.empty();
+  auto constr_bounds_empty = h_constr_lb_ft.empty() && h_constr_ub_ft.empty();
   if (constr_bounds_empty) {
     for (size_t i = 0; i < h_row_types.size(); ++i) {
       if (h_row_types[i] == 'L') {
-        h_constr_lb.push_back(-std::numeric_limits<f_t>::infinity());
-        h_constr_ub.push_back(h_bounds[i]);
+        h_constr_lb_ft.push_back(-std::numeric_limits<f_t>::infinity());
+        h_constr_ub_ft.push_back(h_bounds_ft[i]);
       } else if (h_row_types[i] == 'G') {
-        h_constr_lb.push_back(h_bounds[i]);
-        h_constr_ub.push_back(std::numeric_limits<f_t>::infinity());
+        h_constr_lb_ft.push_back(h_bounds_ft[i]);
+        h_constr_ub_ft.push_back(std::numeric_limits<f_t>::infinity());
       } else if (h_row_types[i] == 'E') {
-        h_constr_lb.push_back(h_bounds[i]);
-        h_constr_ub.push_back(h_bounds[i]);
+        h_constr_lb_ft.push_back(h_bounds_ft[i]);
+        h_constr_ub_ft.push_back(h_bounds_ft[i]);
       }
     }
   }
 
   // handle empty variable bounds
-  if (h_var_lb.empty()) {
-    h_var_lb = std::vector<f_t>(num_cols, -std::numeric_limits<f_t>::infinity());
+  if (h_var_lb_ft.empty()) {
+    h_var_lb_ft = std::vector<f_t>(num_cols, -std::numeric_limits<f_t>::infinity());
   }
-  if (h_var_ub.empty()) {
-    h_var_ub = std::vector<f_t>(num_cols, std::numeric_limits<f_t>::infinity());
+  if (h_var_ub_ft.empty()) {
+    h_var_ub_ft = std::vector<f_t>(num_cols, std::numeric_limits<f_t>::infinity());
   }
 
+  // Convert to double for PSLP API if necessary (PSLP only accepts double*)
+  std::vector<double> h_coefficients = convert_vector<double>(h_coefficients_ft);
+  std::vector<double> h_obj_coeffs   = convert_vector<double>(h_obj_coeffs_ft);
+  std::vector<double> h_var_lb       = convert_vector<double>(h_var_lb_ft);
+  std::vector<double> h_var_ub       = convert_vector<double>(h_var_ub_ft);
+  std::vector<double> h_constr_lb    = convert_vector<double>(h_constr_lb_ft);
+  std::vector<double> h_constr_ub    = convert_vector<double>(h_constr_ub_ft);
+
   // Call PSLP presolver
   ctx.settings           = default_settings();
   ctx.settings->verbose  = false;
@@ -331,7 +354,7 @@ optimization_problem_t<i_t, f_t> build_optimization_problem_from_pslp(
   // PSLP does not allow setting the objective offset, so we add the original objective offset to
   // the reduced objective offset
   obj_offset += original_obj_offset;
-  op_problem.set_objective_offset(obj_offset);
+  op_problem.set_objective_offset(static_cast<f_t>(obj_offset));
   op_problem.set_maximize(maximize);
   op_problem.set_problem_category(problem_category_t::LP);
 
@@ -343,21 +366,65 @@ optimization_problem_t<i_t, f_t> build_optimization_problem_from_pslp(
     return op_problem;
   }
 
-  op_problem.set_csr_constraint_matrix(
-    reduced_prob->Ax, nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1);
+  if constexpr (std::is_same_v<f_t, double>) {
+    // PSLP uses double internally, so we can use the data directly
+    op_problem.set_csr_constraint_matrix(
+      reduced_prob->Ax, nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1);
 
-  std::vector<f_t> h_obj_coeffs(n_cols);
-  std::copy(reduced_prob->c, reduced_prob->c + n_cols, h_obj_coeffs.begin());
-  if (maximize) {
-    for (size_t i = 0; i < n_cols; ++i) {
-      h_obj_coeffs[i] = -h_obj_coeffs[i];
+      std::vector<f_t> h_obj_coeffs(n_cols);
+      std::copy(reduced_prob->c, reduced_prob->c + n_cols, h_obj_coeffs.begin());
+      if (maximize) {
+        for (size_t i = 0; i < n_cols; ++i) {
+          h_obj_coeffs[i] = -h_obj_coeffs[i];
+        }
+      }
+      op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols);
+    op_problem.set_constraint_lower_bounds(reduced_prob->lhs, n_rows);
+    op_problem.set_constraint_upper_bounds(reduced_prob->rhs, n_rows);
+    op_problem.set_variable_lower_bounds(reduced_prob->lbs, n_cols);
+    op_problem.set_variable_upper_bounds(reduced_prob->ubs, n_cols);
+  } else {
+    // Convert PSLP double arrays to f_t
+    // Constraint matrix values (Ax)
+    std::vector<f_t> h_Ax(nnz);
+    for (int i = 0; i < nnz; ++i) {
+      h_Ax[i] = static_cast<f_t>(reduced_prob->Ax[i]);
+    }
+    op_problem.set_csr_constraint_matrix(
+      h_Ax.data(), nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1);
+
+    // Objective coefficients
+    std::vector<f_t> h_obj_coeffs(n_cols);
+    for (int i = 0; i < n_cols; ++i) {
+      h_obj_coeffs[i] = static_cast<f_t>(reduced_prob->c[i]);
+    }
+    if (maximize) {
+      for (int i = 0; i < n_cols; ++i) {
+        h_obj_coeffs[i] = -h_obj_coeffs[i];
+      }
+    }
+    op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols);
+
+    // Constraint bounds
+    std::vector<f_t> h_constr_lb(n_rows);
+    std::vector<f_t> h_constr_ub(n_rows);
+    for (int i = 0; i < n_rows; ++i) {
+      h_constr_lb[i] = static_cast<f_t>(reduced_prob->lhs[i]);
+      h_constr_ub[i] = static_cast<f_t>(reduced_prob->rhs[i]);
     }
+    op_problem.set_constraint_lower_bounds(h_constr_lb.data(), n_rows);
+    op_problem.set_constraint_upper_bounds(h_constr_ub.data(), n_rows);
+
+    // Variable bounds
+    std::vector<f_t> h_var_lb(n_cols);
+    std::vector<f_t> h_var_ub(n_cols);
+    for (int i = 0; i < n_cols; ++i) {
+      h_var_lb[i] = static_cast<f_t>(reduced_prob->lbs[i]);
+      h_var_ub[i] = static_cast<f_t>(reduced_prob->ubs[i]);
+    }
+    op_problem.set_variable_lower_bounds(h_var_lb.data(), n_cols);
+    op_problem.set_variable_upper_bounds(h_var_ub.data(), n_cols);
   }
-  op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols);
-  op_problem.set_constraint_lower_bounds(reduced_prob->lhs, n_rows);
-  op_problem.set_constraint_upper_bounds(reduced_prob->rhs, n_rows);
-  op_problem.set_variable_lower_bounds(reduced_prob->lbs, n_cols);
-  op_problem.set_variable_upper_bounds(reduced_prob->ubs, n_cols);
 
   return op_problem;
 }
@@ -396,6 +463,7 @@ optimization_problem_t<i_t, f_t> build_optimization_problem(
       obj.coefficients[i] = -obj.coefficients[i];
     }
   }
+
   op_problem.set_objective_coefficients(obj.coefficients.data(), obj.coefficients.size());
 
   auto& constraint_matrix = papilo_problem.getConstraintMatrix();
@@ -430,8 +498,9 @@ optimization_problem_t<i_t, f_t> build_optimization_problem(
   i_t nnz = constraint_matrix.getNnz();
   assert(offsets[nrows] == nnz);
 
-  const int* cols   = constraint_matrix.getConstraintMatrix().getColumns();
-  const f_t* coeffs = constraint_matrix.getConstraintMatrix().getValues();
+  const int* cols     = constraint_matrix.getConstraintMatrix().getColumns();
+  const f_t* coeffs   = constraint_matrix.getConstraintMatrix().getValues();
+
   op_problem.set_csr_constraint_matrix(
     &(coeffs[start]), nnz, &(cols[start]), nnz, offsets.data(), nrows + 1);
 
@@ -497,7 +566,8 @@ void set_presolve_methods(papilo::Presolve<f_t>& presolver,
 
   if (category == problem_category_t::MIP) {
     // cuOpt custom GF2 presolver
-    presolver.addPresolveMethod(uptr(new cuopt::linear_programming::detail::GF2Presolve<f_t>()));
+    presolver.addPresolveMethod(
+      uptr(new cuopt::linear_programming::detail::GF2Presolve<f_t>()));
   }
   // fast presolvers
   presolver.addPresolveMethod(uptr(new papilo::SingletonCols<f_t>()));
@@ -535,7 +605,7 @@ void set_presolve_options(papilo::Presolve<f_t>& presolver,
                           problem_category_t category,
                           f_t absolute_tolerance,
                           f_t relative_tolerance,
-                          double time_limit,
+                          f_t time_limit,
                           bool dual_postsolve,
                           i_t num_cpu_threads)
 {
@@ -625,7 +695,7 @@ std::optional<third_party_presolve_result_t<i_t, f_t>> third_party_presolve_t<i_
   CUOPT_LOG_INFO("Calling Papilo presolver (git hash %s)", PAPILO_GITHASH);
   if (category == problem_category_t::MIP) { dual_postsolve = false; }
   papilo::Presolve<f_t> papilo_presolver;
-  set_presolve_methods<f_t>(papilo_presolver, category, dual_postsolve);
+  set_presolve_methods(papilo_presolver, category, dual_postsolve);
   set_presolve_options<i_t, f_t>(papilo_presolver,
                                  category,
                                  absolute_tolerance,
@@ -633,7 +703,7 @@ std::optional<third_party_presolve_result_t<i_t, f_t>> third_party_presolve_t<i_
                                  time_limit,
                                  dual_postsolve,
                                  num_cpu_threads);
-  set_presolve_parameters<f_t>(
+  set_presolve_parameters(
     papilo_presolver, category, op_problem.get_n_constraints(), op_problem.get_n_variables());
 
   // Disable papilo logs
@@ -697,12 +767,15 @@ void third_party_presolve_t<i_t, f_t>::undo(rmm::device_uvector<f_t>& primal_sol
   }
 
   if (status_to_skip) { return; }
+
   std::vector<f_t> primal_sol_vec_h(primal_solution.size());
   raft::copy(primal_sol_vec_h.data(), primal_solution.data(), primal_solution.size(), stream_view);
   std::vector<f_t> dual_sol_vec_h(dual_solution.size());
   raft::copy(dual_sol_vec_h.data(), dual_solution.data(), dual_solution.size(), stream_view);
   std::vector<f_t> reduced_costs_vec_h(reduced_costs.size());
-  raft::copy(reduced_costs_vec_h.data(), reduced_costs.data(), reduced_costs.size(), stream_view);
+  raft::copy(
+    reduced_costs_vec_h.data(), reduced_costs.data(), reduced_costs.size(), stream_view);
+
   papilo::Solution<f_t> reduced_sol(primal_sol_vec_h);
   if (dual_postsolve) {
     reduced_sol.dual         = dual_sol_vec_h;
@@ -734,26 +807,71 @@ void third_party_presolve_t<i_t, f_t>::undo_pslp(rmm::device_uvector<f_t>& prima
                                                  rmm::device_uvector<f_t>& reduced_costs,
                                                  rmm::cuda_stream_view stream_view)
 {
-  std::vector<f_t> h_primal_solution(primal_solution.size());
-  std::vector<f_t> h_dual_solution(dual_solution.size());
-  std::vector<f_t> h_reduced_costs(reduced_costs.size());
-  raft::copy(h_primal_solution.data(), primal_solution.data(), primal_solution.size(), stream_view);
-  raft::copy(h_dual_solution.data(), dual_solution.data(), dual_solution.size(), stream_view);
-  raft::copy(h_reduced_costs.data(), reduced_costs.data(), reduced_costs.size(), stream_view);
-
-  postsolve(
-    pslp_presolver_, h_primal_solution.data(), h_dual_solution.data(), h_reduced_costs.data());
-
-  auto uncrushed_sol = pslp_presolver_->sol;
-  int n_cols         = uncrushed_sol->dim_x;
-  int n_rows         = uncrushed_sol->dim_y;
-
-  primal_solution.resize(n_cols, stream_view);
-  dual_solution.resize(n_rows, stream_view);
-  reduced_costs.resize(n_cols, stream_view);
-  raft::copy(primal_solution.data(), uncrushed_sol->x, n_cols, stream_view);
-  raft::copy(dual_solution.data(), uncrushed_sol->y, n_rows, stream_view);
-  raft::copy(reduced_costs.data(), uncrushed_sol->z, n_cols, stream_view);
+  if constexpr (std::is_same_v<f_t, double>) {
+    // PSLP uses double internally, so we can use the data directly
+    std::vector<double> h_primal_solution(primal_solution.size());
+    std::vector<double> h_dual_solution(dual_solution.size());
+    std::vector<double> h_reduced_costs(reduced_costs.size());
+    raft::copy(
+      h_primal_solution.data(), primal_solution.data(), primal_solution.size(), stream_view);
+    raft::copy(h_dual_solution.data(), dual_solution.data(), dual_solution.size(), stream_view);
+    raft::copy(h_reduced_costs.data(), reduced_costs.data(), reduced_costs.size(), stream_view);
+    stream_view.synchronize();
+
+    postsolve(
+      pslp_presolver_, h_primal_solution.data(), h_dual_solution.data(), h_reduced_costs.data());
+
+    auto uncrushed_sol = pslp_presolver_->sol;
+    int n_cols         = uncrushed_sol->dim_x;
+    int n_rows         = uncrushed_sol->dim_y;
+
+    primal_solution.resize(n_cols, stream_view);
+    dual_solution.resize(n_rows, stream_view);
+    reduced_costs.resize(n_cols, stream_view);
+    raft::copy(primal_solution.data(), uncrushed_sol->x, n_cols, stream_view);
+    raft::copy(dual_solution.data(), uncrushed_sol->y, n_rows, stream_view);
+    raft::copy(reduced_costs.data(), uncrushed_sol->z, n_cols, stream_view);
+  } else {
+    // Convert f_t to double for PSLP postsolve API
+    std::vector<f_t> h_primal_solution_ft(primal_solution.size());
+    std::vector<f_t> h_dual_solution_ft(dual_solution.size());
+    std::vector<f_t> h_reduced_costs_ft(reduced_costs.size());
+    raft::copy(
+      h_primal_solution_ft.data(), primal_solution.data(), primal_solution.size(), stream_view);
+    raft::copy(h_dual_solution_ft.data(), dual_solution.data(), dual_solution.size(), stream_view);
+    raft::copy(h_reduced_costs_ft.data(), reduced_costs.data(), reduced_costs.size(), stream_view);
+    stream_view.synchronize();
+
+    std::vector<double> h_primal_solution = convert_vector<double>(h_primal_solution_ft);
+    std::vector<double> h_dual_solution   = convert_vector<double>(h_dual_solution_ft);
+    std::vector<double> h_reduced_costs   = convert_vector<double>(h_reduced_costs_ft);
+
+    postsolve(
+      pslp_presolver_, h_primal_solution.data(), h_dual_solution.data(), h_reduced_costs.data());
+
+    auto uncrushed_sol = pslp_presolver_->sol;
+    int n_cols         = uncrushed_sol->dim_x;
+    int n_rows         = uncrushed_sol->dim_y;
+
+    // Convert double results back to f_t and copy to device
+    std::vector<f_t> h_primal_out(n_cols);
+    std::vector<f_t> h_dual_out(n_rows);
+    std::vector<f_t> h_reduced_costs_out(n_cols);
+    for (int i = 0; i < n_cols; ++i) {
+      h_primal_out[i]        = static_cast<f_t>(uncrushed_sol->x[i]);
+      h_reduced_costs_out[i] = static_cast<f_t>(uncrushed_sol->z[i]);
+    }
+    for (int i = 0; i < n_rows; ++i) {
+      h_dual_out[i] = static_cast<f_t>(uncrushed_sol->y[i]);
+    }
+
+    primal_solution.resize(n_cols, stream_view);
+    dual_solution.resize(n_rows, stream_view);
+    reduced_costs.resize(n_cols, stream_view);
+    raft::copy(primal_solution.data(), h_primal_out.data(), n_cols, stream_view);
+    raft::copy(dual_solution.data(), h_dual_out.data(), n_rows, stream_view);
+    raft::copy(reduced_costs.data(), h_reduced_costs_out.data(), n_cols, stream_view);
+  }
 
   stream_view.synchronize();
 }
@@ -795,7 +913,7 @@ void papilo_postsolve_deleter<f_t>::operator()(papilo::PostsolveStorage<f_t>* pt
   delete ptr;
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template struct papilo_postsolve_deleter<float>;
 template class third_party_presolve_t<int, float>;
 #endif
diff --git a/cpp/src/mip/problem/presolve_data.cu b/cpp/src/mip/problem/presolve_data.cu
index d09754bc2b..5cd9befdfc 100644
--- a/cpp/src/mip/problem/presolve_data.cu
+++ b/cpp/src/mip/problem/presolve_data.cu
@@ -245,7 +245,7 @@ void presolve_data_t<i_t, f_t>::papilo_uncrush_assignment(
   problem.handle_ptr->sync_stream();
 }
 
-#if MIP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class presolve_data_t<int, float>;
 #endif
 
diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu
index 8fb7b579f5..0bec43cd62 100644
--- a/cpp/tests/linear_programming/pdlp_test.cu
+++ b/cpp/tests/linear_programming/pdlp_test.cu
@@ -1962,7 +1962,7 @@ TEST(pdlp_class, float32_concurrent_throws_validation_error)
   EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
 }
 
-TEST(pdlp_class, float32_presolve_throws_validation_error)
+TEST(pdlp_class, float32_papilo_presolve_works)
 {
   const raft::handle_t handle_{};
 
@@ -1970,13 +1970,36 @@ TEST(pdlp_class, float32_presolve_throws_validation_error)
   cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
     cuopt::mps_parser::parse_mps<int, float>(path, true);
 
-  auto solver_settings    = pdlp_solver_settings_t<int, float>{};
-  solver_settings.method  = cuopt::linear_programming::method_t::PDLP;
-  solver_settings.presolve = true;
+  auto solver_settings      = pdlp_solver_settings_t<int, float>{};
+  solver_settings.method    = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.presolver = cuopt::linear_programming::presolver_t::Papilo;
 
   optimization_problem_solution_t<int, float> solution =
     solve_lp(&handle_, op_problem, solver_settings);
-  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+  EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
+  EXPECT_FALSE(is_incorrect_objective(
+    afiro_primal_objective_f32,
+    solution.get_additional_termination_information().primal_objective));
+}
+
+TEST(pdlp_class, float32_pslp_presolve_works)
+{
+  const raft::handle_t handle_{};
+
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
+    cuopt::mps_parser::parse_mps<int, float>(path, true);
+
+  auto solver_settings      = pdlp_solver_settings_t<int, float>{};
+  solver_settings.method    = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.presolver = cuopt::linear_programming::presolver_t::PSLP;
+
+  optimization_problem_solution_t<int, float> solution =
+    solve_lp(&handle_, op_problem, solver_settings);
+  EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
+  EXPECT_FALSE(is_incorrect_objective(
+    afiro_primal_objective_f32,
+    solution.get_additional_termination_information().primal_objective));
 }
 
 TEST(pdlp_class, float32_crossover_throws_validation_error)

From 6a726685e113bd97b3a26137e93f0ec9ef7e57fe Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Wed, 11 Feb 2026 16:08:23 +0100
Subject: [PATCH 04/23] implement and toggle mixed precision

---
 cpp/src/linear_programming/cusparse_view.cu   | 220 +++++++++++++++++-
 cpp/src/linear_programming/cusparse_view.hpp  |  49 ++++
 cpp/src/linear_programming/pdhg.cu            | 108 ++++++---
 cpp/src/linear_programming/pdlp.cu            |   3 +
 cpp/src/linear_programming/pdlp_constants.hpp |   7 +
 5 files changed, 350 insertions(+), 37 deletions(-)

diff --git a/cpp/src/linear_programming/cusparse_view.cu b/cpp/src/linear_programming/cusparse_view.cu
index 02332da03d..35e5bbf97e 100644
--- a/cpp/src/linear_programming/cusparse_view.cu
+++ b/cpp/src/linear_programming/cusparse_view.cu
@@ -10,6 +10,7 @@
 
 #include <linear_programming/cusparse_view.hpp>
 #include <linear_programming/pdlp_climber_strategy.hpp>
+#include <linear_programming/pdlp_constants.hpp>
 #include <linear_programming/utils.cuh>
 #include <mip/mip_constants.hpp>
 
@@ -21,6 +22,14 @@
 #include <cuda_runtime_api.h>
 #include <dlfcn.h>
 
+#include <thrust/transform.h>
+#include <thrust/execution_policy.h>
+
+// Functor for double-to-float conversion on GPU
+struct double_to_float_functor {
+  __host__ __device__ float operator()(double val) const { return static_cast<float>(val); }
+};
+
 namespace cuopt::linear_programming::detail {
 
 // cusparse_sp_mat_descr_wrapper_t implementation
@@ -304,7 +313,12 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
     A_{op_problem_scaled.coefficients},
     A_offsets_{op_problem_scaled.offsets},
     A_indices_{op_problem_scaled.variables},
-    climber_strategies_(climber_strategies)
+    climber_strategies_(climber_strategies),
+    A_float_{0, handle_ptr->get_stream()},
+    A_T_float_{0, handle_ptr->get_stream()},
+    buffer_non_transpose_mixed_{0, handle_ptr->get_stream()},
+    buffer_transpose_mixed_{0, handle_ptr->get_stream()},
+    mixed_precision_enabled_{false}
 {
   raft::common::nvtx::range fun_scope("Initializing cuSparse view");
 
@@ -583,6 +597,108 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
       handle_ptr->get_stream());
   }
 #endif
+
+  // Initialize mixed precision SpMV support
+  // Only when f_t = double and enable_mixed_precision_spmv is true
+  if constexpr (std::is_same_v<f_t, double> && enable_mixed_precision_spmv) {
+    mixed_precision_enabled_ = true;
+
+    // Create FP32 copies of A and A_T matrix values
+    A_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream());
+    A_T_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream());
+
+    // Convert A values from double to float
+    thrust::transform(thrust::cuda::par.on(handle_ptr->get_stream()),
+                      op_problem_scaled.coefficients.data(),
+                      op_problem_scaled.coefficients.data() + op_problem_scaled.nnz,
+                      A_float_.data(),
+                      double_to_float_functor{});
+
+    // Convert A_T values from double to float
+    thrust::transform(thrust::cuda::par.on(handle_ptr->get_stream()),
+                      A_T_.data(),
+                      A_T_.data() + op_problem_scaled.nnz,
+                      A_T_float_.data(),
+                      double_to_float_functor{});
+
+    // Create FP32 matrix descriptors for mixed precision SpMV
+    RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_mixed_,
+                                        op_problem_scaled.n_constraints,
+                                        op_problem_scaled.n_variables,
+                                        op_problem_scaled.nnz,
+                                        const_cast<i_t*>(op_problem_scaled.offsets.data()),
+                                        const_cast<i_t*>(op_problem_scaled.variables.data()),
+                                        A_float_.data(),
+                                        CUSPARSE_INDEX_32I,
+                                        CUSPARSE_INDEX_32I,
+                                        CUSPARSE_INDEX_BASE_ZERO,
+                                        CUDA_R_32F));
+
+    RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_T_mixed_,
+                                        op_problem_scaled.n_variables,
+                                        op_problem_scaled.n_constraints,
+                                        op_problem_scaled.nnz,
+                                        const_cast<i_t*>(A_T_offsets_.data()),
+                                        const_cast<i_t*>(A_T_indices_.data()),
+                                        A_T_float_.data(),
+                                        CUSPARSE_INDEX_32I,
+                                        CUSPARSE_INDEX_32I,
+                                        CUSPARSE_INDEX_BASE_ZERO,
+                                        CUDA_R_32F));
+
+    // Compute buffer sizes for mixed precision SpMV
+    const rmm::device_scalar<double> alpha_d{1.0, handle_ptr->get_stream()};
+    const rmm::device_scalar<double> beta_d{0.0, handle_ptr->get_stream()};
+
+    size_t buffer_size_non_transpose_mixed =
+      mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(),
+                                      CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                      alpha_d.data(),
+                                      A_mixed_,
+                                      c,
+                                      beta_d.data(),
+                                      dual_solution,
+                                      CUSPARSE_SPMV_CSR_ALG2,
+                                      handle_ptr->get_stream());
+    buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed, handle_ptr->get_stream());
+
+    size_t buffer_size_transpose_mixed =
+      mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(),
+                                      CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                      alpha_d.data(),
+                                      A_T_mixed_,
+                                      dual_solution,
+                                      beta_d.data(),
+                                      c,
+                                      CUSPARSE_SPMV_CSR_ALG2,
+                                      handle_ptr->get_stream());
+    buffer_transpose_mixed_.resize(buffer_size_transpose_mixed, handle_ptr->get_stream());
+
+#if CUDA_VER_12_4_UP
+    // Preprocess mixed precision SpMV
+    mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(),
+                                    CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                    alpha_d.data(),
+                                    A_mixed_,
+                                    c,
+                                    beta_d.data(),
+                                    dual_solution,
+                                    CUSPARSE_SPMV_CSR_ALG2,
+                                    buffer_non_transpose_mixed_.data(),
+                                    handle_ptr->get_stream());
+
+    mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(),
+                                    CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                    alpha_d.data(),
+                                    A_T_mixed_,
+                                    dual_solution,
+                                    beta_d.data(),
+                                    c,
+                                    CUSPARSE_SPMV_CSR_ALG2,
+                                    buffer_transpose_mixed_.data(),
+                                    handle_ptr->get_stream());
+#endif
+  }
 }
 
 // Used by pdlp object for current and average termination condition
@@ -625,7 +741,12 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
     A_{op_problem.coefficients},
     A_offsets_{op_problem.offsets},
     A_indices_{op_problem.variables},
-    climber_strategies_(climber_strategies)
+    climber_strategies_(climber_strategies),
+    A_float_{0, handle_ptr->get_stream()},
+    A_T_float_{0, handle_ptr->get_stream()},
+    buffer_non_transpose_mixed_{0, handle_ptr->get_stream()},
+    buffer_transpose_mixed_{0, handle_ptr->get_stream()},
+    mixed_precision_enabled_{false}
 {
 #ifdef PDLP_DEBUG_MODE
   RAFT_CUDA_TRY(cudaDeviceSynchronize());
@@ -832,7 +953,12 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
     A_{existing_cusparse_view.A_},
     A_offsets_{existing_cusparse_view.A_offsets_},
     A_indices_{existing_cusparse_view.A_indices_},
-    climber_strategies_(existing_cusparse_view.climber_strategies_)
+    climber_strategies_(existing_cusparse_view.climber_strategies_),
+    A_float_{0, handle_ptr->get_stream()},
+    A_T_float_{0, handle_ptr->get_stream()},
+    buffer_non_transpose_mixed_{0, handle_ptr->get_stream()},
+    buffer_transpose_mixed_{0, handle_ptr->get_stream()},
+    mixed_precision_enabled_{false}
 {
 #ifdef PDLP_DEBUG_MODE
   RAFT_CUDA_TRY(cudaDeviceSynchronize());
@@ -942,10 +1068,96 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
     A_(dummy_float),
     A_offsets_(dummy_int),
     A_indices_(dummy_int),
-    climber_strategies_(climber_strategies)
+    climber_strategies_(climber_strategies),
+    A_float_{0, handle_ptr->get_stream()},
+    A_T_float_{0, handle_ptr->get_stream()},
+    buffer_non_transpose_mixed_{0, handle_ptr->get_stream()},
+    buffer_transpose_mixed_{0, handle_ptr->get_stream()},
+    mixed_precision_enabled_{false}
+{
+}
+
+// Update FP32 matrix copies after scaling (must be called after scale_problem())
+template <typename i_t, typename f_t>
+void cusparse_view_t<i_t, f_t>::update_mixed_precision_matrices()
+{
+  if constexpr (std::is_same_v<f_t, double> && enable_mixed_precision_spmv) {
+    if (!mixed_precision_enabled_) { return; }
+
+    // The A_ and A_T_ references point to the scaled matrix data
+    // Update the FP32 copies with the scaled values
+    thrust::transform(thrust::cuda::par.on(handle_ptr_->get_stream()),
+                      A_.data(),
+                      A_.data() + A_.size(),
+                      A_float_.data(),
+                      double_to_float_functor{});
+
+    thrust::transform(thrust::cuda::par.on(handle_ptr_->get_stream()),
+                      A_T_.data(),
+                      A_T_.data() + A_T_.size(),
+                      A_T_float_.data(),
+                      double_to_float_functor{});
+
+    handle_ptr_->get_stream().synchronize();
+  }
+}
+
+// Mixed precision SpMV implementation: FP32 matrix with FP64 vectors and FP64 compute type
+size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle,
+                                       cusparseOperation_t opA,
+                                       const double* alpha,
+                                       cusparseSpMatDescr_t matA,  // FP32 matrix
+                                       cusparseDnVecDescr_t vecX,  // FP64 vector
+                                       const double* beta,
+                                       cusparseDnVecDescr_t vecY,  // FP64 vector
+                                       cusparseSpMVAlg_t alg,
+                                       cudaStream_t stream)
 {
+  size_t bufferSize = 0;
+  RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream));
+  RAFT_CUSPARSE_TRY(cusparseSpMV_bufferSize(
+    handle, opA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, alg, &bufferSize));
+  return bufferSize;
 }
 
+void mixed_precision_spmv(cusparseHandle_t handle,
+                          cusparseOperation_t opA,
+                          const double* alpha,
+                          cusparseSpMatDescr_t matA,  // FP32 matrix
+                          cusparseDnVecDescr_t vecX,  // FP64 vector
+                          const double* beta,
+                          cusparseDnVecDescr_t vecY,  // FP64 vector
+                          cusparseSpMVAlg_t alg,
+                          void* externalBuffer,
+                          cudaStream_t stream)
+{
+  RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream));
+  RAFT_CUSPARSE_TRY(
+    cusparseSpMV(handle, opA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, alg, externalBuffer));
+}
+
+#if CUDA_VER_12_4_UP
+void mixed_precision_spmv_preprocess(cusparseHandle_t handle,
+                                     cusparseOperation_t opA,
+                                     const double* alpha,
+                                     cusparseSpMatDescr_t matA,  // FP32 matrix
+                                     cusparseDnVecDescr_t vecX,  // FP64 vector
+                                     const double* beta,
+                                     cusparseDnVecDescr_t vecY,  // FP64 vector
+                                     cusparseSpMVAlg_t alg,
+                                     void* externalBuffer,
+                                     cudaStream_t stream)
+{
+  static const auto func =
+    dynamic_load_runtime::function<cusparseSpMV_preprocess_sig>("cusparseSpMV_preprocess");
+  if (func.has_value()) {
+    RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream));
+    RAFT_CUSPARSE_TRY(
+      (*func)(handle, opA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, alg, externalBuffer));
+  }
+}
+#endif
+
 #if PDLP_INSTANTIATE_FLOAT
 template class cusparse_sp_mat_descr_wrapper_t<int, float>;
 template class cusparse_dn_vec_descr_wrapper_t<float>;
diff --git a/cpp/src/linear_programming/cusparse_view.hpp b/cpp/src/linear_programming/cusparse_view.hpp
index 699c3aa6c7..ffd4ebd7ff 100644
--- a/cpp/src/linear_programming/cusparse_view.hpp
+++ b/cpp/src/linear_programming/cusparse_view.hpp
@@ -194,8 +194,57 @@ class cusparse_view_t {
   const rmm::device_uvector<i_t>& A_indices_;
 
   const std::vector<pdlp_climber_strategy_t>& climber_strategies_;
+
+  // Mixed precision SpMV support (FP32 matrix with FP64 vectors/compute)
+  // Only used when enable_mixed_precision_spmv is true and f_t = double
+  rmm::device_uvector<float> A_float_;           // FP32 copy of A values
+  rmm::device_uvector<float> A_T_float_;         // FP32 copy of A_T values
+  cusparseSpMatDescr_t A_mixed_{nullptr};        // FP32 matrix descriptor for A
+  cusparseSpMatDescr_t A_T_mixed_{nullptr};      // FP32 matrix descriptor for A_T
+  rmm::device_uvector<uint8_t> buffer_non_transpose_mixed_;  // SpMV buffer for mixed precision A
+  rmm::device_uvector<uint8_t> buffer_transpose_mixed_;      // SpMV buffer for mixed precision A_T
+  bool mixed_precision_enabled_{false};
+
+  // Update FP32 matrix copies after scaling (must be called after scale_problem())
+  void update_mixed_precision_matrices();
 };
 
+// Mixed precision SpMV: FP32 matrix with FP64 vectors and FP64 compute type
+// This is used for PDHG iterations when enable_mixed_precision_spmv is true
+void mixed_precision_spmv(cusparseHandle_t handle,
+                          cusparseOperation_t opA,
+                          const double* alpha,
+                          cusparseSpMatDescr_t matA,  // FP32 matrix
+                          cusparseDnVecDescr_t vecX,  // FP64 vector
+                          const double* beta,
+                          cusparseDnVecDescr_t vecY,  // FP64 vector
+                          cusparseSpMVAlg_t alg,
+                          void* externalBuffer,
+                          cudaStream_t stream);
+
+size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle,
+                                       cusparseOperation_t opA,
+                                       const double* alpha,
+                                       cusparseSpMatDescr_t matA,  // FP32 matrix
+                                       cusparseDnVecDescr_t vecX,  // FP64 vector
+                                       const double* beta,
+                                       cusparseDnVecDescr_t vecY,  // FP64 vector
+                                       cusparseSpMVAlg_t alg,
+                                       cudaStream_t stream);
+
+#if CUDA_VER_12_4_UP
+void mixed_precision_spmv_preprocess(cusparseHandle_t handle,
+                                     cusparseOperation_t opA,
+                                     const double* alpha,
+                                     cusparseSpMatDescr_t matA,  // FP32 matrix
+                                     cusparseDnVecDescr_t vecX,  // FP64 vector
+                                     const double* beta,
+                                     cusparseDnVecDescr_t vecY,  // FP64 vector
+                                     cusparseSpMVAlg_t alg,
+                                     void* externalBuffer,
+                                     cudaStream_t stream);
+#endif
+
 #if CUDA_VER_12_4_UP
 template <
   typename T,
diff --git a/cpp/src/linear_programming/pdhg.cu b/cpp/src/linear_programming/pdhg.cu
index f094e37ddf..6341494c04 100644
--- a/cpp/src/linear_programming/pdhg.cu
+++ b/cpp/src/linear_programming/pdhg.cu
@@ -249,17 +249,31 @@ void pdhg_solver_t<i_t, f_t>::compute_next_dual_solution(rmm::device_uvector<f_t
   // Done in previous function
 
   // K(x'+delta_x)
-  RAFT_CUSPARSE_TRY(
-    raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
-                                       CUSPARSE_OPERATION_NON_TRANSPOSE,
-                                       reusable_device_scalar_value_1_.data(),  // 1
-                                       cusparse_view_.A,
-                                       cusparse_view_.tmp_primal,
-                                       reusable_device_scalar_value_0_.data(),  // 1
-                                       cusparse_view_.dual_gradient,
-                                       CUSPARSE_SPMV_CSR_ALG2,
-                                       (f_t*)cusparse_view_.buffer_non_transpose.data(),
-                                       stream_view_));
+  if constexpr (std::is_same_v<f_t, double> && enable_mixed_precision_spmv) {
+    // Mixed precision SpMV: FP32 matrix with FP64 vectors
+    mixed_precision_spmv(handle_ptr_->get_cusparse_handle(),
+                         CUSPARSE_OPERATION_NON_TRANSPOSE,
+                         reusable_device_scalar_value_1_.data(),  // 1
+                         cusparse_view_.A_mixed_,
+                         cusparse_view_.tmp_primal,
+                         reusable_device_scalar_value_0_.data(),  // 0
+                         cusparse_view_.dual_gradient,
+                         CUSPARSE_SPMV_CSR_ALG2,
+                         cusparse_view_.buffer_non_transpose_mixed_.data(),
+                         stream_view_);
+  } else {
+    RAFT_CUSPARSE_TRY(
+      raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
+                                         CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                         reusable_device_scalar_value_1_.data(),  // 1
+                                         cusparse_view_.A,
+                                         cusparse_view_.tmp_primal,
+                                         reusable_device_scalar_value_0_.data(),  // 0
+                                         cusparse_view_.dual_gradient,
+                                         CUSPARSE_SPMV_CSR_ALG2,
+                                         (f_t*)cusparse_view_.buffer_non_transpose.data(),
+                                         stream_view_));
+  }
 
   // y - (sigma*dual_gradient)
   // max(min(0, sigma*constraint_upper+primal_product), sigma*constraint_lower+primal_product)
@@ -287,17 +301,31 @@ void pdhg_solver_t<i_t, f_t>::compute_At_y()
   // A_t @ y
 
   if (!batch_mode_) {
-    RAFT_CUSPARSE_TRY(
-      raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
-                                         CUSPARSE_OPERATION_NON_TRANSPOSE,
-                                         reusable_device_scalar_value_1_.data(),
-                                         cusparse_view_.A_T,
-                                         cusparse_view_.dual_solution,
-                                         reusable_device_scalar_value_0_.data(),
-                                         cusparse_view_.current_AtY,
-                                         CUSPARSE_SPMV_CSR_ALG2,
-                                         (f_t*)cusparse_view_.buffer_transpose.data(),
-                                         stream_view_));
+    if constexpr (std::is_same_v<f_t, double> && enable_mixed_precision_spmv) {
+      // Mixed precision SpMV: FP32 matrix with FP64 vectors
+      mixed_precision_spmv(handle_ptr_->get_cusparse_handle(),
+                           CUSPARSE_OPERATION_NON_TRANSPOSE,
+                           reusable_device_scalar_value_1_.data(),
+                           cusparse_view_.A_T_mixed_,
+                           cusparse_view_.dual_solution,
+                           reusable_device_scalar_value_0_.data(),
+                           cusparse_view_.current_AtY,
+                           CUSPARSE_SPMV_CSR_ALG2,
+                           cusparse_view_.buffer_transpose_mixed_.data(),
+                           stream_view_);
+    } else {
+      RAFT_CUSPARSE_TRY(
+        raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
+                                           CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                           reusable_device_scalar_value_1_.data(),
+                                           cusparse_view_.A_T,
+                                           cusparse_view_.dual_solution,
+                                           reusable_device_scalar_value_0_.data(),
+                                           cusparse_view_.current_AtY,
+                                           CUSPARSE_SPMV_CSR_ALG2,
+                                           (f_t*)cusparse_view_.buffer_transpose.data(),
+                                           stream_view_));
+    }
   } else {
     RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(
       handle_ptr_->get_cusparse_handle(),
@@ -319,17 +347,31 @@ void pdhg_solver_t<i_t, f_t>::compute_A_x()
 {
   // A @ x
   if (!batch_mode_) {
-    RAFT_CUSPARSE_TRY(
-      raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
-                                         CUSPARSE_OPERATION_NON_TRANSPOSE,
-                                         reusable_device_scalar_value_1_.data(),
-                                         cusparse_view_.A,
-                                         cusparse_view_.reflected_primal_solution,
-                                         reusable_device_scalar_value_0_.data(),
-                                         cusparse_view_.dual_gradient,
-                                         CUSPARSE_SPMV_CSR_ALG2,
-                                         (f_t*)cusparse_view_.buffer_non_transpose.data(),
-                                         stream_view_));
+    if constexpr (std::is_same_v<f_t, double> && enable_mixed_precision_spmv) {
+      // Mixed precision SpMV: FP32 matrix with FP64 vectors
+      mixed_precision_spmv(handle_ptr_->get_cusparse_handle(),
+                           CUSPARSE_OPERATION_NON_TRANSPOSE,
+                           reusable_device_scalar_value_1_.data(),
+                           cusparse_view_.A_mixed_,
+                           cusparse_view_.reflected_primal_solution,
+                           reusable_device_scalar_value_0_.data(),
+                           cusparse_view_.dual_gradient,
+                           CUSPARSE_SPMV_CSR_ALG2,
+                           cusparse_view_.buffer_non_transpose_mixed_.data(),
+                           stream_view_);
+    } else {
+      RAFT_CUSPARSE_TRY(
+        raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
+                                           CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                           reusable_device_scalar_value_1_.data(),
+                                           cusparse_view_.A,
+                                           cusparse_view_.reflected_primal_solution,
+                                           reusable_device_scalar_value_0_.data(),
+                                           cusparse_view_.dual_gradient,
+                                           CUSPARSE_SPMV_CSR_ALG2,
+                                           (f_t*)cusparse_view_.buffer_non_transpose.data(),
+                                           stream_view_));
+    }
   } else {
     RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm(
       handle_ptr_->get_cusparse_handle(),
diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu
index f68fa3e33b..aa0f63d18f 100644
--- a/cpp/src/linear_programming/pdlp.cu
+++ b/cpp/src/linear_programming/pdlp.cu
@@ -2086,6 +2086,9 @@ optimization_problem_solution_t<i_t, f_t> pdlp_solver_t<i_t, f_t>::run_solver(co
 
   initial_scaling_strategy_.scale_problem();
 
+  // Update FP32 matrix copies for mixed precision SpMV after scaling
+  pdhg_solver_.get_cusparse_view().update_mixed_precision_matrices();
+
   if (!settings_.hyper_params.compute_initial_step_size_before_scaling &&
       !settings_.get_initial_step_size().has_value())
     compute_initial_step_size();
diff --git a/cpp/src/linear_programming/pdlp_constants.hpp b/cpp/src/linear_programming/pdlp_constants.hpp
index cf17cc985b..598b79ea61 100644
--- a/cpp/src/linear_programming/pdlp_constants.hpp
+++ b/cpp/src/linear_programming/pdlp_constants.hpp
@@ -71,4 +71,11 @@ template <>
 inline constexpr double safe_guard_for_extreme_values_in_primal_weight_computation<double> =
   1.0e-10;
 
+// Mixed precision SpMV configuration for PDLP Stable3 mode
+// When enabled, the constraint matrix A (and its transpose) are stored in FP32
+// while vectors and compute types remain in FP64 during PDHG iterations.
+// This is NOT used during convergence checking which stays in full FP64.
+// This only applies when f_t = double (FP64 mode).
+inline constexpr bool enable_mixed_precision_spmv = true;
+
 }  // namespace cuopt::linear_programming::detail

From ea37dd237d9245dca5a8b08a87de8ee6d10472f3 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 27 Feb 2026 14:33:40 +0100
Subject: [PATCH 05/23] cleanup and doc

---
 .../linear_programming/cuopt/run_pdlp.cu      |   9 +-
 .../pdlp/solver_settings.hpp                  |   9 +
 cpp/src/pdlp/cpu_pdlp_warm_start_data.cu      |  12 +-
 cpp/src/pdlp/cusparse_view.cu                 | 192 +++++++++---------
 cpp/src/pdlp/cusparse_view.hpp                |   6 +-
 cpp/src/pdlp/pdhg.cu                          |  94 +++++----
 cpp/src/pdlp/pdhg.hpp                         |   3 +-
 cpp/src/pdlp/pdlp.cu                          |   3 +-
 cpp/src/pdlp/pdlp_constants.hpp               |   7 -
 cpp/src/pdlp/solution_conversion.cu           |  44 ++--
 cpp/tests/linear_programming/pdlp_test.cu     |  37 +++-
 docs/cuopt/source/lp-qp-features.rst          |  14 ++
 docs/cuopt/source/lp-qp-milp-settings.rst     |  24 +++
 13 files changed, 279 insertions(+), 175 deletions(-)

diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu
index a2e51cfd51..6e793997f3 100644
--- a/benchmarks/linear_programming/cuopt/run_pdlp.cu
+++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu
@@ -78,7 +78,12 @@ static void parse_arguments(argparse::ArgumentParser& program)
   program.add_argument("--solution-path").help("Path where solution file will be generated");
 
   program.add_argument("--pdlp-fp32")
-    .help("Use FP32 (float) precision instead of FP64 (double). Only PDLP method without presolve and crossover is supported.")
+    .help("Use FP32 (float) precision instead of FP64 (double). Only supported for PDLP method without crossover.")
+    .default_value(false)
+    .implicit_value(true);
+
+  program.add_argument("--mixed-precision-spmv")
+    .help("Enable mixed precision SpMV (FP32 matrix, FP64 vectors) during PDHG iterations. Only supported for PDLP method in FP64.")
     .default_value(false)
     .implicit_value(true);
 }
@@ -122,6 +127,8 @@ static cuopt::linear_programming::pdlp_solver_settings_t<int, f_t> create_solver
   settings.method = static_cast<cuopt::linear_programming::method_t>(program.get<int>("--method"));
   settings.crossover = program.get<int>("--crossover");
   settings.presolver  = string_to_presolver(program.get<std::string>("--presolver"));
+  settings.mixed_precision_spmv =
+    program.get<bool>("--mixed-precision-spmv");
 
   return settings;
 }
diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
index 61906c286a..59236d3531 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
@@ -239,6 +239,15 @@ class pdlp_solver_settings_t {
   i_t ordering{-1};
   i_t barrier_dual_initial_point{-1};
   bool eliminate_dense_columns{true};
+  /**
+   * @brief Enable mixed precision SpMV during PDHG iterations (FP64 mode only).
+   *
+   * When true, the constraint matrix A and its transpose are stored in FP32 while
+   * vectors and compute type remain in FP64, reducing memory bandwidth during SpMV.
+   * Convergence checking and restarts always use the full FP64 matrix, so this does
+   * not reduce overall memory usage.  Has no effect in FP32 mode.
+   */
+  bool mixed_precision_spmv{false};
   bool save_best_primal_so_far{false};
   bool first_primal_feasible{false};
   presolver_t presolver{presolver_t::Default};
diff --git a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu
index f9a73dff06..dd196ab66f 100644
--- a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu
+++ b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu
@@ -108,14 +108,14 @@ pdlp_warm_start_data_t<i_t, f_t> convert_to_gpu_warmstart(
   return gpu_data;
 }
 
-// Explicit template instantiations
+#if MIP_INSTANTIATE_DOUBLE
 template cpu_pdlp_warm_start_data_t<int, double> convert_to_cpu_warmstart(
   const pdlp_warm_start_data_t<int, double>&, rmm::cuda_stream_view);
-
-template pdlp_warm_start_data_t<int, double> convert_to_gpu_warmstart(
-  const cpu_pdlp_warm_start_data_t<int, double>&, rmm::cuda_stream_view);
-
-#if MIP_INSTANTIATE_FLOAT
+  template pdlp_warm_start_data_t<int, double> convert_to_gpu_warmstart(
+    const cpu_pdlp_warm_start_data_t<int, double>&, rmm::cuda_stream_view);
+#endif
+    
+#if PDLP_INSTANTIATE_FLOAT
 template cpu_pdlp_warm_start_data_t<int, float> convert_to_cpu_warmstart(
   const pdlp_warm_start_data_t<int, float>&, rmm::cuda_stream_view);
 
diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu
index fa652f9299..80f94f7834 100644
--- a/cpp/src/pdlp/cusparse_view.cu
+++ b/cpp/src/pdlp/cusparse_view.cu
@@ -21,10 +21,8 @@
 #include <cuda_runtime_api.h>
 #include <dlfcn.h>
 
-#include <thrust/transform.h>
-#include <thrust/execution_policy.h>
+#include <cub/cub.cuh>
 
-// Functor for double-to-float conversion on GPU
 struct double_to_float_functor {
   __host__ __device__ float operator()(double val) const { return static_cast<float>(val); }
 };
@@ -285,7 +283,8 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
   rmm::device_uvector<f_t>& _potential_next_dual_solution,
   rmm::device_uvector<f_t>& _reflected_primal_solution,
   const std::vector<pdlp_climber_strategy_t>& climber_strategies,
-  const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params)
+  const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params,
+  bool enable_mixed_precision_spmv)
   : batch_mode_(climber_strategies.size() > 1),
     handle_ptr_(handle_ptr),
     A{},
@@ -597,60 +596,79 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
   }
 #endif
 
-  // Initialize mixed precision SpMV support
-  // Only when f_t = double and enable_mixed_precision_spmv is true
-  if constexpr (std::is_same_v<f_t, double> && enable_mixed_precision_spmv) {
-    mixed_precision_enabled_ = true;
-
-    // Create FP32 copies of A and A_T matrix values
-    A_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream());
-    A_T_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream());
-
-    // Convert A values from double to float
-    thrust::transform(thrust::cuda::par.on(handle_ptr->get_stream()),
-                      op_problem_scaled.coefficients.data(),
-                      op_problem_scaled.coefficients.data() + op_problem_scaled.nnz,
-                      A_float_.data(),
-                      double_to_float_functor{});
-
-    // Convert A_T values from double to float
-    thrust::transform(thrust::cuda::par.on(handle_ptr->get_stream()),
-                      A_T_.data(),
-                      A_T_.data() + op_problem_scaled.nnz,
-                      A_T_float_.data(),
-                      double_to_float_functor{});
-
-    // Create FP32 matrix descriptors for mixed precision SpMV
-    RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_mixed_,
-                                        op_problem_scaled.n_constraints,
-                                        op_problem_scaled.n_variables,
-                                        op_problem_scaled.nnz,
-                                        const_cast<i_t*>(op_problem_scaled.offsets.data()),
-                                        const_cast<i_t*>(op_problem_scaled.variables.data()),
-                                        A_float_.data(),
-                                        CUSPARSE_INDEX_32I,
-                                        CUSPARSE_INDEX_32I,
-                                        CUSPARSE_INDEX_BASE_ZERO,
-                                        CUDA_R_32F));
-
-    RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_T_mixed_,
-                                        op_problem_scaled.n_variables,
-                                        op_problem_scaled.n_constraints,
-                                        op_problem_scaled.nnz,
-                                        const_cast<i_t*>(A_T_offsets_.data()),
-                                        const_cast<i_t*>(A_T_indices_.data()),
-                                        A_T_float_.data(),
-                                        CUSPARSE_INDEX_32I,
-                                        CUSPARSE_INDEX_32I,
-                                        CUSPARSE_INDEX_BASE_ZERO,
-                                        CUDA_R_32F));
-
-    // Compute buffer sizes for mixed precision SpMV
-    const rmm::device_scalar<double> alpha_d{1.0, handle_ptr->get_stream()};
-    const rmm::device_scalar<double> beta_d{0.0, handle_ptr->get_stream()};
-
-    size_t buffer_size_non_transpose_mixed =
-      mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(),
+  if constexpr (std::is_same_v<f_t, double>) {
+    if (enable_mixed_precision_spmv) {
+      mixed_precision_enabled_ = true;
+
+      A_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream());
+      A_T_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream());
+
+      cub::DeviceTransform::Transform(op_problem_scaled.coefficients.data(),
+                                      A_float_.data(),
+                                      op_problem_scaled.nnz,
+                                      double_to_float_functor{},
+                                      handle_ptr->get_stream().value());
+
+      cub::DeviceTransform::Transform(A_T_.data(),
+                                      A_T_float_.data(),
+                                      op_problem_scaled.nnz,
+                                      double_to_float_functor{},
+                                      handle_ptr->get_stream().value());
+
+      RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_mixed_,
+                                          op_problem_scaled.n_constraints,
+                                          op_problem_scaled.n_variables,
+                                          op_problem_scaled.nnz,
+                                          const_cast<i_t*>(op_problem_scaled.offsets.data()),
+                                          const_cast<i_t*>(op_problem_scaled.variables.data()),
+                                          A_float_.data(),
+                                          CUSPARSE_INDEX_32I,
+                                          CUSPARSE_INDEX_32I,
+                                          CUSPARSE_INDEX_BASE_ZERO,
+                                          CUDA_R_32F));
+
+      RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_T_mixed_,
+                                          op_problem_scaled.n_variables,
+                                          op_problem_scaled.n_constraints,
+                                          op_problem_scaled.nnz,
+                                          const_cast<i_t*>(A_T_offsets_.data()),
+                                          const_cast<i_t*>(A_T_indices_.data()),
+                                          A_T_float_.data(),
+                                          CUSPARSE_INDEX_32I,
+                                          CUSPARSE_INDEX_32I,
+                                          CUSPARSE_INDEX_BASE_ZERO,
+                                          CUDA_R_32F));
+
+      const rmm::device_scalar<double> alpha_d{1.0, handle_ptr->get_stream()};
+      const rmm::device_scalar<double> beta_d{0.0, handle_ptr->get_stream()};
+
+      size_t buffer_size_non_transpose_mixed =
+        mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(),
+                                        CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                        alpha_d.data(),
+                                        A_mixed_,
+                                        c,
+                                        beta_d.data(),
+                                        dual_solution,
+                                        CUSPARSE_SPMV_CSR_ALG2,
+                                        handle_ptr->get_stream());
+      buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed,
+                                         handle_ptr->get_stream());
+
+      size_t buffer_size_transpose_mixed =
+        mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(),
+                                        CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                        alpha_d.data(),
+                                        A_T_mixed_,
+                                        dual_solution,
+                                        beta_d.data(),
+                                        c,
+                                        CUSPARSE_SPMV_CSR_ALG2,
+                                        handle_ptr->get_stream());
+      buffer_transpose_mixed_.resize(buffer_size_transpose_mixed, handle_ptr->get_stream());
+
+#if CUDA_VER_12_4_UP
+      mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(),
                                       CUSPARSE_OPERATION_NON_TRANSPOSE,
                                       alpha_d.data(),
                                       A_mixed_,
@@ -658,11 +676,10 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
                                       beta_d.data(),
                                       dual_solution,
                                       CUSPARSE_SPMV_CSR_ALG2,
+                                      buffer_non_transpose_mixed_.data(),
                                       handle_ptr->get_stream());
-    buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed, handle_ptr->get_stream());
 
-    size_t buffer_size_transpose_mixed =
-      mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(),
+      mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(),
                                       CUSPARSE_OPERATION_NON_TRANSPOSE,
                                       alpha_d.data(),
                                       A_T_mixed_,
@@ -670,33 +687,10 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
                                       beta_d.data(),
                                       c,
                                       CUSPARSE_SPMV_CSR_ALG2,
+                                      buffer_transpose_mixed_.data(),
                                       handle_ptr->get_stream());
-    buffer_transpose_mixed_.resize(buffer_size_transpose_mixed, handle_ptr->get_stream());
-
-#if CUDA_VER_12_4_UP
-    // Preprocess mixed precision SpMV
-    mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(),
-                                    CUSPARSE_OPERATION_NON_TRANSPOSE,
-                                    alpha_d.data(),
-                                    A_mixed_,
-                                    c,
-                                    beta_d.data(),
-                                    dual_solution,
-                                    CUSPARSE_SPMV_CSR_ALG2,
-                                    buffer_non_transpose_mixed_.data(),
-                                    handle_ptr->get_stream());
-
-    mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(),
-                                    CUSPARSE_OPERATION_NON_TRANSPOSE,
-                                    alpha_d.data(),
-                                    A_T_mixed_,
-                                    dual_solution,
-                                    beta_d.data(),
-                                    c,
-                                    CUSPARSE_SPMV_CSR_ALG2,
-                                    buffer_transpose_mixed_.data(),
-                                    handle_ptr->get_stream());
 #endif
+    }
   }
 }
 
@@ -1080,22 +1074,20 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
 template <typename i_t, typename f_t>
 void cusparse_view_t<i_t, f_t>::update_mixed_precision_matrices()
 {
-  if constexpr (std::is_same_v<f_t, double> && enable_mixed_precision_spmv) {
+  if constexpr (std::is_same_v<f_t, double>) {
     if (!mixed_precision_enabled_) { return; }
 
-    // The A_ and A_T_ references point to the scaled matrix data
-    // Update the FP32 copies with the scaled values
-    thrust::transform(thrust::cuda::par.on(handle_ptr_->get_stream()),
-                      A_.data(),
-                      A_.data() + A_.size(),
-                      A_float_.data(),
-                      double_to_float_functor{});
-
-    thrust::transform(thrust::cuda::par.on(handle_ptr_->get_stream()),
-                      A_T_.data(),
-                      A_T_.data() + A_T_.size(),
-                      A_T_float_.data(),
-                      double_to_float_functor{});
+    cub::DeviceTransform::Transform(A_.data(),
+                                    A_float_.data(),
+                                    A_.size(),
+                                    double_to_float_functor{},
+                                    handle_ptr_->get_stream().value());
+
+    cub::DeviceTransform::Transform(A_T_.data(),
+                                    A_T_float_.data(),
+                                    A_T_.size(),
+                                    double_to_float_functor{},
+                                    handle_ptr_->get_stream().value());
 
     handle_ptr_->get_stream().synchronize();
   }
diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp
index e97531860c..4ada26d323 100644
--- a/cpp/src/pdlp/cusparse_view.hpp
+++ b/cpp/src/pdlp/cusparse_view.hpp
@@ -90,7 +90,8 @@ class cusparse_view_t {
                   rmm::device_uvector<f_t>& _potential_next_dual_solution,
                   rmm::device_uvector<f_t>& _reflected_primal_solution,
                   const std::vector<pdlp_climber_strategy_t>& climber_strategies,
-                  const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params);
+                  const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params,
+                  bool enable_mixed_precision_spmv);
 
   cusparse_view_t(raft::handle_t const* handle_ptr,
                   const problem_t<i_t, f_t>& op_problem,
@@ -196,7 +197,7 @@ class cusparse_view_t {
   const std::vector<pdlp_climber_strategy_t>& climber_strategies_;
 
   // Mixed precision SpMV support (FP32 matrix with FP64 vectors/compute)
-  // Only used when enable_mixed_precision_spmv is true and f_t = double
+  // Only used when mixed_precision_enabled_ is true and f_t = double
   rmm::device_uvector<float> A_float_;           // FP32 copy of A values
   rmm::device_uvector<float> A_T_float_;         // FP32 copy of A_T values
   cusparseSpMatDescr_t A_mixed_{nullptr};        // FP32 matrix descriptor for A
@@ -210,7 +211,6 @@ class cusparse_view_t {
 };
 
 // Mixed precision SpMV: FP32 matrix with FP64 vectors and FP64 compute type
-// This is used for PDHG iterations when enable_mixed_precision_spmv is true
 void mixed_precision_spmv(cusparseHandle_t handle,
                           cusparseOperation_t opA,
                           const double* alpha,
diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu
index 6c83b171c7..a606eea8aa 100644
--- a/cpp/src/pdlp/pdhg.cu
+++ b/cpp/src/pdlp/pdhg.cu
@@ -41,7 +41,8 @@ pdhg_solver_t<i_t, f_t>::pdhg_solver_t(
   bool is_legacy_batch_mode,  // Batch mode with streams
   const std::vector<pdlp_climber_strategy_t>& climber_strategies,
   const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params,
-  const std::vector<std::tuple<i_t, f_t, f_t>>& new_bounds)
+  const std::vector<std::tuple<i_t, f_t, f_t>>& new_bounds,
+  bool enable_mixed_precision_spmv)
   : batch_mode_(climber_strategies.size() > 1),
     handle_ptr_(handle_ptr),
     stream_view_(handle_ptr_->get_stream()),
@@ -77,7 +78,8 @@ pdhg_solver_t<i_t, f_t>::pdhg_solver_t(
                    potential_next_dual_solution_,
                    reflected_primal_,
                    climber_strategies,
-                   hyper_params},
+                   hyper_params,
+                   enable_mixed_precision_spmv},
     reusable_device_scalar_value_1_{1.0, stream_view_},
     reusable_device_scalar_value_0_{0.0, stream_view_},
     reusable_device_scalar_value_neg_1_{f_t(-1.0), stream_view_},
@@ -249,26 +251,28 @@ void pdhg_solver_t<i_t, f_t>::compute_next_dual_solution(rmm::device_uvector<f_t
   // Done in previous function
 
   // K(x'+delta_x)
-  if constexpr (std::is_same_v<f_t, double> && enable_mixed_precision_spmv) {
-    // Mixed precision SpMV: FP32 matrix with FP64 vectors
-    mixed_precision_spmv(handle_ptr_->get_cusparse_handle(),
-                         CUSPARSE_OPERATION_NON_TRANSPOSE,
-                         reusable_device_scalar_value_1_.data(),  // 1
-                         cusparse_view_.A_mixed_,
-                         cusparse_view_.tmp_primal,
-                         reusable_device_scalar_value_0_.data(),  // 0
-                         cusparse_view_.dual_gradient,
-                         CUSPARSE_SPMV_CSR_ALG2,
-                         cusparse_view_.buffer_non_transpose_mixed_.data(),
-                         stream_view_);
-  } else {
+  if constexpr (std::is_same_v<f_t, double>) {
+    if (cusparse_view_.mixed_precision_enabled_) {
+      mixed_precision_spmv(handle_ptr_->get_cusparse_handle(),
+                           CUSPARSE_OPERATION_NON_TRANSPOSE,
+                           reusable_device_scalar_value_1_.data(),
+                           cusparse_view_.A_mixed_,
+                           cusparse_view_.tmp_primal,
+                           reusable_device_scalar_value_0_.data(),
+                           cusparse_view_.dual_gradient,
+                           CUSPARSE_SPMV_CSR_ALG2,
+                           cusparse_view_.buffer_non_transpose_mixed_.data(),
+                           stream_view_);
+    }
+  }
+  if (!cusparse_view_.mixed_precision_enabled_) {
     RAFT_CUSPARSE_TRY(
       raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
                                          CUSPARSE_OPERATION_NON_TRANSPOSE,
-                                         reusable_device_scalar_value_1_.data(),  // 1
+                                         reusable_device_scalar_value_1_.data(),
                                          cusparse_view_.A,
                                          cusparse_view_.tmp_primal,
-                                         reusable_device_scalar_value_0_.data(),  // 0
+                                         reusable_device_scalar_value_0_.data(),
                                          cusparse_view_.dual_gradient,
                                          CUSPARSE_SPMV_CSR_ALG2,
                                          (f_t*)cusparse_view_.buffer_non_transpose.data(),
@@ -301,19 +305,21 @@ void pdhg_solver_t<i_t, f_t>::compute_At_y()
   // A_t @ y
 
   if (!batch_mode_) {
-    if constexpr (std::is_same_v<f_t, double> && enable_mixed_precision_spmv) {
-      // Mixed precision SpMV: FP32 matrix with FP64 vectors
-      mixed_precision_spmv(handle_ptr_->get_cusparse_handle(),
-                           CUSPARSE_OPERATION_NON_TRANSPOSE,
-                           reusable_device_scalar_value_1_.data(),
-                           cusparse_view_.A_T_mixed_,
-                           cusparse_view_.dual_solution,
-                           reusable_device_scalar_value_0_.data(),
-                           cusparse_view_.current_AtY,
-                           CUSPARSE_SPMV_CSR_ALG2,
-                           cusparse_view_.buffer_transpose_mixed_.data(),
-                           stream_view_);
-    } else {
+    if constexpr (std::is_same_v<f_t, double>) {
+      if (cusparse_view_.mixed_precision_enabled_) {
+        mixed_precision_spmv(handle_ptr_->get_cusparse_handle(),
+                             CUSPARSE_OPERATION_NON_TRANSPOSE,
+                             reusable_device_scalar_value_1_.data(),
+                             cusparse_view_.A_T_mixed_,
+                             cusparse_view_.dual_solution,
+                             reusable_device_scalar_value_0_.data(),
+                             cusparse_view_.current_AtY,
+                             CUSPARSE_SPMV_CSR_ALG2,
+                             cusparse_view_.buffer_transpose_mixed_.data(),
+                             stream_view_);
+      }
+    }
+    if (!cusparse_view_.mixed_precision_enabled_) {
       RAFT_CUSPARSE_TRY(
         raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
                                            CUSPARSE_OPERATION_NON_TRANSPOSE,
@@ -347,19 +353,21 @@ void pdhg_solver_t<i_t, f_t>::compute_A_x()
 {
   // A @ x
   if (!batch_mode_) {
-    if constexpr (std::is_same_v<f_t, double> && enable_mixed_precision_spmv) {
-      // Mixed precision SpMV: FP32 matrix with FP64 vectors
-      mixed_precision_spmv(handle_ptr_->get_cusparse_handle(),
-                           CUSPARSE_OPERATION_NON_TRANSPOSE,
-                           reusable_device_scalar_value_1_.data(),
-                           cusparse_view_.A_mixed_,
-                           cusparse_view_.reflected_primal_solution,
-                           reusable_device_scalar_value_0_.data(),
-                           cusparse_view_.dual_gradient,
-                           CUSPARSE_SPMV_CSR_ALG2,
-                           cusparse_view_.buffer_non_transpose_mixed_.data(),
-                           stream_view_);
-    } else {
+    if constexpr (std::is_same_v<f_t, double>) {
+      if (cusparse_view_.mixed_precision_enabled_) {
+        mixed_precision_spmv(handle_ptr_->get_cusparse_handle(),
+                             CUSPARSE_OPERATION_NON_TRANSPOSE,
+                             reusable_device_scalar_value_1_.data(),
+                             cusparse_view_.A_mixed_,
+                             cusparse_view_.reflected_primal_solution,
+                             reusable_device_scalar_value_0_.data(),
+                             cusparse_view_.dual_gradient,
+                             CUSPARSE_SPMV_CSR_ALG2,
+                             cusparse_view_.buffer_non_transpose_mixed_.data(),
+                             stream_view_);
+      }
+    }
+    if (!cusparse_view_.mixed_precision_enabled_) {
       RAFT_CUSPARSE_TRY(
         raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
                                            CUSPARSE_OPERATION_NON_TRANSPOSE,
diff --git a/cpp/src/pdlp/pdhg.hpp b/cpp/src/pdlp/pdhg.hpp
index 8ff45ac0ce..32722eae49 100644
--- a/cpp/src/pdlp/pdhg.hpp
+++ b/cpp/src/pdlp/pdhg.hpp
@@ -29,7 +29,8 @@ class pdhg_solver_t {
                 bool is_legacy_batch_mode,
                 const std::vector<pdlp_climber_strategy_t>& climber_strategies,
                 const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params,
-                const std::vector<std::tuple<i_t, f_t, f_t>>& new_bounds);
+                const std::vector<std::tuple<i_t, f_t, f_t>>& new_bounds,
+                bool enable_mixed_precision_spmv = true);
 
   saddle_point_state_t<i_t, f_t>& get_saddle_point_state();
   cusparse_view_t<i_t, f_t>& get_cusparse_view();
diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu
index a23e57f39e..118f2a8036 100644
--- a/cpp/src/pdlp/pdlp.cu
+++ b/cpp/src/pdlp/pdlp.cu
@@ -141,7 +141,8 @@ pdlp_solver_t<i_t, f_t>::pdlp_solver_t(problem_t<i_t, f_t>& op_problem,
                  is_legacy_batch_mode,
                  climber_strategies_,
                  settings_.hyper_params,
-                 settings_.new_bounds},
+                 settings_.new_bounds,
+                 settings_.mixed_precision_spmv},
     initial_scaling_strategy_{handle_ptr_,
                               op_problem_scaled_,
                               settings_.hyper_params.default_l_inf_ruiz_iterations,
diff --git a/cpp/src/pdlp/pdlp_constants.hpp b/cpp/src/pdlp/pdlp_constants.hpp
index 598b79ea61..cf17cc985b 100644
--- a/cpp/src/pdlp/pdlp_constants.hpp
+++ b/cpp/src/pdlp/pdlp_constants.hpp
@@ -71,11 +71,4 @@ template <>
 inline constexpr double safe_guard_for_extreme_values_in_primal_weight_computation<double> =
   1.0e-10;
 
-// Mixed precision SpMV configuration for PDLP Stable3 mode
-// When enabled, the constraint matrix A (and its transpose) are stored in FP32
-// while vectors and compute types remain in FP64 during PDHG iterations.
-// This is NOT used during convergence checking which stays in full FP64.
-// This only applies when f_t = double (FP64 mode).
-inline constexpr bool enable_mixed_precision_spmv = true;
-
 }  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/pdlp/solution_conversion.cu b/cpp/src/pdlp/solution_conversion.cu
index ea3c681266..a0d4f88aef 100644
--- a/cpp/src/pdlp/solution_conversion.cu
+++ b/cpp/src/pdlp/solution_conversion.cu
@@ -13,6 +13,7 @@
 #include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/gpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/utilities/cython_solve.hpp>
+#include <mip_heuristics/mip_constants.hpp>
 
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_uvector.hpp>
@@ -134,6 +135,18 @@ cuopt::cython::mip_ret_t gpu_mip_solution_t<i_t, f_t>::to_mip_ret_t()
 // CPU LP Solution Conversion
 // ===========================
 
+namespace {
+template <typename f_t>
+cuopt::cython::cpu_buffer to_cpu_buffer(std::vector<f_t>& src)
+{
+  if constexpr (std::is_same_v<f_t, double>) {
+    return std::move(src);
+  } else {
+    return cuopt::cython::cpu_buffer(src.begin(), src.end());
+  }
+}
+}  // namespace
+
 template <typename i_t, typename f_t>
 cuopt::cython::linear_programming_ret_t
 cpu_lp_solution_t<i_t, f_t>::to_cpu_linear_programming_ret_t()
@@ -142,22 +155,22 @@ cpu_lp_solution_t<i_t, f_t>::to_cpu_linear_programming_ret_t()
   cuopt::cython::linear_programming_ret_t ret;
 
   cpu_solutions_t cpu;
-  cpu.primal_solution_ = std::move(primal_solution_);
-  cpu.dual_solution_   = std::move(dual_solution_);
-  cpu.reduced_cost_    = std::move(reduced_cost_);
+  cpu.primal_solution_ = to_cpu_buffer(primal_solution_);
+  cpu.dual_solution_   = to_cpu_buffer(dual_solution_);
+  cpu.reduced_cost_    = to_cpu_buffer(reduced_cost_);
 
   if (!pdlp_warm_start_data_.current_primal_solution_.empty()) {
-    cpu.current_primal_solution_ = std::move(pdlp_warm_start_data_.current_primal_solution_);
-    cpu.current_dual_solution_   = std::move(pdlp_warm_start_data_.current_dual_solution_);
-    cpu.initial_primal_average_  = std::move(pdlp_warm_start_data_.initial_primal_average_);
-    cpu.initial_dual_average_    = std::move(pdlp_warm_start_data_.initial_dual_average_);
-    cpu.current_ATY_             = std::move(pdlp_warm_start_data_.current_ATY_);
-    cpu.sum_primal_solutions_    = std::move(pdlp_warm_start_data_.sum_primal_solutions_);
-    cpu.sum_dual_solutions_      = std::move(pdlp_warm_start_data_.sum_dual_solutions_);
+    cpu.current_primal_solution_ = to_cpu_buffer(pdlp_warm_start_data_.current_primal_solution_);
+    cpu.current_dual_solution_   = to_cpu_buffer(pdlp_warm_start_data_.current_dual_solution_);
+    cpu.initial_primal_average_  = to_cpu_buffer(pdlp_warm_start_data_.initial_primal_average_);
+    cpu.initial_dual_average_    = to_cpu_buffer(pdlp_warm_start_data_.initial_dual_average_);
+    cpu.current_ATY_             = to_cpu_buffer(pdlp_warm_start_data_.current_ATY_);
+    cpu.sum_primal_solutions_    = to_cpu_buffer(pdlp_warm_start_data_.sum_primal_solutions_);
+    cpu.sum_dual_solutions_      = to_cpu_buffer(pdlp_warm_start_data_.sum_dual_solutions_);
     cpu.last_restart_duality_gap_primal_solution_ =
-      std::move(pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_);
+      to_cpu_buffer(pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_);
     cpu.last_restart_duality_gap_dual_solution_ =
-      std::move(pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_);
+      to_cpu_buffer(pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_);
 
     ret.initial_primal_weight_         = pdlp_warm_start_data_.initial_primal_weight_;
     ret.initial_step_size_             = pdlp_warm_start_data_.initial_step_size_;
@@ -222,4 +235,11 @@ template cuopt::cython::linear_programming_ret_t
 cpu_lp_solution_t<int, double>::to_cpu_linear_programming_ret_t();
 template cuopt::cython::mip_ret_t cpu_mip_solution_t<int, double>::to_cpu_mip_ret_t();
 
+#if PDLP_INSTANTIATE_FLOAT
+template cuopt::cython::linear_programming_ret_t
+gpu_lp_solution_t<int, float>::to_linear_programming_ret_t();
+template cuopt::cython::linear_programming_ret_t
+cpu_lp_solution_t<int, float>::to_cpu_linear_programming_ret_t();
+#endif
+
 }  // namespace cuopt::linear_programming
diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu
index 3736a796c5..8f47cef65b 100644
--- a/cpp/tests/linear_programming/pdlp_test.cu
+++ b/cpp/tests/linear_programming/pdlp_test.cu
@@ -22,7 +22,7 @@
 #include <cuopt/linear_programming/solve.hpp>
 #include <mip_heuristics/problem/problem.cuh>
 #include <mps_parser/parser.hpp>
-#include <mip/mip_constants.hpp>
+#include <mip_heuristics/mip_constants.hpp>
 
 #include <utilities/copy_helpers.hpp>
 #include <utilities/error.hpp>
@@ -77,6 +77,41 @@ TEST(pdlp_class, run_double)
     afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
 }
 
+TEST(pdlp_class, mixed_precision_spmv)
+{
+  const raft::handle_t handle_{};
+
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
+
+  auto settings_mixed   = pdlp_solver_settings_t<int, double>{};
+  settings_mixed.method = cuopt::linear_programming::method_t::PDLP;
+  settings_mixed.mixed_precision_spmv = true;
+
+  optimization_problem_solution_t<int, double> solution_mixed =
+    solve_lp(&handle_, op_problem, settings_mixed);
+  EXPECT_EQ((int)solution_mixed.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
+  EXPECT_FALSE(is_incorrect_objective(
+    afiro_primal_objective,
+    solution_mixed.get_additional_termination_information().primal_objective));
+
+  auto settings_full   = pdlp_solver_settings_t<int, double>{};
+  settings_full.method = cuopt::linear_programming::method_t::PDLP;
+  settings_full.mixed_precision_spmv = false;
+
+  optimization_problem_solution_t<int, double> solution_full =
+    solve_lp(&handle_, op_problem, settings_full);
+  EXPECT_EQ((int)solution_full.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
+  EXPECT_FALSE(is_incorrect_objective(
+    afiro_primal_objective,
+    solution_full.get_additional_termination_information().primal_objective));
+
+  EXPECT_NEAR(solution_mixed.get_additional_termination_information().primal_objective,
+              solution_full.get_additional_termination_information().primal_objective,
+              1e-2);
+}
+
 TEST(pdlp_class, run_double_very_low_accuracy)
 {
   const raft::handle_t handle_{};
diff --git a/docs/cuopt/source/lp-qp-features.rst b/docs/cuopt/source/lp-qp-features.rst
index 4bd178ed53..9495998760 100644
--- a/docs/cuopt/source/lp-qp-features.rst
+++ b/docs/cuopt/source/lp-qp-features.rst
@@ -157,6 +157,20 @@ Batch Mode
 
 Users can submit a set of problems which will be solved in a batch. Problems will be solved at the same time in parallel to fully utilize the GPU. Checkout :ref:`self-hosted client <generic-example-with-normal-and-batch-mode>` example in thin client.
 
+FP32 Precision Mode
+-------------------
+
+By default, PDLP operates in FP64 (double) precision. Users can switch to FP32 (float) precision for the entire solve. FP32 uses half the memory of FP64 and allows PDHG iterations to be on average twice as fast, but it may require more iterations to converge due to reduced numerical accuracy. FP32 mode is only supported with the PDLP method (not concurrent) and without crossover.
+
+.. note:: The default precision is FP64 (double).
+
+Mixed Precision SpMV
+--------------------
+
+When running PDLP in FP64 mode, users can enable mixed precision sparse matrix-vector products (SpMV) during PDHG iterations. In this mode, the constraint matrix and its transpose are stored in FP32 while vectors and the compute type remain in FP64. This allows SpMV operations to be faster thanks to reduced memory bandwidth requirements, while maintaining FP64 accuracy in the accumulation. This will make PDHG iterations faster while limiting the potential negative impact on convergence (compared to running in FP32 mode). Convergence checking and restart logic always use the full FP64 matrix, so this mode does not reduce memory usage since both the FP32 and FP64 copies of the matrix are kept in memory. Mixed precision SpMV only applies in FP64 mode and has no effect when running in FP32.
+
+.. note:: The default value is false.
+
 Multi-GPU Mode
 --------------
 
diff --git a/docs/cuopt/source/lp-qp-milp-settings.rst b/docs/cuopt/source/lp-qp-milp-settings.rst
index bd1372f70e..de3de5c854 100644
--- a/docs/cuopt/source/lp-qp-milp-settings.rst
+++ b/docs/cuopt/source/lp-qp-milp-settings.rst
@@ -192,6 +192,30 @@ Per Constraint Residual
 
 .. note:: The default value is false.
 
+FP32 Precision
+^^^^^^^^^^^^^^
+
+``CUOPT_PDLP_FP32`` controls whether PDLP should run in FP32 (float) precision instead of FP64 (double).
+FP32 uses half the memory of FP64 and allows PDHG iterations to be on average twice as fast,
+but it may require more iterations to converge due to reduced numerical accuracy.
+FP32 mode is only supported with the PDLP method (not concurrent) and without crossover.
+
+.. note:: The default precision is FP64 (double).
+
+Mixed Precision SpMV
+^^^^^^^^^^^^^^^^^^^^
+
+``CUOPT_MIXED_PRECISION_SPMV`` controls whether PDLP should use mixed precision sparse matrix-vector
+products (SpMV) during PDHG iterations. When enabled, the constraint matrix and its transpose are stored
+in FP32 while vectors and the compute type remain in FP64. This allows SpMV operations to be faster
+thanks to reduced memory bandwidth requirements, while maintaining FP64 accuracy in the accumulation.
+This will make PDHG iterations faster while limiting the potential negative impact on convergence
+(compared to running in FP32 mode). Convergence checking and restart logic always use the full FP64
+matrix, so this mode does not reduce memory usage since both the FP32 and FP64 copies of the matrix
+are kept in memory. Mixed precision SpMV only applies in FP64 mode and has no effect when running in FP32.
+
+.. note:: The default value is false.
+
 Barrier Solver Settings
 ^^^^^^^^^^^^^^^^^^^^^^^^
 

From f8f673a02a5cf5b4aea7bdca07115ec5e12b97d9 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 27 Feb 2026 14:50:26 +0100
Subject: [PATCH 06/23] update doc

---
 docs/cuopt/source/lp-qp-milp-settings.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/cuopt/source/lp-qp-milp-settings.rst b/docs/cuopt/source/lp-qp-milp-settings.rst
index de3de5c854..4a8c4c44ae 100644
--- a/docs/cuopt/source/lp-qp-milp-settings.rst
+++ b/docs/cuopt/source/lp-qp-milp-settings.rst
@@ -198,6 +198,7 @@ FP32 Precision
 ``CUOPT_PDLP_FP32`` controls whether PDLP should run in FP32 (float) precision instead of FP64 (double).
 FP32 uses half the memory of FP64 and allows PDHG iterations to be on average twice as fast,
 but it may require more iterations to converge due to reduced numerical accuracy.
+For an alternative that maintains FP64 accuracy while improving performance, see :ref:`Mixed Precision SpMV`.
 FP32 mode is only supported with the PDLP method (not concurrent) and without crossover.
 
 .. note:: The default precision is FP64 (double).

From 64c54571de322714c729e54c8cad11a463602783 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 27 Feb 2026 15:02:02 +0100
Subject: [PATCH 07/23] style cleanup

---
 .../linear_programming/cuopt/run_pdlp.cu      |  18 ++-
 cpp/src/math_optimization/solution_writer.hpp |   2 +-
 cpp/src/mip_heuristics/mip_constants.hpp      |   2 +-
 .../presolve/third_party_presolve.cpp         |  24 ++-
 cpp/src/pdlp/cpu_pdlp_warm_start_data.cu      |   6 +-
 cpp/src/pdlp/cusparse_view.cu                 |   3 +-
 cpp/src/pdlp/cusparse_view.hpp                |   8 +-
 cpp/src/pdlp/pdlp.cu                          | 148 +++++++++---------
 cpp/src/pdlp/solve.cu                         | 105 +++++++------
 cpp/tests/linear_programming/pdlp_test.cu     |  32 ++--
 10 files changed, 177 insertions(+), 171 deletions(-)

diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu
index 6e793997f3..2166a6474b 100644
--- a/benchmarks/linear_programming/cuopt/run_pdlp.cu
+++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu
@@ -78,12 +78,16 @@ static void parse_arguments(argparse::ArgumentParser& program)
   program.add_argument("--solution-path").help("Path where solution file will be generated");
 
   program.add_argument("--pdlp-fp32")
-    .help("Use FP32 (float) precision instead of FP64 (double). Only supported for PDLP method without crossover.")
+    .help(
+      "Use FP32 (float) precision instead of FP64 (double). Only supported for PDLP method without "
+      "crossover.")
     .default_value(false)
     .implicit_value(true);
 
   program.add_argument("--mixed-precision-spmv")
-    .help("Enable mixed precision SpMV (FP32 matrix, FP64 vectors) during PDHG iterations. Only supported for PDLP method in FP64.")
+    .help(
+      "Enable mixed precision SpMV (FP32 matrix, FP64 vectors) during PDHG iterations. Only "
+      "supported for PDLP method in FP64.")
     .default_value(false)
     .implicit_value(true);
 }
@@ -121,14 +125,14 @@ static cuopt::linear_programming::pdlp_solver_settings_t<int, f_t> create_solver
 
   settings.time_limit      = static_cast<f_t>(program.get<double>("--time-limit"));
   settings.iteration_limit = program.get<int>("--iteration-limit");
-  settings.set_optimality_tolerance(static_cast<f_t>(program.get<double>("--optimality-tolerance")));
+  settings.set_optimality_tolerance(
+    static_cast<f_t>(program.get<double>("--optimality-tolerance")));
   settings.pdlp_solver_mode =
     string_to_pdlp_solver_mode(program.get<std::string>("--pdlp-solver-mode"));
   settings.method = static_cast<cuopt::linear_programming::method_t>(program.get<int>("--method"));
-  settings.crossover = program.get<int>("--crossover");
-  settings.presolver  = string_to_presolver(program.get<std::string>("--presolver"));
-  settings.mixed_precision_spmv =
-    program.get<bool>("--mixed-precision-spmv");
+  settings.crossover            = program.get<int>("--crossover");
+  settings.presolver            = string_to_presolver(program.get<std::string>("--presolver"));
+  settings.mixed_precision_spmv = program.get<bool>("--mixed-precision-spmv");
 
   return settings;
 }
diff --git a/cpp/src/math_optimization/solution_writer.hpp b/cpp/src/math_optimization/solution_writer.hpp
index e187f64313..0ac1b64464 100644
--- a/cpp/src/math_optimization/solution_writer.hpp
+++ b/cpp/src/math_optimization/solution_writer.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
diff --git a/cpp/src/mip_heuristics/mip_constants.hpp b/cpp/src/mip_heuristics/mip_constants.hpp
index cf04df9b0f..47d3d22de4 100644
--- a/cpp/src/mip_heuristics/mip_constants.hpp
+++ b/cpp/src/mip_heuristics/mip_constants.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
index cdc7da4ec8..bee0291b7c 100644
--- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
+++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
@@ -371,14 +371,14 @@ optimization_problem_t<i_t, f_t> build_optimization_problem_from_pslp(
     op_problem.set_csr_constraint_matrix(
       reduced_prob->Ax, nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1);
 
-      std::vector<f_t> h_obj_coeffs(n_cols);
-      std::copy(reduced_prob->c, reduced_prob->c + n_cols, h_obj_coeffs.begin());
-      if (maximize) {
-        for (size_t i = 0; i < n_cols; ++i) {
-          h_obj_coeffs[i] = -h_obj_coeffs[i];
-        }
+    std::vector<f_t> h_obj_coeffs(n_cols);
+    std::copy(reduced_prob->c, reduced_prob->c + n_cols, h_obj_coeffs.begin());
+    if (maximize) {
+      for (size_t i = 0; i < n_cols; ++i) {
+        h_obj_coeffs[i] = -h_obj_coeffs[i];
       }
-      op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols);
+    }
+    op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols);
     op_problem.set_constraint_lower_bounds(reduced_prob->lhs, n_rows);
     op_problem.set_constraint_upper_bounds(reduced_prob->rhs, n_rows);
     op_problem.set_variable_lower_bounds(reduced_prob->lbs, n_cols);
@@ -498,8 +498,8 @@ optimization_problem_t<i_t, f_t> build_optimization_problem(
   i_t nnz = constraint_matrix.getNnz();
   assert(offsets[nrows] == nnz);
 
-  const int* cols     = constraint_matrix.getConstraintMatrix().getColumns();
-  const f_t* coeffs   = constraint_matrix.getConstraintMatrix().getValues();
+  const int* cols   = constraint_matrix.getConstraintMatrix().getColumns();
+  const f_t* coeffs = constraint_matrix.getConstraintMatrix().getValues();
 
   op_problem.set_csr_constraint_matrix(
     &(coeffs[start]), nnz, &(cols[start]), nnz, offsets.data(), nrows + 1);
@@ -566,8 +566,7 @@ void set_presolve_methods(papilo::Presolve<f_t>& presolver,
 
   if (category == problem_category_t::MIP) {
     // cuOpt custom GF2 presolver
-    presolver.addPresolveMethod(
-      uptr(new cuopt::linear_programming::detail::GF2Presolve<f_t>()));
+    presolver.addPresolveMethod(uptr(new cuopt::linear_programming::detail::GF2Presolve<f_t>()));
   }
   // fast presolvers
   presolver.addPresolveMethod(uptr(new papilo::SingletonCols<f_t>()));
@@ -773,8 +772,7 @@ void third_party_presolve_t<i_t, f_t>::undo(rmm::device_uvector<f_t>& primal_sol
   std::vector<f_t> dual_sol_vec_h(dual_solution.size());
   raft::copy(dual_sol_vec_h.data(), dual_solution.data(), dual_solution.size(), stream_view);
   std::vector<f_t> reduced_costs_vec_h(reduced_costs.size());
-  raft::copy(
-    reduced_costs_vec_h.data(), reduced_costs.data(), reduced_costs.size(), stream_view);
+  raft::copy(reduced_costs_vec_h.data(), reduced_costs.data(), reduced_costs.size(), stream_view);
 
   papilo::Solution<f_t> reduced_sol(primal_sol_vec_h);
   if (dual_postsolve) {
diff --git a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu
index dd196ab66f..6038056052 100644
--- a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu
+++ b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu
@@ -111,10 +111,10 @@ pdlp_warm_start_data_t<i_t, f_t> convert_to_gpu_warmstart(
 #if MIP_INSTANTIATE_DOUBLE
 template cpu_pdlp_warm_start_data_t<int, double> convert_to_cpu_warmstart(
   const pdlp_warm_start_data_t<int, double>&, rmm::cuda_stream_view);
-  template pdlp_warm_start_data_t<int, double> convert_to_gpu_warmstart(
-    const cpu_pdlp_warm_start_data_t<int, double>&, rmm::cuda_stream_view);
+template pdlp_warm_start_data_t<int, double> convert_to_gpu_warmstart(
+  const cpu_pdlp_warm_start_data_t<int, double>&, rmm::cuda_stream_view);
 #endif
-    
+
 #if PDLP_INSTANTIATE_FLOAT
 template cpu_pdlp_warm_start_data_t<int, float> convert_to_cpu_warmstart(
   const pdlp_warm_start_data_t<int, float>&, rmm::cuda_stream_view);
diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu
index 80f94f7834..00903c986a 100644
--- a/cpp/src/pdlp/cusparse_view.cu
+++ b/cpp/src/pdlp/cusparse_view.cu
@@ -652,8 +652,7 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
                                         dual_solution,
                                         CUSPARSE_SPMV_CSR_ALG2,
                                         handle_ptr->get_stream());
-      buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed,
-                                         handle_ptr->get_stream());
+      buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed, handle_ptr->get_stream());
 
       size_t buffer_size_transpose_mixed =
         mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(),
diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp
index 4ada26d323..2eb3358fe1 100644
--- a/cpp/src/pdlp/cusparse_view.hpp
+++ b/cpp/src/pdlp/cusparse_view.hpp
@@ -198,10 +198,10 @@ class cusparse_view_t {
 
   // Mixed precision SpMV support (FP32 matrix with FP64 vectors/compute)
   // Only used when mixed_precision_enabled_ is true and f_t = double
-  rmm::device_uvector<float> A_float_;           // FP32 copy of A values
-  rmm::device_uvector<float> A_T_float_;         // FP32 copy of A_T values
-  cusparseSpMatDescr_t A_mixed_{nullptr};        // FP32 matrix descriptor for A
-  cusparseSpMatDescr_t A_T_mixed_{nullptr};      // FP32 matrix descriptor for A_T
+  rmm::device_uvector<float> A_float_;                       // FP32 copy of A values
+  rmm::device_uvector<float> A_T_float_;                     // FP32 copy of A_T values
+  cusparseSpMatDescr_t A_mixed_{nullptr};                    // FP32 matrix descriptor for A
+  cusparseSpMatDescr_t A_T_mixed_{nullptr};                  // FP32 matrix descriptor for A_T
   rmm::device_uvector<uint8_t> buffer_non_transpose_mixed_;  // SpMV buffer for mixed precision A
   rmm::device_uvector<uint8_t> buffer_transpose_mixed_;      // SpMV buffer for mixed precision A_T
   bool mixed_precision_enabled_{false};
diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu
index 118f2a8036..20fc4d1ce3 100644
--- a/cpp/src/pdlp/pdlp.cu
+++ b/cpp/src/pdlp/pdlp.cu
@@ -1978,49 +1978,49 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_to_row(
   rmm::device_uvector<f_t> dual_slack_transposed(
     is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_);
 
-RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_));
+  RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_));
   CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
-                           CUBLAS_OP_T,
-                           CUBLAS_OP_N,
-                           climber_strategies_.size(),
-                           primal_size_h_,
-                           reusable_device_scalar_value_1_.data(),
-                           primal_to_transpose.data(),
-                           primal_size_h_,
-                           reusable_device_scalar_value_0_.data(),
-                           nullptr,
-                           climber_strategies_.size(),
-                           primal_transposed.data(),
-                           climber_strategies_.size()));
+                               CUBLAS_OP_T,
+                               CUBLAS_OP_N,
+                               climber_strategies_.size(),
+                               primal_size_h_,
+                               reusable_device_scalar_value_1_.data(),
+                               primal_to_transpose.data(),
+                               primal_size_h_,
+                               reusable_device_scalar_value_0_.data(),
+                               nullptr,
+                               climber_strategies_.size(),
+                               primal_transposed.data(),
+                               climber_strategies_.size()));
 
   if (!is_dual_slack_empty) {
     CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
-                             CUBLAS_OP_T,
-                             CUBLAS_OP_N,
-                             climber_strategies_.size(),
-                             primal_size_h_,
-                             reusable_device_scalar_value_1_.data(),
-                             dual_slack_to_transpose.data(),
-                             primal_size_h_,
-                             reusable_device_scalar_value_0_.data(),
-                             nullptr,
-                             climber_strategies_.size(),
-                             dual_slack_transposed.data(),
-                             climber_strategies_.size()));
+                                 CUBLAS_OP_T,
+                                 CUBLAS_OP_N,
+                                 climber_strategies_.size(),
+                                 primal_size_h_,
+                                 reusable_device_scalar_value_1_.data(),
+                                 dual_slack_to_transpose.data(),
+                                 primal_size_h_,
+                                 reusable_device_scalar_value_0_.data(),
+                                 nullptr,
+                                 climber_strategies_.size(),
+                                 dual_slack_transposed.data(),
+                                 climber_strategies_.size()));
   }
   CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
-                           CUBLAS_OP_T,
-                           CUBLAS_OP_N,
-                           climber_strategies_.size(),
-                           dual_size_h_,
-                           reusable_device_scalar_value_1_.data(),
-                           dual_to_transpose.data(),
-                           dual_size_h_,
-                           reusable_device_scalar_value_0_.data(),
-                           nullptr,
-                           climber_strategies_.size(),
-                           dual_transposed.data(),
-                           climber_strategies_.size()));
+                               CUBLAS_OP_T,
+                               CUBLAS_OP_N,
+                               climber_strategies_.size(),
+                               dual_size_h_,
+                               reusable_device_scalar_value_1_.data(),
+                               dual_to_transpose.data(),
+                               dual_size_h_,
+                               reusable_device_scalar_value_0_.data(),
+                               nullptr,
+                               climber_strategies_.size(),
+                               dual_transposed.data(),
+                               climber_strategies_.size()));
 
   // Copy that holds the tranpose to the original vector
   raft::copy(primal_to_transpose.data(),
@@ -2055,50 +2055,50 @@ void pdlp_solver_t<i_t, f_t>::transpose_primal_dual_back_to_col(
   rmm::device_uvector<f_t> dual_slack_transposed(
     is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_);
 
-    RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_));
+  RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_));
   CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
-                           CUBLAS_OP_T,
-                           CUBLAS_OP_N,
-                           primal_size_h_,
-                           climber_strategies_.size(),
-                           reusable_device_scalar_value_1_.data(),
-                           primal_to_transpose.data(),
-                           climber_strategies_.size(),
-                           reusable_device_scalar_value_0_.data(),
-                           nullptr,
-                           primal_size_h_,
-                           primal_transposed.data(),
-                           primal_size_h_));
+                               CUBLAS_OP_T,
+                               CUBLAS_OP_N,
+                               primal_size_h_,
+                               climber_strategies_.size(),
+                               reusable_device_scalar_value_1_.data(),
+                               primal_to_transpose.data(),
+                               climber_strategies_.size(),
+                               reusable_device_scalar_value_0_.data(),
+                               nullptr,
+                               primal_size_h_,
+                               primal_transposed.data(),
+                               primal_size_h_));
 
   if (!is_dual_slack_empty) {
     CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
-                             CUBLAS_OP_T,
-                             CUBLAS_OP_N,
-                             primal_size_h_,
-                             climber_strategies_.size(),
-                             reusable_device_scalar_value_1_.data(),
-                             dual_slack_to_transpose.data(),
-                             climber_strategies_.size(),
-                             reusable_device_scalar_value_0_.data(),
-                             nullptr,
-                             primal_size_h_,
-                             dual_slack_transposed.data(),
-                             primal_size_h_));
+                                 CUBLAS_OP_T,
+                                 CUBLAS_OP_N,
+                                 primal_size_h_,
+                                 climber_strategies_.size(),
+                                 reusable_device_scalar_value_1_.data(),
+                                 dual_slack_to_transpose.data(),
+                                 climber_strategies_.size(),
+                                 reusable_device_scalar_value_0_.data(),
+                                 nullptr,
+                                 primal_size_h_,
+                                 dual_slack_transposed.data(),
+                                 primal_size_h_));
   }
 
   CUBLAS_CHECK(cublasGeam<f_t>(handle_ptr_->get_cublas_handle(),
-                           CUBLAS_OP_T,
-                           CUBLAS_OP_N,
-                           dual_size_h_,
-                           climber_strategies_.size(),
-                           reusable_device_scalar_value_1_.data(),
-                           dual_to_transpose.data(),
-                           climber_strategies_.size(),
-                           reusable_device_scalar_value_0_.data(),
-                           nullptr,
-                           dual_size_h_,
-                           dual_transposed.data(),
-                           dual_size_h_));
+                               CUBLAS_OP_T,
+                               CUBLAS_OP_N,
+                               dual_size_h_,
+                               climber_strategies_.size(),
+                               reusable_device_scalar_value_1_.data(),
+                               dual_to_transpose.data(),
+                               climber_strategies_.size(),
+                               reusable_device_scalar_value_0_.data(),
+                               nullptr,
+                               dual_size_h_,
+                               dual_transposed.data(),
+                               dual_size_h_));
 
   // Copy that holds the tranpose to the original vector
   raft::copy(primal_to_transpose.data(),
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index 850a19c6dc..f7f7a5d63e 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -606,57 +606,63 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
 
   if constexpr (!std::is_same_v<f_t, double>) {
     cuopt_expects(!settings.crossover,
-      error_type_t::ValidationError,
-      "PDLP with crossover is not supported for float precision. Set crossover=false or use double precision.");
-    } else {
-      const bool do_crossover = settings.crossover;
-      i_t crossover_info = 0;
+                  error_type_t::ValidationError,
+                  "PDLP with crossover is not supported for float precision. Set crossover=false "
+                  "or use double precision.");
+  } else {
+    const bool do_crossover = settings.crossover;
+    i_t crossover_info      = 0;
     if (do_crossover && sol.get_termination_status() == pdlp_termination_status_t::Optimal) {
       crossover_info = -1;
 
-    dual_simplex::lp_problem_t<i_t, f_t> lp(problem.handle_ptr, 1, 1, 1);
-    dual_simplex::lp_solution_t<i_t, f_t> initial_solution(1, 1);
-    translate_to_crossover_problem(problem, sol, lp, initial_solution);
-    dual_simplex::simplex_solver_settings_t<i_t, f_t> dual_simplex_settings;
-    dual_simplex_settings.time_limit      = settings.time_limit;
-    dual_simplex_settings.iteration_limit = settings.iteration_limit;
-    dual_simplex_settings.concurrent_halt = settings.concurrent_halt;
-    dual_simplex::lp_solution_t<i_t, f_t> vertex_solution(lp.num_rows, lp.num_cols);
-    std::vector<dual_simplex::variable_status_t> vstatus(lp.num_cols);
-    dual_simplex::crossover_status_t crossover_status = dual_simplex::crossover(
-      lp, dual_simplex_settings, initial_solution, timer.get_tic_start(), vertex_solution, vstatus);
-    pdlp_termination_status_t termination_status = pdlp_termination_status_t::TimeLimit;
-    auto to_termination_status                   = [](dual_simplex::crossover_status_t status) {
-      switch (status) {
-        case dual_simplex::crossover_status_t::OPTIMAL: return pdlp_termination_status_t::Optimal;
-        case dual_simplex::crossover_status_t::PRIMAL_FEASIBLE:
-          return pdlp_termination_status_t::PrimalFeasible;
-        case dual_simplex::crossover_status_t::DUAL_FEASIBLE:
-          return pdlp_termination_status_t::NumericalError;
-        case dual_simplex::crossover_status_t::NUMERICAL_ISSUES:
-          return pdlp_termination_status_t::NumericalError;
-        case dual_simplex::crossover_status_t::CONCURRENT_LIMIT:
-          return pdlp_termination_status_t::ConcurrentLimit;
-        case dual_simplex::crossover_status_t::TIME_LIMIT:
-          return pdlp_termination_status_t::TimeLimit;
-        default: return pdlp_termination_status_t::NumericalError;
-      }
-    };
-    termination_status = to_termination_status(crossover_status);
-    if (crossover_status == dual_simplex::crossover_status_t::OPTIMAL) { crossover_info = 0; }
-    rmm::device_uvector<f_t> final_primal_solution =
-      cuopt::device_copy(vertex_solution.x, problem.handle_ptr->get_stream());
-    rmm::device_uvector<f_t> final_dual_solution =
-      cuopt::device_copy(vertex_solution.y, problem.handle_ptr->get_stream());
-    rmm::device_uvector<f_t> final_reduced_cost =
-      cuopt::device_copy(vertex_solution.z, problem.handle_ptr->get_stream());
-    problem.handle_ptr->sync_stream();
-    // Negate dual variables and reduced costs for maximization problems
-    if (problem.maximize) {
-      adjust_dual_solution_and_reduced_cost(
-        final_dual_solution, final_reduced_cost, problem.handle_ptr->get_stream());
+      dual_simplex::lp_problem_t<i_t, f_t> lp(problem.handle_ptr, 1, 1, 1);
+      dual_simplex::lp_solution_t<i_t, f_t> initial_solution(1, 1);
+      translate_to_crossover_problem(problem, sol, lp, initial_solution);
+      dual_simplex::simplex_solver_settings_t<i_t, f_t> dual_simplex_settings;
+      dual_simplex_settings.time_limit      = settings.time_limit;
+      dual_simplex_settings.iteration_limit = settings.iteration_limit;
+      dual_simplex_settings.concurrent_halt = settings.concurrent_halt;
+      dual_simplex::lp_solution_t<i_t, f_t> vertex_solution(lp.num_rows, lp.num_cols);
+      std::vector<dual_simplex::variable_status_t> vstatus(lp.num_cols);
+      dual_simplex::crossover_status_t crossover_status =
+        dual_simplex::crossover(lp,
+                                dual_simplex_settings,
+                                initial_solution,
+                                timer.get_tic_start(),
+                                vertex_solution,
+                                vstatus);
+      pdlp_termination_status_t termination_status = pdlp_termination_status_t::TimeLimit;
+      auto to_termination_status                   = [](dual_simplex::crossover_status_t status) {
+        switch (status) {
+          case dual_simplex::crossover_status_t::OPTIMAL: return pdlp_termination_status_t::Optimal;
+          case dual_simplex::crossover_status_t::PRIMAL_FEASIBLE:
+            return pdlp_termination_status_t::PrimalFeasible;
+          case dual_simplex::crossover_status_t::DUAL_FEASIBLE:
+            return pdlp_termination_status_t::NumericalError;
+          case dual_simplex::crossover_status_t::NUMERICAL_ISSUES:
+            return pdlp_termination_status_t::NumericalError;
+          case dual_simplex::crossover_status_t::CONCURRENT_LIMIT:
+            return pdlp_termination_status_t::ConcurrentLimit;
+          case dual_simplex::crossover_status_t::TIME_LIMIT:
+            return pdlp_termination_status_t::TimeLimit;
+          default: return pdlp_termination_status_t::NumericalError;
+        }
+      };
+      termination_status = to_termination_status(crossover_status);
+      if (crossover_status == dual_simplex::crossover_status_t::OPTIMAL) { crossover_info = 0; }
+      rmm::device_uvector<f_t> final_primal_solution =
+        cuopt::device_copy(vertex_solution.x, problem.handle_ptr->get_stream());
+      rmm::device_uvector<f_t> final_dual_solution =
+        cuopt::device_copy(vertex_solution.y, problem.handle_ptr->get_stream());
+      rmm::device_uvector<f_t> final_reduced_cost =
+        cuopt::device_copy(vertex_solution.z, problem.handle_ptr->get_stream());
       problem.handle_ptr->sync_stream();
-    }
+      // Negate dual variables and reduced costs for maximization problems
+      if (problem.maximize) {
+        adjust_dual_solution_and_reduced_cost(
+          final_dual_solution, final_reduced_cost, problem.handle_ptr->get_stream());
+        problem.handle_ptr->sync_stream();
+      }
 
       // Should be filled with more information from dual simplex
       std::vector<
@@ -684,7 +690,7 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
         crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) {
       // We finished. Tell dual simplex to stop if it is still running.
       CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop");
-        *settings.concurrent_halt = 1;
+      *settings.concurrent_halt = 1;
     }
   }
   return sol;
@@ -1133,12 +1139,11 @@ optimization_problem_solution_t<i_t, f_t> solve_lp_with_method(
     }
   } else {
     // Float precision only supports PDLP without presolve/crossover
-    // TODO when running with cuopt_cli this doesn't show, should we just use CUOPT_LOG_INFO instead?
     cuopt_expects(settings.method == method_t::PDLP,
                   error_type_t::ValidationError,
                   "Float precision only supports PDLP method. DualSimplex, Barrier, and Concurrent "
                   "require double precision.");
-      return run_pdlp(problem, settings, timer, is_batch_mode);
+    return run_pdlp(problem, settings, timer, is_batch_mode);
   }
 }
 
diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu
index 8f47cef65b..0eb9fb60ad 100644
--- a/cpp/tests/linear_programming/pdlp_test.cu
+++ b/cpp/tests/linear_programming/pdlp_test.cu
@@ -20,9 +20,9 @@
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+#include <mip_heuristics/mip_constants.hpp>
 #include <mip_heuristics/problem/problem.cuh>
 #include <mps_parser/parser.hpp>
-#include <mip_heuristics/mip_constants.hpp>
 
 #include <utilities/copy_helpers.hpp>
 #include <utilities/error.hpp>
@@ -85,8 +85,8 @@ TEST(pdlp_class, mixed_precision_spmv)
   cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, true);
 
-  auto settings_mixed   = pdlp_solver_settings_t<int, double>{};
-  settings_mixed.method = cuopt::linear_programming::method_t::PDLP;
+  auto settings_mixed                 = pdlp_solver_settings_t<int, double>{};
+  settings_mixed.method               = cuopt::linear_programming::method_t::PDLP;
   settings_mixed.mixed_precision_spmv = true;
 
   optimization_problem_solution_t<int, double> solution_mixed =
@@ -96,8 +96,8 @@ TEST(pdlp_class, mixed_precision_spmv)
     afiro_primal_objective,
     solution_mixed.get_additional_termination_information().primal_objective));
 
-  auto settings_full   = pdlp_solver_settings_t<int, double>{};
-  settings_full.method = cuopt::linear_programming::method_t::PDLP;
+  auto settings_full                 = pdlp_solver_settings_t<int, double>{};
+  settings_full.method               = cuopt::linear_programming::method_t::PDLP;
   settings_full.mixed_precision_spmv = false;
 
   optimization_problem_solution_t<int, double> solution_full =
@@ -1943,9 +1943,9 @@ TEST(pdlp_class, run_float32)
     solve_lp(&handle_, op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
 
-  EXPECT_FALSE(is_incorrect_objective(
-    afiro_primal_objective_f32,
-    solution.get_additional_termination_information().primal_objective));
+  EXPECT_FALSE(
+    is_incorrect_objective(afiro_primal_objective_f32,
+                           solution.get_additional_termination_information().primal_objective));
 }
 
 TEST(pdlp_class, float32_dual_simplex_throws_validation_error)
@@ -2011,9 +2011,9 @@ TEST(pdlp_class, float32_papilo_presolve_works)
   optimization_problem_solution_t<int, float> solution =
     solve_lp(&handle_, op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
-  EXPECT_FALSE(is_incorrect_objective(
-    afiro_primal_objective_f32,
-    solution.get_additional_termination_information().primal_objective));
+  EXPECT_FALSE(
+    is_incorrect_objective(afiro_primal_objective_f32,
+                           solution.get_additional_termination_information().primal_objective));
 }
 
 TEST(pdlp_class, float32_pslp_presolve_works)
@@ -2031,9 +2031,9 @@ TEST(pdlp_class, float32_pslp_presolve_works)
   optimization_problem_solution_t<int, float> solution =
     solve_lp(&handle_, op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
-  EXPECT_FALSE(is_incorrect_objective(
-    afiro_primal_objective_f32,
-    solution.get_additional_termination_information().primal_objective));
+  EXPECT_FALSE(
+    is_incorrect_objective(afiro_primal_objective_f32,
+                           solution.get_additional_termination_information().primal_objective));
 }
 
 TEST(pdlp_class, float32_crossover_throws_validation_error)
@@ -2044,8 +2044,8 @@ TEST(pdlp_class, float32_crossover_throws_validation_error)
   cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
     cuopt::mps_parser::parse_mps<int, float>(path, true);
 
-  auto solver_settings    = pdlp_solver_settings_t<int, float>{};
-  solver_settings.method  = cuopt::linear_programming::method_t::PDLP;
+  auto solver_settings      = pdlp_solver_settings_t<int, float>{};
+  solver_settings.method    = cuopt::linear_programming::method_t::PDLP;
   solver_settings.crossover = true;
 
   optimization_problem_solution_t<int, float> solution =

From 342ba3210174e60234b669fd1ee807f2a61bcc3a Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 27 Feb 2026 15:41:24 +0100
Subject: [PATCH 08/23] use || for all pdlp float instanciation

---
 cpp/src/dual_simplex/sparse_matrix.cpp                        | 2 +-
 cpp/src/math_optimization/solution_writer.cu                  | 2 +-
 .../mip_heuristics/local_search/rounding/simple_rounding.cu   | 2 +-
 cpp/src/mip_heuristics/problem/problem.cu                     | 2 +-
 cpp/src/mip_heuristics/solution/solution.cu                   | 2 +-
 cpp/src/mip_heuristics/solver_solution.cu                     | 2 +-
 cpp/src/pdlp/cpu_pdlp_warm_start_data.cu                      | 2 +-
 cpp/src/pdlp/cusparse_view.cu                                 | 4 ++--
 cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu      | 2 +-
 .../optimal_batch_size_handler/optimal_batch_size_handler.cu  | 2 +-
 cpp/src/pdlp/optimization_problem.cu                          | 2 +-
 cpp/src/pdlp/pdhg.cu                                          | 2 +-
 cpp/src/pdlp/pdlp.cu                                          | 2 +-
 cpp/src/pdlp/pdlp_warm_start_data.cu                          | 2 +-
 .../pdlp/restart_strategy/localized_duality_gap_container.cu  | 2 +-
 cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu        | 2 +-
 cpp/src/pdlp/restart_strategy/weighted_average_solution.cu    | 2 +-
 cpp/src/pdlp/saddle_point.cu                                  | 2 +-
 cpp/src/pdlp/solution_conversion.cu                           | 2 +-
 cpp/src/pdlp/solve.cu                                         | 2 +-
 cpp/src/pdlp/solver_settings.cu                               | 2 +-
 cpp/src/pdlp/solver_solution.cu                               | 2 +-
 .../pdlp/step_size_strategy/adaptive_step_size_strategy.cu    | 2 +-
 cpp/src/pdlp/termination_strategy/convergence_information.cu  | 2 +-
 .../pdlp/termination_strategy/infeasibility_information.cu    | 2 +-
 cpp/src/pdlp/termination_strategy/termination_strategy.cu     | 2 +-
 cpp/src/pdlp/utilities/problem_checking.cu                    | 2 +-
 cpp/tests/linear_programming/pdlp_test.cu                     | 4 ++--
 28 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index 343cf41498..399114c28c 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -933,7 +933,7 @@ f_t sparse_dot(const std::vector<i_t>& xind,
   return dot;
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 // Minimal float instantiation for LP usage
 template class csc_matrix_t<int, float>;
 template class csr_matrix_t<int, float>;
diff --git a/cpp/src/math_optimization/solution_writer.cu b/cpp/src/math_optimization/solution_writer.cu
index f98eb28bfb..880127546d 100644
--- a/cpp/src/math_optimization/solution_writer.cu
+++ b/cpp/src/math_optimization/solution_writer.cu
@@ -42,7 +42,7 @@ void solution_writer_t::write_solution_to_sol_file(const std::string& filename,
   }
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template void solution_writer_t::write_solution_to_sol_file<float>(
   const std::string& filename,
   const std::string& status,
diff --git a/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu b/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu
index ee62ea906e..4f3a015a6c 100644
--- a/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu
+++ b/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu
@@ -179,7 +179,7 @@ void invoke_correct_integers(solution_t<i_t, f_t>& solution, f_t tol)
   template void invoke_correct_integers<int, F_TYPE>(solution_t<int, F_TYPE> & solution,     \
                                                      F_TYPE tol);
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu
index 87a6683331..7990974d9f 100644
--- a/cpp/src/mip_heuristics/problem/problem.cu
+++ b/cpp/src/mip_heuristics/problem/problem.cu
@@ -2292,7 +2292,7 @@ void problem_t<i_t, f_t>::update_variable_bounds(const std::vector<i_t>& var_ind
   RAFT_CHECK_CUDA(handle_ptr->get_stream());
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class problem_t<int, float>;
 #endif
 
diff --git a/cpp/src/mip_heuristics/solution/solution.cu b/cpp/src/mip_heuristics/solution/solution.cu
index ccfc842b81..531d54372c 100644
--- a/cpp/src/mip_heuristics/solution/solution.cu
+++ b/cpp/src/mip_heuristics/solution/solution.cu
@@ -660,7 +660,7 @@ mip_solution_t<i_t, f_t> solution_t<i_t, f_t>::get_solution(bool output_feasible
   }
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class solution_t<int, float>;
 #endif
 
diff --git a/cpp/src/mip_heuristics/solver_solution.cu b/cpp/src/mip_heuristics/solver_solution.cu
index 57d697ae2d..e497a21c8f 100644
--- a/cpp/src/mip_heuristics/solver_solution.cu
+++ b/cpp/src/mip_heuristics/solver_solution.cu
@@ -234,7 +234,7 @@ void mip_solution_t<i_t, f_t>::log_summary() const
   CUOPT_LOG_INFO("Total Solve Time: %f", get_total_solve_time());
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class mip_solution_t<int, float>;
 #endif
 
diff --git a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu
index 6038056052..b078bc4779 100644
--- a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu
+++ b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu
@@ -115,7 +115,7 @@ template pdlp_warm_start_data_t<int, double> convert_to_gpu_warmstart(
   const cpu_pdlp_warm_start_data_t<int, double>&, rmm::cuda_stream_view);
 #endif
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template cpu_pdlp_warm_start_data_t<int, float> convert_to_cpu_warmstart(
   const pdlp_warm_start_data_t<int, float>&, rmm::cuda_stream_view);
 
diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu
index 00903c986a..6e167b5922 100644
--- a/cpp/src/pdlp/cusparse_view.cu
+++ b/cpp/src/pdlp/cusparse_view.cu
@@ -1148,7 +1148,7 @@ void mixed_precision_spmv_preprocess(cusparseHandle_t handle,
 }
 #endif
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class cusparse_sp_mat_descr_wrapper_t<int, float>;
 template class cusparse_dn_vec_descr_wrapper_t<float>;
 template class cusparse_dn_mat_descr_wrapper_t<float>;
@@ -1162,7 +1162,7 @@ template class cusparse_view_t<int, double>;
 #endif
 
 #if CUDA_VER_12_4_UP
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template void my_cusparsespmm_preprocess<float>(cusparseHandle_t,
                                                 cusparseOperation_t,
                                                 cusparseOperation_t,
diff --git a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu
index 5562bfa80b..b618550f6e 100644
--- a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu
+++ b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu
@@ -858,7 +858,7 @@ pdlp_initial_scaling_strategy_t<i_t, f_t>::view()
     int* A_T_offsets,                                                                         \
     int* A_T_indices);
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu b/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu
index 2fef13eb5f..cbfb03618d 100644
--- a/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu
+++ b/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu
@@ -434,7 +434,7 @@ int optimal_batch_size_handler(const optimization_problem_t<i_t, f_t>& op_proble
   return 0;
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template int optimal_batch_size_handler<int, float>(
   const optimization_problem_t<int, float>& op_problem, int max_batch_size);
 #endif
diff --git a/cpp/src/pdlp/optimization_problem.cu b/cpp/src/pdlp/optimization_problem.cu
index 8537c3114a..302dd9cf16 100644
--- a/cpp/src/pdlp/optimization_problem.cu
+++ b/cpp/src/pdlp/optimization_problem.cu
@@ -1063,7 +1063,7 @@ bool optimization_problem_t<i_t, f_t>::has_quadratic_objective() const
   return !Q_values_.empty();
 }
 // NOTE: Explicitly instantiate all types here in order to avoid linker error
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class optimization_problem_t<int, float>;
 #endif
 #if MIP_INSTANTIATE_DOUBLE
diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu
index a606eea8aa..74df7fee01 100644
--- a/cpp/src/pdlp/pdhg.cu
+++ b/cpp/src/pdlp/pdhg.cu
@@ -1246,7 +1246,7 @@ rmm::device_uvector<f_t>& pdhg_solver_t<i_t, f_t>::get_dual_solution()
   return current_saddle_point_state_.get_dual_solution();
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class pdhg_solver_t<int, float>;
 #endif
 #if MIP_INSTANTIATE_DOUBLE
diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu
index 20fc4d1ce3..81f7e3815d 100644
--- a/cpp/src/pdlp/pdlp.cu
+++ b/cpp/src/pdlp/pdlp.cu
@@ -2971,7 +2971,7 @@ pdlp_solver_t<i_t, f_t>::get_current_termination_strategy()
   return current_termination_strategy_;
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class pdlp_solver_t<int, float>;
 
 template __global__ void compute_weights_initial_primal_weight_from_squared_norms<float>(
diff --git a/cpp/src/pdlp/pdlp_warm_start_data.cu b/cpp/src/pdlp/pdlp_warm_start_data.cu
index 9c293093ab..80abf015d8 100644
--- a/cpp/src/pdlp/pdlp_warm_start_data.cu
+++ b/cpp/src/pdlp/pdlp_warm_start_data.cu
@@ -178,7 +178,7 @@ void pdlp_warm_start_data_t<i_t, f_t>::check_sizes()
                "All dual vectors should be of same size");
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class pdlp_warm_start_data_t<int, float>;
 #endif
 
diff --git a/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu b/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu
index b25fd34948..bb79e5b6e6 100644
--- a/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu
+++ b/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu
@@ -144,7 +144,7 @@ localized_duality_gap_container_t<i_t, f_t>::view()
   return v;
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template struct localized_duality_gap_container_t<int, float>;
 #endif
 #if MIP_INSTANTIATE_DOUBLE
diff --git a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu
index 7656c7f2f0..149e99a431 100644
--- a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu
+++ b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu
@@ -2523,7 +2523,7 @@ bool pdlp_restart_strategy_t<i_t, f_t>::get_last_restart_was_average() const
     const typename localized_duality_gap_container_t<int, F_TYPE>::view_t duality_gap_view,     \
     F_TYPE* primal_product);
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu b/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu
index 5eb54eb215..70a448a9de 100644
--- a/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu
+++ b/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu
@@ -139,7 +139,7 @@ i_t weighted_average_solution_t<i_t, f_t>::get_iterations_since_last_restart() c
   return iterations_since_last_restart_;
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template __global__ void add_weight_sums<float>(const float* primal_weight,
                                                 const float* dual_weight,
                                                 float* sum_primal_solution_weights,
diff --git a/cpp/src/pdlp/saddle_point.cu b/cpp/src/pdlp/saddle_point.cu
index 7cf653dbaa..157e7fa389 100644
--- a/cpp/src/pdlp/saddle_point.cu
+++ b/cpp/src/pdlp/saddle_point.cu
@@ -166,7 +166,7 @@ rmm::device_uvector<f_t>& saddle_point_state_t<i_t, f_t>::get_next_AtY()
   return next_AtY_;
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class saddle_point_state_t<int, float>;
 #endif
 
diff --git a/cpp/src/pdlp/solution_conversion.cu b/cpp/src/pdlp/solution_conversion.cu
index a0d4f88aef..873a340646 100644
--- a/cpp/src/pdlp/solution_conversion.cu
+++ b/cpp/src/pdlp/solution_conversion.cu
@@ -235,7 +235,7 @@ template cuopt::cython::linear_programming_ret_t
 cpu_lp_solution_t<int, double>::to_cpu_linear_programming_ret_t();
 template cuopt::cython::mip_ret_t cpu_mip_solution_t<int, double>::to_cpu_mip_ret_t();
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template cuopt::cython::linear_programming_ret_t
 gpu_lp_solution_t<int, float>::to_linear_programming_ret_t();
 template cuopt::cython::linear_programming_ret_t
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index f7f7a5d63e..cf44fc538f 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -1568,7 +1568,7 @@ std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp(
     const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& data_model);               \
   template void set_pdlp_solver_mode(pdlp_solver_settings_t<int, F_TYPE>& settings);
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/pdlp/solver_settings.cu b/cpp/src/pdlp/solver_settings.cu
index 683f5121a0..7acfc7481c 100644
--- a/cpp/src/pdlp/solver_settings.cu
+++ b/cpp/src/pdlp/solver_settings.cu
@@ -382,7 +382,7 @@ pdlp_solver_settings_t<i_t, f_t>::get_pdlp_warm_start_data_view() const noexcept
   return pdlp_warm_start_data_view_;
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class pdlp_solver_settings_t<int, float>;
 #endif
 
diff --git a/cpp/src/pdlp/solver_solution.cu b/cpp/src/pdlp/solver_solution.cu
index e516b95d7b..10e6a80593 100644
--- a/cpp/src/pdlp/solver_solution.cu
+++ b/cpp/src/pdlp/solver_solution.cu
@@ -448,7 +448,7 @@ void optimization_problem_solution_t<i_t, f_t>::write_to_sol_file(
     std::string(filename), status, objective_value, var_names_, solution);
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class optimization_problem_solution_t<int, float>;
 #endif
 
diff --git a/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu
index bd6e8c63cf..d17a88dd29 100644
--- a/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu
+++ b/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu
@@ -578,7 +578,7 @@ adaptive_step_size_strategy_t<i_t, f_t>::view()
     F_TYPE * dual_step_size,                                                                   \
     int* pdhg_iteration);
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/pdlp/termination_strategy/convergence_information.cu b/cpp/src/pdlp/termination_strategy/convergence_information.cu
index 1bf6fead1e..ab0c921cc7 100644
--- a/cpp/src/pdlp/termination_strategy/convergence_information.cu
+++ b/cpp/src/pdlp/termination_strategy/convergence_information.cu
@@ -996,7 +996,7 @@ convergence_information_t<i_t, f_t>::to_primal_quality_adapter(
           primal_objective_.element(0, stream_view_)};
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class convergence_information_t<int, float>;
 
 template __global__ void compute_remaining_stats_kernel<int, float>(
diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
index bc87207e83..dbb35b732d 100644
--- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
+++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu
@@ -745,7 +745,7 @@ typename infeasibility_information_t<i_t, f_t>::view_t infeasibility_information
   return v;
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class infeasibility_information_t<int, float>;
 
 template __global__ void compute_remaining_stats_kernel<int, float>(
diff --git a/cpp/src/pdlp/termination_strategy/termination_strategy.cu b/cpp/src/pdlp/termination_strategy/termination_strategy.cu
index d682151dbc..7179df6a49 100644
--- a/cpp/src/pdlp/termination_strategy/termination_strategy.cu
+++ b/cpp/src/pdlp/termination_strategy/termination_strategy.cu
@@ -681,7 +681,7 @@ void pdlp_termination_strategy_t<i_t, f_t>::print_termination_criteria(i_t itera
     bool per_constraint_residual,                                                              \
     int batch_size);
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/pdlp/utilities/problem_checking.cu b/cpp/src/pdlp/utilities/problem_checking.cu
index a3e088d2a3..b10850de27 100644
--- a/cpp/src/pdlp/utilities/problem_checking.cu
+++ b/cpp/src/pdlp/utilities/problem_checking.cu
@@ -340,7 +340,7 @@ bool problem_checking_t<i_t, f_t>::has_crossing_bounds(
 
 #define INSTANTIATE(F_TYPE) template class problem_checking_t<int, F_TYPE>;
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu
index 0eb9fb60ad..a46649be7c 100644
--- a/cpp/tests/linear_programming/pdlp_test.cu
+++ b/cpp/tests/linear_programming/pdlp_test.cu
@@ -47,7 +47,7 @@
 namespace cuopt::linear_programming::test {
 
 constexpr double afiro_primal_objective = -464.0;
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 constexpr float afiro_primal_objective_f32 = -464.0f;
 #endif
 // Accept a 1% error
@@ -1927,7 +1927,7 @@ TEST(pdlp_class, some_climber_hit_iteration_limit)
   }
 }
 
-#if PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 TEST(pdlp_class, run_float32)
 {
   const raft::handle_t handle_{};

From 366fd6a96807725b67a8399b8065ffa1ae7d7360 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 27 Feb 2026 16:03:32 +0100
Subject: [PATCH 09/23] address PR comments

---
 cpp/src/math_optimization/solver_settings.cu |  6 +-
 cpp/src/pdlp/cusparse_view.cu                | 84 +++++++++-----------
 cpp/src/pdlp/cusparse_view.hpp               |  4 +-
 cpp/src/pdlp/pdhg.hpp                        |  2 +-
 cpp/src/pdlp/solve.cu                        | 22 +++--
 5 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index 438390ebd6..696f9d34d4 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -69,10 +69,10 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_MIP_ABSOLUTE_TOLERANCE, &mip_settings.tolerances.absolute_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
     {CUOPT_MIP_RELATIVE_TOLERANCE, &mip_settings.tolerances.relative_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
     {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-5)},
-    {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits<f_t>::infinity(), f_t(1e-10)},
+    {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits<f_t>::infinity(), std::max(f_t(1e-10), std::numeric_limits<f_t>::epsilon())},
     {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, f_t(0.0), f_t(1e-1), f_t(1e-4)},
-    {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-10)},
-    {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-10)},
+    {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits<f_t>::epsilon())},
+    {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits<f_t>::epsilon())},
     {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, f_t(0.0), std::numeric_limits<f_t>::infinity(), f_t(1e-3)},
     {CUOPT_MIP_CUT_MIN_ORTHOGONALITY, &mip_settings.cut_min_orthogonality, f_t(0.0), f_t(1.0), f_t(0.5)}
    };
diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu
index 6e167b5922..23ff3711d8 100644
--- a/cpp/src/pdlp/cusparse_view.cu
+++ b/cpp/src/pdlp/cusparse_view.cu
@@ -597,47 +597,37 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
 #endif
 
   if constexpr (std::is_same_v<f_t, double>) {
-    if (enable_mixed_precision_spmv) {
+    if (enable_mixed_precision_spmv && !batch_mode_) {
       mixed_precision_enabled_ = true;
 
       A_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream());
       A_T_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream());
 
-      cub::DeviceTransform::Transform(op_problem_scaled.coefficients.data(),
-                                      A_float_.data(),
-                                      op_problem_scaled.nnz,
-                                      double_to_float_functor{},
-                                      handle_ptr->get_stream().value());
-
-      cub::DeviceTransform::Transform(A_T_.data(),
-                                      A_T_float_.data(),
-                                      op_problem_scaled.nnz,
-                                      double_to_float_functor{},
-                                      handle_ptr->get_stream().value());
-
-      RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_mixed_,
-                                          op_problem_scaled.n_constraints,
-                                          op_problem_scaled.n_variables,
-                                          op_problem_scaled.nnz,
-                                          const_cast<i_t*>(op_problem_scaled.offsets.data()),
-                                          const_cast<i_t*>(op_problem_scaled.variables.data()),
-                                          A_float_.data(),
-                                          CUSPARSE_INDEX_32I,
-                                          CUSPARSE_INDEX_32I,
-                                          CUSPARSE_INDEX_BASE_ZERO,
-                                          CUDA_R_32F));
-
-      RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_T_mixed_,
-                                          op_problem_scaled.n_variables,
-                                          op_problem_scaled.n_constraints,
-                                          op_problem_scaled.nnz,
-                                          const_cast<i_t*>(A_T_offsets_.data()),
-                                          const_cast<i_t*>(A_T_indices_.data()),
-                                          A_T_float_.data(),
-                                          CUSPARSE_INDEX_32I,
-                                          CUSPARSE_INDEX_32I,
-                                          CUSPARSE_INDEX_BASE_ZERO,
-                                          CUDA_R_32F));
+      RAFT_CUDA_TRY(cub::DeviceTransform::Transform(op_problem_scaled.coefficients.data(),
+                                                    A_float_.data(),
+                                                    op_problem_scaled.nnz,
+                                                    double_to_float_functor{},
+                                                    handle_ptr->get_stream().value()));
+
+      RAFT_CUDA_TRY(cub::DeviceTransform::Transform(A_T_.data(),
+                                                    A_T_float_.data(),
+                                                    op_problem_scaled.nnz,
+                                                    double_to_float_functor{},
+                                                    handle_ptr->get_stream().value()));
+
+      A_mixed_.create(op_problem_scaled.n_constraints,
+                      op_problem_scaled.n_variables,
+                      op_problem_scaled.nnz,
+                      const_cast<i_t*>(op_problem_scaled.offsets.data()),
+                      const_cast<i_t*>(op_problem_scaled.variables.data()),
+                      A_float_.data());
+
+      A_T_mixed_.create(op_problem_scaled.n_variables,
+                        op_problem_scaled.n_constraints,
+                        op_problem_scaled.nnz,
+                        const_cast<i_t*>(A_T_offsets_.data()),
+                        const_cast<i_t*>(A_T_indices_.data()),
+                        A_T_float_.data());
 
       const rmm::device_scalar<double> alpha_d{1.0, handle_ptr->get_stream()};
       const rmm::device_scalar<double> beta_d{0.0, handle_ptr->get_stream()};
@@ -1076,17 +1066,17 @@ void cusparse_view_t<i_t, f_t>::update_mixed_precision_matrices()
   if constexpr (std::is_same_v<f_t, double>) {
     if (!mixed_precision_enabled_) { return; }
 
-    cub::DeviceTransform::Transform(A_.data(),
-                                    A_float_.data(),
-                                    A_.size(),
-                                    double_to_float_functor{},
-                                    handle_ptr_->get_stream().value());
-
-    cub::DeviceTransform::Transform(A_T_.data(),
-                                    A_T_float_.data(),
-                                    A_T_.size(),
-                                    double_to_float_functor{},
-                                    handle_ptr_->get_stream().value());
+    RAFT_CUDA_TRY(cub::DeviceTransform::Transform(A_.data(),
+                                                  A_float_.data(),
+                                                  A_.size(),
+                                                  double_to_float_functor{},
+                                                  handle_ptr_->get_stream().value()));
+
+    RAFT_CUDA_TRY(cub::DeviceTransform::Transform(A_T_.data(),
+                                                  A_T_float_.data(),
+                                                  A_T_.size(),
+                                                  double_to_float_functor{},
+                                                  handle_ptr_->get_stream().value()));
 
     handle_ptr_->get_stream().synchronize();
   }
diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp
index 2eb3358fe1..e83915e37d 100644
--- a/cpp/src/pdlp/cusparse_view.hpp
+++ b/cpp/src/pdlp/cusparse_view.hpp
@@ -200,8 +200,8 @@ class cusparse_view_t {
   // Only used when mixed_precision_enabled_ is true and f_t = double
   rmm::device_uvector<float> A_float_;                       // FP32 copy of A values
   rmm::device_uvector<float> A_T_float_;                     // FP32 copy of A_T values
-  cusparseSpMatDescr_t A_mixed_{nullptr};                    // FP32 matrix descriptor for A
-  cusparseSpMatDescr_t A_T_mixed_{nullptr};                  // FP32 matrix descriptor for A_T
+  cusparse_sp_mat_descr_wrapper_t<i_t, float> A_mixed_;      // FP32 matrix descriptor for A
+  cusparse_sp_mat_descr_wrapper_t<i_t, float> A_T_mixed_;    // FP32 matrix descriptor for A_T
   rmm::device_uvector<uint8_t> buffer_non_transpose_mixed_;  // SpMV buffer for mixed precision A
   rmm::device_uvector<uint8_t> buffer_transpose_mixed_;      // SpMV buffer for mixed precision A_T
   bool mixed_precision_enabled_{false};
diff --git a/cpp/src/pdlp/pdhg.hpp b/cpp/src/pdlp/pdhg.hpp
index 32722eae49..0a64e49efb 100644
--- a/cpp/src/pdlp/pdhg.hpp
+++ b/cpp/src/pdlp/pdhg.hpp
@@ -30,7 +30,7 @@ class pdhg_solver_t {
                 const std::vector<pdlp_climber_strategy_t>& climber_strategies,
                 const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params,
                 const std::vector<std::tuple<i_t, f_t, f_t>>& new_bounds,
-                bool enable_mixed_precision_spmv = true);
+                bool enable_mixed_precision_spmv = false);
 
   saddle_point_state_t<i_t, f_t>& get_saddle_point_state();
   cusparse_view_t<i_t, f_t>& get_cusparse_view();
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index cf44fc538f..68526be45e 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -583,6 +583,21 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
                                                    const timer_t& timer,
                                                    bool is_batch_mode)
 {
+  if constexpr (!std::is_same_v<f_t, double>) {
+    cuopt_expects(!settings.crossover,
+                  error_type_t::ValidationError,
+                  "PDLP with crossover is not supported for float precision. Set crossover=false "
+                  "or use double precision.");
+    cuopt_expects(!is_batch_mode,
+                  error_type_t::ValidationError,
+                  "PDLP batch mode is not supported for float precision. Use double precision.");
+  }
+  cuopt_expects(
+    !is_batch_mode || !settings.mixed_precision_spmv,
+    error_type_t::ValidationError,
+    "Mixed-precision SpMV is not supported in batch mode. Set mixed_precision_spmv=false "
+    "or disable batch mode.");
+
   auto start_solver = std::chrono::high_resolution_clock::now();
   timer_t timer_pdlp(timer.remaining_time());
   auto sol = run_pdlp_solver(problem, settings, timer, is_batch_mode);
@@ -604,12 +619,7 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
                                sol.get_solve_time());
   }
 
-  if constexpr (!std::is_same_v<f_t, double>) {
-    cuopt_expects(!settings.crossover,
-                  error_type_t::ValidationError,
-                  "PDLP with crossover is not supported for float precision. Set crossover=false "
-                  "or use double precision.");
-  } else {
+  if constexpr (std::is_same_v<f_t, double>) {
     const bool do_crossover = settings.crossover;
     i_t crossover_info      = 0;
     if (do_crossover && sol.get_termination_status() == pdlp_termination_status_t::Optimal) {

From abf13f3e65b9e8870092401eed7d320ac6cb0289 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 27 Feb 2026 17:14:19 +0100
Subject: [PATCH 10/23] add forgotten parameter

---
 cpp/src/math_optimization/solver_settings.cu | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index 696f9d34d4..a211b418bb 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -60,6 +60,7 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
   float_parameters = {
     {CUOPT_TIME_LIMIT, &mip_settings.time_limit, f_t(0.0), std::numeric_limits<f_t>::infinity(), std::numeric_limits<f_t>::infinity()},
     {CUOPT_TIME_LIMIT, &pdlp_settings.time_limit, f_t(0.0), std::numeric_limits<f_t>::infinity(), std::numeric_limits<f_t>::infinity()},
+    {CUOPT_WORK_LIMIT, &mip_settings.work_limit, f_t(0.0), std::numeric_limits<f_t>::infinity(), std::numeric_limits<f_t>::infinity()},
     {CUOPT_ABSOLUTE_DUAL_TOLERANCE, &pdlp_settings.tolerances.absolute_dual_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
     {CUOPT_RELATIVE_DUAL_TOLERANCE, &pdlp_settings.tolerances.relative_dual_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},
     {CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.absolute_primal_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},

From db6b2a33e3cb27f4ce803c408e7d9eae8784f588 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Mon, 2 Mar 2026 18:04:18 +0100
Subject: [PATCH 11/23] handle cuda version not supporting mixed precision

---
 cpp/src/pdlp/solve.cu                     |  5 +++++
 cpp/tests/linear_programming/pdlp_test.cu | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index 68526be45e..5c854671c5 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -592,6 +592,11 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
                   error_type_t::ValidationError,
                   "PDLP batch mode is not supported for float precision. Use double precision.");
   }
+#if CUDART_VERSION < 12050
+  cuopt_expects(!settings.mixed_precision_spmv,
+                error_type_t::ValidationError,
+                "Mixed-precision SpMV requires CUDA 12.5 or later.");
+#endif
   cuopt_expects(
     !is_batch_mode || !settings.mixed_precision_spmv,
     error_type_t::ValidationError,
diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu
index a46649be7c..dc27c74703 100644
--- a/cpp/tests/linear_programming/pdlp_test.cu
+++ b/cpp/tests/linear_programming/pdlp_test.cu
@@ -77,6 +77,7 @@ TEST(pdlp_class, run_double)
     afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
 }
 
+#if CUDART_VERSION >= 12050
 TEST(pdlp_class, mixed_precision_spmv)
 {
   const raft::handle_t handle_{};
@@ -111,6 +112,23 @@ TEST(pdlp_class, mixed_precision_spmv)
               solution_full.get_additional_termination_information().primal_objective,
               1e-2);
 }
+#else
+TEST(pdlp_class, mixed_precision_spmv_rejected_before_cuda_12_5)
+{
+  const raft::handle_t handle_{};
+
+  auto path = make_path_absolute("linear_programming/afiro_original.mps");
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
+
+  auto settings                 = pdlp_solver_settings_t<int, double>{};
+  settings.method               = cuopt::linear_programming::method_t::PDLP;
+  settings.mixed_precision_spmv = true;
+
+  optimization_problem_solution_t<int, double> solution = solve_lp(&handle_, op_problem, settings);
+  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+}
+#endif
 
 TEST(pdlp_class, run_double_very_low_accuracy)
 {

From a3dd383096ef4e179bf96ef78dff3624e18b41a4 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Tue, 3 Mar 2026 10:11:16 +0100
Subject: [PATCH 12/23] fix compilation issue following the recent main merge

---
 cpp/src/pdlp/optimization_problem.cu | 471 ---------------------------
 cpp/src/pdlp/solve_remote.cu         |   9 +
 2 files changed, 9 insertions(+), 471 deletions(-)

diff --git a/cpp/src/pdlp/optimization_problem.cu b/cpp/src/pdlp/optimization_problem.cu
index 2066bf5fd1..c43406bff9 100644
--- a/cpp/src/pdlp/optimization_problem.cu
+++ b/cpp/src/pdlp/optimization_problem.cu
@@ -1505,477 +1505,6 @@ void optimization_problem_t<i_t, f_t>::copy_variable_types_to_host(var_t* output
     cudaMemcpy(output, variable_types_.data(), size * sizeof(var_t), cudaMemcpyDeviceToHost));
 }
 
-template <typename i_t, typename f_t>
-void optimization_problem_t<i_t, f_t>::set_objective_name(const std::string& objective_name)
-{
-  objective_name_ = objective_name;
-}
-template <typename i_t, typename f_t>
-void optimization_problem_t<i_t, f_t>::set_problem_name(const std::string& problem_name)
-{
-  problem_name_ = problem_name;
-}
-template <typename i_t, typename f_t>
-void optimization_problem_t<i_t, f_t>::set_variable_names(
-  const std::vector<std::string>& variable_names)
-{
-  var_names_ = variable_names;
-}
-template <typename i_t, typename f_t>
-void optimization_problem_t<i_t, f_t>::set_row_names(const std::vector<std::string>& row_names)
-{
-  row_names_ = row_names;
-}
-
-// ============================================================================
-// Getters
-// ============================================================================
-
-template <typename i_t, typename f_t>
-i_t optimization_problem_t<i_t, f_t>::get_n_variables() const
-{
-  return n_vars_;
-}
-
-template <typename i_t, typename f_t>
-i_t optimization_problem_t<i_t, f_t>::get_n_constraints() const
-{
-  return n_constraints_;
-}
-
-template <typename i_t, typename f_t>
-i_t optimization_problem_t<i_t, f_t>::get_nnz() const
-{
-  return A_.size();
-}
-
-template <typename i_t, typename f_t>
-i_t optimization_problem_t<i_t, f_t>::get_n_integers() const
-{
-  if (variable_types_.size() == 0) return 0;
-
-  return thrust::count(rmm::exec_policy(handle_ptr_->get_stream()),
-                       variable_types_.begin(),
-                       variable_types_.end(),
-                       var_t::INTEGER);
-}
-
-template <typename i_t, typename f_t>
-raft::handle_t const* optimization_problem_t<i_t, f_t>::get_handle_ptr() const noexcept
-{
-  return handle_ptr_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
-  const
-{
-  return A_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_values()
-{
-  return A_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices()
-  const
-{
-  return A_indices_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_indices()
-{
-  return A_indices_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets()
-  const
-{
-  return A_offsets_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<i_t>& optimization_problem_t<i_t, f_t>::get_constraint_matrix_offsets()
-{
-  return A_offsets_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_bounds() const
-{
-  return b_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_bounds()
-{
-  return b_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_objective_coefficients() const
-{
-  return c_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_objective_coefficients()
-{
-  return c_;
-}
-
-template <typename i_t, typename f_t>
-f_t optimization_problem_t<i_t, f_t>::get_objective_scaling_factor() const
-{
-  return objective_scaling_factor_;
-}
-
-template <typename i_t, typename f_t>
-f_t optimization_problem_t<i_t, f_t>::get_objective_offset() const
-{
-  return objective_offset_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<f_t>& optimization_problem_t<i_t, f_t>::get_quadratic_objective_values() const
-{
-  return Q_values_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<i_t>& optimization_problem_t<i_t, f_t>::get_quadratic_objective_indices() const
-{
-  return Q_indices_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<i_t>& optimization_problem_t<i_t, f_t>::get_quadratic_objective_offsets() const
-{
-  return Q_offsets_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_lower_bounds() const
-{
-  return variable_lower_bounds_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_upper_bounds() const
-{
-  return variable_upper_bounds_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_lower_bounds()
-{
-  return variable_lower_bounds_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_variable_upper_bounds()
-{
-  return variable_upper_bounds_;
-}
-template <typename i_t, typename f_t>
-const rmm::device_uvector<var_t>& optimization_problem_t<i_t, f_t>::get_variable_types() const
-{
-  return variable_types_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
-  const
-{
-  return constraint_lower_bounds_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
-  const
-{
-  return constraint_upper_bounds_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_lower_bounds()
-{
-  return constraint_lower_bounds_;
-}
-
-template <typename i_t, typename f_t>
-rmm::device_uvector<f_t>& optimization_problem_t<i_t, f_t>::get_constraint_upper_bounds()
-{
-  return constraint_upper_bounds_;
-}
-
-template <typename i_t, typename f_t>
-const rmm::device_uvector<char>& optimization_problem_t<i_t, f_t>::get_row_types() const
-{
-  return row_types_;
-}
-
-template <typename i_t, typename f_t>
-std::string optimization_problem_t<i_t, f_t>::get_objective_name() const
-{
-  return objective_name_;
-}
-
-template <typename i_t, typename f_t>
-std::string optimization_problem_t<i_t, f_t>::get_problem_name() const
-{
-  return problem_name_;
-}
-
-template <typename i_t, typename f_t>
-problem_category_t optimization_problem_t<i_t, f_t>::get_problem_category() const
-{
-  return problem_category_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<std::string>& optimization_problem_t<i_t, f_t>::get_variable_names() const
-{
-  return var_names_;
-}
-
-template <typename i_t, typename f_t>
-const std::vector<std::string>& optimization_problem_t<i_t, f_t>::get_row_names() const
-{
-  return row_names_;
-}
-
-template <typename i_t, typename f_t>
-bool optimization_problem_t<i_t, f_t>::get_sense() const
-{
-  return maximize_;
-}
-
-template <typename i_t, typename f_t>
-bool optimization_problem_t<i_t, f_t>::empty() const
-{
-  return n_vars_ == 0 && n_constraints_ == 0;
-}
-
-template <typename i_t, typename f_t>
-typename optimization_problem_t<i_t, f_t>::view_t optimization_problem_t<i_t, f_t>::view() const
-{
-  optimization_problem_t<i_t, f_t>::view_t v;
-  v.n_vars        = get_n_variables();
-  v.n_constraints = get_n_constraints();
-  v.nnz           = get_nnz();
-  v.A             = raft::device_span<f_t>{const_cast<f_t*>(get_constraint_matrix_values().data()),
-                                           get_constraint_matrix_values().size()};
-  v.A_indices     = raft::device_span<const i_t>{get_constraint_matrix_indices().data(),
-                                                 get_constraint_matrix_indices().size()};
-  v.A_offsets     = raft::device_span<const i_t>{get_constraint_matrix_offsets().data(),
-                                                 get_constraint_matrix_offsets().size()};
-  v.b =
-    raft::device_span<const f_t>{get_constraint_bounds().data(), get_constraint_bounds().size()};
-  v.c                       = raft::device_span<const f_t>{get_objective_coefficients().data(),
-                                                           get_objective_coefficients().size()};
-  v.variable_lower_bounds   = raft::device_span<const f_t>{get_variable_lower_bounds().data(),
-                                                           get_variable_lower_bounds().size()};
-  v.variable_upper_bounds   = raft::device_span<const f_t>{get_variable_upper_bounds().data(),
-                                                           get_variable_upper_bounds().size()};
-  v.constraint_lower_bounds = raft::device_span<const f_t>{get_constraint_lower_bounds().data(),
-                                                           get_constraint_lower_bounds().size()};
-  v.constraint_upper_bounds = raft::device_span<const f_t>{get_constraint_upper_bounds().data(),
-                                                           get_constraint_upper_bounds().size()};
-  return v;
-}
-
-template <typename i_t, typename f_t>
-void optimization_problem_t<i_t, f_t>::set_maximize(bool _maximize)
-{
-  maximize_ = _maximize;
-}
-
-template <typename i_t, typename f_t>
-void optimization_problem_t<i_t, f_t>::write_to_mps(const std::string& mps_file_path)
-{
-  cuopt::mps_parser::data_model_view_t<i_t, f_t> data_model_view;
-
-  // Set optimization sense
-  data_model_view.set_maximize(get_sense());
-
-  // Copy to host
-  auto stream                    = handle_ptr_->get_stream();
-  auto constraint_matrix_values  = cuopt::host_copy(get_constraint_matrix_values(), stream);
-  auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices(), stream);
-  auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets(), stream);
-  auto constraint_bounds         = cuopt::host_copy(get_constraint_bounds(), stream);
-  auto objective_coefficients    = cuopt::host_copy(get_objective_coefficients(), stream);
-  auto variable_lower_bounds     = cuopt::host_copy(get_variable_lower_bounds(), stream);
-  auto variable_upper_bounds     = cuopt::host_copy(get_variable_upper_bounds(), stream);
-  auto constraint_lower_bounds   = cuopt::host_copy(get_constraint_lower_bounds(), stream);
-  auto constraint_upper_bounds   = cuopt::host_copy(get_constraint_upper_bounds(), stream);
-  auto row_types                 = cuopt::host_copy(get_row_types(), stream);
-
-  // Set constraint matrix in CSR format
-  if (get_nnz() != 0) {
-    data_model_view.set_csr_constraint_matrix(constraint_matrix_values.data(),
-                                              constraint_matrix_values.size(),
-                                              constraint_matrix_indices.data(),
-                                              constraint_matrix_indices.size(),
-                                              constraint_matrix_offsets.data(),
-                                              constraint_matrix_offsets.size());
-  }
-
-  // Set constraint bounds (RHS)
-  if (get_n_constraints() != 0) {
-    data_model_view.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
-  }
-
-  // Set objective coefficients
-  if (get_n_variables() != 0) {
-    data_model_view.set_objective_coefficients(objective_coefficients.data(),
-                                               objective_coefficients.size());
-  }
-
-  // Set objective scaling and offset
-  data_model_view.set_objective_scaling_factor(get_objective_scaling_factor());
-  data_model_view.set_objective_offset(get_objective_offset());
-
-  // Set variable bounds
-  if (get_n_variables() != 0) {
-    data_model_view.set_variable_lower_bounds(variable_lower_bounds.data(),
-                                              variable_lower_bounds.size());
-    data_model_view.set_variable_upper_bounds(variable_upper_bounds.data(),
-                                              variable_upper_bounds.size());
-  }
-
-  // Set row types (constraint types)
-  if (get_row_types().size() != 0) {
-    data_model_view.set_row_types(row_types.data(), row_types.size());
-  }
-
-  // Set constraint bounds (lower and upper)
-  if (get_constraint_lower_bounds().size() != 0 && get_constraint_upper_bounds().size() != 0) {
-    data_model_view.set_constraint_lower_bounds(constraint_lower_bounds.data(),
-                                                constraint_lower_bounds.size());
-    data_model_view.set_constraint_upper_bounds(constraint_upper_bounds.data(),
-                                                constraint_upper_bounds.size());
-  }
-
-  // Create a temporary vector to hold the converted variable types
-  std::vector<char> variable_types(get_n_variables());
-  // Set variable types (convert from enum to char)
-  if (get_n_variables() != 0) {
-    auto enum_variable_types = cuopt::host_copy(get_variable_types(), stream);
-
-    // Convert enum types to char types
-    for (size_t i = 0; i < variable_types.size(); ++i) {
-      variable_types[i] = (enum_variable_types[i] == var_t::INTEGER) ? 'I' : 'C';
-    }
-
-    data_model_view.set_variable_types(variable_types.data(), variable_types.size());
-  }
-
-  // Set problem and variable names if available
-  if (!get_problem_name().empty()) { data_model_view.set_problem_name(get_problem_name()); }
-
-  if (!get_objective_name().empty()) { data_model_view.set_objective_name(get_objective_name()); }
-
-  if (!get_variable_names().empty()) { data_model_view.set_variable_names(get_variable_names()); }
-
-  if (!get_row_names().empty()) { data_model_view.set_row_names(get_row_names()); }
-
-  cuopt::mps_parser::write_mps(data_model_view, mps_file_path);
-}
-
-template <typename i_t, typename f_t>
-void optimization_problem_t<i_t, f_t>::print_scaling_information() const
-{
-  auto stream = handle_ptr_->get_stream();
-  std::vector<f_t> constraint_matrix_values =
-    cuopt::host_copy(get_constraint_matrix_values(), stream);
-  std::vector<f_t> constraint_rhs         = cuopt::host_copy(get_constraint_bounds(), stream);
-  std::vector<f_t> objective_coefficients = cuopt::host_copy(get_objective_coefficients(), stream);
-  std::vector<f_t> variable_lower_bounds  = cuopt::host_copy(get_variable_lower_bounds(), stream);
-  std::vector<f_t> variable_upper_bounds  = cuopt::host_copy(get_variable_upper_bounds(), stream);
-  std::vector<f_t> constraint_lower_bounds =
-    cuopt::host_copy(get_constraint_lower_bounds(), stream);
-  std::vector<f_t> constraint_upper_bounds =
-    cuopt::host_copy(get_constraint_upper_bounds(), stream);
-
-  auto findMaxAbs = [](const std::vector<f_t>& vec) -> f_t {
-    if (vec.empty()) { return 0.0; }
-    const f_t inf = std::numeric_limits<f_t>::infinity();
-
-    const size_t sz = vec.size();
-    f_t max_abs_val = f_t(0.0);
-    for (size_t i = 0; i < sz; ++i) {
-      const f_t val = std::abs(vec[i]);
-      if (val < inf) { max_abs_val = std::max(max_abs_val, val); }
-    }
-    return max_abs_val;
-  };
-
-  auto findMinAbs = [](const std::vector<f_t>& vec) -> f_t {
-    if (vec.empty()) { return f_t(0.0); }
-    const size_t sz = vec.size();
-    const f_t inf   = std::numeric_limits<f_t>::infinity();
-    f_t min_abs_val = inf;
-    for (size_t i = 0; i < sz; ++i) {
-      const f_t val = std::abs(vec[i]);
-      if (val > f_t(0.0)) { min_abs_val = std::min(min_abs_val, val); }
-    }
-    return min_abs_val < inf ? min_abs_val : f_t(0.0);
-  };
-
-  f_t A_max          = findMaxAbs(constraint_matrix_values);
-  f_t A_min          = findMinAbs(constraint_matrix_values);
-  f_t b_max          = findMaxAbs(constraint_rhs);
-  f_t b_min          = findMinAbs(constraint_rhs);
-  f_t c_max          = findMaxAbs(objective_coefficients);
-  f_t c_min          = findMinAbs(objective_coefficients);
-  f_t x_lower_max    = findMaxAbs(variable_lower_bounds);
-  f_t x_lower_min    = findMinAbs(variable_lower_bounds);
-  f_t x_upper_max    = findMaxAbs(variable_upper_bounds);
-  f_t x_upper_min    = findMinAbs(variable_upper_bounds);
-  f_t cstr_lower_max = findMaxAbs(constraint_lower_bounds);
-  f_t cstr_lower_min = findMinAbs(constraint_lower_bounds);
-  f_t cstr_upper_max = findMaxAbs(constraint_upper_bounds);
-  f_t cstr_upper_min = findMinAbs(constraint_upper_bounds);
-
-  f_t rhs_max = std::max(b_max, std::max(cstr_lower_max, cstr_upper_max));
-  f_t rhs_min = std::min(b_min, std::min(cstr_lower_min, cstr_upper_min));
-
-  f_t bound_max = std::max(x_upper_max, x_lower_max);
-  f_t bound_min = std::min(x_upper_min, x_lower_min);
-
-  CUOPT_LOG_INFO("Problem scaling:");
-  CUOPT_LOG_INFO("Objective coefficents range:          [%.0e, %.0e]", c_min, c_max);
-  CUOPT_LOG_INFO("Constraint matrix coefficients range: [%.0e, %.0e]", A_min, A_max);
-  CUOPT_LOG_INFO("Constraint rhs / bounds range:        [%.0e, %.0e]", rhs_min, rhs_max);
-  CUOPT_LOG_INFO("Variable bounds range:                [%.0e, %.0e]", bound_min, bound_max);
-
-  auto safelog10 = [](f_t x) { return x > 0 ? std::log10(x) : 0.0; };
-
-  f_t obj_range   = safelog10(c_max) - safelog10(c_min);
-  f_t A_range     = safelog10(A_max) - safelog10(A_min);
-  f_t rhs_range   = safelog10(rhs_max) - safelog10(rhs_min);
-  f_t bound_range = safelog10(bound_max) - safelog10(bound_min);
-
-  if (obj_range >= 6.0 || A_range >= 6.0 || rhs_range >= 6.0 || bound_range >= 6.0) {
-    CUOPT_LOG_INFO(
-      "Warning: input problem contains a large range of coefficients: consider reformulating to "
-      "avoid numerical difficulties.");
-  }
-  CUOPT_LOG_INFO("");
-}
-
-template <typename i_t, typename f_t>
-bool optimization_problem_t<i_t, f_t>::has_quadratic_objective() const
-{
-  return !Q_values_.empty();
-}
 // ==============================================================================
 // Template instantiations
 // ==============================================================================
diff --git a/cpp/src/pdlp/solve_remote.cu b/cpp/src/pdlp/solve_remote.cu
index a9bf7e3989..9ecd84b588 100644
--- a/cpp/src/pdlp/solve_remote.cu
+++ b/cpp/src/pdlp/solve_remote.cu
@@ -9,6 +9,7 @@
 #include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+#include <mip_heuristics/mip_constants.hpp>
 #include <utilities/logger.hpp>
 
 namespace cuopt::linear_programming {
@@ -108,6 +109,14 @@ std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
 }
 
 // Explicit template instantiations for remote execution stubs
+#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
+template std::unique_ptr<lp_solution_interface_t<int, float>> solve_lp_remote(
+  cpu_optimization_problem_t<int, float> const&,
+  pdlp_solver_settings_t<int, float> const&,
+  bool,
+  bool);
+#endif
+
 template std::unique_ptr<lp_solution_interface_t<int, double>> solve_lp_remote(
   cpu_optimization_problem_t<int, double> const&,
   pdlp_solver_settings_t<int, double> const&,

From 14f905291dc61b970441f59708cbfcd393cca594 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Tue, 3 Mar 2026 11:45:04 +0100
Subject: [PATCH 13/23] fix cuda version guard to check cusparse version
 dynamically

---
 cpp/src/pdlp/cusparse_view.cu             | 10 +++++++
 cpp/src/pdlp/cusparse_view.hpp            |  2 ++
 cpp/src/pdlp/solve.cu                     | 11 ++++---
 cpp/tests/linear_programming/pdlp_test.cu | 36 +++++++++++------------
 4 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu
index 23ff3711d8..64ec44f5ef 100644
--- a/cpp/src/pdlp/cusparse_view.cu
+++ b/cpp/src/pdlp/cusparse_view.cu
@@ -1138,6 +1138,16 @@ void mixed_precision_spmv_preprocess(cusparseHandle_t handle,
 }
 #endif
 
+bool is_cusparse_runtime_mixed_precision_supported()
+{
+  int major = 0, minor = 0;
+  auto status = cusparseGetProperty(libraryPropertyType_t::MAJOR_VERSION, &major);
+  if (status != CUSPARSE_STATUS_SUCCESS) return false;
+  status = cusparseGetProperty(libraryPropertyType_t::MINOR_VERSION, &minor);
+  if (status != CUSPARSE_STATUS_SUCCESS) return false;
+  return (major > 12) || (major == 12 && minor >= 5);
+}
+
 #if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
 template class cusparse_sp_mat_descr_wrapper_t<int, float>;
 template class cusparse_dn_vec_descr_wrapper_t<float>;
diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp
index e83915e37d..416a0b1e5f 100644
--- a/cpp/src/pdlp/cusparse_view.hpp
+++ b/cpp/src/pdlp/cusparse_view.hpp
@@ -262,4 +262,6 @@ void my_cusparsespmm_preprocess(cusparseHandle_t handle,
                                 cudaStream_t stream);
 #endif
 
+bool is_cusparse_runtime_mixed_precision_supported();
+
 }  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index f09034da7f..584f3480f5 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -594,13 +594,12 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
                   error_type_t::ValidationError,
                   "PDLP batch mode is not supported for float precision. Use double precision.");
   }
-#if CUDART_VERSION < 12050
-  cuopt_expects(!settings.mixed_precision_spmv,
-                error_type_t::ValidationError,
-                "Mixed-precision SpMV requires CUDA 12.5 or later.");
-#endif
   cuopt_expects(
-    !is_batch_mode || !settings.mixed_precision_spmv,
+    !(settings.mixed_precision_spmv && !detail::is_cusparse_runtime_mixed_precision_supported()),
+    error_type_t::ValidationError,
+    "Mixed-precision SpMV requires cuSPARSE runtime 12.5 or later.");
+  cuopt_expects(
+    !(is_batch_mode && settings.mixed_precision_spmv),
     error_type_t::ValidationError,
     "Mixed-precision SpMV is not supported in batch mode. Set mixed_precision_spmv=false "
     "or disable batch mode.");
diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu
index dc27c74703..fb14e31894 100644
--- a/cpp/tests/linear_programming/pdlp_test.cu
+++ b/cpp/tests/linear_programming/pdlp_test.cu
@@ -6,6 +6,7 @@
 /* clang-format on */
 
 #include <mps_parser.hpp>
+#include <pdlp/cusparse_view.hpp>
 #include <pdlp/pdlp.cuh>
 #include <pdlp/pdlp_constants.hpp>
 #include <pdlp/solve.cuh>
@@ -77,9 +78,25 @@ TEST(pdlp_class, run_double)
     afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
 }
 
-#if CUDART_VERSION >= 12050
 TEST(pdlp_class, mixed_precision_spmv)
 {
+  using namespace cuopt::linear_programming::detail;
+  if (!is_cusparse_runtime_mixed_precision_supported()) {
+    const raft::handle_t handle_{};
+    auto path = make_path_absolute("linear_programming/afiro_original.mps");
+    cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+      cuopt::mps_parser::parse_mps<int, double>(path, true);
+
+    auto settings                 = pdlp_solver_settings_t<int, double>{};
+    settings.method               = cuopt::linear_programming::method_t::PDLP;
+    settings.mixed_precision_spmv = true;
+
+    optimization_problem_solution_t<int, double> solution =
+      solve_lp(&handle_, op_problem, settings);
+    EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+    return;
+  }
+
   const raft::handle_t handle_{};
 
   auto path = make_path_absolute("linear_programming/afiro_original.mps");
@@ -112,23 +129,6 @@ TEST(pdlp_class, mixed_precision_spmv)
               solution_full.get_additional_termination_information().primal_objective,
               1e-2);
 }
-#else
-TEST(pdlp_class, mixed_precision_spmv_rejected_before_cuda_12_5)
-{
-  const raft::handle_t handle_{};
-
-  auto path = make_path_absolute("linear_programming/afiro_original.mps");
-  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
-    cuopt::mps_parser::parse_mps<int, double>(path, true);
-
-  auto settings                 = pdlp_solver_settings_t<int, double>{};
-  settings.method               = cuopt::linear_programming::method_t::PDLP;
-  settings.mixed_precision_spmv = true;
-
-  optimization_problem_solution_t<int, double> solution = solve_lp(&handle_, op_problem, settings);
-  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
-}
-#endif
 
 TEST(pdlp_class, run_double_very_low_accuracy)
 {

From ab9e8eb4e36d6d10a86af31cc2b2643f5dabebb1 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Tue, 3 Mar 2026 12:35:55 +0100
Subject: [PATCH 14/23] fix doc

---
 docs/cuopt/source/lp-qp-milp-settings.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/cuopt/source/lp-qp-milp-settings.rst b/docs/cuopt/source/lp-qp-milp-settings.rst
index 4a8c4c44ae..050843a8df 100644
--- a/docs/cuopt/source/lp-qp-milp-settings.rst
+++ b/docs/cuopt/source/lp-qp-milp-settings.rst
@@ -203,6 +203,8 @@ FP32 mode is only supported with the PDLP method (not concurrent) and without cr
 
 .. note:: The default precision is FP64 (double).
 
+.. _Mixed Precision SpMV:
+
 Mixed Precision SpMV
 ^^^^^^^^^^^^^^^^^^^^
 

From 081a1640370b14be7a07466a7442d7164a533fde Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Thu, 5 Mar 2026 11:37:44 +0100
Subject: [PATCH 15/23] handle fp32 inside pdlp to allow exposing it at the c
 and Python layer, create the new notion of pdlp precision

---
 .../linear_programming/cuopt/run_pdlp.cu      |  62 +++---
 cpp/cuopt_cli.cpp                             |  26 ++-
 .../cuopt/linear_programming/constants.h      |   7 +
 .../pdlp/solver_settings.hpp                  |  25 ++-
 cpp/src/math_optimization/solver_settings.cu  |   5 +-
 cpp/src/pdlp/pdlp.cu                          |   2 +-
 cpp/src/pdlp/solution_conversion.cu           |  44 +---
 cpp/src/pdlp/solve.cu                         | 210 +++++++++++++++++-
 cpp/src/pdlp/solve_remote.cu                  |   9 -
 .../c_api_tests/c_api_test.c                  |  66 ++++++
 .../c_api_tests/c_api_tests.cpp               |  16 ++
 .../c_api_tests/c_api_tests.h                 |   4 +
 cpp/tests/linear_programming/pdlp_test.cu     | 151 +++++--------
 .../cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst   |  13 ++
 docs/cuopt/source/lp-qp-features.rst          |  17 +-
 docs/cuopt/source/lp-qp-milp-settings.rst     |  42 ++--
 .../linear_programming/test_lp_solver.py      |  45 +++-
 17 files changed, 515 insertions(+), 229 deletions(-)

diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu
index f0334dd47d..a7838d773e 100644
--- a/benchmarks/linear_programming/cuopt/run_pdlp.cu
+++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu
@@ -77,19 +77,12 @@ static void parse_arguments(argparse::ArgumentParser& program)
 
   program.add_argument("--solution-path").help("Path where solution file will be generated");
 
-  program.add_argument("--pdlp-fp32")
+  program.add_argument("--pdlp-precision")
     .help(
-      "Use FP32 (float) precision instead of FP64 (double). Only supported for PDLP method without "
-      "crossover.")
-    .default_value(false)
-    .implicit_value(true);
-
-  program.add_argument("--mixed-precision-spmv")
-    .help(
-      "Enable mixed precision SpMV (FP32 matrix, FP64 vectors) during PDHG iterations. Only "
-      "supported for PDLP method in FP64.")
-    .default_value(false)
-    .implicit_value(true);
+      "PDLP precision mode. default: native type, single: FP32 internally, "
+      "double: FP64 explicitly, mixed: mixed-precision SpMV (FP32 matrix, FP64 vectors).")
+    .default_value(std::string("default"))
+    .choices("default", "single", "double", "mixed");
 }
 
 static cuopt::linear_programming::presolver_t string_to_presolver(const std::string& presolver)
@@ -101,6 +94,15 @@ static cuopt::linear_programming::presolver_t string_to_presolver(const std::str
   return cuopt::linear_programming::presolver_t::Default;
 }
 
+static cuopt::linear_programming::pdlp_precision_t string_to_pdlp_precision(
+  const std::string& precision)
+{
+  if (precision == "single") return cuopt::linear_programming::pdlp_precision_t::SinglePrecision;
+  if (precision == "double") return cuopt::linear_programming::pdlp_precision_t::DoublePrecision;
+  if (precision == "mixed") return cuopt::linear_programming::pdlp_precision_t::MixedPrecision;
+  return cuopt::linear_programming::pdlp_precision_t::DefaultPrecision;
+}
+
 static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode(
   const std::string& mode)
 {
@@ -116,33 +118,27 @@ static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode(
   return cuopt::linear_programming::pdlp_solver_mode_t::Stable3;
 }
 
-template <typename f_t>
-static cuopt::linear_programming::pdlp_solver_settings_t<int, f_t> create_solver_settings(
+static cuopt::linear_programming::pdlp_solver_settings_t<int, double> create_solver_settings(
   const argparse::ArgumentParser& program)
 {
-  cuopt::linear_programming::pdlp_solver_settings_t<int, f_t> settings =
-    cuopt::linear_programming::pdlp_solver_settings_t<int, f_t>{};
+  cuopt::linear_programming::pdlp_solver_settings_t<int, double> settings{};
 
-  settings.time_limit      = static_cast<f_t>(program.get<double>("--time-limit"));
+  settings.time_limit      = program.get<double>("--time-limit");
   settings.iteration_limit = program.get<int>("--iteration-limit");
-  settings.set_optimality_tolerance(
-    static_cast<f_t>(program.get<double>("--optimality-tolerance")));
+  settings.set_optimality_tolerance(program.get<double>("--optimality-tolerance"));
   settings.pdlp_solver_mode =
     string_to_pdlp_solver_mode(program.get<std::string>("--pdlp-solver-mode"));
   settings.method = static_cast<cuopt::linear_programming::method_t>(program.get<int>("--method"));
-  settings.crossover            = program.get<int>("--crossover");
-  settings.presolver            = string_to_presolver(program.get<std::string>("--presolver"));
-  settings.mixed_precision_spmv = program.get<bool>("--mixed-precision-spmv");
+  settings.crossover      = program.get<int>("--crossover");
+  settings.presolver      = string_to_presolver(program.get<std::string>("--presolver"));
+  settings.pdlp_precision = string_to_pdlp_precision(program.get<std::string>("--pdlp-precision"));
 
   return settings;
 }
 
-template <typename f_t>
 static int run_solver(const argparse::ArgumentParser& program, const raft::handle_t& handle_)
 {
-  // Initialize solver settings from binary arguments
-  cuopt::linear_programming::pdlp_solver_settings_t<int, f_t> settings =
-    create_solver_settings<f_t>(program);
+  auto settings = create_solver_settings(program);
 
   bool use_pdlp_solver_mode = true;
   if (program.is_used("--pdlp-hyper-params-path")) {
@@ -152,12 +148,12 @@ static int run_solver(const argparse::ArgumentParser& program, const raft::handl
   }
 
   // Parse MPS file
-  cuopt::mps_parser::mps_data_model_t<int, f_t> op_problem =
-    cuopt::mps_parser::parse_mps<int, f_t>(program.get<std::string>("--path"));
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(program.get<std::string>("--path"));
 
   // Solve LP problem
   bool problem_checking = true;
-  cuopt::linear_programming::optimization_problem_solution_t<int, f_t> solution =
+  cuopt::linear_programming::optimization_problem_solution_t<int, double> solution =
     cuopt::linear_programming::solve_lp(
       &handle_, op_problem, settings, problem_checking, use_pdlp_solver_mode);
 
@@ -189,11 +185,5 @@ int main(int argc, char* argv[])
   // Initialize raft handle and running stream
   const raft::handle_t handle_{};
 
-  // Run solver with appropriate precision
-  bool use_fp32 = program.get<bool>("--pdlp-fp32");
-  if (use_fp32) {
-    return run_solver<float>(program, handle_);
-  } else {
-    return run_solver<double>(program, handle_);
-  }
+  return run_solver(program, handle_);
 }
diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp
index e9b1ee3719..53b586155a 100644
--- a/cpp/cuopt_cli.cpp
+++ b/cpp/cuopt_cli.cpp
@@ -77,8 +77,8 @@ inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_re
 inline cuopt::init_logger_t dummy_logger(
   const cuopt::linear_programming::solver_settings_t<int, double>& settings)
 {
-  return cuopt::init_logger_t(settings.get_parameter<std::string>(CUOPT_LOG_FILE),
-                              settings.get_parameter<bool>(CUOPT_LOG_TO_CONSOLE));
+  return cuopt::init_logger_t(settings.template get_parameter<std::string>(CUOPT_LOG_FILE),
+                              settings.template get_parameter<bool>(CUOPT_LOG_TO_CONSOLE));
 }
 
 /**
@@ -287,6 +287,17 @@ int main(int argc, char* argv[])
     .implicit_value(true);
 
   std::map<std::string, std::string> arg_name_to_param_name;
+
+  // Register --pdlp-precision with string-to-int mapping so that it flows
+  // through the settings_strings map like other settings.
+  program.add_argument("--pdlp-precision")
+    .help(
+      "PDLP precision mode. default: native type, single: FP32 internally, "
+      "double: FP64 explicitly, mixed: mixed-precision SpMV (FP32 matrix, FP64 vectors).")
+    .default_value(std::string("0"))
+    .choices("default", "single", "double", "mixed", "0", "1", "2", "3");
+  arg_name_to_param_name["--pdlp-precision"] = CUOPT_PDLP_PRECISION;
+
   {
     // Add all solver settings as arguments
     cuopt::linear_programming::solver_settings_t<int, double> dummy_settings;
@@ -341,11 +352,20 @@ int main(int argc, char* argv[])
     return 1;
   }
 
+  // Map symbolic pdlp-precision names to integer values
+  static const std::map<std::string, std::string> precision_name_to_value = {
+    {"default", "0"}, {"single", "1"}, {"double", "2"}, {"mixed", "3"}};
+
   // Read everything as a string
   std::map<std::string, std::string> settings_strings;
   for (auto& [arg_name, param_name] : arg_name_to_param_name) {
     if (program.is_used(arg_name.c_str())) {
-      settings_strings[param_name] = program.get<std::string>(arg_name.c_str());
+      auto val = program.get<std::string>(arg_name.c_str());
+      if (param_name == CUOPT_PDLP_PRECISION) {
+        auto it = precision_name_to_value.find(val);
+        if (it != precision_name_to_value.end()) { val = it->second; }
+      }
+      settings_strings[param_name] = val;
     }
   }
   // Get the values
diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h
index 7eb0aa07d6..0c76df3346 100644
--- a/cpp/include/cuopt/linear_programming/constants.h
+++ b/cpp/include/cuopt/linear_programming/constants.h
@@ -74,6 +74,7 @@
 #define CUOPT_NUM_GPUS                        "num_gpus"
 #define CUOPT_USER_PROBLEM_FILE               "user_problem_file"
 #define CUOPT_RANDOM_SEED                     "random_seed"
+#define CUOPT_PDLP_PRECISION                  "pdlp_precision"
 
 /* @brief MIP determinism mode constants */
 #define CUOPT_MODE_OPPORTUNISTIC 0
@@ -125,6 +126,12 @@
 #define CUOPT_METHOD_DUAL_SIMPLEX 2
 #define CUOPT_METHOD_BARRIER      3
 
+/* @brief PDLP precision mode constants */
+#define CUOPT_PDLP_DEFAULT_PRECISION 0
+#define CUOPT_PDLP_SINGLE_PRECISION  1
+#define CUOPT_PDLP_DOUBLE_PRECISION  2
+#define CUOPT_PDLP_MIXED_PRECISION   3
+
 /* @brief File format constants for problem I/O */
 #define CUOPT_FILE_FORMAT_MPS 0
 
diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
index 59236d3531..d3f59144cc 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
@@ -63,6 +63,21 @@ enum method_t : int {
   Barrier     = CUOPT_METHOD_BARRIER
 };
 
+/**
+ * @brief Enum representing the PDLP precision modes.
+ *
+ * DefaultPrecision: Use the type of the problem (FP64 for double problems).
+ * SinglePrecision:  Run PDLP internally in FP32, converting inputs and outputs.
+ * DoublePrecision:  Explicitly run in FP64 (same as default for double problems).
+ * MixedPrecision:   Use mixed precision SpMV (FP32 matrix with FP64 vectors/compute).
+ */
+enum pdlp_precision_t : int {
+  DefaultPrecision = CUOPT_PDLP_DEFAULT_PRECISION,
+  SinglePrecision  = CUOPT_PDLP_SINGLE_PRECISION,
+  DoublePrecision  = CUOPT_PDLP_DOUBLE_PRECISION,
+  MixedPrecision   = CUOPT_PDLP_MIXED_PRECISION
+};
+
 template <typename i_t, typename f_t>
 class pdlp_solver_settings_t {
  public:
@@ -239,15 +254,7 @@ class pdlp_solver_settings_t {
   i_t ordering{-1};
   i_t barrier_dual_initial_point{-1};
   bool eliminate_dense_columns{true};
-  /**
-   * @brief Enable mixed precision SpMV during PDHG iterations (FP64 mode only).
-   *
-   * When true, the constraint matrix A and its transpose are stored in FP32 while
-   * vectors and compute type remain in FP64, reducing memory bandwidth during SpMV.
-   * Convergence checking and restarts always use the full FP64 matrix, so this does
-   * not reduce overall memory usage.  Has no effect in FP32 mode.
-   */
-  bool mixed_precision_spmv{false};
+  pdlp_precision_t pdlp_precision{pdlp_precision_t::DefaultPrecision};
   bool save_best_primal_so_far{false};
   bool first_primal_feasible{false};
   presolver_t presolver{presolver_t::Default};
diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index a211b418bb..7435bb37fa 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -103,7 +103,8 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_PRESOLVE, reinterpret_cast<int*>(&mip_settings.presolver), CUOPT_PRESOLVE_DEFAULT, CUOPT_PRESOLVE_PSLP, CUOPT_PRESOLVE_DEFAULT},
     {CUOPT_MIP_DETERMINISM_MODE, &mip_settings.determinism_mode, CUOPT_MODE_OPPORTUNISTIC, CUOPT_MODE_DETERMINISTIC, CUOPT_MODE_OPPORTUNISTIC},
     {CUOPT_RANDOM_SEED, &mip_settings.seed, -1, std::numeric_limits<i_t>::max(), -1},
-    {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits<i_t>::max(), -1}
+    {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits<i_t>::max(), -1},
+    {CUOPT_PDLP_PRECISION, reinterpret_cast<int*>(&pdlp_settings.pdlp_precision), CUOPT_PDLP_DEFAULT_PRECISION, CUOPT_PDLP_MIXED_PRECISION, CUOPT_PDLP_DEFAULT_PRECISION}
   };
 
     // Bool parameters
@@ -120,7 +121,7 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_CROSSOVER, &pdlp_settings.crossover, false},
     {CUOPT_ELIMINATE_DENSE_COLUMNS, &pdlp_settings.eliminate_dense_columns, true},
     {CUOPT_CUDSS_DETERMINISTIC, &pdlp_settings.cudss_deterministic, false},
-    {CUOPT_DUAL_POSTSOLVE, &pdlp_settings.dual_postsolve, true}
+    {CUOPT_DUAL_POSTSOLVE, &pdlp_settings.dual_postsolve, true},
   };
   // String parameters
   string_parameters = {
diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu
index 81f7e3815d..82e79098a7 100644
--- a/cpp/src/pdlp/pdlp.cu
+++ b/cpp/src/pdlp/pdlp.cu
@@ -142,7 +142,7 @@ pdlp_solver_t<i_t, f_t>::pdlp_solver_t(problem_t<i_t, f_t>& op_problem,
                  climber_strategies_,
                  settings_.hyper_params,
                  settings_.new_bounds,
-                 settings_.mixed_precision_spmv},
+                 settings_.pdlp_precision == pdlp_precision_t::MixedPrecision},
     initial_scaling_strategy_{handle_ptr_,
                               op_problem_scaled_,
                               settings_.hyper_params.default_l_inf_ruiz_iterations,
diff --git a/cpp/src/pdlp/solution_conversion.cu b/cpp/src/pdlp/solution_conversion.cu
index ff92eea22f..7993445a08 100644
--- a/cpp/src/pdlp/solution_conversion.cu
+++ b/cpp/src/pdlp/solution_conversion.cu
@@ -13,7 +13,6 @@
 #include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/utilities/cython_solve.hpp>
-#include <mip_heuristics/mip_constants.hpp>
 
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_uvector.hpp>
@@ -135,18 +134,6 @@ cuopt::cython::mip_ret_t gpu_mip_solution_t<i_t, f_t>::to_mip_ret_t()
 // CPU LP Solution Conversion
 // ===========================
 
-namespace {
-template <typename f_t>
-cuopt::cython::cpu_buffer to_cpu_buffer(std::vector<f_t>& src)
-{
-  if constexpr (std::is_same_v<f_t, double>) {
-    return std::move(src);
-  } else {
-    return cuopt::cython::cpu_buffer(src.begin(), src.end());
-  }
-}
-}  // namespace
-
 template <typename i_t, typename f_t>
 cuopt::cython::linear_programming_ret_t
 cpu_lp_solution_t<i_t, f_t>::to_cpu_linear_programming_ret_t()
@@ -155,22 +142,22 @@ cpu_lp_solution_t<i_t, f_t>::to_cpu_linear_programming_ret_t()
   cuopt::cython::linear_programming_ret_t ret;
 
   cpu_solutions_t cpu;
-  cpu.primal_solution_ = to_cpu_buffer(primal_solution_);
-  cpu.dual_solution_   = to_cpu_buffer(dual_solution_);
-  cpu.reduced_cost_    = to_cpu_buffer(reduced_cost_);
+  cpu.primal_solution_ = std::move(primal_solution_);
+  cpu.dual_solution_   = std::move(dual_solution_);
+  cpu.reduced_cost_    = std::move(reduced_cost_);
 
   if (!pdlp_warm_start_data_.current_primal_solution_.empty()) {
-    cpu.current_primal_solution_ = to_cpu_buffer(pdlp_warm_start_data_.current_primal_solution_);
-    cpu.current_dual_solution_   = to_cpu_buffer(pdlp_warm_start_data_.current_dual_solution_);
-    cpu.initial_primal_average_  = to_cpu_buffer(pdlp_warm_start_data_.initial_primal_average_);
-    cpu.initial_dual_average_    = to_cpu_buffer(pdlp_warm_start_data_.initial_dual_average_);
-    cpu.current_ATY_             = to_cpu_buffer(pdlp_warm_start_data_.current_ATY_);
-    cpu.sum_primal_solutions_    = to_cpu_buffer(pdlp_warm_start_data_.sum_primal_solutions_);
-    cpu.sum_dual_solutions_      = to_cpu_buffer(pdlp_warm_start_data_.sum_dual_solutions_);
+    cpu.current_primal_solution_ = std::move(pdlp_warm_start_data_.current_primal_solution_);
+    cpu.current_dual_solution_   = std::move(pdlp_warm_start_data_.current_dual_solution_);
+    cpu.initial_primal_average_  = std::move(pdlp_warm_start_data_.initial_primal_average_);
+    cpu.initial_dual_average_    = std::move(pdlp_warm_start_data_.initial_dual_average_);
+    cpu.current_ATY_             = std::move(pdlp_warm_start_data_.current_ATY_);
+    cpu.sum_primal_solutions_    = std::move(pdlp_warm_start_data_.sum_primal_solutions_);
+    cpu.sum_dual_solutions_      = std::move(pdlp_warm_start_data_.sum_dual_solutions_);
     cpu.last_restart_duality_gap_primal_solution_ =
-      to_cpu_buffer(pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_);
+      std::move(pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_);
     cpu.last_restart_duality_gap_dual_solution_ =
-      to_cpu_buffer(pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_);
+      std::move(pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_);
 
     ret.initial_primal_weight_         = pdlp_warm_start_data_.initial_primal_weight_;
     ret.initial_step_size_             = pdlp_warm_start_data_.initial_step_size_;
@@ -235,11 +222,4 @@ template cuopt::cython::linear_programming_ret_t
 cpu_lp_solution_t<int, double>::to_cpu_linear_programming_ret_t();
 template cuopt::cython::mip_ret_t cpu_mip_solution_t<int, double>::to_cpu_mip_ret_t();
 
-#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
-template cuopt::cython::linear_programming_ret_t
-gpu_lp_solution_t<int, float>::to_linear_programming_ret_t();
-template cuopt::cython::linear_programming_ret_t
-cpu_lp_solution_t<int, float>::to_cpu_linear_programming_ret_t();
-#endif
-
 }  // namespace cuopt::linear_programming
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index 584f3480f5..7dbd423c87 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -560,6 +560,188 @@ optimization_problem_solution_t<i_t, f_t> run_dual_simplex(
                                   0);
 }
 
+#if PDLP_INSTANTIATE_FLOAT || CUOPT_INSTANTIATE_FLOAT
+
+struct double_to_float_op {
+  HDI float operator()(double val) const { return static_cast<float>(val); }
+};
+
+struct float_to_double_op {
+  HDI double operator()(float val) const { return static_cast<double>(val); }
+};
+
+template <typename i_t>
+static optimization_problem_solution_t<i_t, double> run_pdlp_solver_in_fp32(
+  detail::problem_t<i_t, double>& problem,
+  pdlp_solver_settings_t<i_t, double> const& settings,
+  const timer_t& timer,
+  bool is_batch_mode)
+{
+  CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "Running PDLP in FP32 precision");
+  auto stream     = problem.handle_ptr->get_stream();
+  auto stream_val = stream.value();
+
+  auto gpu_double_to_float = [&](const rmm::device_uvector<double>& src) {
+    rmm::device_uvector<float> dst(src.size(), stream);
+    if (src.size() > 0) {
+      RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
+        src.data(), dst.data(), src.size(), double_to_float_op{}, stream_val));
+    }
+    return dst;
+  };
+
+  auto gpu_float_to_double = [&](const rmm::device_uvector<float>& src) {
+    rmm::device_uvector<double> dst(src.size(), stream);
+    if (src.size() > 0) {
+      RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
+        src.data(), dst.data(), src.size(), float_to_double_op{}, stream_val));
+    }
+    return dst;
+  };
+
+  // Convert double device vectors to float on the GPU
+  auto f_coefficients = gpu_double_to_float(problem.coefficients);
+  auto f_obj          = gpu_double_to_float(problem.objective_coefficients);
+  auto f_clb          = gpu_double_to_float(problem.constraint_lower_bounds);
+  auto f_cub          = gpu_double_to_float(problem.constraint_upper_bounds);
+
+  // Extract and convert variable bounds from double2 to separate float arrays on GPU
+  i_t n_vars = problem.n_variables;
+  rmm::device_uvector<float> f_var_lb(n_vars, stream);
+  rmm::device_uvector<float> f_var_ub(n_vars, stream);
+  if (n_vars > 0) {
+    auto out_zip = thrust::make_zip_iterator(thrust::make_tuple(f_var_lb.data(), f_var_ub.data()));
+    RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
+      problem.variable_bounds.data(),
+      out_zip,
+      n_vars,
+      [] __device__(double2 b) {
+        return thrust::make_tuple(static_cast<float>(b.x), static_cast<float>(b.y));
+      },
+      stream_val));
+  }
+
+  // Build float optimization_problem_t from device pointers
+  optimization_problem_t<i_t, float> float_op(problem.handle_ptr);
+  float_op.set_maximize(problem.maximize);
+  // Should it just be problem.objective_offset?
+  float_op.set_objective_offset(static_cast<float>(problem.presolve_data.objective_offset));
+  float_op.set_objective_scaling_factor(
+    static_cast<float>(problem.presolve_data.objective_scaling_factor));
+  float_op.set_csr_constraint_matrix(f_coefficients.data(),
+                                     static_cast<i_t>(f_coefficients.size()),
+                                     problem.variables.data(),
+                                     static_cast<i_t>(problem.variables.size()),
+                                     problem.offsets.data(),
+                                     static_cast<i_t>(problem.offsets.size()));
+  float_op.set_objective_coefficients(f_obj.data(), static_cast<i_t>(f_obj.size()));
+  float_op.set_constraint_lower_bounds(f_clb.data(), static_cast<i_t>(f_clb.size()));
+  float_op.set_constraint_upper_bounds(f_cub.data(), static_cast<i_t>(f_cub.size()));
+  float_op.set_variable_lower_bounds(f_var_lb.data(), static_cast<i_t>(f_var_lb.size()));
+  float_op.set_variable_upper_bounds(f_var_ub.data(), static_cast<i_t>(f_var_ub.size()));
+
+  float_op.set_variable_names(problem.var_names);
+  float_op.set_row_names(problem.row_names);
+  float_op.set_objective_name(problem.objective_name);
+
+  detail::problem_t<i_t, float> float_problem(float_op);
+
+  auto objective_name = problem.objective_name;
+  auto var_names      = problem.var_names;
+  auto row_names      = problem.row_names;
+  // When crossover is off, free double-precision GPU memory to reduce peak usage.
+  // When crossover is on, run_pdlp needs the problem data after we return.
+  if (!settings.crossover) {
+    {
+      [[maybe_unused]] auto discard = detail::problem_t<i_t, double>(std::move(problem));
+    }
+  }
+
+  // Create float settings from double settings
+  pdlp_solver_settings_t<i_t, float> fs;
+  fs.tolerances.absolute_dual_tolerance =
+    static_cast<float>(settings.tolerances.absolute_dual_tolerance);
+  fs.tolerances.relative_dual_tolerance =
+    static_cast<float>(settings.tolerances.relative_dual_tolerance);
+  fs.tolerances.absolute_primal_tolerance =
+    static_cast<float>(settings.tolerances.absolute_primal_tolerance);
+  fs.tolerances.relative_primal_tolerance =
+    static_cast<float>(settings.tolerances.relative_primal_tolerance);
+  fs.tolerances.absolute_gap_tolerance =
+    static_cast<float>(settings.tolerances.absolute_gap_tolerance);
+  fs.tolerances.relative_gap_tolerance =
+    static_cast<float>(settings.tolerances.relative_gap_tolerance);
+  fs.tolerances.primal_infeasible_tolerance =
+    static_cast<float>(settings.tolerances.primal_infeasible_tolerance);
+  fs.tolerances.dual_infeasible_tolerance =
+    static_cast<float>(settings.tolerances.dual_infeasible_tolerance);
+  fs.detect_infeasibility    = settings.detect_infeasibility;
+  fs.strict_infeasibility    = settings.strict_infeasibility;
+  fs.iteration_limit         = settings.iteration_limit;
+  fs.time_limit              = static_cast<float>(settings.time_limit);
+  fs.pdlp_solver_mode        = settings.pdlp_solver_mode;
+  fs.log_to_console          = settings.log_to_console;
+  fs.log_file                = settings.log_file;
+  fs.per_constraint_residual = settings.per_constraint_residual;
+  fs.save_best_primal_so_far = settings.save_best_primal_so_far;
+  fs.first_primal_feasible   = settings.first_primal_feasible;
+  fs.eliminate_dense_columns = settings.eliminate_dense_columns;
+  fs.pdlp_precision          = pdlp_precision_t::DefaultPrecision;
+  fs.method                  = method_t::PDLP;
+  fs.inside_mip              = settings.inside_mip;
+  fs.hyper_params            = settings.hyper_params;
+  fs.presolver               = settings.presolver;
+  fs.num_gpus                = settings.num_gpus;
+  fs.concurrent_halt         = settings.concurrent_halt;
+
+  detail::pdlp_solver_t<i_t, float> solver(float_problem, fs, is_batch_mode);
+  if (settings.inside_mip) { solver.set_inside_mip(true); }
+  auto float_sol = solver.run_solver(timer);
+
+  // Convert float solution back to double on GPU
+  auto dev_primal  = gpu_float_to_double(float_sol.get_primal_solution());
+  auto dev_dual    = gpu_float_to_double(float_sol.get_dual_solution());
+  auto dev_reduced = gpu_float_to_double(float_sol.get_reduced_cost());
+
+  // Convert termination info (small host-side struct, stays on CPU)
+  auto float_term_infos = float_sol.get_additional_termination_informations();
+  using double_term_info_t =
+    typename optimization_problem_solution_t<i_t, double>::additional_termination_information_t;
+  std::vector<double_term_info_t> term_infos;
+  for (auto& fi : float_term_infos) {
+    double_term_info_t di;
+    di.number_of_steps_taken           = fi.number_of_steps_taken;
+    di.total_number_of_attempted_steps = fi.total_number_of_attempted_steps;
+    di.l2_primal_residual              = static_cast<double>(fi.l2_primal_residual);
+    di.l2_relative_primal_residual     = static_cast<double>(fi.l2_relative_primal_residual);
+    di.l2_dual_residual                = static_cast<double>(fi.l2_dual_residual);
+    di.l2_relative_dual_residual       = static_cast<double>(fi.l2_relative_dual_residual);
+    di.primal_objective                = static_cast<double>(fi.primal_objective);
+    di.dual_objective                  = static_cast<double>(fi.dual_objective);
+    di.gap                             = static_cast<double>(fi.gap);
+    di.relative_gap                    = static_cast<double>(fi.relative_gap);
+    di.max_primal_ray_infeasibility    = static_cast<double>(fi.max_primal_ray_infeasibility);
+    di.primal_ray_linear_objective     = static_cast<double>(fi.primal_ray_linear_objective);
+    di.max_dual_ray_infeasibility      = static_cast<double>(fi.max_dual_ray_infeasibility);
+    di.dual_ray_linear_objective       = static_cast<double>(fi.dual_ray_linear_objective);
+    di.solve_time                      = fi.solve_time;
+    di.solved_by_pdlp                  = fi.solved_by_pdlp;
+    term_infos.push_back(di);
+  }
+
+  auto status_vec = float_sol.get_terminations_status();
+
+  return optimization_problem_solution_t<i_t, double>(dev_primal,
+                                                      dev_dual,
+                                                      dev_reduced,
+                                                      objective_name,
+                                                      var_names,
+                                                      row_names,
+                                                      std::move(term_infos),
+                                                      std::move(status_vec));
+}
+#endif
+
 template <typename i_t, typename f_t>
 static optimization_problem_solution_t<i_t, f_t> run_pdlp_solver(
   detail::problem_t<i_t, f_t>& problem,
@@ -574,6 +756,13 @@ static optimization_problem_solution_t<i_t, f_t> run_pdlp_solver(
     return optimization_problem_solution_t<i_t, f_t>{pdlp_termination_status_t::NumericalError,
                                                      problem.handle_ptr->get_stream()};
   }
+#if PDLP_INSTANTIATE_FLOAT || CUOPT_INSTANTIATE_FLOAT
+  if constexpr (std::is_same_v<f_t, double>) {
+    if (settings.pdlp_precision == pdlp_precision_t::SinglePrecision) {
+      return run_pdlp_solver_in_fp32(problem, settings, timer, is_batch_mode);
+    }
+  }
+#endif
   detail::pdlp_solver_t<i_t, f_t> solver(problem, settings, is_batch_mode);
   if (settings.inside_mip) { solver.set_inside_mip(true); }
   return solver.run_solver(timer);
@@ -586,23 +775,22 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
                                                    bool is_batch_mode)
 {
   if constexpr (!std::is_same_v<f_t, double>) {
-    cuopt_expects(!settings.crossover,
-                  error_type_t::ValidationError,
-                  "PDLP with crossover is not supported for float precision. Set crossover=false "
-                  "or use double precision.");
     cuopt_expects(!is_batch_mode,
                   error_type_t::ValidationError,
                   "PDLP batch mode is not supported for float precision. Use double precision.");
   }
+  cuopt_expects(!(settings.pdlp_precision == pdlp_precision_t::MixedPrecision &&
+                  !detail::is_cusparse_runtime_mixed_precision_supported()),
+                error_type_t::ValidationError,
+                "Mixed-precision SpMV requires cuSPARSE runtime 12.5 or later.");
   cuopt_expects(
-    !(settings.mixed_precision_spmv && !detail::is_cusparse_runtime_mixed_precision_supported()),
-    error_type_t::ValidationError,
-    "Mixed-precision SpMV requires cuSPARSE runtime 12.5 or later.");
-  cuopt_expects(
-    !(is_batch_mode && settings.mixed_precision_spmv),
+    !(is_batch_mode && settings.pdlp_precision == pdlp_precision_t::MixedPrecision),
     error_type_t::ValidationError,
-    "Mixed-precision SpMV is not supported in batch mode. Set mixed_precision_spmv=false "
+    "Mixed-precision SpMV is not supported in batch mode. Set pdlp_precision=0 (default) "
     "or disable batch mode.");
+  cuopt_expects(!(settings.pdlp_precision == pdlp_precision_t::SinglePrecision && is_batch_mode),
+                error_type_t::ValidationError,
+                "Single-precision PDLP is not supported in batch mode.");
 
   auto start_solver = std::chrono::high_resolution_clock::now();
   timer_t timer_pdlp(timer.remaining_time());
@@ -1613,7 +1801,7 @@ std::unique_ptr<lp_solution_interface_t<i_t, f_t>> solve_lp(
     const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& data_model);               \
   template void set_pdlp_solver_mode(pdlp_solver_settings_t<int, F_TYPE>& settings);
 
-#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
+#if MIP_INSTANTIATE_FLOAT
 INSTANTIATE(float)
 #endif
 
diff --git a/cpp/src/pdlp/solve_remote.cu b/cpp/src/pdlp/solve_remote.cu
index 9ecd84b588..a9bf7e3989 100644
--- a/cpp/src/pdlp/solve_remote.cu
+++ b/cpp/src/pdlp/solve_remote.cu
@@ -9,7 +9,6 @@
 #include <cuopt/linear_programming/cpu_optimization_problem_solution.hpp>
 #include <cuopt/linear_programming/cpu_pdlp_warm_start_data.hpp>
 #include <cuopt/linear_programming/solve.hpp>
-#include <mip_heuristics/mip_constants.hpp>
 #include <utilities/logger.hpp>
 
 namespace cuopt::linear_programming {
@@ -109,14 +108,6 @@ std::unique_ptr<mip_solution_interface_t<i_t, f_t>> solve_mip_remote(
 }
 
 // Explicit template instantiations for remote execution stubs
-#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
-template std::unique_ptr<lp_solution_interface_t<int, float>> solve_lp_remote(
-  cpu_optimization_problem_t<int, float> const&,
-  pdlp_solver_settings_t<int, float> const&,
-  bool,
-  bool);
-#endif
-
 template std::unique_ptr<lp_solution_interface_t<int, double>> solve_lp_remote(
   cpu_optimization_problem_t<int, double> const&,
   pdlp_solver_settings_t<int, double> const&,
diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_test.c b/cpp/tests/linear_programming/c_api_tests/c_api_test.c
index ecf610041c..41b99aeebb 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_test.c
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_test.c
@@ -2122,3 +2122,69 @@ cuopt_int_t test_cpu_only_mip_execution(const char* filename)
   cuOptDestroySolution(&solution);
   return status;
 }
+
+cuopt_int_t test_pdlp_precision_single(const char* filename,
+                                      cuopt_int_t* termination_status_ptr,
+                                      cuopt_float_t* objective_ptr)
+{
+  cuOptOptimizationProblem problem = NULL;
+  cuOptSolverSettings settings     = NULL;
+  cuOptSolution solution           = NULL;
+  cuopt_int_t status;
+  cuopt_int_t termination_status = -1;
+  cuopt_float_t objective_value;
+
+  status = cuOptReadProblem(filename, &problem);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error reading problem\n");
+    goto DONE;
+  }
+
+  status = cuOptCreateSolverSettings(&settings);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error creating solver settings\n");
+    goto DONE;
+  }
+
+  status = cuOptSetIntegerParameter(settings, CUOPT_METHOD, CUOPT_METHOD_PDLP);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error setting method\n");
+    goto DONE;
+  }
+
+  status = cuOptSetIntegerParameter(settings, CUOPT_PDLP_PRECISION, CUOPT_PDLP_SINGLE_PRECISION);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error setting pdlp_precision\n");
+    goto DONE;
+  }
+
+  status = cuOptSolve(problem, settings, &solution);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error solving problem with pdlp_precision=single\n");
+    goto DONE;
+  }
+
+  status = cuOptGetTerminationStatus(solution, &termination_status);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting termination status\n");
+    goto DONE;
+  }
+  *termination_status_ptr = termination_status;
+
+  status = cuOptGetObjectiveValue(solution, &objective_value);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting objective value\n");
+    goto DONE;
+  }
+  *objective_ptr = objective_value;
+
+  printf("PDLP precision=single test passed: status=%s, objective=%f\n",
+         termination_status_to_string(termination_status),
+         objective_value);
+
+DONE:
+  cuOptDestroyProblem(&problem);
+  cuOptDestroySolverSettings(&settings);
+  cuOptDestroySolution(&solution);
+  return status;
+}
diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
index 33fb42cc9d..5971292ebb 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
@@ -271,6 +271,22 @@ INSTANTIATE_TEST_SUITE_P(c_api,
                            // Different instance
                            std::make_tuple("/mip/bb_optimality.mps", 8, 60.0, 2)));
 
+// =============================================================================
+// PDLP Precision Tests
+// =============================================================================
+
+TEST(c_api, pdlp_precision_single)
+{
+  const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir();
+  std::string filename = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps";
+  cuopt_int_t termination_status;
+  cuopt_float_t objective;
+  EXPECT_EQ(test_pdlp_precision_single(filename.c_str(), &termination_status, &objective),
+            CUOPT_SUCCESS);
+  EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL);
+  EXPECT_NEAR(objective, -464.7531, 1e-1);
+}
+
 // =============================================================================
 // Solution Interface Polymorphism Tests
 // =============================================================================
diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h
index e541316567..b7614c378b 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h
@@ -53,6 +53,10 @@ cuopt_int_t test_deterministic_bb(const char* filename,
 cuopt_int_t test_lp_solution_mip_methods();
 cuopt_int_t test_mip_solution_lp_methods();
 
+cuopt_int_t test_pdlp_precision_single(const char* filename,
+                                       cuopt_int_t* termination_status_ptr,
+                                       cuopt_float_t* objective_ptr);
+
 /* CPU-only execution tests (require env vars CUDA_VISIBLE_DEVICES="" and CUOPT_REMOTE_HOST) */
 cuopt_int_t test_cpu_only_execution(const char* filename);
 cuopt_int_t test_cpu_only_mip_execution(const char* filename);
diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu
index fb14e31894..d5a8d69008 100644
--- a/cpp/tests/linear_programming/pdlp_test.cu
+++ b/cpp/tests/linear_programming/pdlp_test.cu
@@ -48,9 +48,6 @@
 namespace cuopt::linear_programming::test {
 
 constexpr double afiro_primal_objective = -464.0;
-#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
-constexpr float afiro_primal_objective_f32 = -464.0f;
-#endif
 // Accept a 1% error
 template <typename f_t>
 static bool is_incorrect_objective(f_t reference, f_t objective)
@@ -78,7 +75,7 @@ TEST(pdlp_class, run_double)
     afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
 }
 
-TEST(pdlp_class, mixed_precision_spmv)
+TEST(pdlp_class, precision_mixed)
 {
   using namespace cuopt::linear_programming::detail;
   if (!is_cusparse_runtime_mixed_precision_supported()) {
@@ -87,9 +84,9 @@ TEST(pdlp_class, mixed_precision_spmv)
     cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
       cuopt::mps_parser::parse_mps<int, double>(path, true);
 
-    auto settings                 = pdlp_solver_settings_t<int, double>{};
-    settings.method               = cuopt::linear_programming::method_t::PDLP;
-    settings.mixed_precision_spmv = true;
+    auto settings           = pdlp_solver_settings_t<int, double>{};
+    settings.method         = cuopt::linear_programming::method_t::PDLP;
+    settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::MixedPrecision;
 
     optimization_problem_solution_t<int, double> solution =
       solve_lp(&handle_, op_problem, settings);
@@ -103,9 +100,9 @@ TEST(pdlp_class, mixed_precision_spmv)
   cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
     cuopt::mps_parser::parse_mps<int, double>(path, true);
 
-  auto settings_mixed                 = pdlp_solver_settings_t<int, double>{};
-  settings_mixed.method               = cuopt::linear_programming::method_t::PDLP;
-  settings_mixed.mixed_precision_spmv = true;
+  auto settings_mixed           = pdlp_solver_settings_t<int, double>{};
+  settings_mixed.method         = cuopt::linear_programming::method_t::PDLP;
+  settings_mixed.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::MixedPrecision;
 
   optimization_problem_solution_t<int, double> solution_mixed =
     solve_lp(&handle_, op_problem, settings_mixed);
@@ -114,9 +111,9 @@ TEST(pdlp_class, mixed_precision_spmv)
     afiro_primal_objective,
     solution_mixed.get_additional_termination_information().primal_objective));
 
-  auto settings_full                 = pdlp_solver_settings_t<int, double>{};
-  settings_full.method               = cuopt::linear_programming::method_t::PDLP;
-  settings_full.mixed_precision_spmv = false;
+  auto settings_full           = pdlp_solver_settings_t<int, double>{};
+  settings_full.method         = cuopt::linear_programming::method_t::PDLP;
+  settings_full.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::DefaultPrecision;
 
   optimization_problem_solution_t<int, double> solution_full =
     solve_lp(&handle_, op_problem, settings_full);
@@ -1945,132 +1942,106 @@ TEST(pdlp_class, some_climber_hit_iteration_limit)
   }
 }
 
-#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
-TEST(pdlp_class, run_float32)
+TEST(pdlp_class, precision_single)
 {
   const raft::handle_t handle_{};
 
   auto path = make_path_absolute("linear_programming/afiro_original.mps");
-  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
-    cuopt::mps_parser::parse_mps<int, float>(path, true);
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
 
-  auto solver_settings   = pdlp_solver_settings_t<int, float>{};
-  solver_settings.method = cuopt::linear_programming::method_t::PDLP;
+  auto solver_settings           = pdlp_solver_settings_t<int, double>{};
+  solver_settings.method         = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::SinglePrecision;
 
-  optimization_problem_solution_t<int, float> solution =
+  optimization_problem_solution_t<int, double> solution =
     solve_lp(&handle_, op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
 
-  EXPECT_FALSE(
-    is_incorrect_objective(afiro_primal_objective_f32,
-                           solution.get_additional_termination_information().primal_objective));
-}
-
-TEST(pdlp_class, float32_dual_simplex_throws_validation_error)
-{
-  const raft::handle_t handle_{};
-
-  auto path = make_path_absolute("linear_programming/afiro_original.mps");
-  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
-    cuopt::mps_parser::parse_mps<int, float>(path, true);
-
-  auto solver_settings   = pdlp_solver_settings_t<int, float>{};
-  solver_settings.method = cuopt::linear_programming::method_t::DualSimplex;
-
-  optimization_problem_solution_t<int, float> solution =
-    solve_lp(&handle_, op_problem, solver_settings);
-  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+  EXPECT_FALSE(is_incorrect_objective(
+    afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
 }
 
-TEST(pdlp_class, float32_barrier_throws_validation_error)
+TEST(pdlp_class, precision_single_crossover)
 {
   const raft::handle_t handle_{};
 
   auto path = make_path_absolute("linear_programming/afiro_original.mps");
-  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
-    cuopt::mps_parser::parse_mps<int, float>(path, true);
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
 
-  auto solver_settings   = pdlp_solver_settings_t<int, float>{};
-  solver_settings.method = cuopt::linear_programming::method_t::Barrier;
+  auto solver_settings           = pdlp_solver_settings_t<int, double>{};
+  solver_settings.method         = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::SinglePrecision;
+  solver_settings.crossover      = true;
 
-  optimization_problem_solution_t<int, float> solution =
+  optimization_problem_solution_t<int, double> solution =
     solve_lp(&handle_, op_problem, solver_settings);
-  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
-}
-
-TEST(pdlp_class, float32_concurrent_throws_validation_error)
-{
-  const raft::handle_t handle_{};
-
-  auto path = make_path_absolute("linear_programming/afiro_original.mps");
-  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
-    cuopt::mps_parser::parse_mps<int, float>(path, true);
-
-  auto solver_settings   = pdlp_solver_settings_t<int, float>{};
-  solver_settings.method = cuopt::linear_programming::method_t::Concurrent;
+  EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
 
-  optimization_problem_solution_t<int, float> solution =
-    solve_lp(&handle_, op_problem, solver_settings);
-  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+  EXPECT_FALSE(is_incorrect_objective(
+    afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
 }
 
-TEST(pdlp_class, float32_papilo_presolve_works)
+TEST(pdlp_class, precision_single_concurrent)
 {
   const raft::handle_t handle_{};
 
   auto path = make_path_absolute("linear_programming/afiro_original.mps");
-  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
-    cuopt::mps_parser::parse_mps<int, float>(path, true);
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
 
-  auto solver_settings      = pdlp_solver_settings_t<int, float>{};
-  solver_settings.method    = cuopt::linear_programming::method_t::PDLP;
-  solver_settings.presolver = cuopt::linear_programming::presolver_t::Papilo;
+  auto solver_settings           = pdlp_solver_settings_t<int, double>{};
+  solver_settings.method         = cuopt::linear_programming::method_t::Concurrent;
+  solver_settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::SinglePrecision;
 
-  optimization_problem_solution_t<int, float> solution =
+  optimization_problem_solution_t<int, double> solution =
     solve_lp(&handle_, op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
-  EXPECT_FALSE(
-    is_incorrect_objective(afiro_primal_objective_f32,
-                           solution.get_additional_termination_information().primal_objective));
+
+  EXPECT_FALSE(is_incorrect_objective(
+    afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
 }
 
-TEST(pdlp_class, float32_pslp_presolve_works)
+TEST(pdlp_class, precision_single_papilo_presolve)
 {
   const raft::handle_t handle_{};
 
   auto path = make_path_absolute("linear_programming/afiro_original.mps");
-  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
-    cuopt::mps_parser::parse_mps<int, float>(path, true);
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
 
-  auto solver_settings      = pdlp_solver_settings_t<int, float>{};
-  solver_settings.method    = cuopt::linear_programming::method_t::PDLP;
-  solver_settings.presolver = cuopt::linear_programming::presolver_t::PSLP;
+  auto solver_settings           = pdlp_solver_settings_t<int, double>{};
+  solver_settings.method         = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::SinglePrecision;
+  solver_settings.presolver      = cuopt::linear_programming::presolver_t::Papilo;
 
-  optimization_problem_solution_t<int, float> solution =
+  optimization_problem_solution_t<int, double> solution =
     solve_lp(&handle_, op_problem, solver_settings);
   EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
-  EXPECT_FALSE(
-    is_incorrect_objective(afiro_primal_objective_f32,
-                           solution.get_additional_termination_information().primal_objective));
+  EXPECT_FALSE(is_incorrect_objective(
+    afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
 }
 
-TEST(pdlp_class, float32_crossover_throws_validation_error)
+TEST(pdlp_class, precision_single_pslp_presolve)
 {
   const raft::handle_t handle_{};
 
   auto path = make_path_absolute("linear_programming/afiro_original.mps");
-  cuopt::mps_parser::mps_data_model_t<int, float> op_problem =
-    cuopt::mps_parser::parse_mps<int, float>(path, true);
+  cuopt::mps_parser::mps_data_model_t<int, double> op_problem =
+    cuopt::mps_parser::parse_mps<int, double>(path, true);
 
-  auto solver_settings      = pdlp_solver_settings_t<int, float>{};
-  solver_settings.method    = cuopt::linear_programming::method_t::PDLP;
-  solver_settings.crossover = true;
+  auto solver_settings           = pdlp_solver_settings_t<int, double>{};
+  solver_settings.method         = cuopt::linear_programming::method_t::PDLP;
+  solver_settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::SinglePrecision;
+  solver_settings.presolver      = cuopt::linear_programming::presolver_t::PSLP;
 
-  optimization_problem_solution_t<int, float> solution =
+  optimization_problem_solution_t<int, double> solution =
     solve_lp(&handle_, op_problem, solver_settings);
-  EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError);
+  EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL);
+  EXPECT_FALSE(is_incorrect_objective(
+    afiro_primal_objective, solution.get_additional_termination_information().primal_objective));
 }
-#endif
 
 }  // namespace cuopt::linear_programming::test
 
diff --git a/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst b/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst
index 43d15eca64..d9a42301cb 100644
--- a/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst
+++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst
@@ -187,6 +187,7 @@ These constants are used as parameter names in the :c:func:`cuOptSetParameter`,
 .. doxygendefine:: CUOPT_SOLUTION_FILE
 .. doxygendefine:: CUOPT_NUM_CPU_THREADS
 .. doxygendefine:: CUOPT_USER_PROBLEM_FILE
+.. doxygendefine:: CUOPT_PDLP_PRECISION
 
 .. _pdlp-solver-mode-constants:
 
@@ -201,6 +202,18 @@ These constants are used to configure `CUOPT_PDLP_SOLVER_MODE` via :c:func:`cuOp
 .. doxygendefine:: CUOPT_PDLP_SOLVER_MODE_METHODICAL1
 .. doxygendefine:: CUOPT_PDLP_SOLVER_MODE_FAST1
 
+.. _pdlp-precision-constants:
+
+PDLP Precision Constants
+------------------------
+
+These constants are used to configure `CUOPT_PDLP_PRECISION` via :c:func:`cuOptSetIntegerParameter`.
+
+.. doxygendefine:: CUOPT_PDLP_DEFAULT_PRECISION
+.. doxygendefine:: CUOPT_PDLP_SINGLE_PRECISION
+.. doxygendefine:: CUOPT_PDLP_DOUBLE_PRECISION
+.. doxygendefine:: CUOPT_PDLP_MIXED_PRECISION
+
 .. _method-constants:
 
 Method Constants
diff --git a/docs/cuopt/source/lp-qp-features.rst b/docs/cuopt/source/lp-qp-features.rst
index 9495998760..e3cbddbb05 100644
--- a/docs/cuopt/source/lp-qp-features.rst
+++ b/docs/cuopt/source/lp-qp-features.rst
@@ -157,19 +157,16 @@ Batch Mode
 
 Users can submit a set of problems which will be solved in a batch. Problems will be solved at the same time in parallel to fully utilize the GPU. Checkout :ref:`self-hosted client <generic-example-with-normal-and-batch-mode>` example in thin client.
 
-FP32 Precision Mode
--------------------
-
-By default, PDLP operates in FP64 (double) precision. Users can switch to FP32 (float) precision for the entire solve. FP32 uses half the memory of FP64 and allows PDHG iterations to be on average twice as fast, but it may require more iterations to converge due to reduced numerical accuracy. FP32 mode is only supported with the PDLP method (not concurrent) and without crossover.
-
-.. note:: The default precision is FP64 (double).
-
-Mixed Precision SpMV
+PDLP Precision Modes
 --------------------
 
-When running PDLP in FP64 mode, users can enable mixed precision sparse matrix-vector products (SpMV) during PDHG iterations. In this mode, the constraint matrix and its transpose are stored in FP32 while vectors and the compute type remain in FP64. This allows SpMV operations to be faster thanks to reduced memory bandwidth requirements, while maintaining FP64 accuracy in the accumulation. This will make PDHG iterations faster while limiting the potential negative impact on convergence (compared to running in FP32 mode). Convergence checking and restart logic always use the full FP64 matrix, so this mode does not reduce memory usage since both the FP32 and FP64 copies of the matrix are kept in memory. Mixed precision SpMV only applies in FP64 mode and has no effect when running in FP32.
+By default, PDLP operates in the native precision of the problem type (FP64 for double-precision problems). The ``pdlp_precision`` parameter provides several modes:
+
+- **single**: Run PDLP internally in FP32, with automatic conversion of inputs and outputs. FP32 uses half the memory and allows PDHG iterations to be on average twice as fast, but may require more iterations to converge. Compatible with crossover (solution is converted back to FP64 before crossover) and concurrent mode (PDLP runs in FP32 while other solvers run in FP64).
+- **mixed**: Use mixed precision SpMV during PDHG iterations. The constraint matrix is stored in FP32 while vectors and compute type remain in FP64, improving SpMV performance with limited impact on convergence. Convergence checking and restart logic always use the full FP64 matrix.
+- **double**: Explicitly run in FP64 (same as default for double-precision problems).
 
-.. note:: The default value is false.
+.. note:: The default precision is the native type of the problem (FP64 for double).
 
 Multi-GPU Mode
 --------------
diff --git a/docs/cuopt/source/lp-qp-milp-settings.rst b/docs/cuopt/source/lp-qp-milp-settings.rst
index 050843a8df..f429b8bf72 100644
--- a/docs/cuopt/source/lp-qp-milp-settings.rst
+++ b/docs/cuopt/source/lp-qp-milp-settings.rst
@@ -192,32 +192,26 @@ Per Constraint Residual
 
 .. note:: The default value is false.
 
-FP32 Precision
+PDLP Precision
 ^^^^^^^^^^^^^^
 
-``CUOPT_PDLP_FP32`` controls whether PDLP should run in FP32 (float) precision instead of FP64 (double).
-FP32 uses half the memory of FP64 and allows PDHG iterations to be on average twice as fast,
-but it may require more iterations to converge due to reduced numerical accuracy.
-For an alternative that maintains FP64 accuracy while improving performance, see :ref:`Mixed Precision SpMV`.
-FP32 mode is only supported with the PDLP method (not concurrent) and without crossover.
-
-.. note:: The default precision is FP64 (double).
-
-.. _Mixed Precision SpMV:
-
-Mixed Precision SpMV
-^^^^^^^^^^^^^^^^^^^^
-
-``CUOPT_MIXED_PRECISION_SPMV`` controls whether PDLP should use mixed precision sparse matrix-vector
-products (SpMV) during PDHG iterations. When enabled, the constraint matrix and its transpose are stored
-in FP32 while vectors and the compute type remain in FP64. This allows SpMV operations to be faster
-thanks to reduced memory bandwidth requirements, while maintaining FP64 accuracy in the accumulation.
-This will make PDHG iterations faster while limiting the potential negative impact on convergence
-(compared to running in FP32 mode). Convergence checking and restart logic always use the full FP64
-matrix, so this mode does not reduce memory usage since both the FP32 and FP64 copies of the matrix
-are kept in memory. Mixed precision SpMV only applies in FP64 mode and has no effect when running in FP32.
-
-.. note:: The default value is false.
+``CUOPT_PDLP_PRECISION`` controls the precision mode used by the PDLP solver. The following modes are
+available:
+
+- **default** (0): Use the native precision of the problem type (FP64 for double-precision problems).
+- **single** (1): Run PDLP internally in FP32 (float). Inputs are converted from FP64 to FP32 before
+  solving and outputs are converted back to FP64. FP32 uses half the memory and allows PDHG iterations
+  to be on average twice as fast, but may require more iterations to converge due to reduced numerical
+  accuracy. Compatible with crossover (solution is converted back to FP64 before crossover runs) and
+  concurrent mode (the PDLP leg runs in FP32 while Dual Simplex and Barrier run in FP64).
+- **double** (2): Explicitly run in FP64 (same as default for double-precision problems).
+- **mixed** (3): Use mixed precision sparse matrix-vector products (SpMV) during PDHG iterations. The
+  constraint matrix and its transpose are stored in FP32 while vectors and the compute type remain in
+  FP64, improving SpMV performance. Convergence checking and restart logic always use the
+  full FP64 matrix, so this mode does not reduce overall memory usage. This provides a middle ground
+  between full FP64 and FP32: faster PDHG iterations with limited impact on convergence.
+
+.. note:: The default value is 0 (default precision).
 
 Barrier Solver Settings
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
index 9f94916ff0..0cbb534125 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
@@ -18,6 +18,7 @@
     CUOPT_ITERATION_LIMIT,
     CUOPT_METHOD,
     CUOPT_MIP_HEURISTICS_ONLY,
+    CUOPT_PDLP_PRECISION,
     CUOPT_PDLP_SOLVER_MODE,
     CUOPT_PRIMAL_INFEASIBLE_TOLERANCE,
     CUOPT_RELATIVE_DUAL_TOLERANCE,
@@ -604,10 +605,10 @@ def test_barrier():
     A_offsets = np.array([0, 2, 4])
     data_model_obj.set_csr_constraint_matrix(A_values, A_indices, A_offsets)
 
-    b = np.array([200, 160])
+    b = np.array([200.0, 160.0])
     data_model_obj.set_constraint_bounds(b)
 
-    c = np.array([5, 20])
+    c = np.array([5.0, 20.0])
     data_model_obj.set_objective_coefficients(c)
 
     row_types = np.array(["L", "L"])
@@ -722,3 +723,43 @@ def test_write_files():
                 assert float(line.split()[-1]) == pytest.approx(80)
 
     os.remove("afiro.sol")
+
+
+def test_pdlp_precision_single():
+    file_path = (
+        RAPIDS_DATASET_ROOT_DIR + "/linear_programming/afiro_original.mps"
+    )
+    data_model_obj = cuopt_mps_parser.ParseMps(file_path)
+
+    settings = solver_settings.SolverSettings()
+    settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP)
+    settings.set_parameter(CUOPT_PDLP_PRECISION, 1)  # Single
+    settings.set_optimality_tolerance(1e-4)
+
+    solution = solver.Solve(data_model_obj, settings)
+
+    assert solution.get_termination_status() == LPTerminationStatus.Optimal
+    assert solution.get_primal_objective() == pytest.approx(
+        -464.7531, rel=1e-1
+    )
+    assert solution.get_solved_by_pdlp()
+
+
+def test_pdlp_precision_single_crossover():
+    file_path = (
+        RAPIDS_DATASET_ROOT_DIR + "/linear_programming/afiro_original.mps"
+    )
+    data_model_obj = cuopt_mps_parser.ParseMps(file_path)
+
+    settings = solver_settings.SolverSettings()
+    settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP)
+    settings.set_parameter(CUOPT_PDLP_PRECISION, 1)  # Single
+    settings.set_parameter("crossover", True)
+    settings.set_optimality_tolerance(1e-4)
+
+    solution = solver.Solve(data_model_obj, settings)
+
+    assert solution.get_termination_status() == LPTerminationStatus.Optimal
+    assert solution.get_primal_objective() == pytest.approx(
+        -464.7531, rel=1e-1
+    )

From f61ca9272e43063df193d678cba29a876de5f61e Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 6 Mar 2026 12:02:56 +0100
Subject: [PATCH 16/23] address PR comments

---
 cpp/cuopt_cli.cpp                             |   6 +-
 .../cuopt/linear_programming/constants.h      |   8 +-
 .../optimization_problem.hpp                  |   8 +
 .../presolve/third_party_presolve.cpp         | 225 +++++-------------
 cpp/src/pdlp/optimization_problem.cu          |  93 ++++++++
 cpp/src/pdlp/solve.cu                         |  86 +------
 .../c_api_tests/c_api_test.c                  |  66 +++++
 .../c_api_tests/c_api_tests.cpp               |  12 +
 .../c_api_tests/c_api_tests.h                 |   4 +
 docs/cuopt/source/lp-qp-milp-settings.rst     |   8 +-
 .../linear_programming/test_lp_solver.py      |  19 +-
 11 files changed, 288 insertions(+), 247 deletions(-)

diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp
index 53b586155a..899a3118b3 100644
--- a/cpp/cuopt_cli.cpp
+++ b/cpp/cuopt_cli.cpp
@@ -294,8 +294,8 @@ int main(int argc, char* argv[])
     .help(
       "PDLP precision mode. default: native type, single: FP32 internally, "
       "double: FP64 explicitly, mixed: mixed-precision SpMV (FP32 matrix, FP64 vectors).")
-    .default_value(std::string("0"))
-    .choices("default", "single", "double", "mixed", "0", "1", "2", "3");
+    .default_value(std::string("-1"))
+    .choices("default", "single", "double", "mixed", "-1", "0", "1", "2");
   arg_name_to_param_name["--pdlp-precision"] = CUOPT_PDLP_PRECISION;
 
   {
@@ -354,7 +354,7 @@ int main(int argc, char* argv[])
 
   // Map symbolic pdlp-precision names to integer values
   static const std::map<std::string, std::string> precision_name_to_value = {
-    {"default", "0"}, {"single", "1"}, {"double", "2"}, {"mixed", "3"}};
+    {"default", "-1"}, {"single", "0"}, {"double", "1"}, {"mixed", "2"}};
 
   // Read everything as a string
   std::map<std::string, std::string> settings_strings;
diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h
index 0c76df3346..d9dfbce16d 100644
--- a/cpp/include/cuopt/linear_programming/constants.h
+++ b/cpp/include/cuopt/linear_programming/constants.h
@@ -127,10 +127,10 @@
 #define CUOPT_METHOD_BARRIER      3
 
 /* @brief PDLP precision mode constants */
-#define CUOPT_PDLP_DEFAULT_PRECISION 0
-#define CUOPT_PDLP_SINGLE_PRECISION  1
-#define CUOPT_PDLP_DOUBLE_PRECISION  2
-#define CUOPT_PDLP_MIXED_PRECISION   3
+#define CUOPT_PDLP_DEFAULT_PRECISION -1
+#define CUOPT_PDLP_SINGLE_PRECISION  0
+#define CUOPT_PDLP_DOUBLE_PRECISION  1
+#define CUOPT_PDLP_MIXED_PRECISION   2
 
 /* @brief File format constants for problem I/O */
 #define CUOPT_FILE_FORMAT_MPS 0
diff --git a/cpp/include/cuopt/linear_programming/optimization_problem.hpp b/cpp/include/cuopt/linear_programming/optimization_problem.hpp
index d0f624ebdf..8094a1c9e4 100644
--- a/cpp/include/cuopt/linear_programming/optimization_problem.hpp
+++ b/cpp/include/cuopt/linear_programming/optimization_problem.hpp
@@ -312,6 +312,14 @@ class optimization_problem_t : public optimization_problem_interface_t<i_t, f_t>
   // Conversion
   // ============================================================================
 
+  /**
+   * @brief Convert this problem to a different floating-point precision.
+   *
+   * @tparam other_f_t  Target floating-point type (e.g. float when this is double)
+   */
+  template <typename other_f_t>
+  optimization_problem_t<i_t, other_f_t> convert_to_other_prec() const;
+
   /**
    * @brief Returns nullptr since this is already a GPU problem.
    * @return nullptr
diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
index bee0291b7c..1dfcebe772 100644
--- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
+++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
@@ -30,21 +30,6 @@
 
 namespace cuopt::linear_programming::detail {
 
-// Helper to convert vector from one type to another (only when types differ)
-template <typename To, typename From>
-std::vector<To> convert_vector(const std::vector<From>& src)
-{
-  if constexpr (std::is_same_v<To, From>) {
-    return src;  // No conversion needed
-  } else {
-    std::vector<To> dst(src.size());
-    for (size_t i = 0; i < src.size(); ++i) {
-      dst[i] = static_cast<To>(src[i]);
-    }
-    return dst;
-  }
-}
-
 template <typename i_t, typename f_t>
 papilo::Problem<f_t> build_papilo_problem(const optimization_problem_t<i_t, f_t>& op_problem,
                                           problem_category_t category,
@@ -235,70 +220,62 @@ PSLPContext build_and_run_pslp_presolver(const optimization_problem_t<i_t, f_t>&
   const auto& constr_ub    = op_problem.get_constraint_upper_bounds();
   const auto& var_types    = op_problem.get_variable_types();
 
-  // Copy data to host (using f_t type)
-  std::vector<f_t> h_coefficients_ft(coefficients.size());
+  // Copy data to host
+  std::vector<f_t> h_coefficients(coefficients.size());
   auto stream_view = op_problem.get_handle_ptr()->get_stream();
-  raft::copy(h_coefficients_ft.data(), coefficients.data(), coefficients.size(), stream_view);
+  raft::copy(h_coefficients.data(), coefficients.data(), coefficients.size(), stream_view);
   std::vector<i_t> h_offsets(offsets.size());
   raft::copy(h_offsets.data(), offsets.data(), offsets.size(), stream_view);
   std::vector<i_t> h_variables(variables.size());
   raft::copy(h_variables.data(), variables.data(), variables.size(), stream_view);
-  std::vector<f_t> h_obj_coeffs_ft(obj_coeffs.size());
-  raft::copy(h_obj_coeffs_ft.data(), obj_coeffs.data(), obj_coeffs.size(), stream_view);
-  std::vector<f_t> h_var_lb_ft(var_lb.size());
-  raft::copy(h_var_lb_ft.data(), var_lb.data(), var_lb.size(), stream_view);
-  std::vector<f_t> h_var_ub_ft(var_ub.size());
-  raft::copy(h_var_ub_ft.data(), var_ub.data(), var_ub.size(), stream_view);
-  std::vector<f_t> h_bounds_ft(bounds.size());
-  raft::copy(h_bounds_ft.data(), bounds.data(), bounds.size(), stream_view);
+  std::vector<f_t> h_obj_coeffs(obj_coeffs.size());
+  raft::copy(h_obj_coeffs.data(), obj_coeffs.data(), obj_coeffs.size(), stream_view);
+  std::vector<f_t> h_var_lb(var_lb.size());
+  raft::copy(h_var_lb.data(), var_lb.data(), var_lb.size(), stream_view);
+  std::vector<f_t> h_var_ub(var_ub.size());
+  raft::copy(h_var_ub.data(), var_ub.data(), var_ub.size(), stream_view);
+  std::vector<f_t> h_bounds(bounds.size());
+  raft::copy(h_bounds.data(), bounds.data(), bounds.size(), stream_view);
   std::vector<char> h_row_types(row_types.size());
   raft::copy(h_row_types.data(), row_types.data(), row_types.size(), stream_view);
-  std::vector<f_t> h_constr_lb_ft(constr_lb.size());
-  raft::copy(h_constr_lb_ft.data(), constr_lb.data(), constr_lb.size(), stream_view);
-  std::vector<f_t> h_constr_ub_ft(constr_ub.size());
-  raft::copy(h_constr_ub_ft.data(), constr_ub.data(), constr_ub.size(), stream_view);
+  std::vector<f_t> h_constr_lb(constr_lb.size());
+  raft::copy(h_constr_lb.data(), constr_lb.data(), constr_lb.size(), stream_view);
+  std::vector<f_t> h_constr_ub(constr_ub.size());
+  raft::copy(h_constr_ub.data(), constr_ub.data(), constr_ub.size(), stream_view);
   std::vector<var_t> h_var_types(var_types.size());
   raft::copy(h_var_types.data(), var_types.data(), var_types.size(), stream_view);
 
   stream_view.synchronize();
   if (maximize) {
-    for (size_t i = 0; i < h_obj_coeffs_ft.size(); ++i) {
-      h_obj_coeffs_ft[i] = -h_obj_coeffs_ft[i];
+    for (size_t i = 0; i < h_obj_coeffs.size(); ++i) {
+      h_obj_coeffs[i] = -h_obj_coeffs[i];
     }
   }
 
-  auto constr_bounds_empty = h_constr_lb_ft.empty() && h_constr_ub_ft.empty();
+  auto constr_bounds_empty = h_constr_lb.empty() && h_constr_ub.empty();
   if (constr_bounds_empty) {
     for (size_t i = 0; i < h_row_types.size(); ++i) {
       if (h_row_types[i] == 'L') {
-        h_constr_lb_ft.push_back(-std::numeric_limits<f_t>::infinity());
-        h_constr_ub_ft.push_back(h_bounds_ft[i]);
+        h_constr_lb.push_back(-std::numeric_limits<f_t>::infinity());
+        h_constr_ub.push_back(h_bounds[i]);
       } else if (h_row_types[i] == 'G') {
-        h_constr_lb_ft.push_back(h_bounds_ft[i]);
-        h_constr_ub_ft.push_back(std::numeric_limits<f_t>::infinity());
+        h_constr_lb.push_back(h_bounds[i]);
+        h_constr_ub.push_back(std::numeric_limits<f_t>::infinity());
       } else if (h_row_types[i] == 'E') {
-        h_constr_lb_ft.push_back(h_bounds_ft[i]);
-        h_constr_ub_ft.push_back(h_bounds_ft[i]);
+        h_constr_lb.push_back(h_bounds[i]);
+        h_constr_ub.push_back(h_bounds[i]);
       }
     }
   }
 
   // handle empty variable bounds
-  if (h_var_lb_ft.empty()) {
-    h_var_lb_ft = std::vector<f_t>(num_cols, -std::numeric_limits<f_t>::infinity());
+  if (h_var_lb.empty()) {
+    h_var_lb = std::vector<f_t>(num_cols, -std::numeric_limits<f_t>::infinity());
   }
-  if (h_var_ub_ft.empty()) {
-    h_var_ub_ft = std::vector<f_t>(num_cols, std::numeric_limits<f_t>::infinity());
+  if (h_var_ub.empty()) {
+    h_var_ub = std::vector<f_t>(num_cols, std::numeric_limits<f_t>::infinity());
   }
 
-  // Convert to double for PSLP API if necessary (PSLP only accepts double*)
-  std::vector<double> h_coefficients = convert_vector<double>(h_coefficients_ft);
-  std::vector<double> h_obj_coeffs   = convert_vector<double>(h_obj_coeffs_ft);
-  std::vector<double> h_var_lb       = convert_vector<double>(h_var_lb_ft);
-  std::vector<double> h_var_ub       = convert_vector<double>(h_var_ub_ft);
-  std::vector<double> h_constr_lb    = convert_vector<double>(h_constr_lb_ft);
-  std::vector<double> h_constr_ub    = convert_vector<double>(h_constr_ub_ft);
-
   // Call PSLP presolver
   ctx.settings           = default_settings();
   ctx.settings->verbose  = false;
@@ -354,7 +331,7 @@ optimization_problem_t<i_t, f_t> build_optimization_problem_from_pslp(
   // PSLP does not allow setting the objective offset, so we add the original objective offset to
   // the reduced objective offset
   obj_offset += original_obj_offset;
-  op_problem.set_objective_offset(static_cast<f_t>(obj_offset));
+  op_problem.set_objective_offset(obj_offset);
   op_problem.set_maximize(maximize);
   op_problem.set_problem_category(problem_category_t::LP);
 
@@ -366,65 +343,21 @@ optimization_problem_t<i_t, f_t> build_optimization_problem_from_pslp(
     return op_problem;
   }
 
-  if constexpr (std::is_same_v<f_t, double>) {
-    // PSLP uses double internally, so we can use the data directly
-    op_problem.set_csr_constraint_matrix(
-      reduced_prob->Ax, nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1);
-
-    std::vector<f_t> h_obj_coeffs(n_cols);
-    std::copy(reduced_prob->c, reduced_prob->c + n_cols, h_obj_coeffs.begin());
-    if (maximize) {
-      for (size_t i = 0; i < n_cols; ++i) {
-        h_obj_coeffs[i] = -h_obj_coeffs[i];
-      }
-    }
-    op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols);
-    op_problem.set_constraint_lower_bounds(reduced_prob->lhs, n_rows);
-    op_problem.set_constraint_upper_bounds(reduced_prob->rhs, n_rows);
-    op_problem.set_variable_lower_bounds(reduced_prob->lbs, n_cols);
-    op_problem.set_variable_upper_bounds(reduced_prob->ubs, n_cols);
-  } else {
-    // Convert PSLP double arrays to f_t
-    // Constraint matrix values (Ax)
-    std::vector<f_t> h_Ax(nnz);
-    for (int i = 0; i < nnz; ++i) {
-      h_Ax[i] = static_cast<f_t>(reduced_prob->Ax[i]);
-    }
-    op_problem.set_csr_constraint_matrix(
-      h_Ax.data(), nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1);
+  op_problem.set_csr_constraint_matrix(
+    reduced_prob->Ax, nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1);
 
-    // Objective coefficients
-    std::vector<f_t> h_obj_coeffs(n_cols);
-    for (int i = 0; i < n_cols; ++i) {
-      h_obj_coeffs[i] = static_cast<f_t>(reduced_prob->c[i]);
-    }
-    if (maximize) {
-      for (int i = 0; i < n_cols; ++i) {
-        h_obj_coeffs[i] = -h_obj_coeffs[i];
-      }
-    }
-    op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols);
-
-    // Constraint bounds
-    std::vector<f_t> h_constr_lb(n_rows);
-    std::vector<f_t> h_constr_ub(n_rows);
-    for (int i = 0; i < n_rows; ++i) {
-      h_constr_lb[i] = static_cast<f_t>(reduced_prob->lhs[i]);
-      h_constr_ub[i] = static_cast<f_t>(reduced_prob->rhs[i]);
-    }
-    op_problem.set_constraint_lower_bounds(h_constr_lb.data(), n_rows);
-    op_problem.set_constraint_upper_bounds(h_constr_ub.data(), n_rows);
-
-    // Variable bounds
-    std::vector<f_t> h_var_lb(n_cols);
-    std::vector<f_t> h_var_ub(n_cols);
-    for (int i = 0; i < n_cols; ++i) {
-      h_var_lb[i] = static_cast<f_t>(reduced_prob->lbs[i]);
-      h_var_ub[i] = static_cast<f_t>(reduced_prob->ubs[i]);
+  std::vector<f_t> h_obj_coeffs(n_cols);
+  std::copy(reduced_prob->c, reduced_prob->c + n_cols, h_obj_coeffs.begin());
+  if (maximize) {
+    for (size_t i = 0; i < n_cols; ++i) {
+      h_obj_coeffs[i] = -h_obj_coeffs[i];
     }
-    op_problem.set_variable_lower_bounds(h_var_lb.data(), n_cols);
-    op_problem.set_variable_upper_bounds(h_var_ub.data(), n_cols);
   }
+  op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols);
+  op_problem.set_constraint_lower_bounds(reduced_prob->lhs, n_rows);
+  op_problem.set_constraint_upper_bounds(reduced_prob->rhs, n_rows);
+  op_problem.set_variable_lower_bounds(reduced_prob->lbs, n_cols);
+  op_problem.set_variable_upper_bounds(reduced_prob->ubs, n_cols);
 
   return op_problem;
 }
@@ -463,7 +396,6 @@ optimization_problem_t<i_t, f_t> build_optimization_problem(
       obj.coefficients[i] = -obj.coefficients[i];
     }
   }
-
   op_problem.set_objective_coefficients(obj.coefficients.data(), obj.coefficients.size());
 
   auto& constraint_matrix = papilo_problem.getConstraintMatrix();
@@ -641,24 +573,31 @@ template <typename i_t, typename f_t>
 std::optional<third_party_presolve_result_t<i_t, f_t>> third_party_presolve_t<i_t, f_t>::apply_pslp(
   optimization_problem_t<i_t, f_t> const& op_problem, const double time_limit)
 {
-  f_t original_obj_offset = op_problem.get_objective_offset();
-  auto ctx                = build_and_run_pslp_presolver(op_problem, maximize_, time_limit);
+  if constexpr (std::is_same_v<f_t, double>) {
+    double original_obj_offset = op_problem.get_objective_offset();
+    auto ctx                   = build_and_run_pslp_presolver(op_problem, maximize_, time_limit);
 
-  // Free previously allocated presolver and settings
-  if (pslp_presolver_ != nullptr) { free_presolver(pslp_presolver_); }
-  if (pslp_stgs_ != nullptr) { free_settings(pslp_stgs_); }
+    // Free previously allocated presolver and settings if they exist
+    if (pslp_presolver_ != nullptr) { free_presolver(pslp_presolver_); }
+    if (pslp_stgs_ != nullptr) { free_settings(pslp_stgs_); }
 
-  pslp_presolver_ = ctx.presolver;
-  pslp_stgs_      = ctx.settings;
+    pslp_presolver_ = ctx.presolver;
+    pslp_stgs_      = ctx.settings;
 
-  if (ctx.status == PresolveStatus_::INFEASIBLE || ctx.status == PresolveStatus_::UNBNDORINFEAS) {
-    return std::nullopt;
-  }
+    if (ctx.status == PresolveStatus_::INFEASIBLE || ctx.status == PresolveStatus_::UNBNDORINFEAS) {
+      return std::nullopt;
+    }
 
-  auto opt_problem = build_optimization_problem_from_pslp<i_t, f_t>(
-    pslp_presolver_, op_problem.get_handle_ptr(), maximize_, original_obj_offset);
+    auto opt_problem = build_optimization_problem_from_pslp<i_t, f_t>(
+      pslp_presolver_, op_problem.get_handle_ptr(), maximize_, original_obj_offset);
 
-  return std::make_optional(third_party_presolve_result_t<i_t, f_t>{opt_problem, {}});
+    return std::make_optional(third_party_presolve_result_t<i_t, f_t>{opt_problem, {}});
+  } else {
+    cuopt_expects(false,
+                  error_type_t::ValidationError,
+                  "PSLP presolver only supports double precision");
+    return std::nullopt;
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -830,45 +769,9 @@ void third_party_presolve_t<i_t, f_t>::undo_pslp(rmm::device_uvector<f_t>& prima
     raft::copy(dual_solution.data(), uncrushed_sol->y, n_rows, stream_view);
     raft::copy(reduced_costs.data(), uncrushed_sol->z, n_cols, stream_view);
   } else {
-    // Convert f_t to double for PSLP postsolve API
-    std::vector<f_t> h_primal_solution_ft(primal_solution.size());
-    std::vector<f_t> h_dual_solution_ft(dual_solution.size());
-    std::vector<f_t> h_reduced_costs_ft(reduced_costs.size());
-    raft::copy(
-      h_primal_solution_ft.data(), primal_solution.data(), primal_solution.size(), stream_view);
-    raft::copy(h_dual_solution_ft.data(), dual_solution.data(), dual_solution.size(), stream_view);
-    raft::copy(h_reduced_costs_ft.data(), reduced_costs.data(), reduced_costs.size(), stream_view);
-    stream_view.synchronize();
-
-    std::vector<double> h_primal_solution = convert_vector<double>(h_primal_solution_ft);
-    std::vector<double> h_dual_solution   = convert_vector<double>(h_dual_solution_ft);
-    std::vector<double> h_reduced_costs   = convert_vector<double>(h_reduced_costs_ft);
-
-    postsolve(
-      pslp_presolver_, h_primal_solution.data(), h_dual_solution.data(), h_reduced_costs.data());
-
-    auto uncrushed_sol = pslp_presolver_->sol;
-    int n_cols         = uncrushed_sol->dim_x;
-    int n_rows         = uncrushed_sol->dim_y;
-
-    // Convert double results back to f_t and copy to device
-    std::vector<f_t> h_primal_out(n_cols);
-    std::vector<f_t> h_dual_out(n_rows);
-    std::vector<f_t> h_reduced_costs_out(n_cols);
-    for (int i = 0; i < n_cols; ++i) {
-      h_primal_out[i]        = static_cast<f_t>(uncrushed_sol->x[i]);
-      h_reduced_costs_out[i] = static_cast<f_t>(uncrushed_sol->z[i]);
-    }
-    for (int i = 0; i < n_rows; ++i) {
-      h_dual_out[i] = static_cast<f_t>(uncrushed_sol->y[i]);
-    }
-
-    primal_solution.resize(n_cols, stream_view);
-    dual_solution.resize(n_rows, stream_view);
-    reduced_costs.resize(n_cols, stream_view);
-    raft::copy(primal_solution.data(), h_primal_out.data(), n_cols, stream_view);
-    raft::copy(dual_solution.data(), h_dual_out.data(), n_rows, stream_view);
-    raft::copy(reduced_costs.data(), h_reduced_costs_out.data(), n_cols, stream_view);
+    cuopt_expects(false,
+                  error_type_t::ValidationError,
+                  "PSLP postsolve only supports double precision");
   }
 
   stream_view.synchronize();
diff --git a/cpp/src/pdlp/optimization_problem.cu b/cpp/src/pdlp/optimization_problem.cu
index c43406bff9..504a1ec138 100644
--- a/cpp/src/pdlp/optimization_problem.cu
+++ b/cpp/src/pdlp/optimization_problem.cu
@@ -39,6 +39,7 @@
 #include <thrust/sort.h>
 #include <thrust/tuple.h>
 
+#include <cub/device/device_transform.cuh>
 #include <cuda_profiler_api.h>
 
 #include <algorithm>
@@ -1505,6 +1506,93 @@ void optimization_problem_t<i_t, f_t>::copy_variable_types_to_host(var_t* output
     cudaMemcpy(output, variable_types_.data(), size * sizeof(var_t), cudaMemcpyDeviceToHost));
 }
 
+template <typename From, typename To>
+struct cast_op {
+  HDI To operator()(From val) const { return static_cast<To>(val); }
+};
+
+template <typename From, typename To>
+rmm::device_uvector<To> gpu_cast(const rmm::device_uvector<From>& src,
+                                 rmm::cuda_stream_view stream)
+{
+  rmm::device_uvector<To> dst(src.size(), stream);
+  if (src.size() > 0) {
+    RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
+      src.data(), dst.data(), src.size(), cast_op<From, To>{}, stream.value()));
+  }
+  return dst;
+}
+
+template rmm::device_uvector<float> gpu_cast<double, float>(const rmm::device_uvector<double>&,
+                                                            rmm::cuda_stream_view);
+template rmm::device_uvector<double> gpu_cast<float, double>(const rmm::device_uvector<float>&,
+                                                             rmm::cuda_stream_view);
+
+template <typename i_t, typename f_t>
+template <typename other_f_t>
+optimization_problem_t<i_t, other_f_t> optimization_problem_t<i_t, f_t>::convert_to_other_prec()
+  const
+{
+  auto stream = handle_ptr_->get_stream();
+
+  optimization_problem_t<i_t, other_f_t> other(handle_ptr_);
+
+  other.set_maximize(maximize_);
+  other.set_objective_offset(static_cast<other_f_t>(objective_offset_));
+  other.set_objective_scaling_factor(static_cast<other_f_t>(objective_scaling_factor_));
+
+  if (A_.size() > 0) {
+    auto other_A = gpu_cast<f_t, other_f_t>(A_, stream);
+    other.set_csr_constraint_matrix(other_A.data(),
+                                    static_cast<i_t>(other_A.size()),
+                                    A_indices_.data(),
+                                    static_cast<i_t>(A_indices_.size()),
+                                    A_offsets_.data(),
+                                    static_cast<i_t>(A_offsets_.size()));
+  }
+
+  if (c_.size() > 0) {
+    auto other_c = gpu_cast<f_t, other_f_t>(c_, stream);
+    other.set_objective_coefficients(other_c.data(), static_cast<i_t>(other_c.size()));
+  }
+
+  if (b_.size() > 0) {
+    auto other_b = gpu_cast<f_t, other_f_t>(b_, stream);
+    other.set_constraint_bounds(other_b.data(), static_cast<i_t>(other_b.size()));
+  }
+
+  if (constraint_lower_bounds_.size() > 0) {
+    auto other_clb = gpu_cast<f_t, other_f_t>(constraint_lower_bounds_, stream);
+    other.set_constraint_lower_bounds(other_clb.data(), static_cast<i_t>(other_clb.size()));
+  }
+
+  if (constraint_upper_bounds_.size() > 0) {
+    auto other_cub = gpu_cast<f_t, other_f_t>(constraint_upper_bounds_, stream);
+    other.set_constraint_upper_bounds(other_cub.data(), static_cast<i_t>(other_cub.size()));
+  }
+
+  if (variable_lower_bounds_.size() > 0) {
+    auto other_vlb = gpu_cast<f_t, other_f_t>(variable_lower_bounds_, stream);
+    other.set_variable_lower_bounds(other_vlb.data(), static_cast<i_t>(other_vlb.size()));
+  }
+
+  if (variable_upper_bounds_.size() > 0) {
+    auto other_vub = gpu_cast<f_t, other_f_t>(variable_upper_bounds_, stream);
+    other.set_variable_upper_bounds(other_vub.data(), static_cast<i_t>(other_vub.size()));
+  }
+
+  if (variable_types_.size() > 0) {
+    other.set_variable_types(variable_types_.data(), static_cast<i_t>(variable_types_.size()));
+  }
+
+  other.set_variable_names(var_names_);
+  other.set_row_names(row_names_);
+  other.set_objective_name(objective_name_);
+  other.set_problem_category(problem_category_);
+
+  return other;
+}
+
 // ==============================================================================
 // Template instantiations
 // ==============================================================================
@@ -1516,4 +1604,9 @@ template class optimization_problem_t<int32_t, float>;
 template class optimization_problem_t<int32_t, double>;
 #endif
 
+#if PDLP_INSTANTIATE_FLOAT || MIP_INSTANTIATE_FLOAT
+template optimization_problem_t<int32_t, float>
+optimization_problem_t<int32_t, double>::convert_to_other_prec<float>() const;
+#endif
+
 }  // namespace cuopt::linear_programming
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index 7dbd423c87..dc359ee957 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -60,6 +60,10 @@
 
 namespace cuopt::linear_programming {
 
+template <typename From, typename To>
+extern rmm::device_uvector<To> gpu_cast(const rmm::device_uvector<From>& src,
+                                        rmm::cuda_stream_view stream);
+
 // This serves as both a warm up but also a mandatory initial call to setup cuSparse and cuBLAS
 static void init_handler(const raft::handle_t* handle_ptr)
 {
@@ -562,14 +566,6 @@ optimization_problem_solution_t<i_t, f_t> run_dual_simplex(
 
 #if PDLP_INSTANTIATE_FLOAT || CUOPT_INSTANTIATE_FLOAT
 
-struct double_to_float_op {
-  HDI float operator()(double val) const { return static_cast<float>(val); }
-};
-
-struct float_to_double_op {
-  HDI double operator()(float val) const { return static_cast<double>(val); }
-};
-
 template <typename i_t>
 static optimization_problem_solution_t<i_t, double> run_pdlp_solver_in_fp32(
   detail::problem_t<i_t, double>& problem,
@@ -578,71 +574,13 @@ static optimization_problem_solution_t<i_t, double> run_pdlp_solver_in_fp32(
   bool is_batch_mode)
 {
   CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "Running PDLP in FP32 precision");
-  auto stream     = problem.handle_ptr->get_stream();
-  auto stream_val = stream.value();
-
-  auto gpu_double_to_float = [&](const rmm::device_uvector<double>& src) {
-    rmm::device_uvector<float> dst(src.size(), stream);
-    if (src.size() > 0) {
-      RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
-        src.data(), dst.data(), src.size(), double_to_float_op{}, stream_val));
-    }
-    return dst;
-  };
-
-  auto gpu_float_to_double = [&](const rmm::device_uvector<float>& src) {
-    rmm::device_uvector<double> dst(src.size(), stream);
-    if (src.size() > 0) {
-      RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
-        src.data(), dst.data(), src.size(), float_to_double_op{}, stream_val));
-    }
-    return dst;
-  };
-
-  // Convert double device vectors to float on the GPU
-  auto f_coefficients = gpu_double_to_float(problem.coefficients);
-  auto f_obj          = gpu_double_to_float(problem.objective_coefficients);
-  auto f_clb          = gpu_double_to_float(problem.constraint_lower_bounds);
-  auto f_cub          = gpu_double_to_float(problem.constraint_upper_bounds);
-
-  // Extract and convert variable bounds from double2 to separate float arrays on GPU
-  i_t n_vars = problem.n_variables;
-  rmm::device_uvector<float> f_var_lb(n_vars, stream);
-  rmm::device_uvector<float> f_var_ub(n_vars, stream);
-  if (n_vars > 0) {
-    auto out_zip = thrust::make_zip_iterator(thrust::make_tuple(f_var_lb.data(), f_var_ub.data()));
-    RAFT_CUDA_TRY(cub::DeviceTransform::Transform(
-      problem.variable_bounds.data(),
-      out_zip,
-      n_vars,
-      [] __device__(double2 b) {
-        return thrust::make_tuple(static_cast<float>(b.x), static_cast<float>(b.y));
-      },
-      stream_val));
-  }
+  auto stream = problem.handle_ptr->get_stream();
 
-  // Build float optimization_problem_t from device pointers
-  optimization_problem_t<i_t, float> float_op(problem.handle_ptr);
-  float_op.set_maximize(problem.maximize);
-  // Should it just be problem.objective_offset?
+  // Convert the optimization problem stored inside problem_t to float
+  auto float_op = problem.original_problem_ptr->template convert_to_other_prec<float>();
   float_op.set_objective_offset(static_cast<float>(problem.presolve_data.objective_offset));
   float_op.set_objective_scaling_factor(
     static_cast<float>(problem.presolve_data.objective_scaling_factor));
-  float_op.set_csr_constraint_matrix(f_coefficients.data(),
-                                     static_cast<i_t>(f_coefficients.size()),
-                                     problem.variables.data(),
-                                     static_cast<i_t>(problem.variables.size()),
-                                     problem.offsets.data(),
-                                     static_cast<i_t>(problem.offsets.size()));
-  float_op.set_objective_coefficients(f_obj.data(), static_cast<i_t>(f_obj.size()));
-  float_op.set_constraint_lower_bounds(f_clb.data(), static_cast<i_t>(f_clb.size()));
-  float_op.set_constraint_upper_bounds(f_cub.data(), static_cast<i_t>(f_cub.size()));
-  float_op.set_variable_lower_bounds(f_var_lb.data(), static_cast<i_t>(f_var_lb.size()));
-  float_op.set_variable_upper_bounds(f_var_ub.data(), static_cast<i_t>(f_var_ub.size()));
-
-  float_op.set_variable_names(problem.var_names);
-  float_op.set_row_names(problem.row_names);
-  float_op.set_objective_name(problem.objective_name);
 
   detail::problem_t<i_t, float> float_problem(float_op);
 
@@ -698,10 +636,10 @@ static optimization_problem_solution_t<i_t, double> run_pdlp_solver_in_fp32(
   if (settings.inside_mip) { solver.set_inside_mip(true); }
   auto float_sol = solver.run_solver(timer);
 
-  // Convert float solution back to double on GPU
-  auto dev_primal  = gpu_float_to_double(float_sol.get_primal_solution());
-  auto dev_dual    = gpu_float_to_double(float_sol.get_dual_solution());
-  auto dev_reduced = gpu_float_to_double(float_sol.get_reduced_cost());
+  // Convert float solution back to double on GPU (gpu_cast defined in optimization_problem.cu)
+  auto dev_primal  = gpu_cast<float, double>(float_sol.get_primal_solution(), stream);
+  auto dev_dual    = gpu_cast<float, double>(float_sol.get_dual_solution(), stream);
+  auto dev_reduced = gpu_cast<float, double>(float_sol.get_reduced_cost(), stream);
 
   // Convert termination info (small host-side struct, stays on CPU)
   auto float_term_infos = float_sol.get_additional_termination_informations();
@@ -786,7 +724,7 @@ optimization_problem_solution_t<i_t, f_t> run_pdlp(detail::problem_t<i_t, f_t>&
   cuopt_expects(
     !(is_batch_mode && settings.pdlp_precision == pdlp_precision_t::MixedPrecision),
     error_type_t::ValidationError,
-    "Mixed-precision SpMV is not supported in batch mode. Set pdlp_precision=0 (default) "
+    "Mixed-precision SpMV is not supported in batch mode. Set pdlp_precision=-1 (default) "
     "or disable batch mode.");
   cuopt_expects(!(settings.pdlp_precision == pdlp_precision_t::SinglePrecision && is_batch_mode),
                 error_type_t::ValidationError,
diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_test.c b/cpp/tests/linear_programming/c_api_tests/c_api_test.c
index 41b99aeebb..996d60deae 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_test.c
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_test.c
@@ -2123,6 +2123,72 @@ cuopt_int_t test_cpu_only_mip_execution(const char* filename)
   return status;
 }
 
+cuopt_int_t test_pdlp_precision_mixed(const char* filename,
+                                      cuopt_int_t* termination_status_ptr,
+                                      cuopt_float_t* objective_ptr)
+{
+  cuOptOptimizationProblem problem = NULL;
+  cuOptSolverSettings settings     = NULL;
+  cuOptSolution solution           = NULL;
+  cuopt_int_t status;
+  cuopt_int_t termination_status = -1;
+  cuopt_float_t objective_value;
+
+  status = cuOptReadProblem(filename, &problem);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error reading problem\n");
+    goto DONE;
+  }
+
+  status = cuOptCreateSolverSettings(&settings);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error creating solver settings\n");
+    goto DONE;
+  }
+
+  status = cuOptSetIntegerParameter(settings, CUOPT_METHOD, CUOPT_METHOD_PDLP);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error setting method\n");
+    goto DONE;
+  }
+
+  status = cuOptSetIntegerParameter(settings, CUOPT_PDLP_PRECISION, CUOPT_PDLP_MIXED_PRECISION);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error setting pdlp_precision\n");
+    goto DONE;
+  }
+
+  status = cuOptSolve(problem, settings, &solution);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error solving problem with pdlp_precision=mixed\n");
+    goto DONE;
+  }
+
+  status = cuOptGetTerminationStatus(solution, &termination_status);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting termination status\n");
+    goto DONE;
+  }
+  *termination_status_ptr = termination_status;
+
+  status = cuOptGetObjectiveValue(solution, &objective_value);
+  if (status != CUOPT_SUCCESS) {
+    printf("Error getting objective value\n");
+    goto DONE;
+  }
+  *objective_ptr = objective_value;
+
+  printf("PDLP precision=mixed test passed: status=%s, objective=%f\n",
+         termination_status_to_string(termination_status),
+         objective_value);
+
+DONE:
+  cuOptDestroyProblem(&problem);
+  cuOptDestroySolverSettings(&settings);
+  cuOptDestroySolution(&solution);
+  return status;
+}
+
 cuopt_int_t test_pdlp_precision_single(const char* filename,
                                       cuopt_int_t* termination_status_ptr,
                                       cuopt_float_t* objective_ptr)
diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
index 5971292ebb..4c252f8863 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
@@ -287,6 +287,18 @@ TEST(c_api, pdlp_precision_single)
   EXPECT_NEAR(objective, -464.7531, 1e-1);
 }
 
+TEST(c_api, pdlp_precision_mixed)
+{
+  const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir();
+  std::string filename = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps";
+  cuopt_int_t termination_status;
+  cuopt_float_t objective;
+  EXPECT_EQ(test_pdlp_precision_mixed(filename.c_str(), &termination_status, &objective),
+            CUOPT_SUCCESS);
+  EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL);
+  EXPECT_NEAR(objective, -464.7531, 1e-1);
+}
+
 // =============================================================================
 // Solution Interface Polymorphism Tests
 // =============================================================================
diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h
index b7614c378b..402c7d06a5 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h
@@ -57,6 +57,10 @@ cuopt_int_t test_pdlp_precision_single(const char* filename,
                                        cuopt_int_t* termination_status_ptr,
                                        cuopt_float_t* objective_ptr);
 
+cuopt_int_t test_pdlp_precision_mixed(const char* filename,
+                                      cuopt_int_t* termination_status_ptr,
+                                      cuopt_float_t* objective_ptr);
+
 /* CPU-only execution tests (require env vars CUDA_VISIBLE_DEVICES="" and CUOPT_REMOTE_HOST) */
 cuopt_int_t test_cpu_only_execution(const char* filename);
 cuopt_int_t test_cpu_only_mip_execution(const char* filename);
diff --git a/docs/cuopt/source/lp-qp-milp-settings.rst b/docs/cuopt/source/lp-qp-milp-settings.rst
index f429b8bf72..29c27a4ac2 100644
--- a/docs/cuopt/source/lp-qp-milp-settings.rst
+++ b/docs/cuopt/source/lp-qp-milp-settings.rst
@@ -198,14 +198,14 @@ PDLP Precision
 ``CUOPT_PDLP_PRECISION`` controls the precision mode used by the PDLP solver. The following modes are
 available:
 
-- **default** (0): Use the native precision of the problem type (FP64 for double-precision problems).
-- **single** (1): Run PDLP internally in FP32 (float). Inputs are converted from FP64 to FP32 before
+- **default** (-1): Use the native precision of the problem type (FP64 for double-precision problems).
+- **single** (0): Run PDLP internally in FP32 (float). Inputs are converted from FP64 to FP32 before
   solving and outputs are converted back to FP64. FP32 uses half the memory and allows PDHG iterations
   to be on average twice as fast, but may require more iterations to converge due to reduced numerical
   accuracy. Compatible with crossover (solution is converted back to FP64 before crossover runs) and
   concurrent mode (the PDLP leg runs in FP32 while Dual Simplex and Barrier run in FP64).
-- **double** (2): Explicitly run in FP64 (same as default for double-precision problems).
-- **mixed** (3): Use mixed precision sparse matrix-vector products (SpMV) during PDHG iterations. The
+- **double** (1): Explicitly run in FP64 (same as default for double-precision problems).
+- **mixed** (2): Use mixed precision sparse matrix-vector products (SpMV) during PDHG iterations. The
   constraint matrix and its transpose are stored in FP32 while vectors and the compute type remain in
   FP64, improving SpMV performance. Convergence checking and restart logic always use the
   full FP64 matrix, so this mode does not reduce overall memory usage. This provides a middle ground
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
index 0cbb534125..a7ad426d36 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
@@ -733,7 +733,7 @@ def test_pdlp_precision_single():
 
     settings = solver_settings.SolverSettings()
     settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP)
-    settings.set_parameter(CUOPT_PDLP_PRECISION, 1)  # Single
+    settings.set_parameter(CUOPT_PDLP_PRECISION, 0)  # Single
     settings.set_optimality_tolerance(1e-4)
 
     solution = solver.Solve(data_model_obj, settings)
@@ -744,6 +744,23 @@ def test_pdlp_precision_single():
     )
     assert solution.get_solved_by_pdlp()
 
+def test_pdlp_precision_mixed():
+    file_path = (
+        RAPIDS_DATASET_ROOT_DIR + "/linear_programming/afiro_original.mps"
+    )
+    data_model_obj = cuopt_mps_parser.ParseMps(file_path)
+
+    settings = solver_settings.SolverSettings()
+    settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP)
+    settings.set_parameter(CUOPT_PDLP_PRECISION, 2)  # Mixed
+    settings.set_optimality_tolerance(1e-4)
+
+    solution = solver.Solve(data_model_obj, settings)
+
+    assert solution.get_termination_status() == LPTerminationStatus.Optimal
+    assert solution.get_primal_objective() == pytest.approx(
+        -464.7531, rel=1e-1
+    )
 
 def test_pdlp_precision_single_crossover():
     file_path = (

From 7ef14d421f4316ddb01ea14fbc9b07eccd7a4ce4 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 6 Mar 2026 12:11:05 +0100
Subject: [PATCH 17/23] fix style

---
 .../mip_heuristics/presolve/third_party_presolve.cpp   | 10 ++++------
 cpp/src/pdlp/optimization_problem.cu                   |  5 ++---
 .../cuopt/tests/linear_programming/test_lp_solver.py   |  2 ++
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
index 1dfcebe772..20a586f6fb 100644
--- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
+++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
@@ -593,9 +593,8 @@ std::optional<third_party_presolve_result_t<i_t, f_t>> third_party_presolve_t<i_
 
     return std::make_optional(third_party_presolve_result_t<i_t, f_t>{opt_problem, {}});
   } else {
-    cuopt_expects(false,
-                  error_type_t::ValidationError,
-                  "PSLP presolver only supports double precision");
+    cuopt_expects(
+      false, error_type_t::ValidationError, "PSLP presolver only supports double precision");
     return std::nullopt;
   }
 }
@@ -769,9 +768,8 @@ void third_party_presolve_t<i_t, f_t>::undo_pslp(rmm::device_uvector<f_t>& prima
     raft::copy(dual_solution.data(), uncrushed_sol->y, n_rows, stream_view);
     raft::copy(reduced_costs.data(), uncrushed_sol->z, n_cols, stream_view);
   } else {
-    cuopt_expects(false,
-                  error_type_t::ValidationError,
-                  "PSLP postsolve only supports double precision");
+    cuopt_expects(
+      false, error_type_t::ValidationError, "PSLP postsolve only supports double precision");
   }
 
   stream_view.synchronize();
diff --git a/cpp/src/pdlp/optimization_problem.cu b/cpp/src/pdlp/optimization_problem.cu
index 504a1ec138..f3e8c42c17 100644
--- a/cpp/src/pdlp/optimization_problem.cu
+++ b/cpp/src/pdlp/optimization_problem.cu
@@ -39,8 +39,8 @@
 #include <thrust/sort.h>
 #include <thrust/tuple.h>
 
-#include <cub/device/device_transform.cuh>
 #include <cuda_profiler_api.h>
+#include <cub/device/device_transform.cuh>
 
 #include <algorithm>
 #include <cmath>
@@ -1512,8 +1512,7 @@ struct cast_op {
 };
 
 template <typename From, typename To>
-rmm::device_uvector<To> gpu_cast(const rmm::device_uvector<From>& src,
-                                 rmm::cuda_stream_view stream)
+rmm::device_uvector<To> gpu_cast(const rmm::device_uvector<From>& src, rmm::cuda_stream_view stream)
 {
   rmm::device_uvector<To> dst(src.size(), stream);
   if (src.size() > 0) {
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
index a7ad426d36..b325dbe474 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
@@ -744,6 +744,7 @@ def test_pdlp_precision_single():
     )
     assert solution.get_solved_by_pdlp()
 
+
 def test_pdlp_precision_mixed():
     file_path = (
         RAPIDS_DATASET_ROOT_DIR + "/linear_programming/afiro_original.mps"
@@ -762,6 +763,7 @@ def test_pdlp_precision_mixed():
         -464.7531, rel=1e-1
     )
 
+
 def test_pdlp_precision_single_crossover():
     file_path = (
         RAPIDS_DATASET_ROOT_DIR + "/linear_programming/afiro_original.mps"

From e8bd9b681a6327e986c1933cd5aaccc772bb2a30 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 6 Mar 2026 13:41:26 +0100
Subject: [PATCH 18/23] add stream parameter and fix c api to handle if mixed
 is not supported

---
 .../cuopt/linear_programming/optimization_problem.hpp      | 2 +-
 cpp/src/pdlp/optimization_problem.cu                       | 7 +++----
 cpp/src/pdlp/solve.cu                                      | 2 +-
 cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp   | 7 +++++++
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/cpp/include/cuopt/linear_programming/optimization_problem.hpp b/cpp/include/cuopt/linear_programming/optimization_problem.hpp
index 8094a1c9e4..df78dd17c7 100644
--- a/cpp/include/cuopt/linear_programming/optimization_problem.hpp
+++ b/cpp/include/cuopt/linear_programming/optimization_problem.hpp
@@ -318,7 +318,7 @@ class optimization_problem_t : public optimization_problem_interface_t<i_t, f_t>
    * @tparam other_f_t  Target floating-point type (e.g. float when this is double)
    */
   template <typename other_f_t>
-  optimization_problem_t<i_t, other_f_t> convert_to_other_prec() const;
+  optimization_problem_t<i_t, other_f_t> convert_to_other_prec(rmm::cuda_stream_view stream) const;
 
   /**
    * @brief Returns nullptr since this is already a GPU problem.
diff --git a/cpp/src/pdlp/optimization_problem.cu b/cpp/src/pdlp/optimization_problem.cu
index f3e8c42c17..020d26083a 100644
--- a/cpp/src/pdlp/optimization_problem.cu
+++ b/cpp/src/pdlp/optimization_problem.cu
@@ -1529,10 +1529,9 @@ template rmm::device_uvector<double> gpu_cast<float, double>(const rmm::device_u
 
 template <typename i_t, typename f_t>
 template <typename other_f_t>
-optimization_problem_t<i_t, other_f_t> optimization_problem_t<i_t, f_t>::convert_to_other_prec()
-  const
+optimization_problem_t<i_t, other_f_t> optimization_problem_t<i_t, f_t>::convert_to_other_prec(
+  rmm::cuda_stream_view stream) const
 {
-  auto stream = handle_ptr_->get_stream();
 
   optimization_problem_t<i_t, other_f_t> other(handle_ptr_);
 
@@ -1605,7 +1604,7 @@ template class optimization_problem_t<int32_t, double>;
 
 #if PDLP_INSTANTIATE_FLOAT || MIP_INSTANTIATE_FLOAT
 template optimization_problem_t<int32_t, float>
-optimization_problem_t<int32_t, double>::convert_to_other_prec<float>() const;
+optimization_problem_t<int32_t, double>::convert_to_other_prec<float>(rmm::cuda_stream_view) const;
 #endif
 
 }  // namespace cuopt::linear_programming
diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu
index dc359ee957..22fff31906 100644
--- a/cpp/src/pdlp/solve.cu
+++ b/cpp/src/pdlp/solve.cu
@@ -577,7 +577,7 @@ static optimization_problem_solution_t<i_t, double> run_pdlp_solver_in_fp32(
   auto stream = problem.handle_ptr->get_stream();
 
   // Convert the optimization problem stored inside problem_t to float
-  auto float_op = problem.original_problem_ptr->template convert_to_other_prec<float>();
+  auto float_op = problem.original_problem_ptr->template convert_to_other_prec<float>(stream);
   float_op.set_objective_offset(static_cast<float>(problem.presolve_data.objective_offset));
   float_op.set_objective_scaling_factor(
     static_cast<float>(problem.presolve_data.objective_scaling_factor));
diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
index 4c252f8863..ff08fae1a8 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
@@ -14,6 +14,7 @@
 
 #include <cuopt/linear_programming/cuopt_c.h>
 #include <pdlp/cuopt_c_internal.hpp>
+#include <pdlp/cusparse_view.hpp>
 
 #include <utilities/common_utils.hpp>
 #include <utilities/error.hpp>
@@ -289,10 +290,16 @@ TEST(c_api, pdlp_precision_single)
 
 TEST(c_api, pdlp_precision_mixed)
 {
+  using namespace cuopt::linear_programming::detail;
   const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir();
   std::string filename = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps";
   cuopt_int_t termination_status;
   cuopt_float_t objective;
+  if (!is_cusparse_runtime_mixed_precision_supported()) {
+    EXPECT_NE(test_pdlp_precision_mixed(filename.c_str(), &termination_status, &objective),
+              CUOPT_SUCCESS);
+    return;
+  }
   EXPECT_EQ(test_pdlp_precision_mixed(filename.c_str(), &termination_status, &objective),
             CUOPT_SUCCESS);
   EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL);

From 1227b35343199d483075ecbded44179e24665e54 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 6 Mar 2026 13:41:50 +0100
Subject: [PATCH 19/23] fix style

---
 cpp/src/pdlp/optimization_problem.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/pdlp/optimization_problem.cu b/cpp/src/pdlp/optimization_problem.cu
index 020d26083a..9b3016a113 100644
--- a/cpp/src/pdlp/optimization_problem.cu
+++ b/cpp/src/pdlp/optimization_problem.cu
@@ -1532,7 +1532,6 @@ template <typename other_f_t>
 optimization_problem_t<i_t, other_f_t> optimization_problem_t<i_t, f_t>::convert_to_other_prec(
   rmm::cuda_stream_view stream) const
 {
-
   optimization_problem_t<i_t, other_f_t> other(handle_ptr_);
 
   other.set_maximize(maximize_);
@@ -1604,7 +1603,8 @@ template class optimization_problem_t<int32_t, double>;
 
 #if PDLP_INSTANTIATE_FLOAT || MIP_INSTANTIATE_FLOAT
 template optimization_problem_t<int32_t, float>
-optimization_problem_t<int32_t, double>::convert_to_other_prec<float>(rmm::cuda_stream_view) const;
+  optimization_problem_t<int32_t, double>::convert_to_other_prec<float>(
+    rmm::cuda_stream_view) const;
 #endif
 
 }  // namespace cuopt::linear_programming

From 973346d6b8bd253a818d20ce19113eab82467cf5 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 6 Mar 2026 14:35:26 +0100
Subject: [PATCH 20/23] fix compile issue

---
 cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
index ff08fae1a8..5efc090418 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
@@ -14,11 +14,14 @@
 
 #include <cuopt/linear_programming/cuopt_c.h>
 #include <pdlp/cuopt_c_internal.hpp>
-#include <pdlp/cusparse_view.hpp>
 
 #include <utilities/common_utils.hpp>
 #include <utilities/error.hpp>
 
+namespace cuopt::linear_programming::detail {
+bool is_cusparse_runtime_mixed_precision_supported();
+}
+
 #include <gtest/gtest.h>
 
 TEST(c_api, int_size) { EXPECT_EQ(test_int_size(), sizeof(int32_t)); }

From 647191d4013d412afd63b0868bbc0afcfb4d9726 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 6 Mar 2026 16:08:54 +0100
Subject: [PATCH 21/23] modify c mixed precision test so that it can catch the
 issue both locally and in ci where there is a missmatch between the cuda
 version and the dynamic cusparse version

---
 cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
index 5efc090418..702ca5a364 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
@@ -296,11 +296,14 @@ TEST(c_api, pdlp_precision_mixed)
   using namespace cuopt::linear_programming::detail;
   const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir();
   std::string filename = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps";
-  cuopt_int_t termination_status;
+  cuopt_int_t termination_status = -1;
   cuopt_float_t objective;
   if (!is_cusparse_runtime_mixed_precision_supported()) {
-    EXPECT_NE(test_pdlp_precision_mixed(filename.c_str(), &termination_status, &objective),
-              CUOPT_SUCCESS);
+    auto status = test_pdlp_precision_mixed(filename.c_str(), &termination_status, &objective);
+    bool solve_returned_error = (status != CUOPT_SUCCESS);
+    bool solve_returned_non_optimal =
+      (status == CUOPT_SUCCESS && termination_status != CUOPT_TERIMINATION_STATUS_OPTIMAL);
+    EXPECT_TRUE(solve_returned_error || solve_returned_non_optimal);
     return;
   }
   EXPECT_EQ(test_pdlp_precision_mixed(filename.c_str(), &termination_status, &objective),

From 2c70fff81b49399a4c46ae517d74f68a560ccbe7 Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 6 Mar 2026 16:09:10 +0100
Subject: [PATCH 22/23] style

---
 cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
index 702ca5a364..d39a970763 100644
--- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
+++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp
@@ -295,7 +295,7 @@ TEST(c_api, pdlp_precision_mixed)
 {
   using namespace cuopt::linear_programming::detail;
   const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir();
-  std::string filename = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps";
+  std::string filename           = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps";
   cuopt_int_t termination_status = -1;
   cuopt_float_t objective;
   if (!is_cusparse_runtime_mixed_precision_supported()) {

From 22e75715206f0d7588c9c22f878d67548c40a7bd Mon Sep 17 00:00:00 2001
From: Nicolas Blin <nicolas.blin7@hotmail.fr>
Date: Fri, 6 Mar 2026 18:51:29 +0100
Subject: [PATCH 23/23] removed mixed precision test from python test as it's
 inconvient to check both cuda and dynamic cusparse version to go around the
 12.5 issue

---
 .../linear_programming/test_lp_solver.py      | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
index b325dbe474..e284ffc0ab 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py
@@ -745,25 +745,6 @@ def test_pdlp_precision_single():
     assert solution.get_solved_by_pdlp()
 
 
-def test_pdlp_precision_mixed():
-    file_path = (
-        RAPIDS_DATASET_ROOT_DIR + "/linear_programming/afiro_original.mps"
-    )
-    data_model_obj = cuopt_mps_parser.ParseMps(file_path)
-
-    settings = solver_settings.SolverSettings()
-    settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP)
-    settings.set_parameter(CUOPT_PDLP_PRECISION, 2)  # Mixed
-    settings.set_optimality_tolerance(1e-4)
-
-    solution = solver.Solve(data_model_obj, settings)
-
-    assert solution.get_termination_status() == LPTerminationStatus.Optimal
-    assert solution.get_primal_objective() == pytest.approx(
-        -464.7531, rel=1e-1
-    )
-
-
 def test_pdlp_precision_single_crossover():
     file_path = (
         RAPIDS_DATASET_ROOT_DIR + "/linear_programming/afiro_original.mps"