diff --git a/CMakeLists.txt b/CMakeLists.txt
index 79b2547..b385719 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -136,6 +136,7 @@ set(cddp_core_srcs
   src/cddp_core/logddp_solver.cpp
   src/cddp_core/ipddp_solver.cpp
   src/cddp_core/msipddp_solver.cpp
+  src/cddp_core/alddp_solver.cpp
 )
 
 set(dynamics_model_srcs
diff --git a/include/cddp-cpp/cddp.hpp b/include/cddp-cpp/cddp.hpp
index e1b2cf5..c8c54ac 100644
--- a/include/cddp-cpp/cddp.hpp
+++ b/include/cddp-cpp/cddp.hpp
@@ -32,6 +32,7 @@
 #include "cddp_core/logddp_solver.hpp"
 #include "cddp_core/ipddp_solver.hpp"
 #include "cddp_core/msipddp_solver.hpp"
+#include "cddp_core/alddp_solver.hpp"
 #include "cddp_core/helper.hpp"
 #include "cddp_core/boxqp.hpp"
 #include "cddp_core/qp_solver.hpp"
diff --git a/include/cddp-cpp/cddp_core/alddp_solver.hpp b/include/cddp-cpp/cddp_core/alddp_solver.hpp
new file mode 100644
index 0000000..7a1b84f
--- /dev/null
+++ b/include/cddp-cpp/cddp_core/alddp_solver.hpp
@@ -0,0 +1,141 @@
+/*
+ Copyright 2025 Tomo Sasaki
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef CDDP_ALDDP_SOLVER_HPP
+#define CDDP_ALDDP_SOLVER_HPP
+
+#include "cddp_core/boxqp.hpp"
+#include "cddp_core/cddp_solver_base.hpp"
+#include "cddp_core/constraint.hpp"
+#include <Eigen/Dense>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace cddp {
+
+/**
+ * @brief Augmented Lagrangian DDP (ALDDP) solver implementation.
+ *
+ * Implements an ALTRO-like solver that uses augmented Lagrangian penalization
+ * to handle general nonlinear state and control constraints. Features:
+ * - AL outer loop with multiplier/penalty updates
+ * - BoxQP for control bounds (optional, like CLDDP)
+ * - Infeasible state trajectory initialization via slack controls
+ * - Square-root backward pass for numerical robustness
+ */
+class ALDDPSolver : public CDDPSolverBase {
+public:
+  ALDDPSolver();
+
+  void initialize(CDDP &context) override;
+  std::string getSolverName() const override;
+
+protected:
+  // CDDPSolverBase hooks
+  void preIterationSetup(CDDP &context) override;
+  bool backwardPass(CDDP &context) override;
+  bool checkEarlyConvergence(CDDP &context, int iter,
+                             std::string &reason) override;
+  ForwardPassResult forwardPass(CDDP &context, double alpha) override;
+  void applyForwardPassResult(CDDP &context,
+                              const ForwardPassResult &result) override;
+  bool checkConvergence(CDDP &context, double dJ, double dL, int iter,
+                        std::string &reason) override;
+  void postIterationUpdate(CDDP &context, bool forward_pass_success) override;
+  bool handleBackwardPassRegularizationLimit(
+      CDDP &context, std::string &termination_reason) override;
+  void recordIterationHistory(const CDDP &context) override;
+  void populateSolverSpecificSolution(CDDPSolution &solution,
+                                      const CDDP &context) override;
+  void printIteration(int iter, const CDDP &context) const override;
+  void printSolutionSummary(const CDDPSolution &solution) const override;
+
+private:
+  // --- AL variables ---
+  // Per-constraint, per-timestep Lagrange multipliers (dim = getDualDim())
+  std::map<std::string, std::vector<Eigen::VectorXd>> lambda_;
+  // Per-constraint, per-timestep penalty weights
+  std::map<std::string, std::vector<Eigen::VectorXd>> penalty_;
+  // Terminal constraint multipliers and penalties
+  std::map<std::string, Eigen::VectorXd> terminal_lambda_;
+  std::map<std::string, Eigen::VectorXd> terminal_penalty_;
+
+  // Constraint evaluation cache (only written by evaluateConstraints, read by backward/forward pass)
+  std::map<std::string, std::vector<Eigen::VectorXd>> G_;
+  std::map<std::string, Eigen::VectorXd> G_terminal_;
+  std::map<std::string, std::vector<Eigen::MatrixXd>> G_x_;
+  std::map<std::string, std::vector<Eigen::MatrixXd>> G_u_;
+
+  // BoxQP for control bounds
+  BoxQPSolver boxqp_solver_;
+
+  // AL outer loop state
+  int al_outer_iter_ = 0;
+  double inner_tolerance_ = 1e-2;
+  double max_constraint_violation_ = 0.0;
+  double prev_max_constraint_violation_ = 0.0;
+  double prev_outer_cost_ = 0.0;
+  bool inner_converged_ = false;
+  int inner_iter_count_ = 0;
+
+  // Infeasible initialization
+  bool infeasible_start_ = false;
+  double current_slack_penalty_ = 1000.0;
+  std::vector<Eigen::VectorXd> S_;   // Slack controls s_k (state_dim each)
+  std::vector<Eigen::VectorXd> k_s_; // Slack feedforward gains
+  std::vector<Eigen::MatrixXd> K_s_; // Slack feedback gains
+
+  // Square-root backward pass workspace
+  std::vector<Eigen::MatrixXd> S_chol_; // Upper-triangular Cholesky factors √P_k
+  std::vector<Eigen::VectorXd> p_;      // Cost-to-go gradient vectors
+
+  // --- Private helpers ---
+  void initializeMultipliersAndPenalties(CDDP &context);
+  void initializeSlackControls(CDDP &context);
+  void evaluateConstraints(CDDP &context);
+  void updateMultipliers(CDDP &context);
+  void updatePenalties(CDDP &context);
+  double computeMaxConstraintViolation() const;
+  double computeMaxSlackNorm() const;
+
+  // Backward pass variants
+  bool backwardPassStandard(CDDP &context);
+  bool backwardPassSqrt(CDDP &context);
+
+  // AL cost derivative augmentation helpers
+  void augmentRunningCostDerivatives(
+      const CDDP &context, int t,
+      const Eigen::VectorXd &x, const Eigen::VectorXd &u,
+      Eigen::VectorXd &l_x, Eigen::VectorXd &l_u,
+      Eigen::MatrixXd &l_xx, Eigen::MatrixXd &l_uu,
+      Eigen::MatrixXd &l_ux) const;
+
+  void augmentTerminalCostDerivatives(
+      const CDDP &context, const Eigen::VectorXd &x_N,
+      Eigen::VectorXd &V_x, Eigen::MatrixXd &V_xx) const;
+
+  // Compute AL merit for a given trajectory (thread-safe, no member writes)
+  double computeALMerit(
+      const CDDP &context,
+      const std::vector<Eigen::VectorXd> &X,
+      const std::vector<Eigen::VectorXd> &U,
+      const std::vector<Eigen::VectorXd> *S_trial = nullptr) const;
+};
+
+} // namespace cddp
+
+#endif // CDDP_ALDDP_SOLVER_HPP
diff --git a/include/cddp-cpp/cddp_core/barrier.hpp b/include/cddp-cpp/cddp_core/barrier.hpp
index df51776..34f46ad 100644
--- a/include/cddp-cpp/cddp_core/barrier.hpp
+++ b/include/cddp-cpp/cddp_core/barrier.hpp
@@ -59,8 +59,8 @@ class RelaxedLogBarrier {
    * @return Barrier function value.
    */
   double evaluate(const Constraint &constraint, const Eigen::VectorXd &state,
-                  const Eigen::VectorXd &control) const {
-    Eigen::VectorXd g_val = constraint.evaluate(state, control);
+                  const Eigen::VectorXd &control, int index = 0) const {
+    Eigen::VectorXd g_val = constraint.evaluate(state, control, index);
     Eigen::VectorXd L = constraint.getLowerBound();
     Eigen::VectorXd U = constraint.getUpperBound();
     int constraint_dim = g_val.size();
@@ -99,13 +99,13 @@ class RelaxedLogBarrier {
    */
   std::tuple<Eigen::VectorXd, Eigen::VectorXd>
   getGradients(const Constraint &constraint, const Eigen::VectorXd &state,
-               const Eigen::VectorXd &control) const {
+               const Eigen::VectorXd &control, int index = 0) const {
 
-    Eigen::VectorXd g_val = constraint.evaluate(state, control);
+    Eigen::VectorXd g_val = constraint.evaluate(state, control, index);
     Eigen::VectorXd L = constraint.getLowerBound();
     Eigen::VectorXd U = constraint.getUpperBound();
-    Eigen::MatrixXd Gx = constraint.getStateJacobian(state, control);
-    Eigen::MatrixXd Gu = constraint.getControlJacobian(state, control);
+    Eigen::MatrixXd Gx = constraint.getStateJacobian(state, control, index);
+    Eigen::MatrixXd Gu = constraint.getControlJacobian(state, control, index);
 
     int state_dim = state.size();
     int control_dim = control.size();
@@ -151,13 +151,13 @@ class RelaxedLogBarrier {
    */
   std::tuple<Eigen::MatrixXd, Eigen::MatrixXd, Eigen::MatrixXd>
   getHessians(const Constraint &constraint, const Eigen::VectorXd &state,
-              const Eigen::VectorXd &control) const {
+              const Eigen::VectorXd &control, int index = 0) const {
 
-    Eigen::VectorXd g_val = constraint.evaluate(state, control);
+    Eigen::VectorXd g_val = constraint.evaluate(state, control, index);
     Eigen::VectorXd L = constraint.getLowerBound();
     Eigen::VectorXd U = constraint.getUpperBound();
-    Eigen::MatrixXd Gx = constraint.getStateJacobian(state, control);
-    Eigen::MatrixXd Gu = constraint.getControlJacobian(state, control);
+    Eigen::MatrixXd Gx = constraint.getStateJacobian(state, control, index);
+    Eigen::MatrixXd Gu = constraint.getControlJacobian(state, control, index);
 
     int state_dim = state.size();
     int control_dim = control.size();
@@ -171,7 +171,7 @@ class RelaxedLogBarrier {
         Gux_constraint_vec;
     bool constraint_provides_hessians = true;
     try {
-      auto hess_tuple = constraint.getHessians(state, control);
+      auto hess_tuple = constraint.getHessians(state, control, index);
       Gxx_constraint_vec = std::get<0>(hess_tuple);
       Guu_constraint_vec = std::get<1>(hess_tuple);
       Gux_constraint_vec = std::get<2>(hess_tuple);
@@ -536,4 +536,4 @@ class DiscreteBarrierState {
 
 } // namespace cddp
 
-#endif // CDDP_BARRIER_HPP
\ No newline at end of file
+#endif // CDDP_BARRIER_HPP
diff --git a/include/cddp-cpp/cddp_core/cddp_core.hpp b/include/cddp-cpp/cddp_core/cddp_core.hpp
index 4ae0210..7b86d0e 100644
--- a/include/cddp-cpp/cddp_core/cddp_core.hpp
+++ b/include/cddp-cpp/cddp_core/cddp_core.hpp
@@ -45,7 +45,8 @@ enum class SolverType {
   CLDDP,   ///< Control-Limited Differential Dynamic Programming
   LogDDP,  ///< Log-Barrier Differential Dynamic Programming
   IPDDP,   ///< Interior Point Differential Dynamic Programming
-  MSIPDDP  ///< Multi-Shooting Interior Point Differential Dynamic Programming
+  MSIPDDP, ///< Multi-Shooting Interior Point Differential Dynamic Programming
+  ALDDP    ///< Augmented Lagrangian Differential Dynamic Programming
 };
 
 /**
diff --git a/include/cddp-cpp/cddp_core/logddp_solver.hpp b/include/cddp-cpp/cddp_core/logddp_solver.hpp
index 9c209f3..f0df630 100644
--- a/include/cddp-cpp/cddp_core/logddp_solver.hpp
+++ b/include/cddp-cpp/cddp_core/logddp_solver.hpp
@@ -56,12 +56,6 @@ class LogDDPSolver : public CDDPSolverBase {
   void printIteration(int iter, const CDDP &context) const override;
 
 private:
-  // Dynamics storage (forward-simulated trajectory)
-  std::vector<Eigen::VectorXd> F_;
-
-  // Constraint values g(x,u) - g_ub
-  std::map<std::string, std::vector<Eigen::VectorXd>> G_;
-
   // Log-barrier method
   std::unique_ptr<RelaxedLogBarrier> relaxed_log_barrier_;
   double mu_;
@@ -70,9 +64,6 @@ class LogDDPSolver : public CDDPSolverBase {
   // Filter-based line search
   double constraint_violation_;
 
-  // Multi-shooting parameters
-  int ms_segment_length_;
-
   /**
    * @brief Evaluate trajectory by computing cost, dynamics, and merit function.
    */
@@ -82,6 +73,22 @@ class LogDDPSolver : public CDDPSolverBase {
    * @brief Reset/initialize the filter for line search.
    */
   void resetFilter(CDDP &context);
+
+  void augmentRunningCostDerivatives(
+      const CDDP &context, int t, const Eigen::VectorXd &x,
+      const Eigen::VectorXd &u, Eigen::VectorXd &l_x, Eigen::VectorXd &l_u,
+      Eigen::MatrixXd &l_xx, Eigen::MatrixXd &l_uu,
+      Eigen::MatrixXd &l_ux) const;
+
+  void augmentTerminalCostDerivatives(const CDDP &context,
+                                      const Eigen::VectorXd &x_N,
+                                      Eigen::VectorXd &V_x,
+                                      Eigen::MatrixXd &V_xx) const;
+
+  double computeBarrierMerit(const CDDP &context,
+                             const std::vector<Eigen::VectorXd> &X,
+                             const std::vector<Eigen::VectorXd> &U,
+                             double *max_constraint_violation = nullptr) const;
 };
 
 } // namespace cddp
diff --git a/include/cddp-cpp/cddp_core/options.hpp b/include/cddp-cpp/cddp_core/options.hpp
index cee6bb9..656b215 100644
--- a/include/cddp-cpp/cddp_core/options.hpp
+++ b/include/cddp-cpp/cddp_core/options.hpp
@@ -164,6 +164,42 @@ namespace cddp
         double terminal_constraint_tolerance = 1e-6; ///< Tolerance for terminal constraint satisfaction.
     };
 
+    /**
+     * @brief Options for the ALDDP (Augmented Lagrangian DDP) solver.
+     *
+     * ALDDP uses an augmented Lagrangian method to handle general nonlinear
+     * state and control constraints. The solver alternates between an inner
+     * iLQR loop (which minimizes the augmented Lagrangian) and an outer loop
+     * that updates Lagrange multipliers and penalty weights.
+     */
+    struct ALDDPAlgorithmOptions
+    {
+        // Multiplier/penalty initialization
+        double lambda_init = 0.0;       ///< Initial Lagrange multiplier value.
+        double penalty_init = 1.0;      ///< Initial penalty weight.
+        double penalty_max = 1e8;       ///< Maximum penalty weight.
+        double penalty_update_factor = 10.0; ///< Factor to increase penalties.
+
+        // Inner loop control
+        int max_inner_iterations = 50;        ///< Max iLQR iterations per AL outer iteration.
+        double inner_tolerance_init = 1e-2;   ///< Initial inner iLQR convergence tolerance.
+        double inner_tolerance_factor = 0.1;  ///< Factor to tighten inner tolerance.
+        double inner_tolerance_min = 1e-6;    ///< Minimum inner tolerance.
+
+        // Outer loop control
+        int max_outer_iterations = 20;        ///< Max AL outer iterations.
+        double constraint_tolerance = 1e-6;   ///< Constraint satisfaction tolerance.
+
+        // Control constraint handling
+        bool use_boxqp_for_controls = true;   ///< Use BoxQP for control bounds (like CLDDP).
+
+        // Infeasible initialization (ALTRO Eqs. 30-32)
+        double slack_penalty = 1000.0;        ///< R_s weight on slack penalty term.
+
+        // Square-root backward pass (ALTRO Eqs. 19-29)
+        bool use_sqrt_backward_pass = false;  ///< QR-based factorization for numerical robustness.
+    };
+
     /**
      * @brief Main options structure for the CDDP solver.
      *
@@ -210,6 +246,8 @@ namespace cddp
         IPDDPAlgorithmOptions ipddp; ///< Comprehensive options for the IPDDP solver.
         MSIPDDPAlgorithmOptions
             msipddp;                 ///< Comprehensive options for the MSIPDDP solver.
+        ALDDPAlgorithmOptions
+            alddp;                   ///< Comprehensive options for the ALDDP solver.
 
         // Constructor with defaults (relies on member initializers)
         CDDPOptions() = default;
diff --git a/python/pycddp/__init__.py b/python/pycddp/__init__.py
index afb4dd7..466a0ec 100644
--- a/python/pycddp/__init__.py
+++ b/python/pycddp/__init__.py
@@ -24,6 +24,7 @@
     LogBarrierOptions,
     IPDDPOptions,
     MSIPDDPOptions,
+    ALDDPOptions,
 
     # Core solver
     CDDP,
diff --git a/python/src/bind_options.cpp b/python/src/bind_options.cpp
index 747e70a..2d2ba72 100644
--- a/python/src/bind_options.cpp
+++ b/python/src/bind_options.cpp
@@ -18,7 +18,8 @@ void bind_options(py::module_& m) {
         .value("CLDDP", cddp::SolverType::CLDDP)
         .value("LogDDP", cddp::SolverType::LogDDP)
         .value("IPDDP", cddp::SolverType::IPDDP)
-        .value("MSIPDDP", cddp::SolverType::MSIPDDP);
+        .value("MSIPDDP", cddp::SolverType::MSIPDDP)
+        .value("ALDDP", cddp::SolverType::ALDDP);
 
     // BoxQPOptions
     py::class_<cddp::BoxQPOptions>(m, "BoxQPOptions")
@@ -103,6 +104,23 @@ void bind_options(py::module_& m) {
         .def_readwrite("use_controlled_rollout", &cddp::MSIPDDPAlgorithmOptions::use_controlled_rollout)
         .def_readwrite("costate_var_init_scale", &cddp::MSIPDDPAlgorithmOptions::costate_var_init_scale);
 
+    // ALDDPAlgorithmOptions exposed as a flat Python binding.
+    py::class_<cddp::ALDDPAlgorithmOptions>(m, "ALDDPOptions")
+        .def(py::init<>())
+        .def_readwrite("lambda_init", &cddp::ALDDPAlgorithmOptions::lambda_init)
+        .def_readwrite("penalty_init", &cddp::ALDDPAlgorithmOptions::penalty_init)
+        .def_readwrite("penalty_max", &cddp::ALDDPAlgorithmOptions::penalty_max)
+        .def_readwrite("penalty_update_factor", &cddp::ALDDPAlgorithmOptions::penalty_update_factor)
+        .def_readwrite("max_inner_iterations", &cddp::ALDDPAlgorithmOptions::max_inner_iterations)
+        .def_readwrite("inner_tolerance_init", &cddp::ALDDPAlgorithmOptions::inner_tolerance_init)
+        .def_readwrite("inner_tolerance_factor", &cddp::ALDDPAlgorithmOptions::inner_tolerance_factor)
+        .def_readwrite("inner_tolerance_min", &cddp::ALDDPAlgorithmOptions::inner_tolerance_min)
+        .def_readwrite("max_outer_iterations", &cddp::ALDDPAlgorithmOptions::max_outer_iterations)
+        .def_readwrite("constraint_tolerance", &cddp::ALDDPAlgorithmOptions::constraint_tolerance)
+        .def_readwrite("use_boxqp_for_controls", &cddp::ALDDPAlgorithmOptions::use_boxqp_for_controls)
+        .def_readwrite("slack_penalty", &cddp::ALDDPAlgorithmOptions::slack_penalty)
+        .def_readwrite("use_sqrt_backward_pass", &cddp::ALDDPAlgorithmOptions::use_sqrt_backward_pass);
+
     // CDDPOptions
     py::class_<cddp::CDDPOptions>(m, "CDDPOptions")
         .def(py::init<>())
@@ -126,5 +144,6 @@ void bind_options(py::module_& m) {
         .def_readwrite("filter", &cddp::CDDPOptions::filter)
         .def_readwrite("log_barrier", &cddp::CDDPOptions::log_barrier)
         .def_readwrite("ipddp", &cddp::CDDPOptions::ipddp)
-        .def_readwrite("msipddp", &cddp::CDDPOptions::msipddp);
+        .def_readwrite("msipddp", &cddp::CDDPOptions::msipddp)
+        .def_readwrite("alddp", &cddp::CDDPOptions::alddp);
 }
diff --git a/python/src/bind_solver.cpp b/python/src/bind_solver.cpp
index a653c4d..938f1be 100644
--- a/python/src/bind_solver.cpp
+++ b/python/src/bind_solver.cpp
@@ -96,6 +96,7 @@ bool isKnownSolverName(const std::string &solver_name) {
     return solver_name == "CLDDP" || solver_name == "CLCDDP" ||
            solver_name == "LogDDP" || solver_name == "LOGDDP" ||
            solver_name == "IPDDP" || solver_name == "MSIPDDP" ||
+           solver_name == "ALDDP" ||
            cddp::CDDP::isSolverRegistered(solver_name);
 }
 
diff --git a/python/tests/test_constraints.py b/python/tests/test_constraints.py
index 627c6dd..9de9e35 100644
--- a/python/tests/test_constraints.py
+++ b/python/tests/test_constraints.py
@@ -112,6 +112,8 @@ def test_custom_python_constraint_with_solver():
     assert counters["evaluate"] > 0
     assert counters["state_jacobian"] > 0
     assert counters["control_jacobian"] > 0
+
+
 def test_constraint_base_is_rejected_cleanly():
     dt = 0.1
     opts = pycddp.CDDPOptions()
diff --git a/python/tests/test_solver_errors.py b/python/tests/test_solver_errors.py
index 83a8297..36fbe8f 100644
--- a/python/tests/test_solver_errors.py
+++ b/python/tests/test_solver_errors.py
@@ -31,6 +31,7 @@ def test_solve_by_name_raises_for_unknown_solver():
         ("CLDDP", "CLDDP"),
         ("CLCDDP", "CLDDP"),
         ("LOGDDP", "LogDDP"),
+        ("ALDDP", "ALDDP"),
     ],
 )
 def test_solve_by_name_accepts_core_aliases(solver_name, expected_solver_name):
diff --git a/src/cddp_core/alddp_solver.cpp b/src/cddp_core/alddp_solver.cpp
new file mode 100644
index 0000000..7f62cca
--- /dev/null
+++ b/src/cddp_core/alddp_solver.cpp
@@ -0,0 +1,1185 @@
+/*
+ Copyright 2025 Tomo Sasaki
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "cddp_core/alddp_solver.hpp"
+#include "cddp_core/boxqp.hpp"
+#include "cddp_core/cddp_core.hpp"
+#include <Eigen/Dense>
+#include <Eigen/QR>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
+#include <limits>
+
+namespace cddp {
+
+// ============================================================================
+// Construction / Initialization
+// ============================================================================
+
+ALDDPSolver::ALDDPSolver() : boxqp_solver_(BoxQPOptions()) {}
+
+std::string ALDDPSolver::getSolverName() const { return "ALDDP"; }
+
+void ALDDPSolver::initialize(CDDP &context) {
+  const CDDPOptions &options = context.getOptions();
+  const auto &al_opts = options.alddp;
+  const int horizon = context.getHorizon();
+  const int control_dim = context.getControlDim();
+  const int state_dim = context.getStateDim();
+
+  // Warm start validation
+  if (options.warm_start) {
+    bool valid = (k_u_.size() == static_cast<size_t>(horizon) &&
+                  K_u_.size() == static_cast<size_t>(horizon));
+    if (valid && !k_u_.empty()) {
+      for (int t = 0; t < horizon; ++t) {
+        if (k_u_[t].size() != control_dim || K_u_[t].rows() != control_dim ||
+            K_u_[t].cols() != state_dim) {
+          valid = false;
+          break;
+        }
+      }
+    } else {
+      valid = false;
+    }
+
+    if (valid) {
+      if (options.verbose) {
+        std::cout << "ALDDP: Using warm start with existing solver state"
+                  << std::endl;
+      }
+      boxqp_solver_.setOptions(options.box_qp);
+      // The solver state (gains, multipliers, penalties) is reused, but the
+      // current seeded trajectory may have changed since the last solve.
+      // Refresh slack controls so warm starts remain consistent with the
+      // current X/U pair, including dynamically infeasible reseeds.
+      initializeSlackControls(context);
+      if (!context.X_.empty() && !context.U_.empty()) {
+        computeCost(context);
+      }
+      return;
+    } else if (options.verbose) {
+      std::cout << "ALDDP: Warning - warm start requested but no valid "
+                   "solver state found. Falling back to cold start."
+                << std::endl;
+    }
+  }
+
+  // Cold start
+  initializeGains(horizon, control_dim, state_dim);
+  boxqp_solver_ = BoxQPSolver(options.box_qp);
+
+  // Initialize AL state
+  al_outer_iter_ = 0;
+  inner_tolerance_ = al_opts.inner_tolerance_init;
+  max_constraint_violation_ = 0.0;
+  prev_max_constraint_violation_ = std::numeric_limits<double>::infinity();
+  prev_outer_cost_ = std::numeric_limits<double>::infinity();
+  inner_converged_ = false;
+  inner_iter_count_ = 0;
+  current_slack_penalty_ = al_opts.slack_penalty;
+
+  // Initialize multipliers and penalties
+  initializeMultipliersAndPenalties(context);
+
+  // Initialize slack controls for infeasible start
+  initializeSlackControls(context);
+
+  // Initialize sqrt backward pass workspace
+  if (al_opts.use_sqrt_backward_pass) {
+    S_chol_.resize(horizon + 1);
+    p_.resize(horizon + 1);
+    for (int t = 0; t <= horizon; ++t) {
+      S_chol_[t] = Eigen::MatrixXd::Zero(state_dim, state_dim);
+      p_[t] = Eigen::VectorXd::Zero(state_dim);
+    }
+  }
+
+  // Warn if max_iterations may be too low
+  int effective_budget = al_opts.max_inner_iterations * al_opts.max_outer_iterations;
+  if (options.max_iterations < effective_budget && options.verbose) {
+    std::cerr << "ALDDP: Warning - max_iterations (" << options.max_iterations
+              << ") may be too low for " << al_opts.max_outer_iterations
+              << " outer x " << al_opts.max_inner_iterations
+              << " inner iterations (= " << effective_budget << ")"
+              << std::endl;
+  }
+
+  if (!context.X_.empty() && !context.U_.empty()) {
+    computeCost(context);
+  }
+}
+
+void ALDDPSolver::initializeMultipliersAndPenalties(CDDP &context) {
+  const auto &al_opts = context.getOptions().alddp;
+  const int horizon = context.getHorizon();
+
+  lambda_.clear();
+  penalty_.clear();
+  G_.clear();
+  G_x_.clear();
+  G_u_.clear();
+
+  for (const auto &[name, constraint] : context.getConstraintSet()) {
+    // Skip ControlConstraint if handled by BoxQP
+    if (al_opts.use_boxqp_for_controls && name == "ControlConstraint") {
+      continue;
+    }
+    int dual_dim = constraint->getDualDim();
+    if (dual_dim == 0) {
+      dual_dim = constraint->getUpperBound().size();
+    }
+
+    lambda_[name].resize(horizon);
+    penalty_[name].resize(horizon);
+    G_[name].resize(horizon);
+    G_x_[name].resize(horizon);
+    G_u_[name].resize(horizon);
+
+    for (int t = 0; t < horizon; ++t) {
+      lambda_[name][t] = Eigen::VectorXd::Constant(dual_dim, al_opts.lambda_init);
+      penalty_[name][t] = Eigen::VectorXd::Constant(dual_dim, al_opts.penalty_init);
+    }
+  }
+
+  // Terminal constraints
+  terminal_lambda_.clear();
+  terminal_penalty_.clear();
+  G_terminal_.clear();
+
+  for (const auto &[name, constraint] : context.getTerminalConstraintSet()) {
+    int dual_dim = constraint->getDualDim();
+    if (dual_dim == 0) {
+      dual_dim = constraint->getUpperBound().size();
+    }
+    terminal_lambda_[name] = Eigen::VectorXd::Constant(dual_dim, al_opts.lambda_init);
+    terminal_penalty_[name] = Eigen::VectorXd::Constant(dual_dim, al_opts.penalty_init);
+  }
+}
+
+void ALDDPSolver::initializeSlackControls(CDDP &context) {
+  const int horizon = context.getHorizon();
+  const int state_dim = context.getStateDim();
+
+  S_.resize(horizon, Eigen::VectorXd::Zero(state_dim));
+  k_s_.resize(horizon, Eigen::VectorXd::Zero(state_dim));
+  K_s_.resize(horizon, Eigen::MatrixXd::Zero(state_dim, state_dim));
+
+  // Check if the initial trajectory is dynamically infeasible
+  double max_slack = 0.0;
+  for (int t = 0; t < horizon; ++t) {
+    Eigen::VectorXd x_next = context.getSystem().getDiscreteDynamics(
+        context.X_[t], context.U_[t], t * context.getTimestep());
+    S_[t] = context.X_[t + 1] - x_next;
+    max_slack = std::max(max_slack, S_[t].lpNorm<Eigen::Infinity>());
+  }
+
+  infeasible_start_ = (max_slack > 1e-8);
+  if (infeasible_start_ && context.getOptions().verbose) {
+    std::cout << "ALDDP: Infeasible start detected (max slack = "
+              << max_slack << "). Using slack controls." << std::endl;
+  }
+}
+
+// ============================================================================
+// Constraint Evaluation
+// ============================================================================
+
+void ALDDPSolver::evaluateConstraints(CDDP &context) {
+  const int horizon = context.getHorizon();
+
+  // Reset max violation tracker
+  max_constraint_violation_ = 0.0;
+
+  for (auto &[name, G_vec] : G_) {
+    const auto &constraint = context.getConstraintSet().at(name);
+    for (int t = 0; t < horizon; ++t) {
+      G_vec[t] = constraint->evaluate(context.X_[t], context.U_[t], t);
+      G_x_[name][t] = constraint->getStateJacobian(context.X_[t], context.U_[t], t);
+      G_u_[name][t] = constraint->getControlJacobian(context.X_[t], context.U_[t], t);
+
+      // Track max violation using the constraint's own method
+      double viol = constraint->computeViolation(context.X_[t], context.U_[t], t);
+      max_constraint_violation_ = std::max(max_constraint_violation_, viol);
+    }
+  }
+
+  // Terminal constraints
+  Eigen::VectorXd dummy_u = Eigen::VectorXd::Zero(context.getControlDim());
+  for (auto &[name, G_term] : G_terminal_) {
+    const auto &constraint = context.getTerminalConstraintSet().at(name);
+    G_term = constraint->evaluate(context.X_.back(), dummy_u, horizon);
+
+    double viol = constraint->computeViolation(context.X_.back(), dummy_u, horizon);
+    max_constraint_violation_ = std::max(max_constraint_violation_, viol);
+  }
+}
+
+double ALDDPSolver::computeMaxConstraintViolation() const {
+  // max_constraint_violation_ is computed during evaluateConstraints()
+  return max_constraint_violation_;
+}
+
+double ALDDPSolver::computeMaxSlackNorm() const {
+  double max_norm = 0.0;
+  for (const auto &s : S_) {
+    max_norm = std::max(max_norm, s.lpNorm<Eigen::Infinity>());
+  }
+  return max_norm;
+}
+
+// ============================================================================
+// AL Cost Derivative Augmentation
+// ============================================================================
+
+void ALDDPSolver::augmentRunningCostDerivatives(
+    const CDDP &context, int t,
+    const Eigen::VectorXd &x, const Eigen::VectorXd &u,
+    Eigen::VectorXd &l_x, Eigen::VectorXd &l_u,
+    Eigen::MatrixXd &l_xx, Eigen::MatrixXd &l_uu,
+    Eigen::MatrixXd &l_ux) const {
+
+  for (const auto &[name, G_vec] : G_) {
+    const auto &constraint = context.getConstraintSet().at(name);
+    const Eigen::VectorXd &g = G_vec[t];
+    const Eigen::VectorXd &upper = constraint->getUpperBound();
+    const Eigen::VectorXd &lower = constraint->getLowerBound();
+    const Eigen::VectorXd &lam = lambda_.at(name)[t];
+    const Eigen::VectorXd &pen = penalty_.at(name)[t];
+    const Eigen::MatrixXd &C_x = G_x_.at(name)[t];
+    const Eigen::MatrixXd &C_u = G_u_.at(name)[t];
+
+    const int dim = g.size();
+    for (int i = 0; i < dim; ++i) {
+      // Violation: c_i = g_i - upper_i (positive means violated)
+      double c_i = g(i) - upper(i);
+
+      // Determine if this component is active (I_mu masking, Eq. 3)
+      bool is_equality = (std::abs(lower(i) - upper(i)) < 1e-12);
+      bool is_active = is_equality || (c_i >= 0.0) || (lam(i) > 0.0);
+
+      if (!is_active) continue;
+
+      double effective_lambda = lam(i) + pen(i) * c_i;
+
+      // First-order augmentation
+      l_x += effective_lambda * C_x.row(i).transpose();
+      l_u += effective_lambda * C_u.row(i).transpose();
+
+      // Second-order augmentation (Gauss-Newton approximation)
+      l_xx += pen(i) * C_x.row(i).transpose() * C_x.row(i);
+      l_uu += pen(i) * C_u.row(i).transpose() * C_u.row(i);
+      l_ux += pen(i) * C_u.row(i).transpose() * C_x.row(i);
+    }
+  }
+}
+
+void ALDDPSolver::augmentTerminalCostDerivatives(
+    const CDDP &context, const Eigen::VectorXd &x_N,
+    Eigen::VectorXd &V_x, Eigen::MatrixXd &V_xx) const {
+
+  Eigen::VectorXd dummy_u = Eigen::VectorXd::Zero(context.getControlDim());
+
+  for (const auto &[name, g_term] : G_terminal_) {
+    const auto &constraint = context.getTerminalConstraintSet().at(name);
+    const Eigen::VectorXd &upper = constraint->getUpperBound();
+    const Eigen::VectorXd &lower = constraint->getLowerBound();
+    const Eigen::VectorXd &lam = terminal_lambda_.at(name);
+    const Eigen::VectorXd &pen = terminal_penalty_.at(name);
+    const Eigen::MatrixXd C_x =
+        constraint->getStateJacobian(x_N, dummy_u, context.getHorizon());
+
+    const int dim = g_term.size();
+    for (int i = 0; i < dim; ++i) {
+      double c_i = g_term(i) - upper(i);
+      bool is_equality = (std::abs(lower(i) - upper(i)) < 1e-12);
+      bool is_active = is_equality || (c_i >= 0.0) || (lam(i) > 0.0);
+
+      if (!is_active) continue;
+
+      double effective_lambda = lam(i) + pen(i) * c_i;
+      V_x += effective_lambda * C_x.row(i).transpose();
+      V_xx += pen(i) * C_x.row(i).transpose() * C_x.row(i);
+    }
+  }
+}
+
+// ============================================================================
+// AL Merit Function (thread-safe, no member writes)
+// ============================================================================
+
+double ALDDPSolver::computeALMerit(
+    const CDDP &context,
+    const std::vector<Eigen::VectorXd> &X,
+    const std::vector<Eigen::VectorXd> &U,
+    const std::vector<Eigen::VectorXd> *S_trial) const {
+
+  const auto &al_opts = context.getOptions().alddp;
+
+  // Base cost
+  double merit = 0.0;
+  for (int t = 0; t < context.getHorizon(); ++t) {
+    merit += context.getObjective().running_cost(X[t], U[t], t);
+  }
+  merit += context.getObjective().terminal_cost(X.back());
+
+  // Slack penalty
+  if (S_trial != nullptr) {
+    for (int t = 0; t < context.getHorizon(); ++t) {
+      merit += 0.5 * current_slack_penalty_ * (*S_trial)[t].squaredNorm();
+    }
+  }
+
+  // AL penalty terms for path constraints
+  Eigen::VectorXd dummy_u = Eigen::VectorXd::Zero(context.getControlDim());
+  for (const auto &[name, lam_vec] : lambda_) {
+    const auto &constraint = context.getConstraintSet().at(name);
+    const Eigen::VectorXd &upper = constraint->getUpperBound();
+    const Eigen::VectorXd &lower = constraint->getLowerBound();
+
+    for (int t = 0; t < context.getHorizon(); ++t) {
+      Eigen::VectorXd g = constraint->evaluate(X[t], U[t], t);
+      const Eigen::VectorXd &lam = lam_vec[t];
+      const Eigen::VectorXd &pen = penalty_.at(name)[t];
+
+      for (int i = 0; i < g.size(); ++i) {
+        double c_i = g(i) - upper(i);
+        bool is_equality = (std::abs(lower(i) - upper(i)) < 1e-12);
+        bool is_active = is_equality || (c_i >= 0.0) || (lam(i) > 0.0);
+        if (is_active) {
+          merit += lam(i) * c_i + 0.5 * pen(i) * c_i * c_i;
+        }
+      }
+    }
+  }
+
+  // AL penalty terms for terminal constraints
+  for (const auto &[name, lam] : terminal_lambda_) {
+    const auto &constraint = context.getTerminalConstraintSet().at(name);
+    const Eigen::VectorXd &upper = constraint->getUpperBound();
+    const Eigen::VectorXd &lower = constraint->getLowerBound();
+    Eigen::VectorXd g = constraint->evaluate(X.back(), dummy_u, context.getHorizon());
+    const Eigen::VectorXd &pen = terminal_penalty_.at(name);
+
+    for (int i = 0; i < g.size(); ++i) {
+      double c_i = g(i) - upper(i);
+      bool is_equality = (std::abs(lower(i) - upper(i)) < 1e-12);
+      bool is_active = is_equality || (c_i >= 0.0) || (lam(i) > 0.0);
+      if (is_active) {
+        merit += lam(i) * c_i + 0.5 * pen(i) * c_i * c_i;
+      }
+    }
+  }
+
+  return merit;
+}
+
+// ============================================================================
+// Backward Pass
+// ============================================================================
+
+bool ALDDPSolver::backwardPass(CDDP &context) {
+  if (context.getOptions().alddp.use_sqrt_backward_pass) {
+    return backwardPassSqrt(context);
+  }
+  return backwardPassStandard(context);
+}
+
+bool ALDDPSolver::backwardPassStandard(CDDP &context) {
+  const CDDPOptions &options = context.getOptions();
+  const auto &al_opts = options.alddp;
+  const int state_dim = context.getStateDim();
+  const int control_dim = context.getControlDim();
+  const int horizon = context.getHorizon();
+
+  auto control_constraint =
+      al_opts.use_boxqp_for_controls
+          ? context.getConstraint<ControlConstraint>("ControlConstraint")
+          : nullptr;
+
+  // Terminal cost derivatives with AL augmentation
+  Eigen::VectorXd V_x =
+      context.getObjective().getFinalCostGradient(context.X_.back());
+  Eigen::MatrixXd V_xx =
+      context.getObjective().getFinalCostHessian(context.X_.back());
+
+  augmentTerminalCostDerivatives(context, context.X_.back(), V_x, V_xx);
+
+  // Pre-allocate
+  Eigen::MatrixXd A(state_dim, state_dim);
+  Eigen::MatrixXd B(state_dim, control_dim);
+  Eigen::VectorXd Q_x(state_dim), Q_u(control_dim);
+  Eigen::MatrixXd Q_xx(state_dim, state_dim);
+  Eigen::MatrixXd Q_uu(control_dim, control_dim);
+  Eigen::MatrixXd Q_uu_reg(control_dim, control_dim);
+  Eigen::MatrixXd Q_ux(control_dim, state_dim);
+  Eigen::VectorXd k(control_dim);
+  Eigen::MatrixXd K(control_dim, state_dim);
+
+  dV_ = Eigen::Vector2d::Zero();
+  double norm_Vx = V_x.lpNorm<1>();
+  double Qu_error = 0.0;
+
+  for (int t = horizon - 1; t >= 0; --t) {
+    const Eigen::VectorXd &x = context.X_[t];
+    const Eigen::VectorXd &u = context.U_[t];
+
+    const auto [Fx, Fu] =
+        context.getSystem().getJacobians(x, u, t * context.getTimestep());
+
+    A = context.getTimestep() * Fx;
+    A.diagonal().array() += 1.0;
+    B = context.getTimestep() * Fu;
+
+    auto [l_x, l_u] = context.getObjective().getRunningCostGradients(x, u, t);
+    auto [l_xx, l_uu, l_ux] =
+        context.getObjective().getRunningCostHessians(x, u, t);
+
+    // Augment with AL constraint penalty terms
+    augmentRunningCostDerivatives(context, t, x, u, l_x, l_u, l_xx, l_uu, l_ux);
+
+    // Slack contribution to V_x through dynamics: x_{t+1} = f(x,u) + s
+    // The slack penalty 0.5 * R_s * ||s||^2 contributes to cost-to-go
+    if (infeasible_start_) {
+      // Slack penalty gradient: R_s * s_t contributes via the identity Jacobian
+      // Q_s = V_x' + R_s * s_t  (handled below after V_x' is available)
+      // For the standard backward pass, we incorporate slacks after computing V_x, V_xx
+    }
+
+    Q_x = l_x + A.transpose() * V_x;
+    Q_u = l_u + B.transpose() * V_x;
+    Q_xx = l_xx + A.transpose() * V_xx * A;
+    Q_ux = l_ux + B.transpose() * V_xx * A;
+    Q_uu = l_uu + B.transpose() * V_xx * B;
+
+    Q_uu_reg = Q_uu;
+    Q_uu_reg.diagonal().array() += context.regularization_;
+
+    // Check positive definiteness
+    Eigen::EigenSolver<Eigen::MatrixXd> es(Q_uu_reg);
+    if (es.eigenvalues().real().minCoeff() <= 0) {
+      if (options.debug) {
+        std::cerr << "ALDDP: Q_uu is not positive definite at time " << t
+                  << std::endl;
+      }
+      return false;
+    }
+
+    // Solve for control gains
+    if (control_constraint == nullptr) {
+      const Eigen::MatrixXd H = Q_uu_reg.inverse();
+      k = -H * Q_u;
+      K = -H * Q_ux;
+    } else {
+      const Eigen::VectorXd lb = control_constraint->rawLowerBound() - u;
+      const Eigen::VectorXd ub = control_constraint->rawUpperBound() - u;
+      const Eigen::VectorXd x0 = k_u_[t];
+
+      BoxQPResult qp_result = boxqp_solver_.solve(Q_uu_reg, Q_u, lb, ub, x0);
+
+      if (qp_result.status == BoxQPStatus::HESSIAN_NOT_PD ||
+          qp_result.status == BoxQPStatus::NO_DESCENT) {
+        if (options.debug) {
+          std::cerr << "ALDDP: BoxQP failed at time step " << t << std::endl;
+        }
+        return false;
+      }
+
+      k = qp_result.x;
+      K = Eigen::MatrixXd::Zero(control_dim, state_dim);
+      if (qp_result.free.sum() > 0) {
+        std::vector<int> free_idx;
+        for (int i = 0; i < control_dim; i++) {
+          if (qp_result.free(i)) {
+            free_idx.push_back(i);
+          }
+        }
+        Eigen::MatrixXd Q_ux_free(free_idx.size(), state_dim);
+        for (size_t i = 0; i < free_idx.size(); i++) {
+          Q_ux_free.row(i) = Q_ux.row(free_idx[i]);
+        }
+        Eigen::MatrixXd K_free = -qp_result.Hfree.solve(Q_ux_free);
+        for (size_t i = 0; i < free_idx.size(); i++) {
+          K.row(free_idx[i]) = K_free.row(i);
+        }
+      }
+    }
+
+    k_u_[t] = k;
+    K_u_[t] = K;
+
+    // Solve for slack gains (decoupled, ignoring Q_us/Q_su coupling)
+    if (infeasible_start_) {
+      // Q_ss = R_s*I + V_xx (from next step, V_xx is current since we're going backward)
+      Eigen::MatrixXd Q_ss = current_slack_penalty_ * Eigen::MatrixXd::Identity(state_dim, state_dim) + V_xx;
+      Eigen::VectorXd Q_s = V_x + current_slack_penalty_ * S_[t];
+      Eigen::MatrixXd Q_sx = V_xx * A;
+
+      // Regularize Q_ss
+      Q_ss.diagonal().array() += context.regularization_;
+
+      Eigen::LDLT<Eigen::MatrixXd> ldlt(Q_ss);
+      k_s_[t] = -ldlt.solve(Q_s);
+      K_s_[t] = -ldlt.solve(Q_sx);
+    }
+
+    // Expected cost change
+    Eigen::Vector2d dV_step;
+    dV_step << Q_u.dot(k), 0.5 * k.dot(Q_uu * k);
+    dV_ += dV_step;
+
+    // Add slack contribution to expected cost change
+    if (infeasible_start_) {
+      Eigen::MatrixXd Q_ss_unreg = current_slack_penalty_ * Eigen::MatrixXd::Identity(state_dim, state_dim) + V_xx;
+      Eigen::VectorXd Q_s = V_x + current_slack_penalty_ * S_[t];
+      dV_(0) += Q_s.dot(k_s_[t]);
+      dV_(1) += 0.5 * k_s_[t].dot(Q_ss_unreg * k_s_[t]);
+    }
+
+    // Update cost-to-go
+    V_x = Q_x + K.transpose() * Q_uu * k + Q_ux.transpose() * k +
+          K.transpose() * Q_u;
+    V_xx = Q_xx + K.transpose() * Q_uu * K + Q_ux.transpose() * K +
+           K.transpose() * Q_ux;
+    V_xx = 0.5 * (V_xx + V_xx.transpose());
+
+    norm_Vx += V_x.lpNorm<1>();
+    Qu_error = std::max(Qu_error, Q_u.lpNorm<Eigen::Infinity>());
+  }
+
+  double scaling_factor = options.termination_scaling_max_factor;
+  scaling_factor = std::max(scaling_factor, norm_Vx / (horizon * state_dim)) /
+                   scaling_factor;
+  context.inf_du_ = Qu_error / scaling_factor;
+
+  return true;
+}
+
+bool ALDDPSolver::backwardPassSqrt(CDDP &context) {
+  const CDDPOptions &options = context.getOptions();
+  const auto &al_opts = options.alddp;
+  const int state_dim = context.getStateDim();
+  const int control_dim = context.getControlDim();
+  const int horizon = context.getHorizon();
+
+  auto control_constraint =
+      al_opts.use_boxqp_for_controls
+          ? context.getConstraint<ControlConstraint>("ControlConstraint")
+          : nullptr;
+
+  // Terminal cost derivatives
+  Eigen::VectorXd V_x =
+      context.getObjective().getFinalCostGradient(context.X_.back());
+  Eigen::MatrixXd V_xx =
+      context.getObjective().getFinalCostHessian(context.X_.back());
+
+  augmentTerminalCostDerivatives(context, context.X_.back(), V_x, V_xx);
+
+  // Regularize terminal Hessian before Cholesky
+  V_xx.diagonal().array() += 1e-8;
+
+  // Compute Cholesky factor of terminal cost-to-go Hessian: S_N = √P_N
+  Eigen::LLT<Eigen::MatrixXd> llt_terminal(V_xx);
+  if (llt_terminal.info() != Eigen::Success) {
+    if (options.debug) {
+      std::cerr << "ALDDP sqrt: Terminal Hessian Cholesky failed" << std::endl;
+    }
+    return false;
+  }
+  S_chol_[horizon] = llt_terminal.matrixU(); // upper triangular
+  p_[horizon] = V_x;
+
+  dV_ = Eigen::Vector2d::Zero();
+  double norm_Vx = V_x.lpNorm<1>();
+  double Qu_error = 0.0;
+
+  for (int t = horizon - 1; t >= 0; --t) {
+    const Eigen::VectorXd &x = context.X_[t];
+    const Eigen::VectorXd &u = context.U_[t];
+
+    const auto [Fx, Fu] =
+        context.getSystem().getJacobians(x, u, t * context.getTimestep());
+
+    Eigen::MatrixXd A = context.getTimestep() * Fx;
+    A.diagonal().array() += 1.0;
+    Eigen::MatrixXd B = context.getTimestep() * Fu;
+
+    auto [l_x, l_u] = context.getObjective().getRunningCostGradients(x, u, t);
+    auto [l_xx, l_uu, l_ux] =
+        context.getObjective().getRunningCostHessians(x, u, t);
+
+    augmentRunningCostDerivatives(context, t, x, u, l_x, l_u, l_xx, l_uu, l_ux);
+
+    const Eigen::MatrixXd &S_next = S_chol_[t + 1]; // upper triangular √P'
+    const Eigen::VectorXd &p_next = p_[t + 1];
+
+    // Q-function first-order terms
+    Eigen::VectorXd Q_x = l_x + A.transpose() * p_next;
+    Eigen::VectorXd Q_u = l_u + B.transpose() * p_next;
+
+    // Compute Cholesky factors of running cost Hessians
+    // Add regularization to ensure PD
+    l_xx.diagonal().array() += context.regularization_;
+    l_uu.diagonal().array() += context.regularization_;
+
+    Eigen::LLT<Eigen::MatrixXd> llt_lxx(l_xx);
+    Eigen::LLT<Eigen::MatrixXd> llt_luu(l_uu);
+
+    if (llt_lxx.info() != Eigen::Success || llt_luu.info() != Eigen::Success) {
+      if (options.debug) {
+        std::cerr << "ALDDP sqrt: Running cost Hessian Cholesky failed at t="
+                  << t << std::endl;
+      }
+      return false;
+    }
+
+    Eigen::MatrixXd sqrt_lxx = llt_lxx.matrixU(); // upper triangular
+    Eigen::MatrixXd sqrt_luu = llt_luu.matrixU();
+
+    // Reconstruct V_xx' = S_next^T * S_next for explicit Q-function computation
+    Eigen::MatrixXd V_xx_next = S_next.transpose() * S_next;
+
+    // Build Q-function Hessians explicitly (use sqrt factors only for propagation)
+    Eigen::MatrixXd Q_xx = l_xx + A.transpose() * V_xx_next * A;
+    Eigen::MatrixXd Q_ux = l_ux + B.transpose() * V_xx_next * A;
+    Eigen::MatrixXd Q_uu = l_uu + B.transpose() * V_xx_next * B;
+
+    // Regularize Q_uu
+    Eigen::MatrixXd Q_uu_reg = Q_uu;
+    Q_uu_reg.diagonal().array() += context.regularization_;
+
+    // Check positive definiteness
+    Eigen::EigenSolver<Eigen::MatrixXd> es(Q_uu_reg);
+    if (es.eigenvalues().real().minCoeff() <= 0) {
+      if (options.debug) {
+        std::cerr << "ALDDP sqrt: Q_uu is not positive definite at time " << t
+                  << std::endl;
+      }
+      return false;
+    }
+
+    // Compute Z_uu = cholesky(Q_uu_reg) for gain computation
+    Eigen::LLT<Eigen::MatrixXd> llt_Quu(Q_uu_reg);
+    if (llt_Quu.info() != Eigen::Success) {
+      if (options.debug) {
+        std::cerr << "ALDDP sqrt: Q_uu Cholesky failed at t=" << t << std::endl;
+      }
+      return false;
+    }
+    Eigen::MatrixXd Z_uu = llt_Quu.matrixU(); // upper triangular
+
+    // Gains via triangular solves (Eqs. 22-23): K = -Z_uu^{-T} Z_uu^{-1} Q_ux
+    Eigen::VectorXd k_vec(control_dim);
+    Eigen::MatrixXd K_mat(control_dim, state_dim);
+
+    if (control_constraint == nullptr) {
+      // d = -solve(Z_uu, solve(Z_uu^T, Q_u))
+      Eigen::VectorXd tmp1 = Z_uu.transpose().triangularView<Eigen::Lower>().solve(Q_u);
+      k_vec = -Z_uu.triangularView<Eigen::Upper>().solve(tmp1);
+
+      Eigen::MatrixXd tmp2 = Z_uu.transpose().triangularView<Eigen::Lower>().solve(Q_ux);
+      K_mat = -Z_uu.triangularView<Eigen::Upper>().solve(tmp2);
+    } else {
+      // Reconstruct Q_uu for BoxQP
+      Eigen::MatrixXd Q_uu_full = Z_uu.transpose() * Z_uu;
+
+      const Eigen::VectorXd lb = control_constraint->rawLowerBound() - u;
+      const Eigen::VectorXd ub = control_constraint->rawUpperBound() - u;
+      const Eigen::VectorXd x0 = k_u_[t];
+
+      BoxQPResult qp_result = boxqp_solver_.solve(Q_uu_full, Q_u, lb, ub, x0);
+
+      if (qp_result.status == BoxQPStatus::HESSIAN_NOT_PD ||
+          qp_result.status == BoxQPStatus::NO_DESCENT) {
+        if (options.debug) {
+          std::cerr << "ALDDP sqrt: BoxQP failed at time step " << t << std::endl;
+        }
+        return false;
+      }
+
+      k_vec = qp_result.x;
+      K_mat = Eigen::MatrixXd::Zero(control_dim, state_dim);
+      if (qp_result.free.sum() > 0) {
+        std::vector<int> free_idx;
+        for (int i = 0; i < control_dim; i++) {
+          if (qp_result.free(i)) free_idx.push_back(i);
+        }
+        Eigen::MatrixXd Q_ux_free(free_idx.size(), state_dim);
+        for (size_t i = 0; i < free_idx.size(); i++) {
+          Q_ux_free.row(i) = Q_ux.row(free_idx[i]);
+        }
+        Eigen::MatrixXd K_free = -qp_result.Hfree.solve(Q_ux_free);
+        for (size_t i = 0; i < free_idx.size(); i++) {
+          K_mat.row(free_idx[i]) = K_free.row(i);
+        }
+      }
+    }
+
+    k_u_[t] = k_vec;
+    K_u_[t] = K_mat;
+
+    // Slack gains (same as standard path)
+    if (infeasible_start_) {
+      Eigen::MatrixXd Q_ss = current_slack_penalty_ * Eigen::MatrixXd::Identity(state_dim, state_dim)
+                            + S_next.transpose() * S_next;
+      Eigen::VectorXd Q_s = p_next + current_slack_penalty_ * S_[t];
+      Eigen::MatrixXd Q_sx = S_next.transpose() * S_next * A;
+      Q_ss.diagonal().array() += context.regularization_;
+
+      Eigen::LDLT<Eigen::MatrixXd> ldlt(Q_ss);
+      k_s_[t] = -ldlt.solve(Q_s);
+      K_s_[t] = -ldlt.solve(Q_sx);
+    }
+
+    // Expected cost change (Eq. 25)
+    Eigen::VectorXd Zuu_d = Z_uu * k_vec;
+    dV_(0) += Q_u.dot(k_vec);
+    dV_(1) += 0.5 * Zuu_d.dot(Zuu_d);
+
+    if (infeasible_start_) {
+      Eigen::MatrixXd Q_ss_unreg = current_slack_penalty_ * Eigen::MatrixXd::Identity(state_dim, state_dim)
+                                  + S_next.transpose() * S_next;
+      Eigen::VectorXd Q_s = p_next + current_slack_penalty_ * S_[t];
+      dV_(0) += Q_s.dot(k_s_[t]);
+      dV_(1) += 0.5 * k_s_[t].dot(Q_ss_unreg * k_s_[t]);
+    }
+
+    // Cost-to-go gradient update (standard Riccati)
+    p_[t] = Q_x + K_mat.transpose() * Q_uu * k_vec + Q_ux.transpose() * k_vec +
+            K_mat.transpose() * Q_u;
+
+    // Cost-to-go Hessian update (standard Riccati, then take Cholesky for sqrt storage)
+    Eigen::MatrixXd V_xx_new = Q_xx + K_mat.transpose() * Q_uu * K_mat +
+                               Q_ux.transpose() * K_mat + K_mat.transpose() * Q_ux;
+    V_xx_new = 0.5 * (V_xx_new + V_xx_new.transpose());
+    V_xx_new.diagonal().array() += 1e-8;
+
+    Eigen::LLT<Eigen::MatrixXd> llt_Vxx(V_xx_new);
+    if (llt_Vxx.info() != Eigen::Success) {
+      if (options.debug) {
+        std::cerr << "ALDDP sqrt: V_xx Cholesky failed at t=" << t << std::endl;
+      }
+      return false;
+    }
+    S_chol_[t] = llt_Vxx.matrixU();
+
+    norm_Vx += p_[t].lpNorm<1>();
+    Qu_error = std::max(Qu_error, Q_u.lpNorm<Eigen::Infinity>());
+  }
+
+  double scaling_factor = options.termination_scaling_max_factor;
+  scaling_factor = std::max(scaling_factor, norm_Vx / (horizon * state_dim)) /
+                   scaling_factor;
+  context.inf_du_ = Qu_error / scaling_factor;
+
+  return true;
+}
+
+// ============================================================================
+// Forward Pass
+// ============================================================================
+
+ForwardPassResult ALDDPSolver::forwardPass(CDDP &context, double alpha_pr) {
+  ForwardPassResult result;
+  result.success = false;
+  result.cost = std::numeric_limits<double>::infinity();
+  result.merit_function = std::numeric_limits<double>::infinity();
+  result.alpha_pr = alpha_pr;
+
+  result.state_trajectory = context.X_;
+  result.control_trajectory = context.U_;
+  result.state_trajectory[0] = context.getInitialState();
+
+  const auto &al_opts = context.getOptions().alddp;
+  auto control_constraint =
+      al_opts.use_boxqp_for_controls
+          ? context.getConstraint<ControlConstraint>("ControlConstraint")
+          : nullptr;
+
+  // Trial slack controls (local copy for thread safety)
+  std::vector<Eigen::VectorXd> S_trial;
+  if (infeasible_start_) {
+    S_trial = S_;
+  }
+
+  double J_new = 0.0;
+
+  for (int t = 0; t < context.getHorizon(); ++t) {
+    const Eigen::VectorXd &x = result.state_trajectory[t];
+    const Eigen::VectorXd delta_x = x - context.X_[t];
+
+    result.control_trajectory[t] =
+        result.control_trajectory[t] + alpha_pr * k_u_[t] + K_u_[t] * delta_x;
+
+    if (control_constraint != nullptr) {
+      result.control_trajectory[t] =
+          control_constraint->clamp(result.control_trajectory[t]);
+    }
+
+    J_new += context.getObjective().running_cost(x, result.control_trajectory[t], t);
+
+    // Dynamics with slack
+    result.state_trajectory[t + 1] = context.getSystem().getDiscreteDynamics(
+        x, result.control_trajectory[t], t * context.getTimestep());
+
+    if (infeasible_start_) {
+      S_trial[t] = S_[t] + alpha_pr * k_s_[t] + K_s_[t] * delta_x;
+      result.state_trajectory[t + 1] += S_trial[t];
+    }
+  }
+
+  J_new += context.getObjective().terminal_cost(result.state_trajectory.back());
+
+  // Compute AL merit function (thread-safe, uses local S_trial)
+  double merit_new = computeALMerit(
+      context, result.state_trajectory, result.control_trajectory,
+      infeasible_start_ ? &S_trial : nullptr);
+
+  // Armijo condition on the AL merit function
+  double current_merit = context.merit_function_;
+  double dJ = current_merit - merit_new;
+  double expected = -alpha_pr * (dV_(0) + 0.5 * alpha_pr * dV_(1));
+  double reduction_ratio =
+      expected > 0.0 ? dJ / expected : std::copysign(1.0, dJ);
+
+  result.success = reduction_ratio > context.getOptions().filter.armijo_constant;
+  result.cost = J_new;
+  result.merit_function = merit_new;
+
+  // Compute constraint violation for this trial (using local evaluation)
+  double max_viol = 0.0;
+  for (const auto &[name, lam_vec] : lambda_) {
+    const auto &constraint = context.getConstraintSet().at(name);
+    for (int t = 0; t < context.getHorizon(); ++t) {
+      double viol = constraint->computeViolation(
+          result.state_trajectory[t], result.control_trajectory[t], t);
+      max_viol = std::max(max_viol, viol);
+    }
+  }
+  Eigen::VectorXd dummy_u = Eigen::VectorXd::Zero(context.getControlDim());
+  for (const auto &[name, lam] : terminal_lambda_) {
+    const auto &constraint = context.getTerminalConstraintSet().at(name);
+    double viol = constraint->computeViolation(
+        result.state_trajectory.back(), dummy_u, context.getHorizon());
+    max_viol = std::max(max_viol, viol);
+  }
+  result.constraint_violation = max_viol;
+
+  // Stash slack trajectory for applyForwardPassResult
+  if (infeasible_start_ && result.success) {
+    // Store in the optional dynamics_trajectory field
+    result.dynamics_trajectory = S_trial;
+  }
+
+  return result;
+}
+
+void ALDDPSolver::applyForwardPassResult(CDDP &context,
+                                          const ForwardPassResult &result) {
+  // Apply base class updates (X_, U_, cost_, merit_function_, alpha_pr_)
+  CDDPSolverBase::applyForwardPassResult(context, result);
+
+  // Update merit to AL merit (not bare cost)
+  context.merit_function_ = result.merit_function;
+
+  // Update slack controls if infeasible start
+  if (infeasible_start_ && result.dynamics_trajectory.has_value()) {
+    S_ = result.dynamics_trajectory.value();
+  }
+}
+
+// ============================================================================
+// Iteration Setup and Convergence
+// ============================================================================
+
+void ALDDPSolver::preIterationSetup(CDDP &context) {
+  evaluateConstraints(context);
+  // max_constraint_violation_ is set by evaluateConstraints()
+
+  // Set initial merit function to AL merit
+  context.merit_function_ = computeALMerit(
+      context, context.X_, context.U_,
+      infeasible_start_ ? &S_ : nullptr);
+}
+
+bool ALDDPSolver::checkEarlyConvergence(CDDP &context, int /*iter*/,
+                                         std::string &reason) {
+  // Check if already fully converged after backward pass
+  const auto &al_opts = context.getOptions().alddp;
+
+  if (context.inf_du_ < al_opts.constraint_tolerance &&
+      max_constraint_violation_ < al_opts.constraint_tolerance &&
+      (!infeasible_start_ || computeMaxSlackNorm() < al_opts.constraint_tolerance)) {
+    reason = "OptimalSolutionFound";
+    return true;
+  }
+  return false;
+}
+
+bool ALDDPSolver::checkConvergence(CDDP &context, double dJ, double /*dL*/,
+                                    int /*iter*/, std::string &reason) {
+  const auto &al_opts = context.getOptions().alddp;
+
+  // Check for AL outer iteration limit
+  if (al_outer_iter_ >= al_opts.max_outer_iterations) {
+    reason = "MaxOuterIterationsReached";
+    return true;
+  }
+
+  // Check full convergence (outer)
+  bool constraints_satisfied = max_constraint_violation_ < al_opts.constraint_tolerance;
+  bool slacks_zero = !infeasible_start_ || computeMaxSlackNorm() < al_opts.constraint_tolerance;
+  bool dual_converged = context.inf_du_ < al_opts.constraint_tolerance;
+
+  if (constraints_satisfied && slacks_zero && dual_converged) {
+    reason = "OptimalSolutionFound";
+    return true;
+  }
+
+  // Check inner convergence
+  inner_iter_count_++;
+  bool inner_cost_converged = (dJ > 0.0 && dJ < context.getOptions().acceptable_tolerance);
+  bool inner_du_converged = context.inf_du_ < inner_tolerance_;
+  bool inner_iter_limit = inner_iter_count_ >= al_opts.max_inner_iterations;
+
+  if (inner_cost_converged || inner_du_converged || inner_iter_limit) {
+    inner_converged_ = true;
+    // Return false so postIterationUpdate can handle the AL outer update
+  }
+
+  return false;
+}
+
+// ============================================================================
+// AL Outer Loop (Multiplier/Penalty Updates)
+// ============================================================================
+
+void ALDDPSolver::postIterationUpdate(CDDP &context, bool /*forward_pass_success*/) {
+  if (!inner_converged_) return;
+
+  const auto &al_opts = context.getOptions().alddp;
+
+  // Re-evaluate constraints at current trajectory
+  evaluateConstraints(context);
+  // max_constraint_violation_ is set by evaluateConstraints()
+  double current_violation = max_constraint_violation_;
+
+  // Update multipliers (Eq. 17)
+  updateMultipliers(context);
+
+  // Check if violation improved sufficiently
+  if (current_violation > 0.5 * prev_max_constraint_violation_ || al_outer_iter_ == 0) {
+    // Insufficient progress -- increase penalties
+    updatePenalties(context);
+  }
+
+  // Increase slack penalty
+  if (infeasible_start_ && computeMaxSlackNorm() > al_opts.constraint_tolerance) {
+    current_slack_penalty_ *= al_opts.penalty_update_factor;
+    current_slack_penalty_ = std::min(current_slack_penalty_, al_opts.penalty_max);
+  }
+
+  // Cost divergence check
+  if (context.cost_ > 100.0 * prev_outer_cost_ && al_outer_iter_ > 0) {
+    if (context.getOptions().verbose) {
+      std::cerr << "ALDDP: Cost divergence detected (cost = " << context.cost_
+                << ", prev = " << prev_outer_cost_ << ")" << std::endl;
+    }
+  }
+
+  // Update state
+  prev_max_constraint_violation_ = current_violation;
+  prev_outer_cost_ = context.cost_;
+  al_outer_iter_++;
+  inner_converged_ = false;
+  inner_iter_count_ = 0;
+
+  // Tighten inner tolerance
+  inner_tolerance_ = std::max(
+      al_opts.inner_tolerance_min,
+      inner_tolerance_ * al_opts.inner_tolerance_factor);
+
+  // Reset regularization for new inner loop
+  context.regularization_ = context.getOptions().regularization.initial_value;
+
+  if (context.getOptions().verbose) {
+    std::cout << "  ALDDP outer iter " << al_outer_iter_
+              << ": violation=" << std::scientific << std::setprecision(2)
+              << current_violation
+              << " slack=" << computeMaxSlackNorm()
+              << " inner_tol=" << inner_tolerance_
+              << std::endl;
+  }
+}
+
+void ALDDPSolver::updateMultipliers(CDDP &context) {
+  // Path constraints (Eq. 17)
+  for (auto &[name, lam_vec] : lambda_) {
+    const auto &constraint = context.getConstraintSet().at(name);
+    const Eigen::VectorXd &upper = constraint->getUpperBound();
+    const Eigen::VectorXd &lower = constraint->getLowerBound();
+
+    for (int t = 0; t < context.getHorizon(); ++t) {
+      const Eigen::VectorXd &g = G_[name][t];
+      for (int i = 0; i < g.size(); ++i) {
+        double c_i = g(i) - upper(i);
+        bool is_equality = (std::abs(lower(i) - upper(i)) < 1e-12);
+        if (is_equality) {
+          lam_vec[t](i) += penalty_[name][t](i) * c_i;
+        } else {
+          lam_vec[t](i) = std::max(0.0, lam_vec[t](i) + penalty_[name][t](i) * c_i);
+        }
+      }
+    }
+  }
+
+  // Terminal constraints
+  Eigen::VectorXd dummy_u = Eigen::VectorXd::Zero(context.getControlDim());
+  for (auto &[name, lam] : terminal_lambda_) {
+    const auto &constraint = context.getTerminalConstraintSet().at(name);
+    const Eigen::VectorXd &upper = constraint->getUpperBound();
+    const Eigen::VectorXd &lower = constraint->getLowerBound();
+    const Eigen::VectorXd &g = G_terminal_[name];
+
+    for (int i = 0; i < g.size(); ++i) {
+      double c_i = g(i) - upper(i);
+      bool is_equality = (std::abs(lower(i) - upper(i)) < 1e-12);
+      if (is_equality) {
+        lam(i) += terminal_penalty_[name](i) * c_i;
+      } else {
+        lam(i) = std::max(0.0, lam(i) + terminal_penalty_[name](i) * c_i);
+      }
+    }
+  }
+}
+
+void ALDDPSolver::updatePenalties(CDDP &context) {
+  const auto &al_opts = context.getOptions().alddp;
+
+  for (auto &[name, pen_vec] : penalty_) {
+    for (int t = 0; t < context.getHorizon(); ++t) {
+      pen_vec[t] *= al_opts.penalty_update_factor;
+      pen_vec[t] = pen_vec[t].cwiseMin(al_opts.penalty_max);
+    }
+  }
+
+  for (auto &[name, pen] : terminal_penalty_) {
+    pen *= al_opts.penalty_update_factor;
+    pen = pen.cwiseMin(al_opts.penalty_max);
+  }
+}
+
+// ============================================================================
+// Regularization Limit Handler
+// ============================================================================
+
+bool ALDDPSolver::handleBackwardPassRegularizationLimit(
+    CDDP &context, std::string &termination_reason) {
+  const auto &al_opts = context.getOptions().alddp;
+
+  // Perform AL outer update in-place since postIterationUpdate won't fire
+  evaluateConstraints(context);
+  double current_violation = computeMaxConstraintViolation();
+
+  // Check if truly converged
+  if (current_violation < al_opts.constraint_tolerance &&
+      (!infeasible_start_ || computeMaxSlackNorm() < al_opts.constraint_tolerance)) {
+    termination_reason = "OptimalSolutionFound";
+    return true;
+  }
+
+  // Update multipliers and penalties
+  updateMultipliers(context);
+  if (current_violation > 0.5 * prev_max_constraint_violation_ || al_outer_iter_ == 0) {
+    updatePenalties(context);
+  }
+
+  prev_max_constraint_violation_ = current_violation;
+  max_constraint_violation_ = current_violation;
+  al_outer_iter_++;
+
+  if (al_outer_iter_ < al_opts.max_outer_iterations) {
+    // Reset regularization and return true (early exit with current best)
+    context.regularization_ = context.getOptions().regularization.initial_value;
+    termination_reason = "RegularizationLimitReached";
+    return true;
+  }
+
+  termination_reason = "MaxOuterIterationsReached";
+  return false;
+}
+
+// ============================================================================
+// History and Solution
+// ============================================================================
+
+void ALDDPSolver::recordIterationHistory(const CDDP &context) {
+  CDDPSolverBase::recordIterationHistory(context);
+  // AL-specific history could be added here
+  history_.primal_infeasibility.push_back(max_constraint_violation_);
+}
+
+void ALDDPSolver::populateSolverSpecificSolution(CDDPSolution &solution,
+                                                   const CDDP &context) {
+  solution.final_primal_infeasibility = max_constraint_violation_;
+  solution.final_dual_infeasibility = context.inf_du_;
+
+  // Append AL info to status message
+  std::string al_info = " (" + std::to_string(al_outer_iter_) + " AL outer iters)";
+  solution.status_message += al_info;
+}
+
+// ============================================================================
+// Printing
+// ============================================================================
+
+void ALDDPSolver::printIteration(int iter, const CDDP &context) const {
+  if (iter == 0) {
+    std::cout << std::setw(4) << "iter" << " " << std::setw(12) << "objective"
+              << " " << std::setw(10) << "inf_du" << " " << std::setw(10)
+              << "violation" << " " << std::setw(8) << "lg(rg)" << " "
+              << std::setw(8) << "alpha" << " " << std::setw(5) << "outer"
+              << std::endl;
+  }
+
+  std::cout << std::setw(4) << iter << " " << std::setw(12) << std::scientific
+            << std::setprecision(4) << context.cost_ << " " << std::setw(10)
+            << std::scientific << std::setprecision(2) << context.inf_du_
+            << " " << std::setw(10) << std::scientific << std::setprecision(2)
+            << max_constraint_violation_ << " " << std::setw(8) << std::fixed
+            << std::setprecision(1) << std::log10(context.regularization_)
+            << " " << std::setw(8) << std::fixed << std::setprecision(4)
+            << context.alpha_pr_ << " " << std::setw(5) << al_outer_iter_
+            << std::endl;
+}
+
+void ALDDPSolver::printSolutionSummary(const CDDPSolution &solution) const {
+  std::cout << "\n=== ALDDP Solution Summary ===" << std::endl;
+  std::cout << "Status: " << solution.status_message << std::endl;
+  std::cout << "Iterations: " << solution.iterations_completed << std::endl;
+  std::cout << "Solve time: " << std::fixed << std::setprecision(2)
+            << solution.solve_time_ms << " ms" << std::endl;
+  std::cout << "Final objective: " << std::scientific << std::setprecision(6)
+            << solution.final_objective << std::endl;
+  std::cout << "Primal infeasibility: " << std::scientific << std::setprecision(2)
+            << solution.final_primal_infeasibility << std::endl;
+  std::cout << "Dual infeasibility: " << std::scientific << std::setprecision(2)
+            << solution.final_dual_infeasibility << std::endl;
+  std::cout << "==============================\n" << std::endl;
+}
+
+} // namespace cddp
diff --git a/src/cddp_core/cddp_core.cpp b/src/cddp_core/cddp_core.cpp
index 65a3cf8..bfbcb45 100644
--- a/src/cddp_core/cddp_core.cpp
+++ b/src/cddp_core/cddp_core.cpp
@@ -19,6 +19,7 @@
 #include "cddp_core/ipddp_solver.hpp"   // For IPDDPSolver
 #include "cddp_core/logddp_solver.hpp"  // For LogDDPSolver
 #include "cddp_core/msipddp_solver.hpp" // For MSIPDDPSolver
+#include "cddp_core/alddp_solver.hpp"   // For ALDDPSolver
 #include "cddp_core/options.hpp"        // For CDDPOptions structure
 #include <cmath>                        // For std::min, std::max
 #include <functional>
@@ -262,10 +263,32 @@ std::string solverTypeToString(SolverType solver_type) {
     return "IPDDP";
   case SolverType::MSIPDDP:
     return "MSIPDDP";
+  case SolverType::ALDDP:
+    return "ALDDP";
   default:
     return "CLDDP"; // Default fallback
   }
 }
+
+std::string canonicalizeSolverType(const std::string &solver_type) {
+  if (solver_type == "CLCDDP" || solver_type == "CLDDP") {
+    return "CLDDP";
+  }
+  if (solver_type == "LogDDP" || solver_type == "LOGDDP") {
+    return "LogDDP";
+  }
+  if (solver_type == "IPDDP") {
+    return "IPDDP";
+  }
+  if (solver_type == "MSIPDDP") {
+    return "MSIPDDP";
+  }
+  if (solver_type == "ALDDP") {
+    return "ALDDP";
+  }
+
+  return solver_type;
+}
 } // namespace
 
 CDDPSolution CDDP::solve(SolverType solver_type) {
@@ -290,6 +313,8 @@ CDDP::createSolver(const std::string &solver_type) {
     return std::make_unique<IPDDPSolver>();
   } else if (solver_type == "MSIPDDP") {
     return std::make_unique<MSIPDDPSolver>();
+  } else if (solver_type == "ALDDP") {
+    return std::make_unique<ALDDPSolver>();
   }
 
   return nullptr; // Solver not found
@@ -297,31 +322,36 @@ CDDP::createSolver(const std::string &solver_type) {
 
 CDDPSolution CDDP::solve(const std::string &solver_type) {
   // This is where strategy selection and invocation will happen.
+  const std::string canonical_solver_type = canonicalizeSolverType(solver_type);
 
   initializeProblemIfNecessary(); // Ensure X_, U_ are sized etc.
 
-  // Strategy selection and instantiation
-  solver_ = createSolver(solver_type);
+  // Preserve solver state across repeated solves with the same algorithm so
+  // warm-start options can reuse stored gains.
+  if (!solver_ || solver_->getSolverName() != canonical_solver_type) {
+    solver_ = createSolver(canonical_solver_type);
+  }
 
   if (!solver_) {
     // Solver not found - return error solution
     CDDPSolution solution;
-    solution.solver_name = solver_type;
+    solution.solver_name = canonical_solver_type;
     solution.status_message =
-        "UnknownSolver - No solver registered for '" + solver_type + "'";
+        "UnknownSolver - No solver registered for '" + canonical_solver_type +
+        "'";
     solution.iterations_completed = 0;
     solution.solve_time_ms = 0.0;
     solution.final_objective = 0.0;
     solution.final_step_length = 1.0;
 
     if (options_.verbose) {
-      std::cout << "Solver type '" << solver_type
+      std::cout << "Solver type '" << canonical_solver_type
                 << "' not found. Available solvers: ";
       auto available = getRegisteredSolvers();
       for (const auto &name : available) {
         std::cout << name << " ";
       }
-      std::cout << "CLDDP LogDDP IPDDP MSIPDDP" << std::endl;
+      std::cout << "CLDDP LogDDP IPDDP MSIPDDP ALDDP" << std::endl;
     }
 
     return solution;
diff --git a/src/cddp_core/logddp_solver.cpp b/src/cddp_core/logddp_solver.cpp
index 04fa0b2..6d31a07 100644
--- a/src/cddp_core/logddp_solver.cpp
+++ b/src/cddp_core/logddp_solver.cpp
@@ -16,28 +16,22 @@ limitations under the License.
 
 #include "cddp_core/logddp_solver.hpp"
 #include "cddp_core/cddp_core.hpp"
-#include <chrono>
 #include <cmath>
-#include <execution>
-#include <future>
 #include <iomanip>
 #include <iostream>
+#include <limits>
 
 namespace cddp {
 
 LogDDPSolver::LogDDPSolver()
-    : mu_(1e-1), relaxation_delta_(1e-5), constraint_violation_(1e+7),
-      ms_segment_length_(5) {}
+    : mu_(1e-1), relaxation_delta_(1e-5), constraint_violation_(0.0) {}
 
 void LogDDPSolver::initialize(CDDP &context) {
   const CDDPOptions &options = context.getOptions();
+  const int horizon = context.getHorizon();
+  const int control_dim = context.getControlDim();
+  const int state_dim = context.getStateDim();
 
-  int horizon = context.getHorizon();
-  int control_dim = context.getControlDim();
-  int state_dim = context.getStateDim();
-
-  // Check if reference_state in objective and reference_state in context are
-  // the same
   if ((context.getReferenceState() - context.getObjective().getReferenceState())
           .norm() > 1e-6) {
     std::cerr << "LogDDP: Initial state and goal state in the objective "
@@ -47,19 +41,48 @@ void LogDDPSolver::initialize(CDDP &context) {
         "Initial state and goal state in the objective function do not match");
   }
 
-  // For warm starts, verify that existing state is valid
+  if (options.log_barrier.segment_length != horizon) {
+    throw std::runtime_error(
+        "LogDDP: segment_length must equal horizon in single-shooting mode");
+  }
+  if (options.log_barrier.rollout_type != "nonlinear") {
+    throw std::runtime_error(
+        "LogDDP: rollout_type must be 'nonlinear' in single-shooting mode");
+  }
+
+  auto initialize_line_search = [&]() {
+    context.alphas_.clear();
+    double alpha = options.line_search.initial_step_size;
+    for (int i = 0; i < options.line_search.max_iterations; ++i) {
+      context.alphas_.push_back(alpha);
+      alpha *= options.line_search.step_reduction_factor;
+    }
+    context.alpha_pr_ = options.line_search.initial_step_size;
+    dV_ = Eigen::Vector2d::Zero();
+  };
+
+  auto initialize_barrier = [&]() {
+    mu_ = options.log_barrier.barrier.mu_initial;
+    relaxation_delta_ = options.log_barrier.relaxed_log_barrier_delta;
+    if (!relaxed_log_barrier_) {
+      relaxed_log_barrier_ =
+          std::make_unique<RelaxedLogBarrier>(mu_, relaxation_delta_);
+    } else {
+      relaxed_log_barrier_->setBarrierCoeff(mu_);
+      relaxed_log_barrier_->setRelaxationDelta(relaxation_delta_);
+    }
+  };
+
   if (options.warm_start) {
     bool valid_warm_start =
         (k_u_.size() == static_cast<size_t>(horizon) &&
          K_u_.size() == static_cast<size_t>(horizon) &&
-         F_.size() == static_cast<size_t>(horizon) &&
-         context.X_.size() == static_cast<size_t>(horizon + 1) &&
          context.U_.size() == static_cast<size_t>(horizon));
 
     if (valid_warm_start && !k_u_.empty()) {
       for (int t = 0; t < horizon; ++t) {
         if (k_u_[t].size() != control_dim || K_u_[t].rows() != control_dim ||
-            K_u_[t].cols() != state_dim || F_[t].size() != state_dim) {
+            K_u_[t].cols() != state_dim || context.U_[t].size() != control_dim) {
           valid_warm_start = false;
           break;
         }
@@ -73,139 +96,39 @@ void LogDDPSolver::initialize(CDDP &context) {
         std::cout << "LogDDP: Using warm start with existing control gains"
                   << std::endl;
       }
-
-      // Reset barrier parameters for warm start
-      mu_ = options.log_barrier.barrier.mu_initial;
-      relaxation_delta_ = options.log_barrier.relaxed_log_barrier_delta;
-      if (!relaxed_log_barrier_) {
-        relaxed_log_barrier_ =
-            std::make_unique<RelaxedLogBarrier>(mu_, relaxation_delta_);
-      } else {
-        relaxed_log_barrier_->setBarrierCoeff(mu_);
-        relaxed_log_barrier_->setRelaxationDelta(relaxation_delta_);
-      }
-
-      // Initialize regularization
       context.regularization_ = options.regularization.initial_value;
-
-      // Initialize line search parameters
-      context.alphas_.clear();
-      double alpha = options.line_search.initial_step_size;
-      for (int i = 0; i < options.line_search.max_iterations; ++i) {
-        context.alphas_.push_back(alpha);
-        alpha *= options.line_search.step_reduction_factor;
-      }
-      context.alpha_pr_ = options.line_search.initial_step_size;
-      dV_ = Eigen::Vector2d::Zero();
-
-      constraint_violation_ = std::numeric_limits<double>::infinity();
-      ms_segment_length_ = options.log_barrier.segment_length;
-
-      // Evaluate current trajectory and reset filter
+      initialize_line_search();
+      initialize_barrier();
       evaluateTrajectory(context);
       resetFilter(context);
       return;
-    } else if (options.verbose) {
-      std::cout << "LogDDP: Warning - warm start requested but no valid solver "
-                   "state found. "
-                << "Falling back to cold start initialization." << std::endl;
-    }
-  }
-
-  // Cold start: Initialize trajectories with interpolation between initial and
-  // reference states
-  if (context.X_.size() != static_cast<size_t>(horizon + 1) ||
-      context.U_.size() != static_cast<size_t>(horizon)) {
-    context.X_.resize(horizon + 1);
-    context.U_.resize(horizon);
-
-    // Create X_ initial guess using initial_state and reference_state by
-    // interpolating between them
-    for (int t = 0; t <= horizon; ++t) {
-      context.X_[t] =
-          context.getInitialState() +
-          t * (context.getReferenceState() - context.getInitialState()) /
-              horizon;
     }
 
-    for (int t = 0; t < horizon; ++t) {
-      context.U_[t] = Eigen::VectorXd::Zero(control_dim);
+    if (options.verbose) {
+      std::cout << "LogDDP: Warning - warm start requested but no valid solver "
+                   "state found. Falling back to cold start initialization."
+                << std::endl;
     }
   }
 
-  // Resize dynamics storage
-  F_.resize(horizon);
-  for (int t = 0; t < horizon; ++t) {
-    F_[t] = Eigen::VectorXd::Zero(state_dim);
-  }
-
-  // Use base class helper for gains
   initializeGains(horizon, control_dim, state_dim);
 
-  G_.clear();
-
-  const auto &constraint_set = context.getConstraintSet();
-  for (const auto &constraint_pair : constraint_set) {
-    const std::string &constraint_name = constraint_pair.first;
-    int dual_dim = constraint_pair.second->getDualDim();
-
-    G_[constraint_name].resize(horizon);
-    for (int t = 0; t < horizon; ++t) {
-      G_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
+  context.U_.resize(horizon);
+  for (int t = 0; t < horizon; ++t) {
+    if (context.U_[t].size() != control_dim) {
+      context.U_[t] = Eigen::VectorXd::Zero(control_dim);
     }
   }
 
-  // Initialize cost using objective evaluation
-  context.cost_ = context.getObjective().evaluate(context.X_, context.U_);
-
-  // Initialize line search parameters
-  context.alphas_.clear();
-  double alpha = options.line_search.initial_step_size;
-  for (int i = 0; i < options.line_search.max_iterations; ++i) {
-    context.alphas_.push_back(alpha);
-    alpha *= options.line_search.step_reduction_factor;
-  }
-  context.alpha_pr_ = options.line_search.initial_step_size;
-  dV_ = Eigen::Vector2d::Zero();
-
-  // Initialize regularization
   context.regularization_ = options.regularization.initial_value;
-
-  constraint_violation_ = std::numeric_limits<double>::infinity();
-
-  ms_segment_length_ = options.log_barrier.segment_length;
-
-  // Check if ms_segment_length_ is valid
-  if (ms_segment_length_ < 0) {
-    std::cerr << "LogDDP: ms_segment_length_ must be non-negative" << std::endl;
-    throw std::runtime_error("LogDDP: ms_segment_length_ must be non-negative");
-  }
-
-  const std::string &rollout_type = options.log_barrier.rollout_type;
-  if (rollout_type != "linear" && rollout_type != "nonlinear" &&
-      rollout_type != "hybrid") {
-    std::cerr << "LogDDP: Invalid ms_rollout_type: " << rollout_type
-              << std::endl;
-    throw std::runtime_error("LogDDP: Invalid ms_rollout_type");
-  }
-
-  // Initialize log barrier object
-  mu_ = options.log_barrier.barrier.mu_initial;
-  relaxation_delta_ = options.log_barrier.relaxed_log_barrier_delta;
-  if (!relaxed_log_barrier_) {
-    relaxed_log_barrier_ =
-        std::make_unique<RelaxedLogBarrier>(mu_, relaxation_delta_);
-  }
-
-  // Evaluate initial trajectory
+  initialize_line_search();
+  initialize_barrier();
   evaluateTrajectory(context);
   resetFilter(context);
 }
 
 std::string LogDDPSolver::getSolverName() const { return "LogDDP"; }
 
-// === Hook implementations ===
-
 void LogDDPSolver::preIterationSetup(CDDP &context) {
   evaluateTrajectory(context);
   resetFilter(context);
@@ -213,22 +136,15 @@ void LogDDPSolver::preIterationSetup(CDDP &context) {
 
 bool LogDDPSolver::handleBackwardPassRegularizationLimit(
     CDDP & /*context*/, std::string &termination_reason) {
-  // LogDDP treats regularization exhaustion as converged — the current
-  // barrier-penalized trajectory is typically still usable.
   termination_reason = "RegularizationLimitReached_Converged";
-  return true; // converged
+  return true;
 }
 
 void LogDDPSolver::applyForwardPassResult(CDDP &context,
                                           const ForwardPassResult &result) {
-  // Call base to update X_, U_, cost_, merit_function_, alpha_pr_, alpha_du_
   CDDPSolverBase::applyForwardPassResult(context, result);
-
-  // Also update LogDDP-specific state
-  if (result.dynamics_trajectory) {
-    F_ = *result.dynamics_trajectory;
-  }
   constraint_violation_ = result.constraint_violation;
+  context.inf_pr_ = result.constraint_violation;
 }
 
 bool LogDDPSolver::checkConvergence(CDDP &context, double dJ, double dL,
@@ -264,23 +180,20 @@ void LogDDPSolver::postIterationUpdate(CDDP &context,
                                        bool forward_pass_success) {
   const CDDPOptions &options = context.getOptions();
 
-  // Barrier update logic
   if (forward_pass_success) {
     mu_ = std::max(options.log_barrier.barrier.mu_min_value,
                    mu_ * options.log_barrier.barrier.mu_update_factor);
   } else {
     mu_ = std::min(options.log_barrier.barrier.mu_initial, mu_ * 5.0);
   }
+
   relaxed_log_barrier_->setBarrierCoeff(mu_);
   relaxed_log_barrier_->setRelaxationDelta(relaxation_delta_);
   resetFilter(context);
 }
 
 void LogDDPSolver::recordIterationHistory(const CDDP &context) {
-  // Call base to record common metrics
   CDDPSolverBase::recordIterationHistory(context);
-
-  // Add LogDDP-specific metrics
   history_.barrier_mu.push_back(mu_);
 }
 
@@ -302,258 +215,165 @@ void LogDDPSolver::printIteration(int iter, const CDDP &context) const {
 
   std::cout << std::setw(4) << iter << " " << std::setw(12) << std::scientific
             << std::setprecision(4) << context.cost_ << " " << std::setw(12)
-            << std::scientific << std::setprecision(4) << context.merit_function_
-            << " " << std::setw(10) << std::scientific << std::setprecision(2)
-            << context.inf_du_ << " " << std::setw(8) << std::fixed
-            << std::setprecision(1) << std::log10(context.regularization_)
-            << " " << std::setw(8) << std::fixed << std::setprecision(4)
-            << context.alpha_pr_ << " " << std::setw(8) << std::fixed
-            << std::setprecision(1) << std::log10(mu_) << " " << std::setw(10)
-            << std::scientific << std::setprecision(2) << constraint_violation_
-            << std::endl;
+            << std::scientific << std::setprecision(4)
+            << context.merit_function_ << " " << std::setw(10)
+            << std::scientific << std::setprecision(2) << context.inf_du_
+            << " " << std::setw(8) << std::fixed << std::setprecision(1)
+            << std::log10(context.regularization_) << " " << std::setw(8)
+            << std::fixed << std::setprecision(4) << context.alpha_pr_ << " "
+            << std::setw(8) << std::fixed << std::setprecision(1)
+            << std::log10(mu_) << " " << std::setw(10) << std::scientific
+            << std::setprecision(2) << constraint_violation_ << std::endl;
 }
 
-// === Private helper methods ===
-
 void LogDDPSolver::evaluateTrajectory(CDDP &context) {
   const int horizon = context.getHorizon();
-  double cost = 0.0;
 
-  // Rollout dynamics and calculate cost
+  context.X_.resize(horizon + 1);
+  context.X_[0] = context.getInitialState();
+  context.cost_ = 0.0;
+
   for (int t = 0; t < horizon; ++t) {
     const Eigen::VectorXd &x_t = context.X_[t];
     const Eigen::VectorXd &u_t = context.U_[t];
 
-    // Compute stage cost using the guessed state/control
-    cost += context.getObjective().running_cost(x_t, u_t, t);
-
-    // Compute dynamics
-    F_[t] = context.getSystem().getDiscreteDynamics(x_t, u_t,
-                                                    t * context.getTimestep());
+    context.cost_ += context.getObjective().running_cost(x_t, u_t, t);
+    context.X_[t + 1] = context.getSystem().getDiscreteDynamics(
+        x_t, u_t, t * context.getTimestep());
   }
 
-  // Add terminal cost based on the final guessed state
-  cost += context.getObjective().terminal_cost(context.X_.back());
-
-  // Store the initial total cost
-  context.cost_ = cost;
+  context.cost_ += context.getObjective().terminal_cost(context.X_.back());
 }
 
 void LogDDPSolver::resetFilter(CDDP &context) {
-  // Evaluate log-barrier cost (includes path constraints)
-  context.merit_function_ = context.cost_;
-  constraint_violation_ = 0.0;
-  double defect_violation = 0.0;
+  context.merit_function_ =
+      computeBarrierMerit(context, context.X_, context.U_, &constraint_violation_);
+  context.inf_pr_ = constraint_violation_;
+}
 
-  const auto &constraint_set = context.getConstraintSet();
+void LogDDPSolver::augmentRunningCostDerivatives(
+    const CDDP &context, int t, const Eigen::VectorXd &x,
+    const Eigen::VectorXd &u, Eigen::VectorXd &l_x, Eigen::VectorXd &l_u,
+    Eigen::MatrixXd &l_xx, Eigen::MatrixXd &l_uu,
+    Eigen::MatrixXd &l_ux) const {
+  for (const auto &[name, constraint] : context.getConstraintSet()) {
+    auto [L_x_relaxed, L_u_relaxed] =
+        relaxed_log_barrier_->getGradients(*constraint, x, u, t);
+    l_x += L_x_relaxed;
+    l_u += L_u_relaxed;
+
+    auto [L_xx_relaxed, L_uu_relaxed, L_ux_relaxed] =
+        relaxed_log_barrier_->getHessians(*constraint, x, u, t);
+    l_xx += L_xx_relaxed;
+    l_uu += L_uu_relaxed;
+    l_ux += L_ux_relaxed;
+  }
+}
+
+void LogDDPSolver::augmentTerminalCostDerivatives(const CDDP &context,
+                                                  const Eigen::VectorXd &x_N,
+                                                  Eigen::VectorXd &V_x,
+                                                  Eigen::MatrixXd &V_xx) const {
+  Eigen::VectorXd dummy_u = Eigen::VectorXd::Zero(context.getControlDim());
+  const int horizon = context.getHorizon();
+
+  for (const auto &[name, constraint] : context.getTerminalConstraintSet()) {
+    auto [L_x_relaxed, L_u_relaxed] =
+        relaxed_log_barrier_->getGradients(*constraint, x_N, dummy_u, horizon);
+    (void)L_u_relaxed;
+    V_x += L_x_relaxed;
+
+    auto [L_xx_relaxed, L_uu_relaxed, L_ux_relaxed] =
+        relaxed_log_barrier_->getHessians(*constraint, x_N, dummy_u, horizon);
+    (void)L_uu_relaxed;
+    (void)L_ux_relaxed;
+    V_xx += L_xx_relaxed;
+  }
+}
+
+double LogDDPSolver::computeBarrierMerit(
+    const CDDP &context, const std::vector<Eigen::VectorXd> &X,
+    const std::vector<Eigen::VectorXd> &U, double *max_constraint_violation) const {
+  double merit = 0.0;
+  double max_violation = 0.0;
 
-  // Calculate path constraint terms and violation
   for (int t = 0; t < context.getHorizon(); ++t) {
-    for (const auto &constraint_pair : constraint_set) {
-      const std::string &constraint_name = constraint_pair.first;
-      Eigen::VectorXd g_t =
-          constraint_pair.second->evaluate(context.X_[t], context.U_[t]) -
-          constraint_pair.second->getUpperBound();
-      G_[constraint_name][t] = g_t;
-      context.merit_function_ += relaxed_log_barrier_->evaluate(
-          *constraint_pair.second, context.X_[t], context.U_[t]);
-
-      for (int i = 0; i < g_t.size(); ++i) {
-        if (g_t(i) > 0.0) {
-          constraint_violation_ += g_t(i);
-        }
-      }
+    merit += context.getObjective().running_cost(X[t], U[t], t);
+
+    for (const auto &[name, constraint] : context.getConstraintSet()) {
+      merit += relaxed_log_barrier_->evaluate(*constraint, X[t], U[t], t);
+      max_violation = std::max(
+          max_violation, constraint->computeViolation(X[t], U[t], t));
     }
+  }
+
+  merit += context.getObjective().terminal_cost(X.back());
 
-    // Add defect violation penalty
-    Eigen::VectorXd d = F_[t] - context.X_[t + 1];
-    defect_violation += d.lpNorm<1>();
+  Eigen::VectorXd dummy_u = Eigen::VectorXd::Zero(context.getControlDim());
+  for (const auto &[name, constraint] : context.getTerminalConstraintSet()) {
+    merit +=
+        relaxed_log_barrier_->evaluate(*constraint, X.back(), dummy_u, context.getHorizon());
+    max_violation = std::max(max_violation, constraint->computeViolation(
+                                                X.back(), dummy_u,
+                                                context.getHorizon()));
+  }
+
+  if (max_constraint_violation != nullptr) {
+    *max_constraint_violation = max_violation;
   }
-  constraint_violation_ += defect_violation;
-  context.inf_pr_ = constraint_violation_;
-}
 
-// === Backward pass ===
+  return merit;
+}
 
 bool LogDDPSolver::backwardPass(CDDP &context) {
   const CDDPOptions &options = context.getOptions();
   const int state_dim = context.getStateDim();
   const int control_dim = context.getControlDim();
   const int horizon = context.getHorizon();
-  const double timestep = context.getTimestep();
-  const auto &constraint_set = context.getConstraintSet();
-
-  // Pre-compute dynamics jacobians and hessians for all time steps
-  // Note: LogDDP needs continuous-time Jacobians for the backward pass
-  // because it handles defect terms (d = F[t] - X[t+1]) which require
-  // the A = dt*Fx + I, B = dt*Fu formulation. We compute these inline
-  // since the base precomputeDynamicsDerivatives stores discrete-time versions.
-  const int MIN_HORIZON_FOR_PARALLEL = 20;
-  const bool use_parallel =
-      options.enable_parallel && horizon >= MIN_HORIZON_FOR_PARALLEL;
-
-  // Resize storage
-  F_x_.resize(horizon);
-  F_u_.resize(horizon);
-  F_xx_.resize(horizon);
-  F_uu_.resize(horizon);
-  F_ux_.resize(horizon);
-
-  if (!use_parallel) {
-    for (int t = 0; t < horizon; ++t) {
-      const Eigen::VectorXd &x = context.X_[t];
-      const Eigen::VectorXd &u = context.U_[t];
-
-      const auto [Fx, Fu] =
-          context.getSystem().getJacobians(x, u, t * timestep);
-      F_x_[t] = Fx;
-      F_u_[t] = Fu;
-
-      if (!options.use_ilqr) {
-        const auto hessians =
-            context.getSystem().getHessians(x, u, t * timestep);
-        F_xx_[t] = std::get<0>(hessians);
-        F_uu_[t] = std::get<1>(hessians);
-        F_ux_[t] = std::get<2>(hessians);
-      } else {
-        F_xx_[t] = std::vector<Eigen::MatrixXd>();
-        F_uu_[t] = std::vector<Eigen::MatrixXd>();
-        F_ux_[t] = std::vector<Eigen::MatrixXd>();
-      }
-    }
-  } else {
-    const int num_threads =
-        std::min(options.num_threads,
-                 static_cast<int>(std::thread::hardware_concurrency()));
-    const int chunk_size = std::max(1, horizon / num_threads);
-
-    std::vector<std::future<void>> futures;
-    futures.reserve(num_threads);
-
-    for (int thread_id = 0; thread_id < num_threads; ++thread_id) {
-      int start_t = thread_id * chunk_size;
-      int end_t = (thread_id == num_threads - 1) ? horizon
-                                                 : (thread_id + 1) * chunk_size;
-
-      if (start_t >= horizon)
-        break;
-
-      futures.push_back(
-          std::async(std::launch::async, [this, &context, &options, start_t,
-                                          end_t, timestep]() {
-            for (int t = start_t; t < end_t; ++t) {
-              const Eigen::VectorXd &x = context.X_[t];
-              const Eigen::VectorXd &u = context.U_[t];
-
-              const auto [Fx, Fu] =
-                  context.getSystem().getJacobians(x, u, t * timestep);
-              F_x_[t] = Fx;
-              F_u_[t] = Fu;
-
-              if (!options.use_ilqr) {
-                const auto hessians =
-                    context.getSystem().getHessians(x, u, t * timestep);
-                F_xx_[t] = std::get<0>(hessians);
-                F_uu_[t] = std::get<1>(hessians);
-                F_ux_[t] = std::get<2>(hessians);
-              } else {
-                F_xx_[t] = std::vector<Eigen::MatrixXd>();
-                F_uu_[t] = std::vector<Eigen::MatrixXd>();
-                F_ux_[t] = std::vector<Eigen::MatrixXd>();
-              }
-            }
-          }));
-    }
 
-    for (auto &future : futures) {
-      try {
-        if (future.valid()) {
-          future.get();
-        }
-      } catch (const std::exception &e) {
-        if (options.verbose) {
-          std::cerr
-              << "LogDDP: Dynamics derivatives computation thread failed: "
-              << e.what() << std::endl;
-        }
-        throw;
-      }
-    }
-  }
-
-  if (options.debug) {
-    std::cout << "[LogDDP] Pre-computed dynamics derivatives for " << horizon
-              << " time steps using "
-              << (use_parallel ? "parallel" : "sequential") << " computation"
-              << std::endl;
-  }
+  precomputeDynamicsDerivatives(context, 20);
 
-  // Terminal cost and derivatives (V_x, V_xx at t=N)
   Eigen::VectorXd V_x =
       context.getObjective().getFinalCostGradient(context.X_.back());
   Eigen::MatrixXd V_xx =
       context.getObjective().getFinalCostHessian(context.X_.back());
-  V_xx = 0.5 * (V_xx + V_xx.transpose()); // Symmetrize
+  augmentTerminalCostDerivatives(context, context.X_.back(), V_x, V_xx);
+  V_xx = 0.5 * (V_xx + V_xx.transpose());
 
   dV_ = Eigen::Vector2d::Zero();
   double Qu_err = 0.0;
+  double norm_Vx = V_x.lpNorm<1>();
 
-  // Backward Riccati recursion
   for (int t = horizon - 1; t >= 0; --t) {
     const Eigen::VectorXd &x = context.X_[t];
     const Eigen::VectorXd &u = context.U_[t];
-    const Eigen::VectorXd &f = F_[t];
-    const Eigen::VectorXd &d = f - context.X_[t + 1]; // Defect
+    const Eigen::MatrixXd &A = F_x_[t];
+    const Eigen::MatrixXd &B = F_u_[t];
 
-    // Use pre-computed continuous-time dynamics Jacobians
-    const Eigen::MatrixXd &Fx = F_x_[t];
-    const Eigen::MatrixXd &Fu = F_u_[t];
-    const Eigen::MatrixXd &A =
-        timestep * Fx + Eigen::MatrixXd::Identity(state_dim, state_dim);
-    const Eigen::MatrixXd &B = timestep * Fu;
-
-    // Cost derivatives at (x_t, u_t)
     auto [l_x, l_u] = context.getObjective().getRunningCostGradients(x, u, t);
     auto [l_xx, l_uu, l_ux] =
         context.getObjective().getRunningCostHessians(x, u, t);
+    augmentRunningCostDerivatives(context, t, x, u, l_x, l_u, l_xx, l_uu, l_ux);
 
-    Eigen::VectorXd Q_x = l_x + A.transpose() * (V_x + V_xx * d);
-    Eigen::VectorXd Q_u = l_u + B.transpose() * (V_x + V_xx * d);
+    Eigen::VectorXd Q_x = l_x + A.transpose() * V_x;
+    Eigen::VectorXd Q_u = l_u + B.transpose() * V_x;
     Eigen::MatrixXd Q_xx = l_xx + A.transpose() * V_xx * A;
     Eigen::MatrixXd Q_ux = l_ux + B.transpose() * V_xx * A;
     Eigen::MatrixXd Q_uu = l_uu + B.transpose() * V_xx * B;
 
-    // Add state hessian term if not using iLQR
     if (!options.use_ilqr) {
       const auto &Fxx = F_xx_[t];
       const auto &Fuu = F_uu_[t];
       const auto &Fux = F_ux_[t];
-
       for (int i = 0; i < state_dim; ++i) {
-        Q_xx += timestep * V_x(i) * Fxx[i];
-        Q_ux += timestep * V_x(i) * Fux[i];
-        Q_uu += timestep * V_x(i) * Fuu[i];
+        Q_xx += V_x(i) * Fxx[i];
+        Q_ux += V_x(i) * Fux[i];
+        Q_uu += V_x(i) * Fuu[i];
       }
     }
 
-    // Apply Log-barrier cost gradients and Hessians
-    for (const auto &constraint_pair : constraint_set) {
-      auto [L_x_relaxed, L_u_relaxed] =
-          relaxed_log_barrier_->getGradients(*constraint_pair.second, x, u);
-      Q_x += L_x_relaxed;
-      Q_u += L_u_relaxed;
-
-      auto [L_xx_relaxed, L_uu_relaxed, L_ux_relaxed] =
-          relaxed_log_barrier_->getHessians(*constraint_pair.second, x, u);
-      Q_xx += L_xx_relaxed;
-      Q_uu += L_uu_relaxed;
-      Q_ux += L_ux_relaxed;
-    }
-
-    // Regularization
-    Eigen::MatrixXd Q_uu_reg = Q_uu;
+    Eigen::MatrixXd Q_uu_reg = 0.5 * (Q_uu + Q_uu.transpose());
     Q_uu_reg.diagonal().array() += context.regularization_;
-    Q_uu_reg = 0.5 * (Q_uu_reg + Q_uu_reg.transpose()); // symmetrize
 
     Eigen::LDLT<Eigen::MatrixXd> ldlt(Q_uu_reg);
     if (ldlt.info() != Eigen::Success) {
@@ -563,45 +383,39 @@ bool LogDDPSolver::backwardPass(CDDP &context) {
       return false;
     }
 
-    Eigen::MatrixXd bigRHS(control_dim, 1 + state_dim);
-    bigRHS.col(0) = Q_u;
-    Eigen::MatrixXd M = Q_ux;
-    for (int col = 0; col < state_dim; col++) {
-      bigRHS.col(col + 1) = M.col(col);
-    }
-
-    Eigen::MatrixXd kK = -ldlt.solve(bigRHS);
+    Eigen::MatrixXd rhs(control_dim, 1 + state_dim);
+    rhs.col(0) = Q_u;
+    rhs.rightCols(state_dim) = Q_ux;
 
-    // parse out feedforward (ku) and feedback (Ku)
-    Eigen::VectorXd k_u = kK.col(0);
-    Eigen::MatrixXd K_u(control_dim, state_dim);
-    for (int col = 0; col < state_dim; col++) {
-      K_u.col(col) = kK.col(col + 1);
-    }
+    Eigen::MatrixXd gains = -ldlt.solve(rhs);
+    const Eigen::VectorXd k_u = gains.col(0);
+    const Eigen::MatrixXd K_u = gains.rightCols(state_dim);
 
-    // Save gains
     k_u_[t] = k_u;
     K_u_[t] = K_u;
 
-    // Compute value function approximation
-    Eigen::Vector2d dV_step;
-    dV_step << Q_u.dot(k_u), 0.5 * k_u.dot(Q_uu * k_u);
-    dV_ = dV_ + dV_step;
+    dV_(0) += Q_u.dot(k_u);
+    dV_(1) += 0.5 * k_u.dot(Q_uu * k_u);
+
     V_x = Q_x + K_u.transpose() * Q_uu * k_u + Q_ux.transpose() * k_u +
           K_u.transpose() * Q_u;
     V_xx = Q_xx + K_u.transpose() * Q_uu * K_u + Q_ux.transpose() * K_u +
            K_u.transpose() * Q_ux;
-    V_xx = 0.5 * (V_xx + V_xx.transpose()); // Symmetrize Hessian
+    V_xx = 0.5 * (V_xx + V_xx.transpose());
 
-    // Compute optimality gap (Inf-norm) for convergence check
     Qu_err = std::max(Qu_err, Q_u.lpNorm<Eigen::Infinity>());
+    norm_Vx += V_x.lpNorm<1>();
   }
-  context.inf_du_ = Qu_err;
+
+  double scaling_factor = options.termination_scaling_max_factor;
+  scaling_factor = std::max(scaling_factor, norm_Vx / (horizon * state_dim)) /
+                   scaling_factor;
+  context.inf_du_ = Qu_err / scaling_factor;
 
   if (options.debug) {
     std::cout << "[LogDDP Backward Pass]\n"
               << "    Qu_err:  " << std::scientific << std::setprecision(4)
-              << Qu_err << "\n"
+              << context.inf_du_ << "\n"
               << "    rf_err:  " << std::scientific << std::setprecision(4)
               << constraint_violation_ << "\n"
               << "    dV:      " << std::scientific << std::setprecision(4)
@@ -611,11 +425,8 @@ bool LogDDPSolver::backwardPass(CDDP &context) {
   return true;
 }
 
-// === Forward pass ===
-
 ForwardPassResult LogDDPSolver::forwardPass(CDDP &context, double alpha) {
   const CDDPOptions &options = context.getOptions();
-  const auto &constraint_set = context.getConstraintSet();
 
   ForwardPassResult result;
   result.success = false;
@@ -623,120 +434,50 @@ ForwardPassResult LogDDPSolver::forwardPass(CDDP &context, double alpha) {
   result.merit_function = std::numeric_limits<double>::infinity();
   result.alpha_pr = alpha;
 
-  const int horizon = context.getHorizon();
-  const int state_dim = context.getStateDim();
-
-  // Initialize trajectories
   result.state_trajectory = context.X_;
   result.control_trajectory = context.U_;
   result.state_trajectory[0] = context.getInitialState();
 
-  std::vector<Eigen::VectorXd> F_new = F_;
-
   double cost_new = 0.0;
-  double merit_function_new = 0.0;
-  double rp_err = 0.0;
-  double rf_err = 0.0;
-
-  // Rollout loop with multi-shooting logic from original
-  for (int t = 0; t < horizon; ++t) {
-    const Eigen::VectorXd delta_x_t =
-        result.state_trajectory[t] - context.X_[t];
 
-    // Update control
+  for (int t = 0; t < context.getHorizon(); ++t) {
+    const Eigen::VectorXd delta_x_t = result.state_trajectory[t] - context.X_[t];
     result.control_trajectory[t] =
         context.U_[t] + alpha * k_u_[t] + K_u_[t] * delta_x_t;
 
-    // --- Rollout Logic from original ---
-    Eigen::VectorXd dynamics_eval_for_F_new_t;
-
-    // Determine if the *next* step (t+1) starts a new segment boundary
-    bool is_segment_boundary = (ms_segment_length_ > 0) &&
-                               ((t + 1) % ms_segment_length_ == 0) &&
-                               (t + 1 < horizon);
-    bool apply_gap_closing_strategy = is_segment_boundary;
-
-    if (apply_gap_closing_strategy) {
-      if (options.log_barrier.rollout_type == "nonlinear") {
-        F_new[t] = context.getSystem().getDiscreteDynamics(
-            result.state_trajectory[t], result.control_trajectory[t],
-            t * context.getTimestep());
-        result.state_trajectory[t + 1] = context.X_[t + 1] +
-                                         (F_new[t] - F_[t]) +
-                                         alpha * (F_[t] - context.X_[t + 1]);
-      } else if (options.log_barrier.rollout_type == "hybrid") {
-        F_new[t] = context.getSystem().getDiscreteDynamics(
-            result.state_trajectory[t], result.control_trajectory[t],
-            t * context.getTimestep());
-        const auto [Fx, Fu] = context.getSystem().getJacobians(
-            context.X_[t], context.U_[t], t * context.getTimestep());
-        const Eigen::MatrixXd A =
-            Eigen::MatrixXd::Identity(state_dim, state_dim) +
-            context.getTimestep() * Fx;
-        const Eigen::MatrixXd B = context.getTimestep() * Fu;
-        result.state_trajectory[t + 1] =
-            context.X_[t + 1] + (A + B * K_u_[t]) * delta_x_t +
-            alpha * (B * k_u_[t] + F_[t] - context.X_[t + 1]);
-      }
-    } else {
-      result.state_trajectory[t + 1] = context.getSystem().getDiscreteDynamics(
-          result.state_trajectory[t], result.control_trajectory[t],
-          t * context.getTimestep());
-      F_new[t] = result.state_trajectory[t + 1];
+    if (!result.control_trajectory[t].allFinite()) {
+      return result;
     }
 
-    // Robustness check
-    if (!result.state_trajectory[t + 1].allFinite() ||
-        !result.control_trajectory[t].allFinite()) {
+    cost_new += context.getObjective().running_cost(result.state_trajectory[t],
+                                                    result.control_trajectory[t], t);
+    result.state_trajectory[t + 1] = context.getSystem().getDiscreteDynamics(
+        result.state_trajectory[t], result.control_trajectory[t],
+        t * context.getTimestep());
+
+    if (!result.state_trajectory[t + 1].allFinite()) {
       if (options.debug) {
         std::cerr
             << "[LogDDP Forward Pass] NaN/Inf detected during rollout at t="
             << t << " for alpha=" << alpha << std::endl;
       }
-      result.success = false;
       return result;
     }
   }
 
-  // Cost computation and filter line-search from original
-  for (int t = 0; t < horizon; ++t) {
-    cost_new += context.getObjective().running_cost(
-        result.state_trajectory[t], result.control_trajectory[t], t);
-
-    for (const auto &constraint_pair : constraint_set) {
-      const std::string &constraint_name = constraint_pair.first;
-      Eigen::VectorXd g_t =
-          constraint_pair.second->evaluate(result.state_trajectory[t],
-                                           result.control_trajectory[t]) -
-          constraint_pair.second->getUpperBound();
-      merit_function_new += relaxed_log_barrier_->evaluate(
-          *constraint_pair.second, result.state_trajectory[t],
-          result.control_trajectory[t]);
-
-      for (int i = 0; i < g_t.size(); ++i) {
-        if (g_t(i) > 0.0) {
-          rp_err += g_t(i);
-        }
-      }
-    }
-
-    Eigen::VectorXd d = F_new[t] - result.state_trajectory[t + 1];
-    rf_err += d.lpNorm<1>();
-  }
+  cost_new += context.getObjective().terminal_cost(result.state_trajectory.back());
 
-  cost_new +=
-      context.getObjective().terminal_cost(result.state_trajectory.back());
-  merit_function_new += cost_new;
-
-  // Filter-based acceptance using original logic with new options structure
-  double constraint_violation_old = constraint_violation_;
-  double constraint_violation_new = rf_err + rp_err;
-  double merit_function_old = context.merit_function_;
-  bool filter_acceptance = false;
-  double expected_improvement = alpha * dV_(0);
+  double constraint_violation_new = 0.0;
+  double merit_function_new = computeBarrierMerit(
+      context, result.state_trajectory, result.control_trajectory,
+      &constraint_violation_new);
 
+  const double constraint_violation_old = constraint_violation_;
+  const double merit_function_old = context.merit_function_;
+  const double expected_improvement = alpha * dV_(0);
   const auto &filter_opts = options.filter;
 
+  bool filter_acceptance = false;
   if (constraint_violation_new > filter_opts.max_violation_threshold) {
     if (constraint_violation_new <
         (1.0 - filter_opts.violation_acceptance_threshold) *
@@ -745,9 +486,10 @@ ForwardPassResult LogDDPSolver::forwardPass(CDDP &context, double alpha) {
     }
   } else if (std::max(constraint_violation_new, constraint_violation_old) <
                  filter_opts.min_violation_for_armijo_check &&
-             expected_improvement < 0) {
-    if (merit_function_new < merit_function_old + filter_opts.armijo_constant *
-                                                      expected_improvement) {
+             expected_improvement < 0.0) {
+    if (merit_function_new < merit_function_old +
+                                 filter_opts.armijo_constant *
+                                     expected_improvement) {
       filter_acceptance = true;
     }
   } else {
@@ -766,7 +508,6 @@ ForwardPassResult LogDDPSolver::forwardPass(CDDP &context, double alpha) {
     result.cost = cost_new;
     result.merit_function = merit_function_new;
     result.constraint_violation = constraint_violation_new;
-    result.dynamics_trajectory = F_new;
   }
 
   return result;
diff --git a/tests/cddp_core/test_alddp_solver.cpp b/tests/cddp_core/test_alddp_solver.cpp
new file mode 100644
index 0000000..ee8bf15
--- /dev/null
+++ b/tests/cddp_core/test_alddp_solver.cpp
@@ -0,0 +1,660 @@
+/*
+ Copyright 2025 Tomo Sasaki
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+#include <cmath>
+#include <iostream>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "cddp.hpp"
+
+// Helper: create CDDP options for ALDDP tests
+static cddp::CDDPOptions makeALDDPOptions() {
+  cddp::CDDPOptions options;
+  options.max_iterations = 500;
+  options.tolerance = 1e-3;
+  options.acceptable_tolerance = 1e-4;
+  options.verbose = true;
+  options.debug = false;
+
+  options.alddp.penalty_init = 1.0;
+  options.alddp.penalty_max = 1e6;
+  options.alddp.penalty_update_factor = 10.0;
+  options.alddp.max_inner_iterations = 30;
+  options.alddp.max_outer_iterations = 15;
+  options.alddp.constraint_tolerance = 1e-4;
+  options.alddp.inner_tolerance_init = 1e-2;
+  options.alddp.use_sqrt_backward_pass = false;
+  return options;
+}
+
+// ============================================================================
+// CartPole tests
+// ============================================================================
+
+TEST(ALDDPTest, SolveCartPole) {
+  // CartPole swing-up with control bounds only (BoxQP path)
+  const int state_dim = 4;
+  const int control_dim = 1;
+  const int horizon = 200;
+  const double timestep = 0.02;
+
+  auto system = std::make_unique<cddp::CartPole>(timestep, "rk4");
+
+  Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+  Eigen::MatrixXd R = 0.01 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+  Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+  Eigen::VectorXd goal_state = Eigen::VectorXd::Zero(state_dim);
+  goal_state << 0.0, M_PI, 0.0, 0.0; // Cart at origin, pole upright
+
+  std::vector<Eigen::VectorXd> empty_ref;
+  auto objective = std::make_unique<cddp::QuadraticObjective>(
+      Q, R, Qf, goal_state, empty_ref, timestep);
+
+  Eigen::VectorXd initial_state = Eigen::VectorXd::Zero(state_dim);
+  // Cart at origin, pole down
+
+  cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+  solver.setDynamicalSystem(std::move(system));
+  solver.setObjective(std::move(objective));
+
+  // Control bounds
+  Eigen::VectorXd u_lb(control_dim), u_ub(control_dim);
+  u_lb << -20.0;
+  u_ub << 20.0;
+  solver.addPathConstraint("ControlConstraint",
+      std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+  auto options = makeALDDPOptions();
+  solver.setOptions(options);
+  solver.setInitialTrajectory(
+      std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+      std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+  cddp::CDDPSolution solution = solver.solve(cddp::SolverType::ALDDP);
+
+  EXPECT_TRUE(solution.status_message.find("Optimal") != std::string::npos ||
+              solution.status_message.find("Acceptable") != std::string::npos ||
+              solution.status_message.find("RegularizationLimit") != std::string::npos);
+  EXPECT_GT(solution.iterations_completed, 0);
+
+  // Check that final state is near goal
+  const auto &x_final = solution.state_trajectory.back();
+  EXPECT_NEAR(x_final(1), M_PI, 0.5); // Pole angle near upright
+}
+
+TEST(ALDDPTest, SolveCartPoleWithStateBounds) {
+  // CartPole with state constraints (cart position limits)
+  const int state_dim = 4;
+  const int control_dim = 1;
+  const int horizon = 200;
+  const double timestep = 0.02;
+
+  auto system = std::make_unique<cddp::CartPole>(timestep, "rk4");
+
+  Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+  Eigen::MatrixXd R = 0.01 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+  Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+  Eigen::VectorXd goal_state = Eigen::VectorXd::Zero(state_dim);
+  goal_state << 0.0, M_PI, 0.0, 0.0;
+
+  std::vector<Eigen::VectorXd> empty_ref;
+  auto objective = std::make_unique<cddp::QuadraticObjective>(
+      Q, R, Qf, goal_state, empty_ref, timestep);
+
+  Eigen::VectorXd initial_state = Eigen::VectorXd::Zero(state_dim);
+
+  cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+  solver.setDynamicalSystem(std::move(system));
+  solver.setObjective(std::move(objective));
+
+  // Control bounds
+  Eigen::VectorXd u_lb(control_dim), u_ub(control_dim);
+  u_lb << -20.0;
+  u_ub << 20.0;
+  solver.addPathConstraint("ControlConstraint",
+      std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+  // State bounds: cart position [-2, 2]
+  Eigen::VectorXd x_lb(state_dim), x_ub(state_dim);
+  x_lb << -2.0, -1e6, -1e6, -1e6;
+  x_ub << 2.0, 1e6, 1e6, 1e6;
+  solver.addPathConstraint("StateConstraint",
+      std::make_unique<cddp::StateConstraint>(x_lb, x_ub));
+
+  auto options = makeALDDPOptions();
+  solver.setOptions(options);
+  solver.setInitialTrajectory(
+      std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+      std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+  cddp::CDDPSolution solution = solver.solve(cddp::SolverType::ALDDP);
+
+  EXPECT_GT(solution.iterations_completed, 0);
+
+  // Verify cart position stays within bounds (with some tolerance)
+  for (const auto &x : solution.state_trajectory) {
+    EXPECT_GE(x(0), -2.1);
+    EXPECT_LE(x(0), 2.1);
+  }
+}
+
+// ============================================================================
+// Unicycle tests
+// ============================================================================
+
+TEST(ALDDPTest, SolveUnicycle) {
+  // Unicycle path following with control bounds
+  const int state_dim = 3; // x, y, theta
+  const int control_dim = 2; // v, omega
+  const int horizon = 100;
+  const double timestep = 0.1;
+
+  auto system = std::make_unique<cddp::Unicycle>(timestep, "euler");
+
+  Eigen::MatrixXd Q = 0.1 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+  Eigen::MatrixXd R = 0.01 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+  Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+  Eigen::VectorXd goal_state(state_dim);
+  goal_state << 5.0, 5.0, 0.0;
+
+  std::vector<Eigen::VectorXd> empty_ref;
+  auto objective = std::make_unique<cddp::QuadraticObjective>(
+      Q, R, Qf, goal_state, empty_ref, timestep);
+
+  Eigen::VectorXd initial_state = Eigen::VectorXd::Zero(state_dim);
+
+  cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+  solver.setDynamicalSystem(std::move(system));
+  solver.setObjective(std::move(objective));
+
+  // Control bounds
+  Eigen::VectorXd u_lb(control_dim), u_ub(control_dim);
+  u_lb << -2.0, -2.0;
+  u_ub << 2.0, 2.0;
+  solver.addPathConstraint("ControlConstraint",
+      std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+  auto options = makeALDDPOptions();
+  solver.setOptions(options);
+  solver.setInitialTrajectory(
+      std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+      std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+  cddp::CDDPSolution solution = solver.solve(cddp::SolverType::ALDDP);
+
+  EXPECT_GT(solution.iterations_completed, 0);
+
+  // Check that final state approaches goal
+  const auto &x_final = solution.state_trajectory.back();
+  EXPECT_NEAR(x_final(0), 5.0, 1.0);
+  EXPECT_NEAR(x_final(1), 5.0, 1.0);
+}
+
+TEST(ALDDPTest, SolveUnicycleWithStateBounds) {
+  // Unicycle with state constraints (position bounds)
+  const int state_dim = 3;
+  const int control_dim = 2;
+  const int horizon = 100;
+  const double timestep = 0.1;
+
+  auto system = std::make_unique<cddp::Unicycle>(timestep, "euler");
+
+  Eigen::MatrixXd Q = 0.1 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+  Eigen::MatrixXd R = 0.01 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+  Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+  Eigen::VectorXd goal_state(state_dim);
+  goal_state << 5.0, 5.0, 0.0;
+
+  std::vector<Eigen::VectorXd> empty_ref;
+  auto objective = std::make_unique<cddp::QuadraticObjective>(
+      Q, R, Qf, goal_state, empty_ref, timestep);
+
+  Eigen::VectorXd initial_state = Eigen::VectorXd::Zero(state_dim);
+
+  cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+  solver.setDynamicalSystem(std::move(system));
+  solver.setObjective(std::move(objective));
+
+  Eigen::VectorXd u_lb(control_dim), u_ub(control_dim);
+  u_lb << -2.0, -2.0;
+  u_ub << 2.0, 2.0;
+  solver.addPathConstraint("ControlConstraint",
+      std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+  // State bounds: y position [0, 8]
+  Eigen::VectorXd x_lb(state_dim), x_ub(state_dim);
+  x_lb << -1e6, 0.0, -1e6;
+  x_ub << 1e6, 8.0, 1e6;
+  solver.addPathConstraint("StateConstraint",
+      std::make_unique<cddp::StateConstraint>(x_lb, x_ub));
+
+  auto options = makeALDDPOptions();
+  solver.setOptions(options);
+  solver.setInitialTrajectory(
+      std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+      std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+  cddp::CDDPSolution solution = solver.solve(cddp::SolverType::ALDDP);
+
+  EXPECT_GT(solution.iterations_completed, 0);
+
+  // Verify y-position stays within bounds
+  for (const auto &x : solution.state_trajectory) {
+    EXPECT_GE(x(1), -0.2);
+    EXPECT_LE(x(1), 8.2);
+  }
+}
+
+// ============================================================================
+// Car parking with obstacles
+// ============================================================================
+
+TEST(ALDDPTest, SolveCarParking) {
+  // Car parallel parking with control bounds
+  const int state_dim = 4; // x, y, theta, v
+  const int control_dim = 2; // acceleration, steering rate
+  const int horizon = 100;
+  const double timestep = 0.1;
+
+  auto system = std::make_unique<cddp::Car>(timestep, 2.0, "rk4");
+
+  Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+  Eigen::MatrixXd R = 0.1 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+  Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+  Eigen::VectorXd goal_state(state_dim);
+  goal_state << 5.0, 2.0, 0.0, 0.0; // Park at (5,2), heading=0, stopped
+
+  std::vector<Eigen::VectorXd> empty_ref;
+  auto objective = std::make_unique<cddp::QuadraticObjective>(
+      Q, R, Qf, goal_state, empty_ref, timestep);
+
+  Eigen::VectorXd initial_state = Eigen::VectorXd::Zero(state_dim);
+
+  cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+  solver.setDynamicalSystem(std::move(system));
+  solver.setObjective(std::move(objective));
+
+  Eigen::VectorXd u_lb(control_dim), u_ub(control_dim);
+  u_lb << -3.0, -1.0;
+  u_ub << 3.0, 1.0;
+  solver.addPathConstraint("ControlConstraint",
+      std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+  auto options = makeALDDPOptions();
+  solver.setOptions(options);
+  solver.setInitialTrajectory(
+      std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+      std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+  cddp::CDDPSolution solution = solver.solve(cddp::SolverType::ALDDP);
+
+  EXPECT_GT(solution.iterations_completed, 0);
+}
+
+// ============================================================================
+// Infeasible start
+// ============================================================================
+
+TEST(ALDDPTest, SolveWithInfeasibleStart) {
+  // CartPole with interpolated (infeasible) state trajectory
+  const int state_dim = 4;
+  const int control_dim = 1;
+  const int horizon = 200;
+  const double timestep = 0.02;
+
+  auto system = std::make_unique<cddp::CartPole>(timestep, "rk4");
+
+  Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+  Eigen::MatrixXd R = 0.01 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+  Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+  Eigen::VectorXd goal_state = Eigen::VectorXd::Zero(state_dim);
+  goal_state << 0.0, M_PI, 0.0, 0.0;
+
+  std::vector<Eigen::VectorXd> empty_ref;
+  auto objective = std::make_unique<cddp::QuadraticObjective>(
+      Q, R, Qf, goal_state, empty_ref, timestep);
+
+  Eigen::VectorXd initial_state = Eigen::VectorXd::Zero(state_dim);
+
+  cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+  solver.setDynamicalSystem(std::move(system));
+  solver.setObjective(std::move(objective));
+
+  Eigen::VectorXd u_lb(control_dim), u_ub(control_dim);
+  u_lb << -20.0;
+  u_ub << 20.0;
+  solver.addPathConstraint("ControlConstraint",
+      std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+  // Create an interpolated (dynamically infeasible) initial trajectory
+  std::vector<Eigen::VectorXd> X_init(horizon + 1);
+  for (int t = 0; t <= horizon; ++t) {
+    double alpha = static_cast<double>(t) / horizon;
+    X_init[t] = (1.0 - alpha) * initial_state + alpha * goal_state;
+  }
+
+  auto options = makeALDDPOptions();
+  options.alddp.slack_penalty = 100.0;
+  solver.setOptions(options);
+  solver.setInitialTrajectory(
+      X_init,
+      std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+  cddp::CDDPSolution solution = solver.solve(cddp::SolverType::ALDDP);
+
+  EXPECT_GT(solution.iterations_completed, 0);
+  // The solver should produce a valid trajectory
+  EXPECT_FALSE(solution.state_trajectory.empty());
+}
+
+// ============================================================================
+// Sqrt backward pass comparison
+// ============================================================================
+
+TEST(ALDDPTest, SolveSqrtBackwardPass) {
+  // Same unicycle problem, compare sqrt vs standard backward pass
+  const int state_dim = 3;
+  const int control_dim = 2;
+  const int horizon = 50;
+  const double timestep = 0.1;
+
+  Eigen::VectorXd goal_state(state_dim);
+  goal_state << 3.0, 3.0, 0.0;
+  Eigen::VectorXd initial_state = Eigen::VectorXd::Zero(state_dim);
+
+  Eigen::MatrixXd Q = 0.1 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+  Eigen::MatrixXd R = 0.01 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+  Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+  std::vector<Eigen::VectorXd> empty_ref;
+
+  Eigen::VectorXd u_lb(control_dim), u_ub(control_dim);
+  u_lb << -2.0, -2.0;
+  u_ub << 2.0, 2.0;
+
+  // Standard backward pass
+  double cost_standard;
+  {
+    auto system = std::make_unique<cddp::Unicycle>(timestep, "euler");
+    auto objective = std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, empty_ref, timestep);
+
+    cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+    solver.setDynamicalSystem(std::move(system));
+    solver.setObjective(std::move(objective));
+    solver.addPathConstraint("ControlConstraint",
+        std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+    auto opts = makeALDDPOptions();
+    opts.alddp.use_sqrt_backward_pass = false;
+    opts.max_iterations = 200;
+    opts.verbose = false;
+    opts.print_solver_header = false;
+    solver.setOptions(opts);
+    solver.setInitialTrajectory(
+        std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+        std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+    auto sol = solver.solve(cddp::SolverType::ALDDP);
+    cost_standard = sol.final_objective;
+  }
+
+  // Sqrt backward pass
+  double cost_sqrt;
+  {
+    auto system = std::make_unique<cddp::Unicycle>(timestep, "euler");
+    auto objective = std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, empty_ref, timestep);
+
+    cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+    solver.setDynamicalSystem(std::move(system));
+    solver.setObjective(std::move(objective));
+    solver.addPathConstraint("ControlConstraint",
+        std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+    auto opts = makeALDDPOptions();
+    opts.alddp.use_sqrt_backward_pass = true;
+    opts.max_iterations = 200;
+    opts.max_cpu_time = 5.0; // 5 second timeout
+    opts.verbose = false;
+    opts.print_solver_header = false;
+    solver.setOptions(opts);
+    solver.setInitialTrajectory(
+        std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+        std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+    auto sol = solver.solve(cddp::SolverType::ALDDP);
+    cost_sqrt = sol.final_objective;
+  }
+
+  // Both should produce finite costs
+  EXPECT_TRUE(std::isfinite(cost_standard));
+  EXPECT_TRUE(std::isfinite(cost_sqrt));
+}
+
+// ============================================================================
+// Convergence failure
+// ============================================================================
+
+TEST(ALDDPTest, ConvergenceFailure) {
+  // Overly constrained problem -- should terminate gracefully
+  const int state_dim = 3;
+  const int control_dim = 2;
+  const int horizon = 50;
+  const double timestep = 0.1;
+
+  auto system = std::make_unique<cddp::Unicycle>(timestep, "euler");
+
+  Eigen::MatrixXd Q = Eigen::MatrixXd::Identity(state_dim, state_dim);
+  Eigen::MatrixXd R = 0.01 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+  Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+  Eigen::VectorXd goal_state(state_dim);
+  goal_state << 100.0, 100.0, 0.0; // Very far goal
+
+  std::vector<Eigen::VectorXd> empty_ref;
+  auto objective = std::make_unique<cddp::QuadraticObjective>(
+      Q, R, Qf, goal_state, empty_ref, timestep);
+
+  Eigen::VectorXd initial_state = Eigen::VectorXd::Zero(state_dim);
+
+  cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+  solver.setDynamicalSystem(std::move(system));
+  solver.setObjective(std::move(objective));
+
+  // Very tight control bounds
+  Eigen::VectorXd u_lb(control_dim), u_ub(control_dim);
+  u_lb << -0.1, -0.1;
+  u_ub << 0.1, 0.1;
+  solver.addPathConstraint("ControlConstraint",
+      std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+  // Very tight state bounds (impossible to reach goal)
+  Eigen::VectorXd x_lb(state_dim), x_ub(state_dim);
+  x_lb << -1.0, -1.0, -1e6;
+  x_ub << 1.0, 1.0, 1e6;
+  solver.addPathConstraint("StateConstraint",
+      std::make_unique<cddp::StateConstraint>(x_lb, x_ub));
+
+  auto options = makeALDDPOptions();
+  options.alddp.max_outer_iterations = 3;
+  options.max_iterations = 100;
+  options.verbose = false;
+  solver.setOptions(options);
+  solver.setInitialTrajectory(
+      std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+      std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+  cddp::CDDPSolution solution = solver.solve(cddp::SolverType::ALDDP);
+
+  // Should terminate, not crash
+  EXPECT_FALSE(solution.state_trajectory.empty());
+}
+
+// ============================================================================
+// Constraint-free comparison with CLDDP
+// ============================================================================
+
+TEST(ALDDPTest, ConstraintFreeMatchesCLDDP) {
+  // Same problem solved by ALDDP and CLDDP should give similar results
+  const int state_dim = 4;
+  const int control_dim = 1;
+  const int horizon = 100;
+  const double timestep = 0.02;
+
+  Eigen::VectorXd goal_state = Eigen::VectorXd::Zero(state_dim);
+  goal_state << 0.0, M_PI, 0.0, 0.0;
+  Eigen::VectorXd initial_state = Eigen::VectorXd::Zero(state_dim);
+
+  Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+  Eigen::MatrixXd R = 0.1 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+  Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+  std::vector<Eigen::VectorXd> empty_ref;
+
+  Eigen::VectorXd u_lb(control_dim), u_ub(control_dim);
+  u_lb << -20.0;
+  u_ub << 20.0;
+
+  // CLDDP solve
+  double cost_clddp;
+  {
+    auto system = std::make_unique<cddp::CartPole>(timestep, "rk4");
+    auto objective = std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, empty_ref, timestep);
+
+    cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+    solver.setDynamicalSystem(std::move(system));
+    solver.setObjective(std::move(objective));
+    solver.addPathConstraint("ControlConstraint",
+        std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+    cddp::CDDPOptions opts;
+    opts.max_iterations = 200;
+    opts.tolerance = 1e-4;
+    opts.acceptable_tolerance = 1e-5;
+    opts.verbose = false;
+    opts.print_solver_header = false;
+    solver.setOptions(opts);
+    solver.setInitialTrajectory(
+        std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+        std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+    auto sol = solver.solve(cddp::SolverType::CLDDP);
+    cost_clddp = sol.final_objective;
+  }
+
+  // ALDDP solve (no AL constraints, only BoxQP control bounds)
+  double cost_alddp;
+  {
+    auto system = std::make_unique<cddp::CartPole>(timestep, "rk4");
+    auto objective = std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, empty_ref, timestep);
+
+    cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+    solver.setDynamicalSystem(std::move(system));
+    solver.setObjective(std::move(objective));
+    solver.addPathConstraint("ControlConstraint",
+        std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+    auto opts = makeALDDPOptions();
+    opts.verbose = false;
+    opts.print_solver_header = false;
+    solver.setOptions(opts);
+    solver.setInitialTrajectory(
+        std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+        std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+    auto sol = solver.solve(cddp::SolverType::ALDDP);
+    cost_alddp = sol.final_objective;
+  }
+
+  // Costs should be in the same ballpark (within 50%)
+  if (cost_clddp > 0 && cost_alddp > 0) {
+    double ratio = cost_alddp / cost_clddp;
+    EXPECT_GT(ratio, 0.5);
+    EXPECT_LT(ratio, 2.0);
+  }
+}
+
+TEST(ALDDPTest, WarmStartRefreshesSlackControlsForReseededTrajectory) {
+  const int state_dim = 4;
+  const int control_dim = 1;
+  const int horizon = 100;
+  const double timestep = 0.02;
+
+  auto system = std::make_unique<cddp::CartPole>(timestep, "rk4");
+
+  Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+  Eigen::MatrixXd R = 0.01 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+  Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+  Eigen::VectorXd goal_state = Eigen::VectorXd::Zero(state_dim);
+  goal_state << 0.0, M_PI, 0.0, 0.0;
+
+  std::vector<Eigen::VectorXd> empty_ref;
+  auto objective = std::make_unique<cddp::QuadraticObjective>(
+      Q, R, Qf, goal_state, empty_ref, timestep);
+
+  Eigen::VectorXd initial_state = Eigen::VectorXd::Zero(state_dim);
+
+  cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+  solver.setDynamicalSystem(std::move(system));
+  solver.setObjective(std::move(objective));
+
+  Eigen::VectorXd u_lb(control_dim), u_ub(control_dim);
+  u_lb << -20.0;
+  u_ub << 20.0;
+  solver.addPathConstraint("ControlConstraint",
+      std::make_unique<cddp::ControlConstraint>(u_lb, u_ub));
+
+  auto options = makeALDDPOptions();
+  options.verbose = false;
+  options.print_solver_header = false;
+  solver.setOptions(options);
+  solver.setInitialTrajectory(
+      std::vector<Eigen::VectorXd>(horizon + 1, initial_state),
+      std::vector<Eigen::VectorXd>(horizon, Eigen::VectorXd::Zero(control_dim)));
+
+  auto first_solution = solver.solve(cddp::SolverType::ALDDP);
+  ASSERT_FALSE(first_solution.state_trajectory.empty());
+
+  std::vector<Eigen::VectorXd> X_infeasible(horizon + 1);
+  for (int t = 0; t <= horizon; ++t) {
+    double alpha = static_cast<double>(t) / horizon;
+    X_infeasible[t] = (1.0 - alpha) * initial_state + alpha * goal_state;
+  }
+  std::vector<Eigen::VectorXd> U_seed(horizon, Eigen::VectorXd::Zero(control_dim));
+
+  options.warm_start = true;
+  options.max_iterations = 100;
+  solver.setOptions(options);
+  solver.setInitialTrajectory(X_infeasible, U_seed);
+
+  auto reseeded_solution = solver.solve(cddp::SolverType::ALDDP);
+
+  EXPECT_GT(reseeded_solution.iterations_completed, 0);
+  EXPECT_FALSE(reseeded_solution.state_trajectory.empty());
+  EXPECT_TRUE(std::isfinite(reseeded_solution.final_objective));
+}
diff --git a/tests/cddp_core/test_logddp_solver.cpp b/tests/cddp_core/test_logddp_solver.cpp
index a028d34..96ddbbb 100644
--- a/tests/cddp_core/test_logddp_solver.cpp
+++ b/tests/cddp_core/test_logddp_solver.cpp
@@ -99,6 +99,8 @@ TEST(LogDDPTest, SolvePendulum)
     options.verbose = true;
     options.debug = false;
     options.regularization.initial_value = 1e-6;
+    options.log_barrier.segment_length = horizon;
+    options.log_barrier.rollout_type = "nonlinear";
     options.return_iteration_info = true; // Get detailed iteration history
 
     // Set options
@@ -229,6 +231,94 @@ TEST(LogDDPTest, SolvePendulum)
     EXPECT_LE(warm_iterations, iterations_completed + 5) << "Warm start should not take significantly more iterations";
 }
 
+TEST(LogDDPTest, RecomputesStateTrajectoryFromSeededControlsOnColdStart) {
+    int state_dim = 2;
+    int control_dim = 1;
+    int horizon = 5;
+    double timestep = 0.05;
+
+    auto system = std::make_unique<cddp::Pendulum>(timestep, 1.0, 1.0, 0.0, "euler");
+
+    Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+    Eigen::MatrixXd R = Eigen::MatrixXd::Identity(control_dim, control_dim);
+    Eigen::MatrixXd Qf = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::VectorXd goal_state(state_dim);
+    goal_state << 0.0, 0.0;
+
+    std::vector<Eigen::VectorXd> empty_reference_states;
+    auto objective = std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, empty_reference_states, timestep);
+
+    Eigen::VectorXd initial_state(state_dim);
+    initial_state << M_PI, 0.0;
+
+    cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+    solver.setDynamicalSystem(std::move(system));
+    solver.setObjective(std::move(objective));
+
+    std::vector<Eigen::VectorXd> X_seed(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+    std::vector<Eigen::VectorXd> U_seed(horizon, Eigen::VectorXd::Zero(control_dim));
+    X_seed[0] = initial_state;
+    for (int t = 1; t <= horizon; ++t) {
+        X_seed[t] << initial_state(0) - 0.1 * t, 0.2 * t;
+    }
+    solver.setInitialTrajectory(X_seed, U_seed);
+
+    cddp::CDDPOptions options;
+    options.max_iterations = 0;
+    options.verbose = false;
+    options.debug = false;
+    options.log_barrier.segment_length = horizon;
+    options.log_barrier.rollout_type = "nonlinear";
+    solver.setOptions(options);
+
+    cddp::CDDPSolution solution = solver.solve("LogDDP");
+    Eigen::VectorXd expected_next_state =
+        solver.getSystem().getDiscreteDynamics(initial_state, U_seed[0], 0.0);
+
+    ASSERT_EQ(solution.state_trajectory.size(), X_seed.size());
+    EXPECT_NEAR(solution.state_trajectory[1](0), expected_next_state(0), 1e-12);
+    EXPECT_NEAR(solution.state_trajectory[1](1), expected_next_state(1), 1e-12);
+    EXPECT_GT((solution.state_trajectory[1] - X_seed[1]).norm(), 1e-6);
+    EXPECT_EQ(solution.status_message, "MaxIterationsReached");
+}
+
+TEST(LogDDPTest, RejectsSegmentedRolloutOptions) {
+    int state_dim = 2;
+    int control_dim = 1;
+    int horizon = 5;
+    double timestep = 0.05;
+
+    auto system = std::make_unique<cddp::Pendulum>(timestep, 1.0, 1.0, 0.0, "euler");
+
+    Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+    Eigen::MatrixXd R = Eigen::MatrixXd::Identity(control_dim, control_dim);
+    Eigen::MatrixXd Qf = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::VectorXd goal_state(state_dim);
+    goal_state << 0.0, 0.0;
+
+    std::vector<Eigen::VectorXd> empty_reference_states;
+    auto objective = std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, empty_reference_states, timestep);
+
+    Eigen::VectorXd initial_state(state_dim);
+    initial_state << M_PI, 0.0;
+
+    cddp::CDDP solver(initial_state, goal_state, horizon, timestep);
+    solver.setDynamicalSystem(std::move(system));
+    solver.setObjective(std::move(objective));
+
+    cddp::CDDPOptions options;
+    options.max_iterations = 0;
+    options.verbose = false;
+    options.debug = false;
+    options.log_barrier.segment_length = horizon - 1;
+    options.log_barrier.rollout_type = "hybrid";
+    solver.setOptions(options);
+
+    EXPECT_THROW(solver.solve("LogDDP"), std::runtime_error);
+}
+
 TEST(LogDDPTest, SolveUnicycle) {
     // Problem parameters
     int state_dim = 3;
@@ -267,6 +357,8 @@ TEST(LogDDPTest, SolveUnicycle) {
     options.num_threads = 10;
     options.verbose = true;
     options.debug = false;
+    options.log_barrier.segment_length = horizon;
+    options.log_barrier.rollout_type = "nonlinear";
 
     // Create CDDP solver
     cddp::CDDP cddp_solver(initial_state, goal_state, horizon, timestep);
@@ -495,26 +587,18 @@ TEST(LogDDPTest, SolveCar)
     warm_options.warm_start = true;
     warm_options.max_iterations = 100; // Allow sufficient iterations for warm start convergence
     warm_options.verbose = false;     // Less verbose for warm start test
+    warm_options.tolerance = 1e-3;
+    warm_options.acceptable_tolerance = 1e-3;
+    warm_options.regularization.initial_value = 1e-4;
 
-    // Create a new solver for warm start test
-    auto car_system_warmstart = std::make_unique<cddp::Car>(timestep, wheelbase, integration_type);
-
-    // Create new objective
-    auto objective_warmstart = std::make_unique<cddp::CarParkingObjective>(goal_state, timestep);
-
-    cddp::CDDP warm_solver(initial_state, goal_state, horizon, timestep);
-    warm_solver.setDynamicalSystem(std::move(car_system_warmstart));
-    warm_solver.setObjective(std::move(objective_warmstart));
-    warm_solver.addPathConstraint("ControlConstraint",
-                              std::make_unique<cddp::ControlConstraint>(control_lower_bound, control_upper_bound));
-    warm_solver.setOptions(warm_options);
-
-    // Use previous solution as warm start
-    warm_solver.setInitialTrajectory(X_sol, U_sol);
+    // Reuse the same solver instance so LogDDP can preserve its warm-start
+    // gains, matching the documented contract of the option.
+    cddp_solver.setOptions(warm_options);
+    cddp_solver.setInitialTrajectory(X_sol, U_sol);
 
     // Solve with warm start
     auto start_time = std::chrono::high_resolution_clock::now();
-    cddp::CDDPSolution warm_solution = warm_solver.solve("LogDDP");
+    cddp::CDDPSolution warm_solution = cddp_solver.solve("LogDDP");
     auto end_time = std::chrono::high_resolution_clock::now();
     auto warm_duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);