diff --git a/Project.toml b/Project.toml
index 743f6d1..29bdc43 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,6 +9,7 @@ AtomsCalculators = "a3e0e189-c65a-42c1-833c-339540406eb1"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+NLSolversBase = "d41bc354-129a-5804-8e4c-c37616107c6c"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
@@ -29,7 +30,8 @@ AtomsBuilder = "0.2.2"
 AtomsCalculators = "0.2.3"
 DocStringExtensions = "0.9"
 LineSearches = "7"
-Optim = "1.11.0"
+NLSolversBase = "8.0.0"
+Optim = "2"
 Optimization = "3, 4"
 PrettyTables = "3"
 StaticArrays = "1"
diff --git a/src/GeometryOptimization.jl b/src/GeometryOptimization.jl
index 38bbfab..bfd9dc4 100644
--- a/src/GeometryOptimization.jl
+++ b/src/GeometryOptimization.jl
@@ -5,7 +5,6 @@ using AtomsCalculators
 using DocStringExtensions
 using LinearAlgebra
 using LineSearches
-using Optim
 using StaticArrays
 using Unitful
 using UnitfulAtomic
diff --git a/src/dof_management.jl b/src/dof_management.jl
index c560049..af614ee 100644
--- a/src/dof_management.jl
+++ b/src/dof_management.jl
@@ -21,8 +21,8 @@ In addition set at most one of the kwargs:
 On call to the constructor, `DofManager` stores positions and cell
 `X0, C0`, dofs are understood *relative* to this initial configuration.
 `get_dofs(sys, dm::DofManager)` returns a vector that represents the
-non-dimensional displacement and a deformation matrix `(U, F)`. The new configuration extracted from a dof vector
-is understood as
+non-dimensional displacement and a deformation matrix `(U, F)`.
+The new configuration extracted from a dof vector is understood as
 * The new cell: `C = F * C0`
 * The new positions: `𝐫[i] = F * (X0[i] + U[i] * r0)`
 One aspect of this definition is that clamped atom positions still change via
diff --git a/src/minimize_energy.jl b/src/minimize_energy.jl
index b64424b..55d487e 100644
--- a/src/minimize_energy.jl
+++ b/src/minimize_energy.jl
@@ -45,12 +45,12 @@ function eval_objective_gradient!(G, prob::GeoOptProblem, ps, x)
     objective = res.energy_unitless
     energy = res.energy
 
-    gradnorm = nothing
-    forces   = nothing
-    virial   = nothing
+    grad   = nothing
+    forces = nothing
+    virial = nothing
     if !isnothing(G)
         res = eval_gradient(prob.system, prob.calculator, prob.dofmgr, x, ps, res.state)
-        gradnorm = maximum(abs, res.grad)
+        grad = res.grad
         haskey(res, :forces) && (forces = res.forces)
         haskey(res, :virial) && (virial = res.virial)
         copy!(G, res.grad)
@@ -62,7 +62,7 @@ function eval_objective_gradient!(G, prob::GeoOptProblem, ps, x)
     if energy ≤ min_energy
         geoopt_state.calc_state = res.state
     end
-    push!(geoopt_state.cache_evaluations, (; energy, forces, virial, objective, gradnorm))
+    push!(geoopt_state.cache_evaluations, (; energy, forces, virial, objective, grad))
 
     objective
 end
diff --git a/src/optim.jl b/src/optim.jl
index 3a38d8a..13b6718 100644
--- a/src/optim.jl
+++ b/src/optim.jl
@@ -1,3 +1,6 @@
+using Optim
+using NLSolversBase: only_fg!
+
 #
 # Solvers with sane defaults
 #
@@ -42,28 +45,28 @@ function solve_problem(prob::GeoOptProblem, solver::Optim.AbstractOptimizer, cvg
     end
 
     geoopt_state = prob.geoopt_state
-    inner_callback = function(ts)
+    inner_callback = function(optim_state)
         cache_evaluations = geoopt_state.cache_evaluations
-
-        geoopt_state.n_iter = ts.iteration
+        n_iter = optim_state.pseudo_iteration
+        geoopt_state.n_iter = n_iter
         if isempty(cache_evaluations)
             # Find out if we already added the current state (if optim cannot
             # make progress it keeps printing iterations, but does not run further
             # function evaluations ... in this case we have no new forces and virials).
             # Also it sometimes does an extra call to the callback even though
             # convergence has already been flagged.
-            tol = 10eps(typeof(ts.value))
-            is_match = abs(austrip(geoopt_state.history_energy[end]) - ts.value) < tol
+            tol = 10eps(typeof(optim_state.f_x))
+            is_match = abs(austrip(geoopt_state.history_energy[end]) - optim_state.f_x) < tol
             if !geoopt_state.converged && !is_match
-                @warn "Discarding optimisation step of iteration $(ts.iteration)"
+                @warn "Discarding optimisation step of iteration $(n_iter)"
             end
         else
             # Find position in the cache matching Optim's current state
             i_match = findlast(cache_evaluations) do eval
-                isnothing(eval.gradnorm) && return false
-                tol = 10eps(typeof(ts.value))
-                (   abs(eval.objective - ts.value)  < tol
-                 && abs(eval.gradnorm  - ts.g_norm) < tol)
+                isnothing(eval.grad) && return false
+                tol = 10eps(typeof(optim_state.f_x))
+                (   abs(eval.objective - optim_state.f_x)     < tol
+                 && maximum(abs, eval.grad - optim_state.g_x) < tol)
             end
             i_match = @something i_match length(cache_evaluations)
 
@@ -82,7 +85,7 @@ function solve_problem(prob::GeoOptProblem, solver::Optim.AbstractOptimizer, cvg
         end
 
         # Callback and possible abortion
-        halt = callback(ts, geoopt_state)
+        halt = callback(optim_state, geoopt_state)
         halt && return true
 
         geoopt_state.converged
@@ -95,19 +98,20 @@ function solve_problem(prob::GeoOptProblem, solver::Optim.AbstractOptimizer, cvg
         allow_f_increases=true,
         successive_f_tol=2,
         callback=inner_callback,
-        x_abstol=-1, f_abstol=-1, g_tol=10eps(T),
-        x_reltol=-1, f_reltol=-1,
+        x_abstol=NaN, f_abstol=NaN, g_tol=10eps(T),
+        x_reltol=NaN, f_reltol=NaN,
         iterations=maxiters,
         time_limit=maxtime,
         kwargs...
     )
-    optimres = Optim.optimize(Optim.only_fg!(fg!), x0, solver, options)
+    optimres = Optim.optimize(only_fg!(fg!), x0, solver, options)
 
     (; minimizer=Optim.minimizer(optimres), minimum=Optim.minimum(optimres), optimres)
 end
 
 function solve_problem(prob, solver::Optim.ZerothOrderOptimizer, cvg;
                        callback, maxiters, maxtime, kwargs...)
-    # TODO Supporting this needs more fiddeling with the callbacks and convergence checks
+    # TODO: Supporting this needs more fiddeling with the callbacks and convergence checks
+    #       and it's generally not very useful as forces / stresses are usually available
     throw(ArgumentError("Zeroth-order optimizers are currently not supported."))
 end