diff --git a/Project.toml b/Project.toml
index 9afd3d558..ca81165b0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,10 +1,11 @@
 name = "NonlinearSolve"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 authors = ["SciML"]
-version = "3.5.4"
+version = "3.6.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
@@ -55,12 +56,13 @@ NonlinearSolveZygoteExt = "Zygote"
 
 [compat]
 ADTypes = "0.2.6"
+Accessors = "0.1.32"
 Aqua = "0.8"
 ArrayInterface = "7.7"
 BandedMatrices = "1.4"
 BenchmarkTools = "1.4"
-ConcreteStructs = "0.2.3"
 CUDA = "5.1"
+ConcreteStructs = "0.2.3"
 DiffEqBase = "6.146.0"
 Enzyme = "0.11.11"
 FastBroadcast = "0.2.8"
diff --git a/docs/src/devdocs/internal_interfaces.md b/docs/src/devdocs/internal_interfaces.md
index 843054cc8..97762e18b 100644
--- a/docs/src/devdocs/internal_interfaces.md
+++ b/docs/src/devdocs/internal_interfaces.md
@@ -13,6 +13,7 @@ NonlinearSolve.AbstractNonlinearSolveCache
 ```@docs
 NonlinearSolve.AbstractDescentAlgorithm
 NonlinearSolve.AbstractDescentCache
+NonlinearSolve.DescentResult
 ```
 
 ## Approximate Jacobian
diff --git a/docs/src/tutorials/large_systems.md b/docs/src/tutorials/large_systems.md
index aedd58445..4788a99af 100644
--- a/docs/src/tutorials/large_systems.md
+++ b/docs/src/tutorials/large_systems.md
@@ -2,8 +2,8 @@
 
 This tutorial is for getting into the extra features of using NonlinearSolve.jl. Solving
 ill-conditioned nonlinear systems requires specializing the linear solver on properties of
-the Jacobian in order to cut down on the ``\mathcal{O}(n^3)`` linear solve and the
-``\mathcal{O}(n^2)`` back-solves. This tutorial is designed to explain the advanced usage of
+the Jacobian in order to cut down on the `\mathcal{O}(n^3)` linear solve and the
+`\mathcal{O}(n^2)` back-solves. This tutorial is designed to explain the advanced usage of
 NonlinearSolve.jl by solving the steady state stiff Brusselator partial differential
 equation (BRUSS) using NonlinearSolve.jl.
 
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index de46fe153..784b00b76 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -8,9 +8,9 @@ import Reexport: @reexport
 import PrecompileTools: @recompile_invalidations, @compile_workload, @setup_workload
 
 @recompile_invalidations begin
-    using ADTypes, ConcreteStructs, DiffEqBase, FastBroadcast, FastClosures, LazyArrays,
-          LineSearches, LinearAlgebra, LinearSolve, MaybeInplace, Preferences, Printf,
-          SciMLBase, SimpleNonlinearSolve, SparseArrays, SparseDiffTools
+    using Accessors, ADTypes, ConcreteStructs, DiffEqBase, FastBroadcast, FastClosures,
+          LazyArrays, LineSearches, LinearAlgebra, LinearSolve, MaybeInplace, Preferences,
+          Printf, SciMLBase, SimpleNonlinearSolve, SparseArrays, SparseDiffTools
 
     import ArrayInterface: undefmatrix, can_setindex, restructure, fast_scalar_indexing
     import DiffEqBase: AbstractNonlinearTerminationMode,
@@ -45,11 +45,13 @@ include("adtypes.jl")
 include("timer_outputs.jl")
 include("internal/helpers.jl")
 
+include("descent/common.jl")
 include("descent/newton.jl")
 include("descent/steepest.jl")
 include("descent/dogleg.jl")
 include("descent/damped_newton.jl")
 include("descent/geodesic_acceleration.jl")
+include("descent/multistep.jl")
 
 include("internal/operators.jl")
 include("internal/jacobian.jl")
@@ -69,6 +71,7 @@ include("core/spectral_methods.jl")
 
 include("algorithms/raphson.jl")
 include("algorithms/pseudo_transient.jl")
+include("algorithms/multistep.jl")
 include("algorithms/broyden.jl")
 include("algorithms/klement.jl")
 include("algorithms/lbroyden.jl")
@@ -138,7 +141,8 @@ include("default.jl")
 end
 
 # Core Algorithms
-export NewtonRaphson, PseudoTransient, Klement, Broyden, LimitedMemoryBroyden, DFSane
+export NewtonRaphson, PseudoTransient, Klement, Broyden, LimitedMemoryBroyden, DFSane,
+       MultiStepNonlinearSolver
 export GaussNewton, LevenbergMarquardt, TrustRegion
 export NonlinearSolvePolyAlgorithm,
        RobustMultiNewton, FastShortcutNonlinearPolyalg, FastShortcutNLLSPolyalg
@@ -152,7 +156,9 @@ export GeneralizedFirstOrderAlgorithm, ApproximateJacobianSolveAlgorithm, Genera
 
 # Descent Algorithms
 export NewtonDescent, SteepestDescent, Dogleg, DampedNewtonDescent,
-       GeodesicAcceleration
+       GeodesicAcceleration, GenericMultiStepDescent
+## Multistep Algorithms
+export MultiStepSchemes
 
 # Globalization
 ## Line Search Algorithms
diff --git a/src/abstract_types.jl b/src/abstract_types.jl
index 1b30f2b9f..6209359f8 100644
--- a/src/abstract_types.jl
+++ b/src/abstract_types.jl
@@ -66,8 +66,8 @@ Abstract Type for all Descent Caches.
 ### `__internal_solve!` specification
 
 ```julia
-δu, success, intermediates = __internal_solve!(cache::AbstractDescentCache, J, fu, u,
-    idx::Val; skip_solve::Bool = false, kwargs...)
+descent_result = __internal_solve!(cache::AbstractDescentCache, J, fu, u, idx::Val;
+    skip_solve::Bool = false, kwargs...)
 ```
 
   - `J`: Jacobian or Inverse Jacobian (if `pre_inverted = Val(true)`).
@@ -79,14 +79,7 @@ Abstract Type for all Descent Caches.
     direction was rejected and we want to try with a modified trust region.
   - `kwargs`: keyword arguments to pass to the linear solver if there is one.
 
-#### Returned values
-
-  - `δu`: the descent direction.
-  - `success`: Certain Descent Algorithms can reject a descent direction for example
-    `GeodesicAcceleration`.
-  - `intermediates`: A named tuple containing intermediates computed during the solve.
-    For example, `GeodesicAcceleration` returns `NamedTuple{(:v, :a)}` containing the
-    "velocity" and "acceleration" terms.
+Returns a result of type [`DescentResult`](@ref).
 
 ### Interface Functions
 
@@ -94,6 +87,11 @@ Abstract Type for all Descent Caches.
   - `get_du(cache, ::Val{N})`: get the `N`th descent direction.
   - `set_du!(cache, δu)`: set the descent direction.
   - `set_du!(cache, δu, ::Val{N})`: set the `N`th descent direction.
+  - `get_internal_cache(cache, ::Val{field})`: get the internal cache field.
+  - `get_internal_cache(cache, field::Val, ::Val{N})`: get the `N`th internal cache field.
+  - `set_internal_cache!(cache, value, ::Val{field})`: set the internal cache field.
+  - `set_internal_cache!(cache, value, field::Val, ::Val{N})`: set the `N`th internal cache
+    field.
   - `last_step_accepted(cache)`: whether or not the last step was accepted. Checks if the
     cache has a `last_step_accepted` field and returns it if it does, else returns `true`.
 """
@@ -105,6 +103,29 @@ SciMLBase.get_du(cache::AbstractDescentCache, ::Val{N}) where {N} = cache.δus[N
 set_du!(cache::AbstractDescentCache, δu) = (cache.δu = δu)
 set_du!(cache::AbstractDescentCache, δu, ::Val{1}) = set_du!(cache, δu)
 set_du!(cache::AbstractDescentCache, δu, ::Val{N}) where {N} = (cache.δus[N - 1] = δu)
+function get_internal_cache(cache::AbstractDescentCache, ::Val{field}) where {field}
+    return getproperty(cache, field)
+end
+function get_internal_cache(cache::AbstractDescentCache, field::Val, ::Val{1})
+    return get_internal_cache(cache, field)
+end
+function get_internal_cache(
+        cache::AbstractDescentCache, ::Val{field}, ::Val{N}) where {field, N}
+    true_field = Symbol(string(field), "s")  # Julia 1.10 compiles this away
+    return getproperty(cache, true_field)[N]
+end
+function set_internal_cache!(cache::AbstractDescentCache, value, ::Val{field}) where {field}
+    return setproperty!(cache, field, value)
+end
+function set_internal_cache!(
+        cache::AbstractDescentCache, value, field::Val, ::Val{1})
+    return set_internal_cache!(cache, value, field)
+end
+function set_internal_cache!(
+        cache::AbstractDescentCache, value, ::Val{field}, ::Val{N}) where {field, N}
+    true_field = Symbol(string(field), "s")  # Julia 1.10 compiles this away
+    return setproperty!(cache, true_field, value, N)
+end
 
 function last_step_accepted(cache::AbstractDescentCache)
     hasfield(typeof(cache), :last_step_accepted) && return cache.last_step_accepted
diff --git a/src/algorithms/multistep.jl b/src/algorithms/multistep.jl
new file mode 100644
index 000000000..cd5a31890
--- /dev/null
+++ b/src/algorithms/multistep.jl
@@ -0,0 +1,10 @@
+function MultiStepNonlinearSolver(; concrete_jac = nothing, linsolve = nothing,
+        scheme = MSS.PotraPtak3, precs = DEFAULT_PRECS, autodiff = nothing,
+        vjp_autodiff = nothing, linesearch = NoLineSearch())
+    forward_ad = ifelse(autodiff isa ADTypes.AbstractForwardMode, autodiff, nothing)
+    scheme_concrete = apply_patch(
+        scheme, (; autodiff, vjp_autodiff, jvp_autodiff = forward_ad))
+    descent = GenericMultiStepDescent(; scheme = scheme_concrete, linsolve, precs)
+    return GeneralizedFirstOrderAlgorithm(; concrete_jac, name = MSS.display_name(scheme),
+        descent, jacobian_ad = autodiff, linesearch, reverse_ad = vjp_autodiff, forward_ad)
+end
diff --git a/src/core/approximate_jacobian.jl b/src/core/approximate_jacobian.jl
index ffc77cea8..22a192e41 100644
--- a/src/core/approximate_jacobian.jl
+++ b/src/core/approximate_jacobian.jl
@@ -163,8 +163,7 @@ function SciMLBase.__init(prob::AbstractNonlinearProblem{uType, iip},
 
         linsolve = get_linear_solver(alg.descent)
         initialization_cache = __internal_init(prob, alg.initialization, alg, f, fu, u, p;
-            linsolve,
-            maxiters, internalnorm)
+            linsolve, maxiters, internalnorm)
 
         abstol, reltol, termination_cache = init_termination_cache(abstol, reltol, fu, u,
             termination_condition)
@@ -222,9 +221,7 @@ function __step!(cache::ApproximateJacobianSolveCache{INV, GB, iip};
     new_jacobian = true
     @static_timeit cache.timer "jacobian init/reinit" begin
         if get_nsteps(cache) == 0  # First Step is special ignore kwargs
-            J_init = __internal_solve!(cache.initialization_cache,
-                cache.fu,
-                cache.u,
+            J_init = __internal_solve!(cache.initialization_cache, cache.fu, cache.u,
                 Val(false))
             if INV
                 if jacobian_initialized_preinverted(cache.initialization_cache.alg)
@@ -283,54 +280,65 @@ function __step!(cache::ApproximateJacobianSolveCache{INV, GB, iip};
     @static_timeit cache.timer "descent" begin
         if cache.trustregion_cache !== nothing &&
            hasfield(typeof(cache.trustregion_cache), :trust_region)
-            δu, descent_success, descent_intermediates = __internal_solve!(
-                cache.descent_cache,
-                J, cache.fu, cache.u; new_jacobian,
-                trust_region = cache.trustregion_cache.trust_region)
+            descent_result = __internal_solve!(cache.descent_cache, J, cache.fu, cache.u;
+                new_jacobian, trust_region = cache.trustregion_cache.trust_region)
         else
-            δu, descent_success, descent_intermediates = __internal_solve!(
-                cache.descent_cache,
-                J, cache.fu, cache.u; new_jacobian)
+            descent_result = __internal_solve!(cache.descent_cache, J, cache.fu, cache.u;
+                new_jacobian)
         end
     end
 
-    if descent_success
-        if GB === :LineSearch
-            @static_timeit cache.timer "linesearch" begin
-                needs_reset, α = __internal_solve!(cache.linesearch_cache, cache.u, δu)
-            end
-            if needs_reset && cache.steps_since_last_reset > 5 # Reset after a burn-in period
-                cache.force_reinit = true
-            else
-                @static_timeit cache.timer "step" begin
-                    @bb axpy!(α, δu, cache.u)
-                    evaluate_f!(cache, cache.u, cache.p)
-                end
-            end
-        elseif GB === :TrustRegion
-            @static_timeit cache.timer "trustregion" begin
-                tr_accepted, u_new, fu_new = __internal_solve!(cache.trustregion_cache, J,
-                    cache.fu, cache.u, δu, descent_intermediates)
-                if tr_accepted
-                    @bb copyto!(cache.u, u_new)
-                    @bb copyto!(cache.fu, fu_new)
-                end
-                if hasfield(typeof(cache.trustregion_cache), :shrink_counter) &&
-                   cache.trustregion_cache.shrink_counter > cache.max_shrink_times
-                    cache.retcode = ReturnCode.ShrinkThresholdExceeded
-                    cache.force_stop = true
-                end
-            end
-            α = true
-        elseif GB === :None
+    if descent_result.success
+        if GB === :None
             @static_timeit cache.timer "step" begin
-                @bb axpy!(1, δu, cache.u)
+                if descent_result.u !== missing
+                    @bb copyto!(cache.u, descent_result.u)
+                elseif descent_result.δu !== missing
+                    @bb axpy!(1, descent_result.δu, cache.u)
+                else
+                    error("This shouldn't occur. `$(cache.alg.descent)` is incorrectly \
+                           specified.")
+                end
                 evaluate_f!(cache, cache.u, cache.p)
             end
             α = true
         else
-            error("Unknown Globalization Strategy: $(GB). Allowed values are (:LineSearch, \
-                :TrustRegion, :None)")
+            δu = descent_result.δu
+            @assert δu!==missing "Descent Supporting LineSearch or TrustRegion must return a `δu`."
+
+            if GB === :LineSearch
+                @static_timeit cache.timer "linesearch" begin
+                    needs_reset, α = __internal_solve!(cache.linesearch_cache, cache.u, δu)
+                end
+                if needs_reset && cache.steps_since_last_reset > 5 # Reset after a burn-in period
+                    cache.force_reinit = true
+                else
+                    @static_timeit cache.timer "step" begin
+                        @bb axpy!(α, δu, cache.u)
+                        evaluate_f!(cache, cache.u, cache.p)
+                    end
+                end
+            elseif GB === :TrustRegion
+                @static_timeit cache.timer "trustregion" begin
+                    tr_accepted, u_new, fu_new = __internal_solve!(cache.trustregion_cache,
+                        J, cache.fu, cache.u, δu, descent_result.extras)
+                    if tr_accepted
+                        @bb copyto!(cache.u, u_new)
+                        @bb copyto!(cache.fu, fu_new)
+                        α = true
+                    else
+                        α = false
+                    end
+                    if hasfield(typeof(cache.trustregion_cache), :shrink_counter) &&
+                       cache.trustregion_cache.shrink_counter > cache.max_shrink_times
+                        cache.retcode = ReturnCode.ShrinkThresholdExceeded
+                        cache.force_stop = true
+                    end
+                end
+            else
+                error("Unknown Globalization Strategy: $(GB). Allowed values are \
+                       (:LineSearch, :TrustRegion, :None)")
+            end
         end
         check_and_update!(cache, cache.fu, cache.u, cache.u_cache)
     else
diff --git a/src/core/generalized_first_order.jl b/src/core/generalized_first_order.jl
index 0812e7f05..fb7580319 100644
--- a/src/core/generalized_first_order.jl
+++ b/src/core/generalized_first_order.jl
@@ -215,59 +215,67 @@ function __step!(cache::GeneralizedFirstOrderAlgorithmCache{iip, GB};
     @static_timeit cache.timer "descent" begin
         if cache.trustregion_cache !== nothing &&
            hasfield(typeof(cache.trustregion_cache), :trust_region)
-            δu, descent_success, descent_intermediates = __internal_solve!(
-                cache.descent_cache,
-                J, cache.fu, cache.u; new_jacobian,
-                trust_region = cache.trustregion_cache.trust_region)
+            descent_result = __internal_solve!(cache.descent_cache, J, cache.fu, cache.u;
+                new_jacobian, trust_region = cache.trustregion_cache.trust_region)
         else
-            δu, descent_success, descent_intermediates = __internal_solve!(
-                cache.descent_cache,
-                J, cache.fu, cache.u; new_jacobian)
+            descent_result = __internal_solve!(cache.descent_cache, J, cache.fu, cache.u;
+                new_jacobian)
         end
     end
 
-    if descent_success
+    if descent_result.success
         cache.make_new_jacobian = true
-        if GB === :LineSearch
-            @static_timeit cache.timer "linesearch" begin
-                linesearch_failed, α = __internal_solve!(cache.linesearch_cache,
-                    cache.u, δu)
-            end
-            if linesearch_failed
-                cache.retcode = ReturnCode.InternalLineSearchFailed
-                cache.force_stop = true
-            end
+        if GB === :None
             @static_timeit cache.timer "step" begin
-                @bb axpy!(α, δu, cache.u)
-                evaluate_f!(cache, cache.u, cache.p)
-            end
-        elseif GB === :TrustRegion
-            @static_timeit cache.timer "trustregion" begin
-                tr_accepted, u_new, fu_new = __internal_solve!(cache.trustregion_cache, J,
-                    cache.fu, cache.u, δu, descent_intermediates)
-                if tr_accepted
-                    @bb copyto!(cache.u, u_new)
-                    @bb copyto!(cache.fu, fu_new)
-                    α = true
+                if descent_result.u !== missing
+                    @bb copyto!(cache.u, descent_result.u)
+                elseif descent_result.δu !== missing
+                    @bb axpy!(1, descent_result.δu, cache.u)
                 else
-                    α = false
-                    cache.make_new_jacobian = false
+                    error("This shouldn't occur. `$(cache.alg.descent)` is incorrectly \
+                           specified.")
                 end
-                if hasfield(typeof(cache.trustregion_cache), :shrink_counter) &&
-                   cache.trustregion_cache.shrink_counter > cache.max_shrink_times
-                    cache.retcode = ReturnCode.ShrinkThresholdExceeded
-                    cache.force_stop = true
-                end
-            end
-        elseif GB === :None
-            @static_timeit cache.timer "step" begin
-                @bb axpy!(1, δu, cache.u)
                 evaluate_f!(cache, cache.u, cache.p)
             end
             α = true
         else
-            error("Unknown Globalization Strategy: $(GB). Allowed values are (:LineSearch, \
-                  :TrustRegion, :None)")
+            δu = descent_result.δu
+            @assert δu!==missing "Descent Supporting LineSearch or TrustRegion must return a `δu`."
+
+            if GB === :LineSearch
+                @static_timeit cache.timer "linesearch" begin
+                    failed, α = __internal_solve!(cache.linesearch_cache, cache.u, δu)
+                end
+                if failed
+                    cache.retcode = ReturnCode.InternalLineSearchFailed
+                    cache.force_stop = true
+                else
+                    @static_timeit cache.timer "step" begin
+                        @bb axpy!(α, δu, cache.u)
+                        evaluate_f!(cache, cache.u, cache.p)
+                    end
+                end
+            elseif GB === :TrustRegion
+                @static_timeit cache.timer "trustregion" begin
+                    tr_accepted, u_new, fu_new = __internal_solve!(cache.trustregion_cache,
+                        J, cache.fu, cache.u, δu, descent_result.extras)
+                    if tr_accepted
+                        @bb copyto!(cache.u, u_new)
+                        @bb copyto!(cache.fu, fu_new)
+                        α = true
+                    else
+                        α = false
+                    end
+                    if hasfield(typeof(cache.trustregion_cache), :shrink_counter) &&
+                       cache.trustregion_cache.shrink_counter > cache.max_shrink_times
+                        cache.retcode = ReturnCode.ShrinkThresholdExceeded
+                        cache.force_stop = true
+                    end
+                end
+            else
+                error("Unknown Globalization Strategy: $(GB). Allowed values are \
+                       (:LineSearch, :TrustRegion, :None)")
+            end
         end
         check_and_update!(cache, cache.fu, cache.u, cache.u_cache)
     else
diff --git a/src/descent/common.jl b/src/descent/common.jl
new file mode 100644
index 000000000..2a614d84a
--- /dev/null
+++ b/src/descent/common.jl
@@ -0,0 +1,26 @@
+"""
+    DescentResult(; δu = missing, u = missing, success::Bool = true, extras = (;))
+
+Construct a `DescentResult` object.
+
+### Keyword Arguments
+
+  - `δu`: The descent direction.
+  - `u`: The new iterate. This is provided only for multi-step methods currently.
+  - `success`: Certain Descent Algorithms can reject a descent direction for example
+    [`GeodesicAcceleration`](@ref).
+  - `extras`: A named tuple containing intermediates computed during the solve.
+    For example, [`GeodesicAcceleration`](@ref) returns `NamedTuple{(:v, :a)}` containing
+    the "velocity" and "acceleration" terms.
+"""
+@concrete struct DescentResult
+    δu
+    u
+    success::Bool
+    extras
+end
+
+function DescentResult(; δu = missing, u = missing, success::Bool = true, extras = (;))
+    @assert δu !== missing || u !== missing
+    return DescentResult(δu, u, success, extras)
+end
diff --git a/src/descent/damped_newton.jl b/src/descent/damped_newton.jl
index 77ad95b54..cee437d7e 100644
--- a/src/descent/damped_newton.jl
+++ b/src/descent/damped_newton.jl
@@ -58,11 +58,7 @@ function __internal_init(
         shared::Val{N} = Val(1), kwargs...) where {INV, N}
     length(fu) != length(u) &&
         @assert !INV "Precomputed Inverse for Non-Square Jacobian doesn't make sense."
-    @bb δu = similar(u)
-    δus = N ≤ 1 ? nothing : map(2:N) do i
-        @bb δu_ = similar(u)
-    end
-
+    δu, δus = @shared_caches N (@bb δu = similar(u))
     normal_form_damping = returns_norm_form_damping(alg.damping_fn)
     normal_form_linsolve = __needs_square_A(alg.linsolve, u)
     if u isa Number
@@ -138,7 +134,7 @@ function __internal_solve!(cache::DampedNewtonDescentCache{INV, mode}, J, fu, u,
         idx::Val{N} = Val(1); skip_solve::Bool = false, new_jacobian::Bool = true,
         kwargs...) where {INV, N, mode}
     δu = get_du(cache, idx)
-    skip_solve && return δu, true, (;)
+    skip_solve && return DescentResult(; δu)
 
     recompute_A = idx === Val(1)
 
@@ -203,15 +199,14 @@ function __internal_solve!(cache::DampedNewtonDescentCache{INV, mode}, J, fu, u,
     end
 
     @static_timeit cache.timer "linear solve" begin
-        δu = cache.lincache(; A, b,
-            reuse_A_if_factorization = !new_jacobian && !recompute_A,
-            kwargs..., linu = _vec(δu))
+        δu = cache.lincache(; A, b, linu = _vec(δu),
+            reuse_A_if_factorization = !new_jacobian && !recompute_A, kwargs...)
         δu = _restructure(get_du(cache, idx), δu)
     end
 
     @bb @. δu *= -1
     set_du!(cache, δu, idx)
-    return δu, true, (;)
+    return DescentResult(; δu)
 end
 
 # Define special concatenation for certain Array combinations
diff --git a/src/descent/dogleg.jl b/src/descent/dogleg.jl
index e1a50832f..ca7314760 100644
--- a/src/descent/dogleg.jl
+++ b/src/descent/dogleg.jl
@@ -40,7 +40,7 @@ end
     newton_cache
     cauchy_cache
     internalnorm
-    JᵀJ_cache
+    Jᵀδu_cache
     δu_cache_1
     δu_cache_2
     δu_cache_mul
@@ -56,10 +56,7 @@ function __internal_init(prob::AbstractNonlinearProblem, alg::Dogleg, J, fu, u;
         linsolve_kwargs, abstol, reltol, shared, kwargs...)
     cauchy_cache = __internal_init(prob, alg.steepest_descent, J, fu, u; pre_inverted,
         linsolve_kwargs, abstol, reltol, shared, kwargs...)
-    @bb δu = similar(u)
-    δus = N ≤ 1 ? nothing : map(2:N) do i
-        @bb δu_ = similar(u)
-    end
+    δu, δus = @shared_caches N (@bb δu = similar(u))
     @bb δu_cache_1 = similar(u)
     @bb δu_cache_2 = similar(u)
     @bb δu_cache_mul = similar(u)
@@ -68,10 +65,10 @@ function __internal_init(prob::AbstractNonlinearProblem, alg::Dogleg, J, fu, u;
 
     normal_form = prob isa NonlinearLeastSquaresProblem &&
                   __needs_square_A(alg.newton_descent.linsolve, u)
-    JᵀJ_cache = !normal_form ? J * _vec(δu) : nothing  # TODO: Rename
+    Jᵀδu_cache = !normal_form ? J * _vec(δu) : nothing
 
     return DoglegCache{INV, normal_form}(δu, δus, newton_cache, cauchy_cache, internalnorm,
-        JᵀJ_cache, δu_cache_1, δu_cache_2, δu_cache_mul)
+        Jᵀδu_cache, δu_cache_1, δu_cache_2, δu_cache_mul)
 end
 
 # If TrustRegion is not specified, then use a Gauss-Newton step
@@ -82,14 +79,16 @@ function __internal_solve!(cache::DoglegCache{INV, NF}, J, fu, u, idx::Val{N} =
                                     want to use a Trust Region."
     δu = get_du(cache, idx)
     T = promote_type(eltype(u), eltype(fu))
-    δu_newton, _, _ = __internal_solve!(cache.newton_cache, J, fu, u, idx; skip_solve,
+
+    res_newton = __internal_solve!(cache.newton_cache, J, fu, u, idx; skip_solve,
         kwargs...)
+    δu_newton = res_newton.δu
 
     # Newton's Step within the trust region
     if cache.internalnorm(δu_newton) ≤ trust_region
         @bb copyto!(δu, δu_newton)
         set_du!(cache, δu, idx)
-        return δu, true, (; δuJᵀJδu = T(NaN))
+        return DescentResult(; δu, extras = (; δuJᵀJδu = T(NaN)))
     end
 
     # Take intersection of steepest descent direction and trust region if Cauchy point lies
@@ -103,12 +102,13 @@ function __internal_solve!(cache::DoglegCache{INV, NF}, J, fu, u, idx::Val{N} =
         @bb cache.δu_cache_mul = JᵀJ × vec(δu_cauchy)
         δuJᵀJδu = __dot(δu_cauchy, cache.δu_cache_mul)
     else
-        δu_cauchy, _, _ = __internal_solve!(cache.cauchy_cache, J, fu, u, idx; skip_solve,
+        res_cauchy = __internal_solve!(cache.cauchy_cache, J, fu, u, idx; skip_solve,
             kwargs...)
+        δu_cauchy = res_cauchy.δu
         J_ = INV ? inv(J) : J
         l_grad = cache.internalnorm(δu_cauchy)
-        @bb cache.JᵀJ_cache = J × vec(δu_cauchy)  # TODO: Rename
-        δuJᵀJδu = __dot(cache.JᵀJ_cache, cache.JᵀJ_cache)
+        @bb cache.Jᵀδu_cache = J × vec(δu_cauchy)
+        δuJᵀJδu = __dot(cache.Jᵀδu_cache, cache.Jᵀδu_cache)
     end
     d_cauchy = (l_grad^3) / δuJᵀJδu
 
@@ -116,7 +116,7 @@ function __internal_solve!(cache::DoglegCache{INV, NF}, J, fu, u, idx::Val{N} =
         λ = trust_region / l_grad
         @bb @. δu = λ * δu_cauchy
         set_du!(cache, δu, idx)
-        return δu, true, (; δuJᵀJδu = λ^2 * δuJᵀJδu)
+        return DescentResult(; δu, extras = (; δuJᵀJδu = λ^2 * δuJᵀJδu))
     end
 
     # FIXME: For anything other than 2-norm a quadratic root will give incorrect results
@@ -134,5 +134,5 @@ function __internal_solve!(cache::DoglegCache{INV, NF}, J, fu, u, idx::Val{N} =
 
     @bb @. δu = cache.δu_cache_1 + τ * cache.δu_cache_2
     set_du!(cache, δu, idx)
-    return δu, true, (; δuJᵀJδu = T(NaN))
+    return DescentResult(; δu, extras = (; δuJᵀJδu = τ^2 * δuJᵀJδu))
 end
diff --git a/src/descent/geodesic_acceleration.jl b/src/descent/geodesic_acceleration.jl
index 35764783c..a989c0376 100644
--- a/src/descent/geodesic_acceleration.jl
+++ b/src/descent/geodesic_acceleration.jl
@@ -89,10 +89,7 @@ function __internal_init(prob::AbstractNonlinearProblem, alg::GeodesicAccelerati
         abstol = nothing, reltol = nothing, internalnorm::F = DEFAULT_NORM,
         kwargs...) where {INV, N, F}
     T = promote_type(eltype(u), eltype(fu))
-    @bb δu = similar(u)
-    δus = N ≤ 1 ? nothing : map(2:N) do i
-        @bb δu_ = similar(u)
-    end
+    δu, δus = @shared_caches N (@bb δu = similar(u))
     descent_cache = __internal_init(prob, alg.descent, J, fu, u; shared = Val(N * 2),
         pre_inverted, linsolve_kwargs, abstol, reltol, kwargs...)
     @bb Jv = similar(fu)
@@ -106,9 +103,11 @@ function __internal_solve!(
         cache::GeodesicAccelerationCache, J, fu, u, idx::Val{N} = Val(1);
         skip_solve::Bool = false, kwargs...) where {N}
     a, v, δu = get_acceleration(cache, idx), get_velocity(cache, idx), get_du(cache, idx)
-    skip_solve && return δu, true, (; a, v)
-    v, _, _ = __internal_solve!(cache.descent_cache, J, fu, u, Val(2N - 1); skip_solve,
+    skip_solve && return DescentResult(; δu, extras = (; a, v))
+
+    res_v = __internal_solve!(cache.descent_cache, J, fu, u, Val(2N - 1); skip_solve,
         kwargs...)
+    v = res_v.δu
 
     @bb @. cache.u_cache = u + cache.h * v
     cache.fu_cache = evaluate_f!!(cache.f, cache.fu_cache, cache.u_cache, cache.p)
@@ -117,8 +116,9 @@ function __internal_solve!(
     Jv = _restructure(cache.fu_cache, cache.Jv)
     @bb @. cache.fu_cache = (2 / cache.h) * ((cache.fu_cache - fu) / cache.h - Jv)
 
-    a, _, _ = __internal_solve!(cache.descent_cache, J, cache.fu_cache, u, Val(2N);
+    res_a = __internal_solve!(cache.descent_cache, J, cache.fu_cache, u, Val(2N);
         skip_solve, kwargs..., reuse_A_if_factorization = true)
+    a = res_a.δu
 
     norm_v = cache.internalnorm(v)
     norm_a = cache.internalnorm(a)
@@ -131,5 +131,5 @@ function __internal_solve!(
         cache.last_step_accepted = false
     end
 
-    return δu, cache.last_step_accepted, (; a, v)
+    return DescentResult(; δu, success = cache.last_step_accepted, extras = (; a, v))
 end
diff --git a/src/descent/multistep.jl b/src/descent/multistep.jl
new file mode 100644
index 000000000..eae086493
--- /dev/null
+++ b/src/descent/multistep.jl
@@ -0,0 +1,162 @@
+"""
+    MultiStepSchemes
+
+This module defines the multistep schemes used in the multistep descent algorithms. The
+naming convention follows <name of method><order of convergence>. The name of method is
+typically the last names of the authors of the paper that introduced the method.
+"""
+module MultiStepSchemes
+
+using ConcreteStructs
+
+abstract type AbstractMultiStepScheme end
+
+function Base.show(io::IO, mss::AbstractMultiStepScheme)
+    print(io, "MultiStepSchemes.$(string(nameof(typeof(mss)))[3:end])")
+end
+
+newton_steps(::Type{T}) where {T <: AbstractMultiStepScheme} = newton_steps(T())
+
+struct __PotraPtak3 <: AbstractMultiStepScheme end
+const PotraPtak3 = __PotraPtak3()
+
+newton_steps(::__PotraPtak3) = 2
+nintermediates(::__PotraPtak3) = 1
+
+@kwdef @concrete struct __SinghSharma4 <: AbstractMultiStepScheme
+    jvp_autodiff = nothing
+end
+const SinghSharma4 = __SinghSharma4()
+
+newton_steps(::__SinghSharma4) = 4
+nintermediates(::__SinghSharma4) = 2
+
+@kwdef @concrete struct __SinghSharma5 <: AbstractMultiStepScheme
+    jvp_autodiff = nothing
+end
+const SinghSharma5 = __SinghSharma5()
+
+newton_steps(::__SinghSharma5) = 4
+nintermediates(::__SinghSharma5) = 2
+
+@kwdef @concrete struct __SinghSharma7 <: AbstractMultiStepScheme
+    jvp_autodiff = nothing
+end
+const SinghSharma7 = __SinghSharma7()
+
+newton_steps(::__SinghSharma7) = 6
+
+@generated function display_name(alg::T) where {T <: AbstractMultiStepScheme}
+    res = Symbol(first(split(last(split(string(T), ".")), "{"; limit = 2))[3:end])
+    return :($(Meta.quot(res)))
+end
+
+end
+
+const MSS = MultiStepSchemes
+
+@kwdef @concrete struct GenericMultiStepDescent <: AbstractDescentAlgorithm
+    scheme
+    linsolve = nothing
+    precs = DEFAULT_PRECS
+end
+
+Base.show(io::IO, alg::GenericMultiStepDescent) = print(io, "$(alg.scheme)()")
+
+supports_line_search(::GenericMultiStepDescent) = true
+supports_trust_region(::GenericMultiStepDescent) = false
+
+@concrete mutable struct GenericMultiStepDescentCache{S} <: AbstractDescentCache
+    f
+    p
+    δu
+    δus
+    u
+    us
+    fu
+    fus
+    internal_cache
+    internal_caches
+    extra
+    extras
+    scheme::S
+    timer
+    nf::Int
+end
+
+# FIXME: @internal_caches needs to be updated to support tuples and namedtuples
+# @internal_caches GenericMultiStepDescentCache :internal_caches
+
+function __reinit_internal!(cache::GenericMultiStepDescentCache, args...; p = cache.p,
+        kwargs...)
+    cache.nf = 0
+    cache.p = p
+    reset_timer!(cache.timer)
+end
+
+function __internal_multistep_caches(
+        scheme::Union{MSS.__PotraPtak3, MSS.__SinghSharma4, MSS.__SinghSharma5},
+        alg::GenericMultiStepDescent, prob, args...;
+        shared::Val{N} = Val(1), kwargs...) where {N}
+    internal_descent = NewtonDescent(; alg.linsolve, alg.precs)
+    return @shared_caches N __internal_init(
+        prob, internal_descent, args...; kwargs..., shared = Val(2))
+end
+
+__extras_cache(::MSS.AbstractMultiStepScheme, args...; kwargs...) = nothing, nothing
+
+function __internal_init(prob::Union{NonlinearProblem, NonlinearLeastSquaresProblem},
+        alg::GenericMultiStepDescent, J, fu, u; shared::Val{N} = Val(1),
+        pre_inverted::Val{INV} = False, linsolve_kwargs = (;),
+        abstol = nothing, reltol = nothing, timer = get_timer_output(),
+        kwargs...) where {INV, N}
+    δu, δus = @shared_caches N (@bb δu = similar(u))
+    fu_cache, fus_cache = @shared_caches N (ntuple(MSS.nintermediates(alg.scheme)) do i
+        @bb xx = similar(fu)
+    end)
+    u_cache, us_cache = @shared_caches N (ntuple(MSS.nintermediates(alg.scheme)) do i
+        @bb xx = similar(u)
+    end)
+    internal_cache, internal_caches = __internal_multistep_caches(
+        alg.scheme, alg, prob, J, fu, u; shared, pre_inverted, linsolve_kwargs,
+        abstol, reltol, timer, kwargs...)
+    extra, extras = __extras_cache(
+        alg.scheme, alg, prob, J, fu, u; shared, pre_inverted, linsolve_kwargs,
+        abstol, reltol, timer, kwargs...)
+    return GenericMultiStepDescentCache(
+        prob.f, prob.p, δu, δus, u_cache, us_cache, fu_cache, fus_cache,
+        internal_cache, internal_caches, extra, extras, alg.scheme, timer, 0)
+end
+
+function __internal_solve!(cache::GenericMultiStepDescentCache{MSS.__PotraPtak3, INV}, J,
+        fu, u, idx::Val = Val(1); skip_solve::Bool = false, new_jacobian::Bool = true,
+        kwargs...) where {INV}
+    δu = get_du(cache, idx)
+    skip_solve && return DescentResult(; δu)
+
+    (y,) = get_internal_cache(cache, Val(:u), idx)
+    (fy,) = get_internal_cache(cache, Val(:fu), idx)
+    internal_cache = get_internal_cache(cache, Val(:internal_cache), idx)
+
+    @static_timeit cache.timer "descent step" begin
+        result_1 = __internal_solve!(
+            internal_cache, J, fu, u, Val(1); new_jacobian, kwargs...)
+        δx = result_1.δu
+
+        @bb @. y = u + δx
+        fy = evaluate_f!!(cache.f, fy, y, cache.p)
+        cache.nf += 1
+
+        result_2 = __internal_solve!(
+            internal_cache, J, fy, y, Val(2); kwargs...)
+        δy = result_2.δu
+
+        @bb @. δu = δx + δy
+    end
+
+    set_du!(cache, δu, idx)
+    set_internal_cache!(cache, (y,), Val(:u), idx)
+    set_internal_cache!(cache, (fy,), Val(:fu), idx)
+    set_internal_cache!(cache, internal_cache, Val(:internal_cache), idx)
+    return DescentResult(; δu)
+end
diff --git a/src/descent/newton.jl b/src/descent/newton.jl
index c8ba35ed9..52f8e9743 100644
--- a/src/descent/newton.jl
+++ b/src/descent/newton.jl
@@ -36,10 +36,7 @@ function __internal_init(prob::NonlinearProblem, alg::NewtonDescent, J, fu, u;
         shared::Val{N} = Val(1), pre_inverted::Val{INV} = False, linsolve_kwargs = (;),
         abstol = nothing, reltol = nothing, timer = get_timer_output(),
         kwargs...) where {INV, N}
-    @bb δu = similar(u)
-    δus = N ≤ 1 ? nothing : map(2:N) do i
-        @bb δu_ = similar(u)
-    end
+    δu, δus = @shared_caches N (@bb δu = similar(u))
     INV && return NewtonDescentCache{true, false}(δu, δus, nothing, nothing, nothing, timer)
     lincache = LinearSolverCache(alg, alg.linsolve, J, _vec(fu), _vec(u); abstol, reltol,
         linsolve_kwargs...)
@@ -64,10 +61,7 @@ function __internal_init(prob::NonlinearLeastSquaresProblem, alg::NewtonDescent,
     end
     lincache = LinearSolverCache(alg, alg.linsolve, A, b, _vec(u); abstol, reltol,
         linsolve_kwargs...)
-    @bb δu = similar(u)
-    δus = N ≤ 1 ? nothing : map(2:N) do i
-        @bb δu_ = similar(u)
-    end
+    δu, δus = @shared_caches N (@bb δu = similar(u))
     return NewtonDescentCache{false, normal_form}(δu, δus, lincache, JᵀJ, Jᵀfu, timer)
 end
 
@@ -75,7 +69,7 @@ function __internal_solve!(cache::NewtonDescentCache{INV, false}, J, fu, u,
         idx::Val = Val(1); skip_solve::Bool = false, new_jacobian::Bool = true,
         kwargs...) where {INV}
     δu = get_du(cache, idx)
-    skip_solve && return δu, true, (;)
+    skip_solve && return DescentResult(; δu)
     if INV
         @assert J!==nothing "`J` must be provided when `pre_inverted = Val(true)`."
         @bb δu = J × vec(fu)
@@ -88,13 +82,13 @@ function __internal_solve!(cache::NewtonDescentCache{INV, false}, J, fu, u,
     end
     @bb @. δu *= -1
     set_du!(cache, δu, idx)
-    return δu, true, (;)
+    return DescentResult(; δu)
 end
 
 function __internal_solve!(cache::NewtonDescentCache{false, true}, J, fu, u,
         idx::Val = Val(1); skip_solve::Bool = false, new_jacobian::Bool = true, kwargs...)
     δu = get_du(cache, idx)
-    skip_solve && return δu, true, (;)
+    skip_solve && return DescentResult(; δu)
     if idx === Val(1)
         @bb cache.JᵀJ_cache = transpose(J) × J
     end
@@ -107,5 +101,5 @@ function __internal_solve!(cache::NewtonDescentCache{false, true}, J, fu, u,
     end
     @bb @. δu *= -1
     set_du!(cache, δu, idx)
-    return δu, true, (;)
+    return DescentResult(; δu)
 end
diff --git a/src/descent/steepest.jl b/src/descent/steepest.jl
index d19505a86..9fd7cc9a9 100644
--- a/src/descent/steepest.jl
+++ b/src/descent/steepest.jl
@@ -34,10 +34,7 @@ end
         linsolve_kwargs = (;), abstol = nothing, reltol = nothing,
         timer = get_timer_output(), kwargs...) where {INV, N}
     INV && @assert length(fu)==length(u) "Non-Square Jacobian Inverse doesn't make sense."
-    @bb δu = similar(u)
-    δus = N ≤ 1 ? nothing : map(2:N) do i
-        @bb δu_ = similar(u)
-    end
+    δu, δus = @shared_caches N (@bb δu = similar(u))
     if INV
         lincache = LinearSolverCache(alg, alg.linsolve, transpose(J), _vec(fu), _vec(u);
             abstol, reltol, linsolve_kwargs...)
@@ -63,5 +60,5 @@ function __internal_solve!(cache::SteepestDescentCache{INV}, J, fu, u, idx::Val
     end
     @bb @. δu *= -1
     set_du!(cache, δu, idx)
-    return δu, true, (;)
+    return DescentResult(; δu)
 end
diff --git a/src/internal/helpers.jl b/src/internal/helpers.jl
index 4f475214b..8226f6cf7 100644
--- a/src/internal/helpers.jl
+++ b/src/internal/helpers.jl
@@ -268,3 +268,41 @@ function __internal_caches(__source__, __module__, cType, internal_cache_names::
         end
     end)
 end
+
+"""
+    apply_patch(scheme, patch::NamedTuple{names})
+
+Applies the patch to the scheme, returning the new scheme. If some of the `names` are not,
+present in the scheme, they are ignored.
+"""
+@generated function apply_patch(scheme, patch::NamedTuple{names}) where {names}
+    exprs = []
+    for name in names
+        hasfield(scheme, name) || continue
+        push!(exprs, quote
+            lens = PropertyLens{$(Meta.quot(name))}()
+            return set(scheme, lens, getfield(patch, $(Meta.quot(name))))
+        end)
+    end
+    push!(exprs, :(return scheme))
+    return Expr(:block, exprs...)
+end
+
+"""
+    @shared_caches N expr
+
+Create a shared cache and a vector of caches. If `N` is 1, then the vector of caches is
+`nothing`.
+"""
+macro shared_caches(N, expr)
+    @gensym cache caches
+    return esc(quote
+        begin
+            $(cache) = $(expr)
+            $(caches) = $(N) ≤ 1 ? nothing : map(2:($(N))) do i
+                $(expr)
+            end
+            ($cache, $caches)
+        end
+    end)
+end
diff --git a/src/internal/tracing.jl b/src/internal/tracing.jl
index 667c6ce07..bfb93c6d7 100644
--- a/src/internal/tracing.jl
+++ b/src/internal/tracing.jl
@@ -187,6 +187,7 @@ function update_trace!(cache::AbstractNonlinearSolveCache, α = true)
     trace === nothing && return nothing
 
     J = __getproperty(cache, Val(:J))
+    # TODO: fix tracing for multi-step methods where du is not aliased properly
     if J === nothing
         update_trace!(trace, get_nsteps(cache) + 1, get_u(cache), get_fu(cache),
             nothing, cache.du, α)