From 8579fde6d6577bb4102626558795d51f510e3671 Mon Sep 17 00:00:00 2001 From: "Vandenplas, Jeremie" Date: Thu, 9 May 2024 22:11:00 +0200 Subject: [PATCH 1/2] Example to move the optimizer to the layer level --- src/nf/nf_dense_layer.f90 | 22 ++++++++++++++ src/nf/nf_dense_layer_submodule.f90 | 43 +++++++++++++++++++++++++++ src/nf/nf_network.f90 | 13 +++++++++ src/nf/nf_network_submodule.f90 | 45 +++++++++++++++++++++++++++-- 4 files changed, 120 insertions(+), 3 deletions(-) diff --git a/src/nf/nf_dense_layer.f90 b/src/nf/nf_dense_layer.f90 index ae523ccb..32c7f878 100644 --- a/src/nf/nf_dense_layer.f90 +++ b/src/nf/nf_dense_layer.f90 @@ -4,6 +4,7 @@ module nf_dense_layer !! It is used internally by the layer type. !! It is not intended to be used directly by the user. + use nf_optimizers, only: optimizer_base_type use nf_activation, only: activation_function use nf_base_layer, only: base_layer @@ -28,6 +29,8 @@ module nf_dense_layer real, allocatable :: db(:) ! bias gradients class(activation_function), allocatable :: activation + class(optimizer_base_type), allocatable :: optimizer_1d + class(optimizer_base_type), allocatable :: optimizer_2d contains @@ -38,6 +41,8 @@ module nf_dense_layer procedure :: get_params procedure :: init procedure :: set_params + procedure :: apply_optimizer + procedure :: set_optimizer end type dense_layer @@ -122,8 +127,25 @@ module subroutine init(self, input_shape) !! Dense layer instance integer, intent(in) :: input_shape(:) !! Shape of the input layer + end subroutine init + module subroutine apply_optimizer(self, batch_size) + class(dense_layer), intent(in out), target :: self + integer, intent(in) :: batch_size + end subroutine apply_optimizer + + module subroutine set_optimizer(self, optimizer) + !! Initialize the layer data structures. + !! + !! This is a deferred procedure from the `base_layer` abstract type. + class(dense_layer), intent(in out) :: self + !! Dense layer instance + class(optimizer_base_type), intent(in), optional :: optimizer + + end subroutine set_optimizer + + end interface end module nf_dense_layer diff --git a/src/nf/nf_dense_layer_submodule.f90 b/src/nf/nf_dense_layer_submodule.f90 index 4be23e33..e7145a3e 100644 --- a/src/nf/nf_dense_layer_submodule.f90 +++ b/src/nf/nf_dense_layer_submodule.f90 @@ -1,5 +1,6 @@ submodule(nf_dense_layer) nf_dense_layer_submodule + use nf_optimizers, only: adam use nf_activation, only: activation_function use nf_base_layer, only: base_layer use nf_random, only: random_normal @@ -144,4 +145,46 @@ module subroutine init(self, input_shape) end subroutine init + + module subroutine set_optimizer(self, optimizer) + class(dense_layer), intent(in out) :: self + class(optimizer_base_type), intent(in), optional:: optimizer + + if (.not. allocated(self % optimizer_1d)) then + if (present(optimizer)) then + self % optimizer_1d = optimizer + else + self % optimizer_1d = adam(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1.e-7) + end if + call self % optimizer_1d % init(self % output_size) + end if + if (.not. allocated(self % optimizer_2d)) then + if (present(optimizer)) then + self % optimizer_2d = optimizer + else + self % optimizer_2d = adam(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1.e-7) + end if + call self % optimizer_2d % init(self % input_size * self % output_size) + end if + + end subroutine set_optimizer + + module subroutine apply_optimizer(self, batch_size) + class(dense_layer), intent(in out), target :: self + integer, intent(in) :: batch_size + + real, pointer :: w_(:), dw_(:) + + call self % optimizer_1d % minimize( self % biases, self % db / batch_size) + + associate(n => self % input_size * self % output_size) + w_(1:n) => self % weights + dw_(1:n) => self % dw + call self % optimizer_2d % minimize( w_, dw_ / batch_size) + end associate + + + end subroutine apply_optimizer + + end submodule nf_dense_layer_submodule diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90 index 4346f4e3..52a9a792 100644 --- a/src/nf/nf_network.f90 +++ b/src/nf/nf_network.f90 @@ -25,6 +25,8 @@ module nf_network procedure :: get_params procedure :: print_info procedure :: set_params + procedure :: apply_optimizer + procedure :: set_optimizers procedure :: train procedure :: update @@ -229,6 +231,17 @@ module subroutine update(self, optimizer, batch_size) !! Set to `size(input_data, dim=2)` for a batch gradient descent. end subroutine update + module subroutine set_optimizers(self, optimizer) + class(network), intent(in out) :: self + class(optimizer_base_type), intent(in) :: optimizer + end subroutine set_optimizers + + module subroutine apply_optimizer(self, batch_size) + class(network), intent(in out) :: self + integer, intent(in) :: batch_size + end subroutine + + end interface end module nf_network diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 59abcf7d..d288e4e1 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -556,6 +556,25 @@ module subroutine set_params(self, params) end subroutine set_params + module subroutine set_optimizers(self, optimizer) + class(network), intent(in out) :: self + class(optimizer_base_type), intent(in) :: optimizer + + integer :: n + + do n = 1, size(self % layers) + + select type (this_layer => self % layers(n) % p) + + type is (dense_layer) + call this_layer % set_optimizer(optimizer) + + end select + + end do + + end subroutine set_optimizers + module subroutine train(self, input_data, output_data, batch_size, & epochs, optimizer, loss) @@ -584,6 +603,8 @@ module subroutine train(self, input_data, output_data, batch_size, & call self % optimizer % init(self % get_num_params()) + call self % set_optimizers(optimizer) + ! Passing the loss instance is optional. ! If not provided, we default to quadratic(). if (present(loss)) then @@ -667,9 +688,11 @@ module subroutine update(self, optimizer, batch_size) end select end do - params = self % get_params() - call self % optimizer % minimize(params, self % get_gradients() / batch_size_) - call self % set_params(params) +! params = self % get_params() +! call self % optimizer % minimize(params, self % get_gradients() / batch_size_) +! call self % set_params(params) + + call self % apply_optimizer(batch_size_) ! Flush network gradients to zero. do concurrent(n = 2:size(self % layers)) @@ -685,4 +708,20 @@ module subroutine update(self, optimizer, batch_size) end subroutine update + + module subroutine apply_optimizer(self, batch_size) + class(network), intent(in out) :: self + integer, intent(in) :: batch_size + + integer :: n + + do concurrent(n = 2:size(self % layers)) + select type(this_layer => self % layers(n) % p) + type is(dense_layer) + call this_layer % apply_optimizer(batch_size) + end select + end do + + end subroutine apply_optimizer + end submodule nf_network_submodule From 1dbcb802525fb508f0abf1ccb8efad08b7e5d7ce Mon Sep 17 00:00:00 2001 From: "Vandenplas, Jeremie" Date: Fri, 14 Jun 2024 19:24:11 +0200 Subject: [PATCH 2/2] format --- src/nf/nf_dense_layer.f90 | 3 --- src/nf/nf_dense_layer_submodule.f90 | 2 -- src/nf/nf_network.f90 | 1 - 3 files changed, 6 deletions(-) diff --git a/src/nf/nf_dense_layer.f90 b/src/nf/nf_dense_layer.f90 index 911589c1..1d4f6b9c 100644 --- a/src/nf/nf_dense_layer.f90 +++ b/src/nf/nf_dense_layer.f90 @@ -127,7 +127,6 @@ module subroutine init(self, input_shape) !! Dense layer instance integer, intent(in) :: input_shape(:) !! Shape of the input layer - end subroutine init module subroutine apply_optimizer(self, batch_size) @@ -144,8 +143,6 @@ module subroutine set_optimizer(self, optimizer) class(optimizer_base_type), intent(in), optional :: optimizer end subroutine set_optimizer - - end interface end module nf_dense_layer diff --git a/src/nf/nf_dense_layer_submodule.f90 b/src/nf/nf_dense_layer_submodule.f90 index 3fb80e32..26e9b3fe 100644 --- a/src/nf/nf_dense_layer_submodule.f90 +++ b/src/nf/nf_dense_layer_submodule.f90 @@ -152,7 +152,6 @@ module subroutine init(self, input_shape) end subroutine init - module subroutine set_optimizer(self, optimizer) class(dense_layer), intent(in out) :: self class(optimizer_base_type), intent(in), optional:: optimizer @@ -193,5 +192,4 @@ module subroutine apply_optimizer(self, batch_size) end subroutine apply_optimizer - end submodule nf_dense_layer_submodule diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90 index 0ce4b8ee..2eaf01e3 100644 --- a/src/nf/nf_network.f90 +++ b/src/nf/nf_network.f90 @@ -254,7 +254,6 @@ module subroutine apply_optimizer(self, batch_size) integer, intent(in) :: batch_size end subroutine - end interface end module nf_network