modern-fortran · OneAdder · Feb 23, 2025 · Feb 24, 2025 · Feb 24, 2025 · Feb 24, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -27,6 +27,8 @@ add_library(neural-fortran
   src/nf/nf_datasets_mnist_submodule.f90
   src/nf/nf_dense_layer.f90
   src/nf/nf_dense_layer_submodule.f90
+  src/nf/nf_fc2d_layer.f90
+  src/nf/nf_fc2d_layer_submodule.f90
   src/nf/nf_flatten_layer.f90
   src/nf/nf_flatten_layer_submodule.f90
   src/nf/nf_input1d_layer.f90

diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
@@ -7,6 +7,7 @@ foreach(execid
   sine
   quadratic
   mha_simple
+  simple_2d_mlp
 )
   add_executable(${execid} ${execid}.f90)
   target_link_libraries(${execid} PRIVATE

diff --git a/example/simple_2d_mlp.f90 b/example/simple_2d_mlp.f90
@@ -0,0 +1,35 @@
+program simple
+  use nf, only: dense, fc2d, flatten, linear2d, input, network, sgd, relu, tanhf
+  implicit none
+  type(network) :: net
+  real, allocatable :: x(:, :), y(:)
+  integer, parameter :: num_iterations = 25
+  integer :: n
+
+  print '("Simple")'
+  print '(60("="))'
+
+  net = network([ &
+    input(4, 5), &
+    fc2d(3, 2, activation=relu()), &
+    flatten(), &
+    dense(4, activation=tanhf()) &
+  ])
+
+  call net % print_info()
+
+  allocate(x(4, 5))
+  call random_number(x)
+  y = [0.123456, 0.246802, 0.9, 0.001]
+
+  do n = 0, num_iterations
+
+    call net % forward(x)
+    call net % backward(y)
+    call net % update(optimizer=sgd(learning_rate=0.05))
+
+    if (mod(n, 5) == 0) print *, n, net % predict(x)
+
+  end do
+
+end program simple
diff --git a/src/nf.f90 b/src/nf.f90
@@ -11,7 +11,8 @@ module nf
     linear2d, &
     maxpool2d, &
     reshape, &
-    self_attention
+    self_attention, &
+    fc2d
   use nf_loss, only: mse, quadratic
   use nf_metrics, only: corr, maxabs
   use nf_network, only: network

diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90
@@ -0,0 +1,84 @@
+module nf_fc2d_layer
+  use iso_fortran_env, only: stderr => error_unit
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+  use nf_linear2d_layer, only: linear2d_layer
+
+  implicit none
+
+  private
+  public :: fc2d_layer
+
+  type, extends(base_layer) :: fc2d_layer
+    !! Fully Connected 2D Layer
+    !! Two Linear layers with an activation function in between
+    integer :: sequence_length, model_dimension, hidden_size, output_size
+
+    type(linear2d_layer) :: in_proj
+    type(linear2d_layer) :: out_proj
+
+    class(activation_function), allocatable :: activation
+
+    real, allocatable :: gradient(:, :)
+    real, allocatable :: in_proj_input(:, :)
+    real, allocatable :: out_proj_input(:, :)
+
+    real, allocatable :: output(:, :)
+
+  contains
+    procedure :: backward
+    procedure :: forward
+    procedure :: get_num_params
+    procedure :: get_params
+    procedure :: get_gradients
+    procedure :: set_params
+    procedure :: init
+  end type fc2d_layer
+
+  interface fc2d_layer
+    module function fc2d_layer_cons(hidden_size, output_size, activation) result(res)
+      !! This function returns the `fc2d_layer` instance.
+      integer, intent(in) :: hidden_size, output_size
+      class(activation_function), intent(in) :: activation
+      type(fc2d_layer) :: res
+    end function fc2d_layer_cons
+  end interface fc2d_layer
+
+  interface
+    module subroutine init(self, input_shape)
+      class(fc2d_layer), intent(in out) :: self
+      integer, intent(in) :: input_shape(:)
+    end subroutine init
+
+    pure module subroutine forward(self, input)
+      class(fc2d_layer), intent(in out) :: self
+      real, intent(in) :: input(:, :)
+    end subroutine forward
+
+    pure module subroutine backward(self, input, gradient)
+      class(fc2d_layer), intent(in out) :: self
+      real, intent(in) :: input(:, :)
+      real, intent(in) :: gradient(:, :)
+    end subroutine backward
+
+    elemental module function get_num_params(self) result(num_params)
+      class(fc2d_layer), intent(in) :: self
+      integer :: num_params
+    end function get_num_params
+
+    module function get_params(self) result(params)
+      class(fc2d_layer), intent(in) :: self
+      real, allocatable :: params(:)
+    end function get_params
+
+    module function get_gradients(self) result(gradients)
+      class(fc2d_layer), intent(in), target :: self
+      real, allocatable :: gradients(:)
+    end function get_gradients
+
+    module subroutine set_params(self, params)
+      class(fc2d_layer), intent(in out) :: self
+      real, intent(in) :: params(:)
+    end subroutine set_params
+  end interface
+end module nf_fc2d_layer
diff --git a/src/nf/nf_fc2d_layer_submodule.f90 b/src/nf/nf_fc2d_layer_submodule.f90
@@ -0,0 +1,140 @@
+submodule(nf_fc2d_layer) nf_fc2d_layer_submodule
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+  use nf_linear2d_layer, only: linear2d_layer
+
+  implicit none
+
+contains
+  module function fc2d_layer_cons(hidden_size, output_size, activation) result(res)
+    !! This function returns the `fc2d_layer` instance.
+    integer, intent(in) :: hidden_size, output_size
+    class(activation_function), intent(in) :: activation
+    type(fc2d_layer) :: res
+
+    res % hidden_size = hidden_size
+    res % output_size = output_size
+    res % activation_name = activation % get_name()
+    ! FIXME: implement correct derivative for `softmax`
+    if (res % activation_name == 'softmax') then
+      write(stderr, '(a)') '`softmax` activation is temporarily unavailable'
+      error stop 1
+    end if
+    allocate(res % activation, source = activation)
+  end function fc2d_layer_cons
+
+  module subroutine init(self, input_shape)
+    class(fc2d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    if (size(input_shape) /= 2) then
+      error stop "fc2d_layer accepts 2D input"
+    end if
+
+    self % sequence_length = input_shape(1)
+    self % model_dimension = input_shape(2)
+
+    self % in_proj = linear2d_layer(self % hidden_size)
+    call self % in_proj % init([self % sequence_length, self % model_dimension])
+
+    self % out_proj = linear2d_layer(self % output_size)
+    call self % out_proj % init([self % sequence_length, self % hidden_size])
+
+    allocate(self % in_proj_input(self % sequence_length, self % model_dimension))
+    allocate(self % out_proj_input(self % sequence_length, self % hidden_size))
+
+    allocate(self % output(self % sequence_length, self % output_size))
+
+    allocate(self % gradient, mold=self % in_proj % gradient)
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    class(fc2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :)
+    integer :: i
+
+    self % in_proj_input = input
+    call self % in_proj % forward(input)
+
+    do concurrent(i = 1: self % sequence_length)
+      self % out_proj_input(i, :) = self % activation % eval_1d(self % in_proj % output(i, :))
+    end do
+
+    call self % out_proj % forward(self % out_proj_input)
+    self % output = self % out_proj % output
+  end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    class(fc2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :)
+    real, intent(in) :: gradient(:, :)
+    integer :: i
+
+    call self % out_proj % backward(self % out_proj_input, gradient)
+    ! d_output/d_activation = d_output/d_output_proj * d/d_activation
+    do concurrent(i = 1: self % sequence_length)
+      self % out_proj % gradient(i, :) = &
+          self % out_proj % gradient(i, :) &
+          * (self % activation % eval_1d_prime(self % in_proj % output(i, :)))
+    end do
+    call self % in_proj % backward(self % in_proj_input, self % out_proj % gradient)
+
+    self % gradient = self % in_proj % gradient
+  end subroutine backward
+
+  elemental module function get_num_params(self) result(num_params)
+    class(fc2d_layer), intent(in) :: self
+    integer :: num_params
+
+    num_params = self % in_proj % get_num_params() + self % out_proj % get_num_params()
+  end function get_num_params
+
+  module function get_params(self) result(params)
+    class(fc2d_layer), intent(in) :: self
+    real, allocatable :: params(:)
+
+    params = [&
+        self % in_proj % weights,&
+        self % out_proj % weights,&
+        self % in_proj % biases,&
+        self % out_proj % biases&
+    ]
+  end function get_params
+
+  module function get_gradients(self) result(gradients)
+    class(fc2d_layer), intent(in), target :: self
+    real, allocatable :: gradients(:)
+
+    gradients = [ &
+        self % in_proj % dw,&
+        self % out_proj % dw,&
+        self % in_proj % db,&
+        self % out_proj % db&
+    ]
+  end function get_gradients
+
+  module subroutine set_params(self, params)
+    class(fc2d_layer), intent(in out) :: self
+    real, intent(in) :: params(:)
+    integer :: i, j, window
+
+    ! check if the number of parameters is correct
+    if (size(params) /= self % get_num_params()) then
+      error stop 'Error: number of parameters does not match'
+    end if
+
+    ! FIXME: looks clumsy, better ideas?
+    i = 1
+    j = self % model_dimension * self % hidden_size
+    self % in_proj % weights = reshape(params(i: j), [self % model_dimension, self % hidden_size])
+    i = j + 1
+    j = i + self % hidden_size * self % output_size - 1
+    self % out_proj % weights = reshape(params(i: j), [self % hidden_size, self % output_size])
+    i = j + 1
+    j = i + self % hidden_size - 1
+    self % in_proj % biases = params(i: j)
+    i = j + 1
+    j = i + self % output_size - 1
+    self % out_proj % biases = params(i: j)
+  end subroutine set_params
+end submodule nf_fc2d_layer_submodule
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
@@ -17,7 +17,8 @@ module nf_layer_constructors
     linear2d, &
     maxpool2d, &
     reshape, &
-    self_attention
+    self_attention, &
+    fc2d
 
   interface input
 
@@ -222,16 +223,21 @@ module function linear2d(out_features) result(res)
         !! Resulting layer instance
     end function linear2d
 
-  module function self_attention(num_heads) result(res)
-    !! Rank-2 (sequence_length, out_features) self attention constructor.
-    !! sequence_length and model_dimension are determined at layer initialization, based on the
-    !! output shape of the previous layer.
-    integer, intent(in) :: num_heads
-      !! Number of attention heads
-    type(layer) :: res
-      !! Resulting layer instance
-  end function self_attention
+    module function self_attention(num_heads) result(res)
+      !! Rank-2 (sequence_length, out_features) self attention constructor.
+      !! sequence_length and model_dimension are determined at layer initialization, based on the
+      !! output shape of the previous layer.
+      integer, intent(in) :: num_heads
+        !! Number of attention heads
+      type(layer) :: res
+        !! Resulting layer instance
+    end function self_attention
 
+    module function fc2d(hidden_size, output_size, activation) result(res)
+      integer, intent(in) :: hidden_size, output_size
+      class(activation_function), intent(in) :: activation
+      type(layer) :: res
+    end function fc2d
   end interface
 
 end module nf_layer_constructors
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
@@ -12,6 +12,7 @@
   use nf_reshape_layer, only: reshape3d_layer
   use nf_linear2d_layer, only: linear2d_layer
   use nf_self_attention_layer, only: self_attention_layer
+  use nf_fc2d_layer, only: fc2d_layer
   use nf_activation, only: activation_function, relu, sigmoid
 
   implicit none
@@ -179,4 +180,13 @@ module function self_attention(num_heads) result(res)
     allocate(res % p, source=self_attention_layer(num_heads))
   end function self_attention
 
+  module function fc2d(hidden_size, output_size, activation) result(res)
+    integer, intent(in) :: hidden_size, output_size
+    class(activation_function), intent(in) :: activation
+    type(layer) :: res
+
+    res % name = 'fc2d'
+    allocate(res % p, source=fc2d_layer(hidden_size, output_size, activation))
+  end function fc2d
+
 end submodule nf_layer_constructors_submodule