diff --git a/CMakeLists.txt b/CMakeLists.txt index 490f7ff1..91aad008 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,8 @@ add_library(neural-fortran src/nf/nf_random.f90 src/nf/nf_reshape_layer.f90 src/nf/nf_reshape_layer_submodule.f90 + src/nf/nf_rnn_layer.f90 + src/nf/nf_rnn_layer_submodule.f90 src/nf/io/nf_io_binary.f90 src/nf/io/nf_io_binary_submodule.f90 ) diff --git a/example/simple_rnn.f90 b/example/simple_rnn.f90 new file mode 100644 index 00000000..dcb1db7f --- /dev/null +++ b/example/simple_rnn.f90 @@ -0,0 +1,48 @@ +program simple_rnn + use nf, only: dense, input, network, rnn, sgd + implicit none + type(network) :: net + real, allocatable :: x(:), y(:), p(:) + integer, parameter :: num_iterations = 1000 + integer :: n, l + + allocate(p(2)) + + print '("Simple RNN")' + print '(60("="))' + + net = network([ & + input(3), & + rnn(5), & + rnn(1) & + ]) + + call net % print_info() + + x = [0.2, 0.4, 0.6] + y = [0.123456, 0.246802] + + do n = 0, num_iterations + + do l = 1, size(net % layers) + if (net % layers(l) % name == 'rnn') call net % layers(l) % set_state() + end do + + if (mod(n, 100) == 0) then + p(1:1) = net % predict(x) + p(2:2) = net % predict(x) + print '(i4,2(3x,f8.6))', n, p + + else + + call net % forward(x) + call net % backward(y(1:1)) + call net % update(optimizer=sgd(learning_rate=.001)) + call net % forward(x) + call net % backward(y(2:2)) + call net % update(optimizer=sgd(learning_rate=.001)) + end if + + end do + +end program simple_rnn diff --git a/src/nf.f90 b/src/nf.f90 index b97d9e62..670f815e 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -3,7 +3,7 @@ module nf use nf_datasets_mnist, only: label_digits, load_mnist use nf_layer, only: layer use nf_layer_constructors, only: & - conv2d, dense, flatten, input, maxpool2d, reshape + conv2d, dense, flatten, input, maxpool2d, reshape, rnn use nf_loss, only: mse, quadratic use nf_metrics, only: corr, maxabs use nf_network, only: network diff --git a/src/nf/nf_layer.f90 b/src/nf/nf_layer.f90 index ca5e9606..487e112e 100644 --- a/src/nf/nf_layer.f90 +++ b/src/nf/nf_layer.f90 @@ -30,6 +30,7 @@ module nf_layer procedure :: get_params procedure :: get_gradients procedure :: set_params + procedure :: set_state procedure :: init procedure :: print_info @@ -153,6 +154,11 @@ module subroutine set_params(self, params) !! Parameters of this layer end subroutine set_params + pure module subroutine set_state(self, state) + class(layer), intent(inout) :: self + real, intent(in), optional :: state(:) + end subroutine set_state + end interface end module nf_layer diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index 309be6e4..956ffac5 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -8,7 +8,7 @@ module nf_layer_constructors implicit none private - public :: conv2d, dense, flatten, input, maxpool2d, reshape + public :: conv2d, dense, flatten, input, maxpool2d, reshape, rnn interface input @@ -166,6 +166,29 @@ module function reshape(output_shape) result(res) !! Resulting layer instance end function reshape + pure module function rnn(layer_size, activation) result(res) + !! Recurrent (fully-connected) layer constructor. + !! + !! This layer is a building block for recurrent, fully-connected + !! networks, or for an output layer of a convolutional network. + !! A recurrent layer must not be the first layer in the network. + !! + !! Example: + !! + !! ``` + !! use nf, only :: rnn, layer, relu + !! type(layer) :: rnn_layer + !! rnn_layer = rnn(10) + !! rnn_layer = rnn(10, activation=relu()) + !! ``` + integer, intent(in) :: layer_size + !! The number of neurons in a dense layer + class(activation_function), intent(in), optional :: activation + !! Activation function instance (default tanh) + type(layer) :: res + !! Resulting layer instance + end function rnn + end interface end module nf_layer_constructors diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 234b20b1..9c450a2a 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -8,7 +8,8 @@ use nf_input3d_layer, only: input3d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer - use nf_activation, only: activation_function, relu, sigmoid + use nf_rnn_layer, only: rnn_layer + use nf_activation, only: activation_function, relu, sigmoid, tanhf implicit none @@ -134,4 +135,27 @@ module function reshape(output_shape) result(res) end function reshape + pure module function rnn(layer_size, activation) result(res) + integer, intent(in) :: layer_size + class(activation_function), intent(in), optional :: activation + type(layer) :: res + + class(activation_function), allocatable :: activation_tmp + + res % name = 'rnn' + res % layer_shape = [layer_size] + + if (present(activation)) then + allocate(activation_tmp, source=activation) + else + allocate(activation_tmp, source=tanhf()) + end if + + res % activation = activation_tmp % get_name() + + allocate(res % p, source=rnn_layer(layer_size, activation_tmp)) + + end function rnn + + end submodule nf_layer_constructors_submodule diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index c672581a..7fac8c2b 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -8,6 +8,7 @@ use nf_input3d_layer, only: input3d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_rnn_layer, only: rnn_layer use nf_optimizers, only: optimizer_base_type contains @@ -32,6 +33,8 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(flatten_layer) call this_layer % backward(prev_layer % output, gradient) + type is(rnn_layer) + call this_layer % backward(prev_layer % output, gradient) end select type is(flatten_layer) @@ -46,6 +49,19 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) end select + type is(rnn_layer) + + select type(prev_layer => previous % p) + type is(input1d_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(dense_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(flatten_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(rnn_layer) + call this_layer % backward(prev_layer % output, gradient) + end select + end select end subroutine backward_1d @@ -123,6 +139,8 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) type is(flatten_layer) call this_layer % forward(prev_layer % output) + type is(rnn_layer) + call this_layer % forward(prev_layer % output) end select type is(conv2d_layer) @@ -179,6 +197,19 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) end select + type is(rnn_layer) + + ! Upstream layers permitted: input1d, dense, rnn + select type(prev_layer => input % p) + type is(input1d_layer) + call this_layer % forward(prev_layer % output) + type is(dense_layer) + call this_layer % forward(prev_layer % output) + type is(rnn_layer) + call this_layer % forward(prev_layer % output) + end select + + end select end subroutine forward @@ -197,6 +228,8 @@ pure module subroutine get_output_1d(self, output) allocate(output, source=this_layer % output) type is(flatten_layer) allocate(output, source=this_layer % output) + type is(rnn_layer) + allocate(output, source=this_layer % output) class default error stop '1-d output can only be read from an input1d, dense, or flatten layer.' @@ -292,8 +325,10 @@ elemental module function get_num_params(self) result(num_params) num_params = 0 type is (reshape3d_layer) num_params = 0 + type is (rnn_layer) + num_params = this_layer % get_num_params() class default - error stop 'Unknown layer type.' + error stop 'get_num_params() with unknown layer type.' end select end function get_num_params @@ -317,8 +352,10 @@ module function get_params(self) result(params) ! No parameters to get. type is (reshape3d_layer) ! No parameters to get. + type is (rnn_layer) + params = this_layer % get_params() class default - error stop 'Unknown layer type.' + error stop 'get_params() with unknown layer type.' end select end function get_params @@ -342,8 +379,10 @@ module function get_gradients(self) result(gradients) ! No gradients to get. type is (reshape3d_layer) ! No gradients to get. + type is (rnn_layer) + gradients = this_layer % get_gradients() class default - error stop 'Unknown layer type.' + error stop 'get_gradients() with unknown layer type.' end select end function get_gradients @@ -399,10 +438,27 @@ module subroutine set_params(self, params) write(stderr, '(a)') 'Warning: calling set_params() ' & // 'on a zero-parameter layer; nothing to do.' + type is (rnn_layer) + call this_layer % set_params(params) + class default - error stop 'Unknown layer type.' + error stop 'set_params() with unknown layer type.' end select end subroutine set_params + pure module subroutine set_state(self, state) + class(layer), intent(inout) :: self + real, intent(in), optional :: state(:) + + select type (this_layer => self % p) + type is (rnn_layer) + if (present(state)) then + this_layer % state = state + else + this_layer % state = 0 + end if + end select + end subroutine set_state + end submodule nf_layer_submodule diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 140c9226..1bc03773 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -7,8 +7,9 @@ use nf_input3d_layer, only: input3d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_rnn_layer, only: rnn_layer use nf_layer, only: layer - use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape + use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape, rnn use nf_loss, only: quadratic use nf_optimizers, only: optimizer_base_type, sgd use nf_parallel, only: tile_indices @@ -93,6 +94,59 @@ module function network_from_layers(layers) result(res) end function network_from_layers + pure function get_activation_by_name(activation_name) result(res) + ! Workaround to get activation_function with some + ! hardcoded default parameters by its name. + ! Need this function since we get only activation name + ! from keras files. + character(len=*), intent(in) :: activation_name + class(activation_function), allocatable :: res + + select case(trim(activation_name)) + case('elu') + allocate ( res, source = elu(alpha = 0.1) ) + + case('exponential') + allocate ( res, source = exponential() ) + + case('gaussian') + allocate ( res, source = gaussian() ) + + case('linear') + allocate ( res, source = linear() ) + + case('relu') + allocate ( res, source = relu() ) + + case('leaky_relu') + allocate ( res, source = leaky_relu(alpha = 0.1) ) + + case('sigmoid') + allocate ( res, source = sigmoid() ) + + case('softmax') + allocate ( res, source = softmax() ) + + case('softplus') + allocate ( res, source = softplus() ) + + case('step') + allocate ( res, source = step() ) + + case('tanh') + allocate ( res, source = tanhf() ) + + case('celu') + allocate ( res, source = celu() ) + + case default + error stop 'activation_name must be one of: ' // & + '"elu", "exponential", "gaussian", "linear", "relu", ' // & + '"leaky_relu", "sigmoid", "softmax", "softplus", "step", "tanh" or "celu".' + end select + + end function get_activation_by_name + module subroutine backward(self, output, loss) class(network), intent(in out) :: self real, intent(in) :: output(:) @@ -128,6 +182,11 @@ module subroutine backward(self, output, loss) self % layers(n - 1), & self % loss % derivative(output, this_layer % output) & ) + type is(rnn_layer) + call self % layers(n) % backward( & + self % layers(n - 1), & + self % loss % derivative(output, this_layer % output) & + ) end select else ! Hidden layer; take the gradient from the next layer @@ -523,6 +582,9 @@ module subroutine update(self, optimizer, batch_size) type is(conv2d_layer) this_layer % dw = 0 this_layer % db = 0 + type is(rnn_layer) + this_layer % dw = 0 + this_layer % db = 0 end select end do diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90 new file mode 100644 index 00000000..a4f55971 --- /dev/null +++ b/src/nf/nf_rnn_layer.f90 @@ -0,0 +1,137 @@ +module nf_rnn_layer + + !! This module provides the concrete dense layer type. + !! It is used internally by the layer type. + !! It is not intended to be used directly by the user. + + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + + implicit none + + private + public :: rnn_layer + + type, extends(base_layer) :: rnn_layer + + !! Concrete implementation of an RNN (fully-connected) layer type + + integer :: input_size + integer :: output_size + + real, allocatable :: weights(:,:) + real, allocatable :: recurrent(:,:) + real, allocatable :: biases(:) + real, allocatable :: z(:) ! matmul(x, w) + b + real, allocatable :: state(:) + real, allocatable :: output(:) ! activation(z) + real, allocatable :: gradient(:) ! matmul(w, db) + real, allocatable :: dw(:,:) ! weight gradients + real, allocatable :: db(:) ! bias gradients + + class(activation_function), allocatable :: activation + + contains + + procedure :: backward + procedure :: forward + procedure :: get_gradients + procedure :: get_num_params + procedure :: get_params + procedure :: init + procedure :: set_params + ! procedure :: set_state + + end type rnn_layer + + interface rnn_layer + elemental module function rnn_layer_cons(output_size, activation) & + result(res) + !! This function returns the `rnn_layer` instance. + integer, intent(in) :: output_size + !! Number of neurons in this layer + class(activation_function), intent(in) :: activation + !! Instance of the activation_function to use; + !! See nf_activation.f90 for available functions. + type(rnn_layer) :: res + !! rnn_layer instance + end function rnn_layer_cons + end interface rnn_layer + + interface + + pure module subroutine backward(self, input, gradient) + !! Apply the backward gradient descent pass. + !! Only weight and bias gradients are updated in this subroutine, + !! while the weights and biases themselves are untouched. + class(rnn_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: input(:) + !! Input from the previous layer + real, intent(in) :: gradient(:) + !! Gradient from the next layer + end subroutine backward + + pure module subroutine forward(self, input) + !! Propagate forward the layer. + !! Calling this subroutine updates the values of a few data components + !! of `rnn_layer` that are needed for the backward pass. + class(rnn_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: input(:) + !! Input from the previous layer + end subroutine forward + + pure module function get_num_params(self) result(num_params) + !! Return the number of parameters in this layer. + class(rnn_layer), intent(in) :: self + !! Dense layer instance + integer :: num_params + !! Number of parameters in this layer + end function get_num_params + + pure module function get_params(self) result(params) + !! Return the parameters (weights and biases) of this layer. + !! The parameters are ordered as weights first, biases second. + class(rnn_layer), intent(in) :: self + !! RNN layer instance + real, allocatable :: params(:) + !! Parameters of this layer + end function get_params + + pure module function get_gradients(self) result(gradients) + !! Return the gradients of this layer. + !! The gradients are ordered as weights first, biases second. + class(rnn_layer), intent(in) :: self + !! Dense layer instance + real, allocatable :: gradients(:) + !! Gradients of this layer + end function get_gradients + + module subroutine set_params(self, params) + !! Set the parameters of this layer. + !! The parameters are ordered as weights first, biases second. + class(rnn_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: params(:) + !! Parameters of this layer + end subroutine set_params + + module subroutine init(self, input_shape) + !! Initialize the layer data structures. + !! + !! This is a deferred procedure from the `base_layer` abstract type. + class(rnn_layer), intent(in out) :: self + !! Dense layer instance + integer, intent(in) :: input_shape(:) + !! Shape of the input layer + end subroutine init + + !module subroutine set_state(self, state) + ! type(rnn_layer), intent(inout) :: self + ! real, intent(in), optional :: state(:) + !end subroutine set_state + + end interface + +end module nf_rnn_layer diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90 new file mode 100644 index 00000000..fe2992aa --- /dev/null +++ b/src/nf/nf_rnn_layer_submodule.f90 @@ -0,0 +1,176 @@ +submodule(nf_rnn_layer) nf_rnn_layer_submodule + + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + use nf_random, only: random_normal + + implicit none + +contains + + elemental module function rnn_layer_cons(output_size, activation) & + result(res) + integer, intent(in) :: output_size + class(activation_function), intent(in) :: activation + type(rnn_layer) :: res + + res % output_size = output_size + res % activation_name = activation % get_name() + allocate( res % activation, source = activation ) + + end function rnn_layer_cons + + + pure module subroutine backward(self, input, gradient) + class(rnn_layer), intent(in out) :: self + real, intent(in) :: input(:) + real, intent(in) :: gradient(:) + real :: db(self % output_size) + real :: dw(self % input_size, self % output_size) + + db = gradient * self % activation % eval_prime(self % z) + dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)])) + self % gradient = matmul(self % weights, db) + self % dw = self % dw + dw + self % db = self % db + db + + end subroutine backward + + + pure module subroutine forward(self, input) + class(rnn_layer), intent(in out) :: self + real, intent(in) :: input(:) + + self % z = matmul(input, self % weights) & + + matmul(self % state, self % recurrent) & + + self % biases + self % state = self % activation % eval(self % z) + self % output = self % state + + end subroutine forward + + + pure module function get_num_params(self) result(num_params) + class(rnn_layer), intent(in) :: self + integer :: num_params + + ! Number of weigths times number of biases + num_params = self % input_size * self % output_size & + + self % output_size * self % output_size & + + self % output_size + + end function get_num_params + + + pure module function get_params(self) result(params) + class(rnn_layer), intent(in) :: self + real, allocatable :: params(:) + + params = [ & + pack(self % weights, .true.), & + pack(self % recurrent, .true.), & + pack(self % biases, .true.) & + ] + + end function get_params + + + pure module function get_gradients(self) result(gradients) + class(rnn_layer), intent(in) :: self + real, allocatable :: gradients(:) + + gradients = [ & + pack(self % dw, .true.), & + pack(self % db, .true.) & + ] + + end function get_gradients + + + module subroutine set_params(self, params) + class(rnn_layer), intent(in out) :: self + real, intent(in) :: params(:) + integer :: first, last + + ! check if the number of parameters is correct + if (size(params) /= self % get_num_params()) then + error stop 'Error: number of parameters does not match' + end if + + ! reshape the weights + last = self % input_size * self % output_size + self % weights = reshape( & + params(:last), & + [self % input_size, self % output_size] & + ) + + ! reshape the recurrent weights + first = last + 1 + last = first + self % output_size * self % output_size + self % recurrent = reshape( & + params(first:last), & + [self % output_size, self % output_size] & + ) + + ! reshape the biases + first = last + 1 + last = first + self % output_size + self % biases = reshape( & + params(first:last), & + [self % output_size] & + ) + + end subroutine set_params + + + module subroutine init(self, input_shape) + class(rnn_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + self % input_size = input_shape(1) + + ! Weights are a 2-d array of shape previous layer size + ! times this layer size. + allocate(self % weights(self % input_size, self % output_size)) + call random_normal(self % weights) + self % weights = self % weights / self % input_size + + ! Broadcast weights to all other images, if any. + call co_broadcast(self % weights, 1) + + allocate(self % recurrent(self % output_size, self % output_size)) + call random_normal(self % recurrent) + self % recurrent = self % recurrent / self % input_size + + + allocate(self % biases(self % output_size)) + self % biases = 0 + + allocate(self % output(self % output_size)) + self % output = 0 + + allocate(self % z(self % output_size)) + self % z = 0 + + allocate(self % state(self % output_size)) + self % state = 0 + + allocate(self % dw(self % input_size, self % output_size)) + self % dw = 0 + + allocate(self % db(self % output_size)) + self % db = 0 + + allocate(self % gradient(self % output_size)) + self % gradient = 0 + + end subroutine init + + module subroutine reset(self) + class(rnn_layer), intent(in out) :: self + + self % state = 0 + + end subroutine reset + +end submodule nf_rnn_layer_submodule