Below is a basic function that converts integers to binary format (read left to right)
# basic conversion
<- function(integer, length=8)
i2b as.numeric(intToBits(integer))[1:length]
# apply to entire vectors
<- function(integer, length=8)
int2bin t(sapply(integer, i2b, length=length))
First we generate the data:
# set training data length
= 20000
training_data_size
# create sample inputs
= sample(0:127, training_data_size, replace=TRUE)
X1 = sample(0:127, training_data_size, replace=TRUE)
X2
# create sample output
<- X1 + X2
Y
# convert to binary
<- int2bin(X1)
X1 <- int2bin(X2)
X2 <- int2bin(Y)
Y
# create 3d array: dim 1: samples; dim 2: time; dim 3: variables
<- array( c(X1,X2), dim=c(dim(X1),2) ) X
Define the sigmoid and derivative functions
<- function(x)
sigmoid 1 / ( 1+exp(-x) )
<- function(x)
sig_to_der *(1-x) x
This example is:
= 8
binary_dim = 0.5
alpha = 2
input_dim = 6
hidden_dim = 1 output_dim
# initialize weights randomly between -1 and 1, with mean 0
= matrix(runif(n = input_dim *hidden_dim, min=-1, max=1),
weights_0 nrow=input_dim,
ncol=hidden_dim )
= matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1),
weights_h nrow=hidden_dim,
ncol=hidden_dim )
= matrix(runif(n = hidden_dim*output_dim, min=-1, max=1),
weights_1 nrow=hidden_dim,
ncol=output_dim )
# create matrices to store updates, to be used in backpropagation
= matrix(0, nrow = input_dim, ncol = hidden_dim)
weights_0_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
weights_h_update = matrix(0, nrow = hidden_dim, ncol = output_dim) weights_1_update
# training logic
for (j in 1:training_data_size) {
# select data
= X1[j,]
a = X2[j,]
b
# select true answer
= Y[j,]
c
# where we'll store our best guesss (binary encoded)
= matrix(0, nrow = 1, ncol = binary_dim)
d
= 0
overallError
= matrix(0)
layer_2_deltas = matrix(0, nrow=1, ncol = hidden_dim)
layer_1_values
# moving along the positions in the binary encoding
for (position in 1:binary_dim) {
# generate input and output
= cbind( a[position], b[position] ) # rename X to layer_0?
X = c[position]
y
# hidden layer
= sigmoid( (X%*%weights_0) +
layer_1 dim(layer_1_values)[1],] %*% weights_h) )
(layer_1_values[
# output layer
= sigmoid(layer_1 %*% weights_1)
layer_2
# did we miss?... if so, by how much?
= y - layer_2
layer_2_error = rbind(layer_2_deltas, layer_2_error * sig_to_der(layer_2))
layer_2_deltas = overallError + abs(layer_2_error)
overallError
# decode estimate so we can print it out
= round(layer_2)
d[position]
# store hidden layer
= rbind(layer_1_values, layer_1)
layer_1_values
}
= matrix(0, nrow = 1, ncol = hidden_dim)
future_layer_1_delta
for (position in binary_dim:1) {
= cbind(a[position], b[position])
X = layer_1_values[dim(layer_1_values)[1]-(binary_dim-position),]
layer_1 = layer_1_values[dim(layer_1_values)[1]- ( (binary_dim-position)+1 ),]
prev_layer_1
# error at output layer
= layer_2_deltas[dim(layer_2_deltas)[1]-(binary_dim-position),]
layer_2_delta # error at hidden layer
= (future_layer_1_delta %*% t(weights_h) +
layer_1_delta %*% t(weights_1)) * sig_to_der(layer_1)
layer_2_delta
# let's update all our weights so we can try again
= weights_1_update + matrix(layer_1) %*% layer_2_delta
weights_1_update = weights_h_update + matrix(prev_layer_1) %*% layer_1_delta
weights_h_update = weights_0_update + t(X) %*% layer_1_delta
weights_0_update
= layer_1_delta
future_layer_1_delta
}
= weights_0 + ( weights_0_update * alpha )
weights_0 = weights_1 + ( weights_1_update * alpha )
weights_1 = weights_h + ( weights_h_update * alpha )
weights_h
= weights_0_update * 0
weights_0_update = weights_1_update * 0
weights_1_update = weights_h_update * 0
weights_h_update
if(j%%(training_data_size/10) == 0)
print(paste("Error:", overallError))
}
## [1] "Error: 2.18291493644676"
## [1] "Error: 0.178499782196946"
## [1] "Error: 0.116320487679996"
## [1] "Error: 0.174290835574415"
## [1] "Error: 0.198828057558919"
## [1] "Error: 0.083769322080405"
## [1] "Error: 0.13625691557453"
## [1] "Error: 0.0660248641004516"
## [1] "Error: 0.147126292653413"
## [1] "Error: 0.0483486390229478"