As of 2018-06-17 the elmNN package was archived and due to the fact that it was one of the machine learning functions that I used when I started learning R (it returns the output results pretty fast too) plus that I had to utilize the package last week for a personal task I decided to reimplement the R code in Rcpp. It didn’t take long because the R package was written, initially by the author, in a clear way. In the next lines I’ll explain the differences and the functionality just for reference.
The functions included in the elmNNRcpp package are the following and details for each parameter can be found in the package documentation,
elmNNRcpp |
---|
elm_train(x, y, nhid, actfun, init_weights = “normal_gaussian”, bias = FALSE, …) |
elm_predict(elm_train_object, newdata, normalize = FALSE) |
onehot_encode(y) |
The following code chunk gives some details on how to use the elm_train in case of regression and compares the results with the lm ( linear model ) base function,
# load the data and split it in two parts
#----------------------------------------
data(Boston, package = 'KernelKnn')
library(elmNNRcpp)
## Loading required package: KernelKnn
= as.matrix(Boston)
Boston dimnames(Boston) = NULL
= Boston[, -dim(Boston)[2]]
X = X[1:350, ]
xtr = X[351:nrow(X), ]
xte
# prepare / convert the train-data-response to a one-column matrix
#-----------------------------------------------------------------
= matrix(Boston[1:350, dim(Boston)[2]], nrow = length(Boston[1:350, dim(Boston)[2]]),
ytr
ncol = 1)
# perform a fit and predict [ elmNNRcpp ]
#----------------------------------------
= elm_train(xtr, ytr, nhid = 1000, actfun = 'purelin',
fit_elm
init_weights = "uniform_negative", bias = TRUE, verbose = T)
## Input weights will be initialized ...
## Dot product of input weights and data starts ...
## Bias will be added to the dot product ...
## 'purelin' activation function will be utilized ...
## The computation of the Moore-Pseudo-inverse starts ...
## The computation is finished!
##
## Time to complete : 0.07127595 secs
= elm_predict(fit_elm, xte)
pr_te_elm
# perform a fit and predict [ lm ]
#----------------------------------------
data(Boston, package = 'KernelKnn')
= lm(medv~., data = Boston[1:350, ])
fit_lm
= predict(fit_lm, newdata = Boston[351:nrow(X), ])
pr_te_lm
# evaluation metric
#------------------
= function (y_true, y_pred) {
rmse
= sqrt(mean((y_true - y_pred)^2))
out
out
}
# test data response variable
#----------------------------
= Boston[351:nrow(X), dim(Boston)[2]]
yte
# mean-squared-error for 'elm' and 'lm'
#--------------------------------------
cat('the rmse error for extreme-learning-machine is :', rmse(yte, pr_te_elm[, 1]), '\n')
## the rmse error for extreme-learning-machine is : 23.36543
cat('the rmse error for liner-model is :', rmse(yte, pr_te_lm), '\n')
## the rmse error for liner-model is : 23.36543
The following code script illustrates how elm_train can be used in classification and compares the results with the glm ( Generalized Linear Models ) base function,
# load the data
#--------------
data(ionosphere, package = 'KernelKnn')
= ionosphere[, ncol(ionosphere)]
y_class
= ionosphere[, -c(2, ncol(ionosphere))] # second column has 1 unique value
x_class
= scale(x_class[, -ncol(x_class)])
x_class
= as.matrix(x_class) # convert to matrix
x_class dimnames(x_class) = NULL
# split data in train-test
#-------------------------
= x_class[1:200, ]
xtr_class = x_class[201:nrow(ionosphere), ]
xte_class
= as.numeric(y_class[1:200])
ytr_class = as.numeric(y_class[201:nrow(ionosphere)])
yte_class
= onehot_encode(ytr_class - 1) # class labels should begin from 0 (subtract 1)
ytr_class
# perform a fit and predict [ elmNNRcpp ]
#----------------------------------------
= elm_train(xtr_class, ytr_class, nhid = 1000, actfun = 'relu',
fit_elm_class
init_weights = "uniform_negative", bias = TRUE, verbose = TRUE)
## Input weights will be initialized ...
## Dot product of input weights and data starts ...
## Bias will be added to the dot product ...
## 'relu' activation function will be utilized ...
## The computation of the Moore-Pseudo-inverse starts ...
## The computation is finished!
##
## Time to complete : 0.03828526 secs
= elm_predict(fit_elm_class, xte_class, normalize = FALSE)
pr_elm_class
= max.col(pr_elm_class, ties.method = "random")
pr_elm_class
# perform a fit and predict [ glm ]
#----------------------------------------
data(ionosphere, package = 'KernelKnn')
= glm(class~., data = ionosphere[1:200, -2], family = binomial(link = 'logit')) fit_glm
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
= predict(fit_glm, newdata = ionosphere[201:nrow(ionosphere), -2], type = 'response')
pr_glm
= as.vector(ifelse(pr_glm < 0.5, 1, 2))
pr_glm
# accuracy for 'elm' and 'glm'
#-----------------------------
cat('the accuracy for extreme-learning-machine is :', mean(yte_class == pr_elm_class), '\n')
## the accuracy for extreme-learning-machine is : 0.9006623
cat('the accuracy for glm is :', mean(yte_class == pr_glm), '\n')
## the accuracy for glm is : 0.8940397
I found an interesting Python implementation / Code on the web and I thought I give it a try to reproduce the results. I downloaded the MNIST data from my Github repository and I used the following parameter setting,
# using system('wget..') on a linux OS
#-------------------------------------
system("wget https://raw.githubusercontent.com/mlampros/DataSets/master/mnist.zip")
<- read.table(unz("mnist.zip", "mnist.csv"), nrows = 70000, header = T,
mnist
quote = "\"", sep = ",")
= mnist[, -ncol(mnist)]
x
= mnist[, ncol(mnist)]
y
# using system('wget..') on a linux OS
#-------------------------------------
system("wget https://raw.githubusercontent.com/mlampros/DataSets/master/mnist.zip")
<- read.table(unz("mnist.zip", "mnist.csv"), nrows = 70000, header = T,
mnist
quote = "\"", sep = ",")
= mnist[, -ncol(mnist)]
x
= mnist[, ncol(mnist)] + 1
y
# use the hog-features as input data
#-----------------------------------
= OpenImageR::HOG_apply(x, cells = 6, orientations = 9, rows = 28, columns = 28, threads = 6)
hog
= elmNNRcpp::onehot_encode(y - 1)
y_expand
# 4-fold cross-validation
#------------------------
= KernelKnn:::class_folds(folds = 4, as.factor(y))
folds str(folds)
= Sys.time()
START
= lapply(1:length(folds), function(x) {
fit
cat('\n'); cat('fold', x, 'starts ....', '\n')
= elmNNRcpp::elm_train(as.matrix(hog[unlist(folds[-x]), ]), y_expand[unlist(folds[-x]), ],
tmp_fit
nhid = 2500, actfun = 'relu', init_weights = 'uniform_negative',
bias = TRUE, verbose = TRUE)
cat('******************************************', '\n')
tmp_fit
})
= Sys.time()
END
- START
END
# Time difference of 5.698552 mins
str(fit)
# predictions for 4-fold cross validation
#----------------------------------------
= unlist(lapply(1:length(fit), function(x) {
test_acc
= elmNNRcpp::elm_predict(fit[[x]], newdata = as.matrix(hog[folds[[x]], ]))
pr_te
= max.col(pr_te, ties.method = "random")
pr_max_col
= max.col(y_expand[folds[[x]], ])
y_true
mean(pr_max_col == y_true)
}))
test_acc
# [1] 0.9825143 0.9848571 0.9824571 0.9822857
cat('Accuracy ( Mnist data ) :', round(mean(test_acc) * 100, 2), '\n')
# Accuracy ( Mnist data ) : 98.3