The package abclass provides implementations of the multi-category angle-based classifiers (Zhang & Liu, 2014) with the large-margin unified machines (Liu, et al., 2011) for high-dimensional data.
Notice that the package is still experimental and under active development.
One can install the released version from CRAN.
install.packages("abclass")
Alternatively, the version under development can be installed as follows:
if (! require(remotes)) install.packages("remotes")
::install_github("wenjie2wang/abclass", upgrade = "never") remotes
A toy example is as follows:
library(abclass)
set.seed(123)
## toy examples for demonstration purpose
## reference: example 1 in Zhang and Liu (2014)
<- 100 # size of training set
ntrain <- 1000 # size of testing set
ntest <- 10 # number of actual predictors
p0 <- 100 # number of random predictors
p1 <- 5 # number of categories
k
<- ntrain + ntest; p <- p0 + p1
n <- seq_len(ntrain)
train_idx <- sample(k, size = n, replace = TRUE) # response
y <- matrix(rnorm(p0 * k), nrow = k, ncol = p0) # mean vector
mu ## normalize the mean vector so that they are distributed on the unit circle
<- mu / apply(mu, 1, function(a) sqrt(sum(a ^ 2)))
mu <- t(sapply(y, function(i) rnorm(p0, mean = mu[i, ], sd = 0.25)))
x0 <- matrix(rnorm(p1 * n, sd = 0.3), nrow = n, ncol = p1)
x1 <- cbind(x0, x1)
x <- x[train_idx, ]
train_x <- x[- train_idx, ]
test_x <- factor(paste0("label_", y))
y <- y[train_idx]
train_y <- y[- train_idx]
test_y
### regularization through elastic-net penalty
## logistic deviance loss
<- abclass(train_x, train_y, nlambda = 100,
model1 nfolds = 3, loss = "logistic")
<- predict(model1, test_x)
pred1 table(test_y, pred1)
## pred1
## test_y label_1 label_2 label_3 label_4 label_5
## label_1 179 1 26 0 0
## label_2 1 200 1 1 0
## label_3 4 0 195 0 0
## label_4 0 2 4 183 3
## label_5 1 0 3 2 194
mean(test_y == pred1) # accuracy
## [1] 0.951
## exponential loss approximating AdaBoost
<- abclass(train_x, train_y, nlambda = 100,
model2 nfolds = 3, loss = "boost")
<- predict(model2, test_x, s = "cv_1se")
pred2 table(test_y, pred2)
## pred2
## test_y label_1 label_2 label_3 label_4 label_5
## label_1 184 0 22 0 0
## label_2 0 202 0 1 0
## label_3 18 1 176 3 1
## label_4 1 5 3 177 6
## label_5 1 0 0 1 198
mean(test_y == pred2) # accuracy
## [1] 0.937
## hybrid hinge-boost loss
<- abclass(train_x, train_y, nlambda = 100,
model3 nfolds = 3, loss = "hinge-boost")
<- predict(model3, test_x)
pred3 table(test_y, pred3)
## pred3
## test_y label_1 label_2 label_3 label_4 label_5
## label_1 179 1 26 0 0
## label_2 1 201 0 1 0
## label_3 5 0 194 0 0
## label_4 0 2 3 185 2
## label_5 1 0 2 2 195
mean(test_y == pred3) # accuracy
## [1] 0.954
## large-margin unified loss
<- abclass(train_x, train_y, nlambda = 100,
model4 nfolds = 3, loss = "lum")
<- predict(model4, test_x)
pred4 table(test_y, pred4)
## pred4
## test_y label_1 label_2 label_3 label_4 label_5
## label_1 179 1 26 0 0
## label_2 1 201 0 1 0
## label_3 4 0 194 0 1
## label_4 0 2 3 185 2
## label_5 1 0 1 0 198
mean(test_y == pred4) # accuracy
## [1] 0.957
### variable selection via group lasso
## logistic deviance loss
<- abclass(train_x, train_y, nlambda = 100, nfolds = 3,
model1 grouped = TRUE, loss = "logistic")
<- predict(model1, test_x, s = "cv_1se")
pred1 table(test_y, pred1)
## pred1
## test_y label_1 label_2 label_3 label_4 label_5
## label_1 173 1 32 0 0
## label_2 2 197 3 1 0
## label_3 1 1 197 0 0
## label_4 0 2 8 180 2
## label_5 2 0 5 3 190
mean(test_y == pred1) # accuracy
## [1] 0.937
## exponential loss approximating AdaBoost
<- abclass(train_x, train_y, nlambda = 100, nfolds = 3,
model2 grouped = TRUE, loss = "boost")
<- predict(model2, test_x, s = "cv_1se")
pred2 table(test_y, pred2)
## pred2
## test_y label_1 label_2 label_3 label_4 label_5
## label_1 189 0 17 0 0
## label_2 1 202 0 0 0
## label_3 11 0 187 0 1
## label_4 0 1 2 181 8
## label_5 1 0 0 0 199
mean(test_y == pred2) # accuracy
## [1] 0.958
## hybrid hinge-boost loss
<- abclass(train_x, train_y, nlambda = 100, nfolds = 3,
model3 grouped = TRUE, loss = "hinge-boost")
<- predict(model3, test_x)
pred3 table(test_y, pred3)
## pred3
## test_y label_1 label_2 label_3 label_4 label_5
## label_1 174 1 31 0 0
## label_2 0 202 0 1 0
## label_3 10 0 188 0 1
## label_4 0 3 7 181 1
## label_5 1 0 1 3 195
mean(test_y == pred3) # accuracy
## [1] 0.94
## large-margin unified loss
<- abclass(train_x, train_y, nlambda = 100, nfolds = 3,
model4 grouped = TRUE, loss = "lum")
<- predict(model4, test_x)
pred4 table(test_y, pred4)
## pred4
## test_y label_1 label_2 label_3 label_4 label_5
## label_1 181 1 24 0 0
## label_2 0 202 0 1 0
## label_3 5 0 193 0 1
## label_4 0 2 5 183 2
## label_5 1 0 1 0 198
mean(test_y == pred4) # accuracy
## [1] 0.957
GNU General Public License (≥ 3)
Copyright holder: Eli Lilly and Company