Accuracy comparison
Load data
data(package="mlbench")
data(Sonar, package="mlbench")
data(DNA, package="mlbench")
data.list <- list(
Sonar=list(
input.mat=as.matrix(Sonar[,1:60]),
output.vec=ifelse(Sonar$Class=="R", 1, -1)),
DNA=list(
input.mat=ifelse(as.matrix(DNA[,1:180])==0, 0, 1),
output.vec=ifelse(DNA$Class=="n", -1, 1)))
Define functions for computing loss and gradient
N <- 10
set.seed(1)
rand.pred.vec <- rnorm(N)
subtrain.output.vec <- rep(c(-1, 1), l=N)
subtrain.diff.count.dt <- aum::aum_diffs_binary(subtrain.output.vec, denominator="count")
subtrain.diff.rate.dt <- aum::aum_diffs_binary(subtrain.output.vec, denominator="rate")
library(data.table)
PairsDT <- function(output.vec){
is.positive <- output.vec == 1
data.table(expand.grid(
positive=which(is.positive),
negative=which(!is.positive)))
}
subtrain.pairs.dt <- PairsDT(subtrain.output.vec)
margin <- 1
## Note: for efficiency subtrain labels are assumed to be pre-computed
## in the enclosing environment, once before the optimization starts.
equal.class.weights <- function(output.vec){
otab <- table(output.vec)
as.numeric(1/otab[paste(output.vec)])
}
subtrain.obs.weights <- equal.class.weights(subtrain.output.vec)
Logistic <- function(pred.vec, output.vec, obs.weights){
list(
gradient=-obs.weights*output.vec/(1+exp(output.vec*pred.vec)),
loss=sum(obs.weights*log(1+exp(-output.vec*pred.vec))))
}
AUM <- function(pred.vec, diff.dt){
L <- aum::aum(diff.dt, pred.vec)
d <- L$derivative_mat
non.diff <- abs(d[,1] - d[,2]) > 1e-6
if(any(non.diff)){
cat(sprintf("%d non-diff points\n", sum(non.diff)))
print(d[non.diff, ])
}
with(L, list(gradient=derivative_mat[,1], loss=aum))
}
loss.list <- list(
logistic=function(pred.vec, output.vec=subtrain.output.vec, ...){
Logistic(pred.vec, output.vec, 1/length(pred.vec))
},
logistic.weighted=
function(pred.vec, output.vec=subtrain.output.vec,
obs.weights=subtrain.obs.weights, ...){
Logistic(pred.vec, output.vec, obs.weights)
},
aum.count=function(pred.vec, diff.count.dt=subtrain.diff.count.dt, ...){
AUM(pred.vec, diff.count.dt)
},
aum.rate=function(pred.vec, diff.rate.dt=subtrain.diff.rate.dt, ...){
AUM(pred.vec, diff.rate.dt)
},
squared.hinge.all.pairs=function(pred.vec, pairs.dt=subtrain.pairs.dt, ...){
pairs.dt[, diff := pred.vec[positive]-pred.vec[negative]-margin]
pairs.dt[, diff.clipped := ifelse(diff<0, diff, 0)]
pairs.tall <- data.table::melt(
pairs.dt,
measure.vars=c("positive", "negative"),
value.name="pred.i",
variable.name="label")
## d/dx (x - y - m)^2 = x - y - m
## d/dy (x - y - m)^2 = -(x - y - m)
pairs.tall[, grad.sign := ifelse(label=="positive", 1, -1)]
N.pairs <- nrow(pairs.dt)
grad.dt <- pairs.tall[, .(
gradient=sum(grad.sign*diff.clipped)
), keyby=pred.i]
list(
gradient=grad.dt$gradient/N.pairs,
loss=sum(pairs.dt$diff.clipped^2)/N.pairs)
}
)
result.list <- list()
for(loss.name in names(loss.list)){
fun <- loss.list[[loss.name]]
result.list[[loss.name]] <- fun(rand.pred.vec)
}
str(result.list)
#> List of 5
#> $ logistic :List of 2
#> ..$ gradient: num [1:10] 0.0348 -0.0454 0.0302 -0.0169 0.0582 ...
#> ..$ loss : num 0.687
#> $ logistic.weighted :List of 2
#> ..$ gradient: num [1:10] 0.0697 -0.0908 0.0605 -0.0337 0.1163 ...
#> ..$ loss : num 1.37
#> $ aum.count :List of 2
#> ..$ gradient: num [1:10] 0 -1 0 0 1 -1 1 0 1 -1
#> ..$ loss : num 2.33
#> $ aum.rate :List of 2
#> ..$ gradient: num [1:10] 0 -0.2 0 0 0.2 -0.2 0.2 0 0.2 -0.2
#> ..$ loss : num 0.467
#> $ squared.hinge.all.pairs:List of 2
#> ..$ gradient: num [1:10] 0.0825 -0.1613 0.0582 0 0.2209 ...
#> ..$ loss : num 1.39
sapply(result.list, "[[", "gradient")
#> logistic logistic.weighted aum.count aum.rate squared.hinge.all.pairs
#> [1,] 0.03483151 0.06966301 0 0.0 0.08251408
#> [2,] -0.04542178 -0.09084355 -1 -0.2 -0.16126764
#> [3,] 0.03024562 0.06049125 0 0.0 0.05818398
#> [4,] -0.01686422 -0.03372844 0 0.0 0.00000000
#> [5,] 0.05816396 0.11632792 1 0.2 0.22087679
#> [6,] -0.06943358 -0.13886715 -1 -0.2 -0.36131911
#> [7,] 0.06195006 0.12390012 1 0.2 0.24614420
#> [8,] -0.03233706 -0.06467412 0 0.0 -0.08710976
#> [9,] 0.06400961 0.12801922 1 0.2 0.26028057
#> [10,] -0.05757592 -0.11515184 -1 -0.2 -0.25830311
Optimization
out.loss.list <- list()
data.name <- "Sonar"
input.output.list <- data.list[[data.name]]
input.mat <- input.output.list[["input.mat"]]
full.input.mat <- scale(input.mat)
full.output.vec <- input.output.list[["output.vec"]]
stopifnot(full.output.vec %in% c(-1, 1))
set.seed(1)
n.folds <- 2
unique.folds <- 1:n.folds
fold.vec <- sample(rep(unique.folds, l=length(full.output.vec)))
validation.fold <- 1
is.set.list <- list(
validation=fold.vec == validation.fold,
subtrain=fold.vec != validation.fold)
set.data.list <- list()
for(set.name in names(is.set.list)){
is.set <- is.set.list[[set.name]]
output.vec <- full.output.vec[is.set]
set.data.list[[set.name]] <- list(
output.vec=output.vec,
obs.weights=equal.class.weights(output.vec),
input.mat=full.input.mat[is.set,],
diff.rate.dt=aum::aum_diffs_binary(output.vec, denominator="rate"),
diff.count.dt=aum::aum_diffs_binary(output.vec, denominator="count"),
pairs.dt=PairsDT(output.vec))
}
X.mat <- set.data.list$subtrain$input.mat
for(loss.name in names(loss.list)){
loss.grad.fun <- loss.list[[loss.name]]
step.size <- 0.1
cat(sprintf("loss=%s\n", loss.name))
set.seed(1)
weight.vec <- last.w <- rnorm(ncol(X.mat))
done <- FALSE
iteration <- 0
prev.set.loss.vec <- rep(1e10, 2)
while(!done){
iteration <- iteration+1
loss.for.weight <- function(w, set.data=set.data.list$subtrain){
pred <- set.data$input.mat %*% w
set.data$pred.vec <- pred
out <- do.call(loss.grad.fun, set.data)
out$pred <- pred
out
}
loss.before.step <- loss.for.weight(weight.vec)
direction <- -t(X.mat) %*% loss.before.step[["gradient"]]
loss.for.step <- function(step.size){
new.weight <- weight.vec + step.size * direction
out <- loss.for.weight(new.weight)
out$new.weight <- new.weight
out$step.size <- step.size
out
}
loss.after.step <- loss.for.step(step.size)
weight.vec <- loss.after.step[["new.weight"]]
diff.w <- sum(abs(weight.vec-last.w))
last.w <- weight.vec
set.loss.vec <- numeric()
for(set.name in names(set.data.list)){
set.data <- set.data.list[[set.name]]
set.loss <- loss.for.weight(weight.vec, set.data)
set.loss.vec[[set.name]] <- set.loss[["loss"]]
roc.df <- WeightedROC::WeightedROC(
set.loss[["pred"]],
set.data[["output.vec"]])
auc <- WeightedROC::WeightedAUC(roc.df)
out.dt <- data.table(
loss.name,
iteration,
set.name,
auc,
loss.value=set.loss$loss)
for(aum.type in c("count", "rate")){
diff.name <- paste0("diff.", aum.type, ".dt")
aum.list <- aum::aum(set.data[[diff.name]], set.loss[["pred"]])
out.col <- paste0("aum.", aum.type)
out.dt[[out.col]] <- aum.list[["aum"]]
}
out.loss.list[[paste(
loss.name,
iteration,
set.name
)]] <- out.dt
if(2000 < iteration || diff.w < 1e-6){
done <- TRUE
}
}#for(set.name
}#while(!done
}#for(loss.name
#> loss=logistic
#> loss=logistic.weighted
#> loss=aum.count
#> loss=aum.rate
#> loss=squared.hinge.all.pairs
out.loss <- do.call(rbind, out.loss.list)
(max.valid.auc <- out.loss[
set.name=="validation",
.SD[which.max(auc), .(iteration, auc, set.name)],
by=.(loss.name)])
#> loss.name iteration auc set.name
#> <char> <num> <num> <char>
#> 1: logistic 1421 0.8731399 validation
#> 2: logistic.weighted 1181 0.8723958 validation
#> 3: aum.count 9 0.9002976 validation
#> 4: aum.rate 237 0.7663690 validation
#> 5: squared.hinge.all.pairs 1086 0.8121280 validation
library(ggplot2)
ggplot(,aes(
iteration, auc, color=set.name))+
geom_line(
data=out.loss)+
geom_point(
data=max.valid.auc)+
facet_grid(
. ~ loss.name,
labeller="label_both",
scales='free',
space='fixed')