source(system.file("Rsource", "GIG.R", package="copula"))## ../inst/Rsource/GIG.R
require(copula)
require(bbmle)
require(lattice)
require(grid)
source(system.file("Rsource", "utils.R", package="copula", mustWork=TRUE))##-> showProc.time() ..
FALSE ## set 'do.profile' below -- *visibly* doPDF <-
##' Initial interval for GIG
##' @title Initial interval for GIG
##' @param U (n x d)-matrix of simulated data
##' @param h non-negative auxiliary parameter for computing initial intervals
##' @param method "etau" via sample version of Kendall's tau
##' "dmle.G" via DMLE of Gumbel
##' @return (2 x 2)-matrix containing the initial interval [1st row: lower,
##' 2nd row: upper; 2 parameters => 2 cols]
##' @author Marius Hofert
function(U, h, method=c("etau","dmle.G")){
ii.GIG <-stopifnot(h >= 0, length(h) >= 2)
matrix(, nrow=2, ncol=2, dimnames=list(c("lower", "upper"), c("nu", "theta")))
I <-## estimate Kendall's tau
match.arg(method)
method <- switch(method,
tau.hat <-"etau" = { # uses sample version of tau, more accurate but slower
cor(U, method="kendall")
tau.hat.mat <-mean(tau.hat.mat[upper.tri(tau.hat.mat)])
},"dmle.G" = { # uses DMLE for Gumbel to estimate tau
apply(U, 1, max)
Z <- log(ncol(U))/(log(length(Z))-log(sum(-log(Z))))
theta.hat.G <-@tau(theta.hat.G)
copGumbel
},stop("wrong method:", method))
## compute largest value of theta (for upper left endpoint of the inital interval)
stopifnot(tau.hat > 0)
0
nu.min <-1,1] <- nu.min # smallest value for nu
I[ iTau.GIG(max(tau.hat-h[1],0.005), theta=c(nu.min, NA))
th.max <-2,2] <- th.max[2] # largest value for theta
I[## compute smallest theta (for lower left endpoint of the inital interval)
iTau.GIG(min(tau.hat+h[2],0.995), theta=c(nu.min, NA)) # largest attainable tau with 1e-30 is one.m.eps=0.9602
th.min <-1,2] <- th.min[2]
I[## compute largest nu (for lower right endpoint of the inital interval)
iTau.GIG(max(tau.hat-h[1],0.005), theta=c(NA, th.min[2]))
nu.max <-2,1] <- nu.max[1]
I[## result
I }
##' -log-likelihood
##' @title -log-likelihood
##' @param nu parameter of the generator/copula
##' @param theta parameter of the generator/copula
##' @param u (n x d)-matrix of simulated data
##' @return -sum(log(density))
##' @author Marius Hofert
function(nu, theta, u){
nlogl.GIG <-if(!is.matrix(u)) u <- rbind(u)
if((d <- ncol(u)) < 2) stop("u should be at least bivariate") # check that d >= 2
-sum(dacopula.GIG(u, theta=c(nu, theta), n.MC=0, log=TRUE))
} function(theta, u) nlogl.GIG(theta[1], theta=theta[2], u=u) # vectorized version nlogl.GIG. <-
Note: The GIG family is two-parametric.
c(0, 0.1, 0.5, 1, 5, 10)
th1 <- colorRampPalette(c("red", "orange", "darkgreen", "turquoise", "blue"),
cols <-space="Lab")(length(th1))
par(pty="s")
for(i in seq_along(th1))
curve(tau.GIG(cbind(th1[i],x)), 1e-12, 2,
main="Kendall's tau for the GIG family", ylim=c(0,1),
xlab=expression(theta), ylab=expression(tau(nu,theta)), add=(i>1),
lwd=1.4, col=cols[i])
as.expression(lapply(1:length(th1), function(i) substitute(nu==nu., list(nu.=th1[i]))))
label <-legend("topright", label, bty="n", lwd=1.4, col=cols)
Let’s specify some parameters.
100 # sample size
n <- 10 # dimension
d <- 0.2 # fix nu
nu <- 0.5 # => psi(t)=(1+t)^(-nu/2)besselK(theta*sqrt(1+t), nu=nu)/besselK(theta, nu=nu) with (nu, theta)=(0.2, 0.0838)
tau <- c(0.15, 0.15) # h_-, h_+ (for initial value) h <-
iTau.GIG(tau, c(nu, NA)) # determine theta such that tau is matched (for given nu)
theta <-set.seed(1000)
rnacopula.GIG(n, d, theta)
U <-par(pty="s")
splom2(U, cex=0.4, pscales=0, main=paste("Sample of size",n,
"from a GIG copula"))
ii.GIG(U, h)
I <- colMeans(I) # initial interval
start <-
## 1) Without profiling: optim with method="L-BFGS-B"
if(FALSE) # << don't do it if won't look at it -- takes ca. 16.5 sec
system.time(optim(par=start, method="L-BFGS-B",
fn=function(x) nlogl.GIG(x[1], theta=x[2], u=U),
lower=c(I[1,1], I[1,2]), upper=c(I[2,1], I[2,2])))
## 2) With profiling: via mle (uses optimizer="optim" with method="L-BFGS-B")
function(nu, theta) nlogl.GIG(nu, theta, u=U)
nLL <-system.time(ml <- mle(nLL, method="L-BFGS-B",
start=list(nu=mean(I[,1]), theta=mean(I[,2])),
lower=c(nu=I[1,1], theta=I[1,2]),
upper=c(nu=I[2,1], theta=I[2,2])))
## user system elapsed
## 12.215 0.020 12.394
summary(ml)
## Maximum likelihood estimation
##
## Call:
## mle(minuslogl = nLL, start = list(nu = mean(I[, 1]), theta = mean(I[,
## 2])), method = "L-BFGS-B", lower = c(nu = I[1, 1], theta = I[1,
## 2]), upper = c(nu = I[2, 1], theta = I[2, 2]))
##
## Coefficients:
## Estimate Std. Error
## nu 0.19074320 0.06197105
## theta 0.09566594 0.01386170
##
## -2 log L: -994.5633
str(ml@details)
## List of 6
## $ par : Named num [1:2] 0.1907 0.0957
## ..- attr(*, "names")= chr [1:2] "nu" "theta"
## $ value : num -497
## $ counts : Named int [1:2] 30 30
## ..- attr(*, "names")= chr [1:2] "function" "gradient"
## $ convergence: int 0
## $ message : chr "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
## $ hessian : num [1:2, 1:2] 264 139 139 5277
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "nu" "theta"
## .. ..$ : chr [1:2] "nu" "theta"
## 3) With profiling: via mle2 (uses optimizer="optim" with method="L-BFGS-B")
system.time(ml2 <- mle2(nlogl.GIG, data=list(u=U), method="L-BFGS-B",
start=list(nu=mean(I[,1]), theta=mean(I[,2])),
lower=c(nu=I[1,1], theta=I[1,2]),
upper=c(nu=I[2,1], theta=I[2,2])))
## user system elapsed
## 14.830 0.007 15.041
summary(ml2)
## Maximum likelihood estimation
##
## Call:
## mle2(minuslogl = nlogl.GIG, start = list(nu = mean(I[, 1]), theta = mean(I[,
## 2])), method = "L-BFGS-B", data = list(u = U), lower = c(nu = I[1,
## 1], theta = I[1, 2]), upper = c(nu = I[2, 1], theta = I[2,
## 2]))
##
## Coefficients:
## Estimate Std. Error z value Pr(z)
## nu 0.190743 0.060871 3.1336 0.001727 **
## theta 0.095666 0.013684 6.9910 2.729e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## -2 log L: -994.5633
str(ml2@details)
## List of 8
## $ par : Named num [1:2] 0.1907 0.0957
## ..- attr(*, "names")= chr [1:2] "nu" "theta"
## $ value : num -497
## $ counts : Named int [1:2] 30 30
## ..- attr(*, "names")= chr [1:2] "function" "gradient"
## $ convergence: int 0
## $ message : chr "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
## $ hessian : num [1:2, 1:2] 272 112 112 5387
## $ maxgrad : num 9.36
## $ eratio : num 0.0501
FALSE # set this to TRUE to compute profile-likelihood plots (time-consuming)
do.profile <-if(do.profile){
system.time(prof <- profile(ml))
if(FALSE) { ## FIXME (?)
## maybe this helps: https://stat.ethz.ch/pipermail/r-help/2005-July/076003.html
confint(prof)
ci <-
ciplot(prof)
}system.time(prof2 <- profile(ml2)) # profiling (time-consuming)
confint(prof2))
(ci <-plot(prof2) # => for adjusting stepsize etc., see ?profile.mle2
}showProc.time()
## Time (user system elapsed): 28.391 0.049 28.825
## Build grid
20 # number of grid points = number of intervals + 1
m <- seq(I[1,1], I[2,1], length.out=m) # grid points for nu
th <- seq(I[1,2], I[2,2], length.out=m) # grid points for theta
beta <- expand.grid(theta=th, beta=beta) # grid
grid <- "GIG_vign-nlogl-gr.rds"
base.saveF <- system.file("rData", base.saveF, package = "copula")
saveF <-if(nzchar(saveF) && file.exists(saveF)) { # save time, also on CRAN
readRDS(saveF)
val.grid <-else { ## takes around 45 sec
} print(system.time(
## val.grid := values of the -log-likelihood on the grid
apply(grid, 1, nlogl.GIG., u=U)
val.grid <-
)) file.path(if(dir.exists(sd <- "~/R/Pkgs/copula/inst/rData")) sd
saveF <-else tempdir(), base.saveF)
saveRDS(val.grid, file = saveF)
cat("saved to saveFile = ", dQuote(saveF), "\n")
}showProc.time()
## Time (user system elapsed): 0.016 0.002 0.029
theta
true.theta <- c(true.theta, nlogl.GIG.(true.theta, u=U)) # theoretical optimum
true.val <- ml@coef # optimizer-optimum
opt <- c(opt, nlogl.GIG.(opt, u=U)) # optimizer-optimum and its value
opt.val <- rbind(true.val, opt.val) # points to add to wireframe plot
pts <- "-log-likelihood of an Archimedean GIG copula" # title
title <- substitute(italic(n) == N ~~~ italic(d)== D ~~~
sub <- tau == TAU ~~~ "#{eval}:" ~ NIT,
list(N=n, D=d, TAU= tau, NIT= ml@details$counts[[1]]))
as.expression(sub) # lattice bug
sub <-wireframe(val.grid ~ grid[,1] * grid[,2], screen=list(z=70, x=-55), zoom=0.95,
xlab = expression(italic(theta)), ylab = expression(italic(beta)),
zlab = list(as.expression(-log~L * group("(",list(theta,beta),")")), rot=90),
main=title, sub=sub, pts=pts, scales=list(col=1, arrows=FALSE),
par.settings=list(axis.line=list(col="transparent"),
clip=list(panel="off")), zlim=c(min(val.grid, pts[,3]),
max(val.grid, pts[,3])), aspect=1,
panel.3d.wireframe = function(x,y,z,xlim,ylim,zlim,xlim.scaled,
ylim.scaled,zlim.scaled,pts,...) {panel.3dwire(x=x, y=y, z=z, xlim=xlim, ylim=ylim, zlim=zlim,
xlim.scaled=xlim.scaled, ylim.scaled=ylim.scaled,
zlim.scaled=zlim.scaled, alpha.regions=0.8, ...)
panel.3dscatter(x=pts[,1], y=pts[,2], z=pts[,3],
xlim=xlim, ylim=ylim, zlim=zlim,
xlim.scaled=xlim.scaled, ylim.scaled=ylim.scaled,
zlim.scaled=zlim.scaled, type="p", col=c("red","blue"),
pch=c(3,4), lex=2, cex=1.4, .scale=TRUE, ...)
},key = list(x=0.64, y=1.01,
points = list(pch=c(3,4), col=c("red","blue"), lwd=2, cex=1.4),
text = list(c("True value", "Optimum of optimizer")), padding.text=3,
cex=1, align=TRUE, transparent=TRUE))
c(min(grid[,1]),max(grid[,1]))
xlim. <- c(min(grid[,2]),max(grid[,2]))
ylim. <- (xlim.[2] - xlim.[1]) * 0.04
xeps <- (ylim.[2] - ylim.[1]) * 0.04
yeps <- adjustcolor(colorRampPalette(c("darkgreen", "green", "orange", "yellow"),
cols <-space="Lab")(100), 0.8)
levelplot(val.grid ~ grid[,1] * grid[,2],
par.settings = list(layout.heights=list(main=3, sub=2),
regions=list(col=cols)),
xlim = c(xlim.[1]-xeps, xlim.[2]+xeps),
ylim = c(ylim.[1]-yeps, ylim.[2]+yeps),
xlab = expression(italic(theta)), ylab=expression(italic(beta)),
main=title, sub=sub, pts=pts, aspect=1,
scales=list(alternating=c(1,1), tck=c(1,0)), contour=TRUE,
panel = function(x, y, z, pts, ...) {
panel.levelplot(x=x, y=y, z=z, ...)
grid.points(x=pts[1,1], y=pts[1,2], pch=3,
gp=gpar(lwd=2, col="red")) # + true value
grid.points(x=pts[2,1], y=pts[2,2], pch=4,
gp=gpar(lwd=2, col="blue")) # x optimum
},key = list(x=0.18, y=1.08, points = list(pch=c(3,4), col=c("red","blue"),
lwd=2, cex=1.4),
columns=2, text = list(c("True value", "Optimum of optimizer")),
align=TRUE, transparent=TRUE))
showProc.time()
## Time (user system elapsed): 0.476 0.002 0.442