GFM: installation and simulated example

Fit GFM model using simulated data

Generate data with homogeneous normal variables

First, we generate the data with homogeneous normal variables.

## Homogeneous  normal variables
  dat <- gendata(q = 2, n=100, p=100, rho=3)

Then, we set the algorithm parameters and fit model

# full-one vector means there is only one variable type within all variables.
  group <- rep(1,ncol(dat$X)) 
# set variables' type, 'gaussian' means there is  continous variable type.
  type <- 'gaussian'

Third, we fit the GFM model with user-specified number of factors.

# specify q=2
  gfm1 <- gfm(dat$X, group, type, q=2, output = FALSE)
  
  # measure the performance of GFM estimators in terms of canonical correlations
  measurefun(gfm1$hH, dat$H0, type='ccor')
  measurefun(gfm1$hB, dat$B0, type='ccor')

The number of factors can also be determined by data-driven manners.

# select q automatically
  gfm2 <- gfm(dat$X, group, type, q=NULL, q_set = 1:6, output = FALSE)
# measure the performance of GFM estimators in terms of canonical correlations
  measurefun(gfm2$hH, dat$H0, type='ccor')
  measurefun(gfm2$hB, dat$B0, type='ccor')

Generate data with heterogeous normal variables

First, we generate the data with heterogeous normal variables and set the parameters of algorithm.

  dat <- gendata(seed=1, n=100, p=100, type='heternorm', q=2, rho=1)
  group <- rep(1,ncol(dat$X))
  type <- 'gaussian'

Third, we fit the GFM model with user-specified number of factors and compare the results with that of linear factor models.

# specify q=2
  gfm1 <- gfm(dat$X, group, type, q=2, output = FALSE)
  
  # measure the performance of GFM estimators in terms of canonical correlations
  corH_gfm <- measurefun(gfm1$hH, dat$H0, type='ccor')
  corB_gfm <- measurefun(gfm1$hB, dat$B0, type='ccor')
  
  lfm1 <- Factorm(dat$X, q=2)
  corH_lfm <- measurefun(lfm1$hH, dat$H0, type='ccor')
  corB_lfm <- measurefun(lfm1$hB, dat$B0, type='ccor')
  
  library(ggplot2)
  df1 <- data.frame(CCor= c(corH_gfm, corH_lfm, corB_gfm, corB_lfm),
                    Method =factor(rep(c('GFM', "LFM"), times=2)),
                    Quantity= factor(c(rep('factors',2), rep("loadings", 2))))
  ggplot(data=df1, aes(x=Quantity, y=CCor, fill=Method)) + geom_bar(position = "dodge", stat="identity",width = 0.5)

The number of factors can also be determined by data-driven manners.

# select q automatically
  gfm2 <- gfm(dat$X, group, type, q=NULL, q_set = 1:4, output = FALSE)
# measure the performance of GFM estimators in terms of canonical correlations
  corH_gfm <- measurefun(gfm2$hH, dat$H0, type='ccor')
  corB_gfm <- measurefun(gfm2$hB, dat$B0, type='ccor')
  
  library(ggplot2)
  df1 <- data.frame(CCor= c(corH_gfm, corH_lfm, corB_gfm, corB_lfm),
                    Method =factor(rep(c('GFM', "LFM"), times=2)),
                    Quantity= factor(c(rep('factors',2), rep("loadings", 2))))
  ggplot(data=df1, aes(x=Quantity, y=CCor, fill=Method)) + geom_bar(position = "dodge", stat="identity",width = 0.5)

Generate data with Count(Poisson) variables

First, we generate the data with Count(Poisson) variables and set the parameters of algorithm.

  q <- 3; p <- 200
  dat <- gendata(seed=1, n=200, p=p, type='pois', q=q, rho=4)
  group <- rep(1,ncol(dat$X))
  type <- 'poisson'

Second, we we fit the GFM models in the parallel manner.

  system.time(
    gfm2 <- gfm(dat$X, group, type,parallel = TRUE, q=NULL, q_set = 1:4, output = FALSE, fast_version = TRUE))

Third, we compare the results with that of linear factor models.


  # measure the performance of GFM estimators in terms of canonical correlations
  corH_gfm <- measurefun(gfm2$hH, dat$H0, type='ccor')
  corB_gfm <- measurefun(gfm2$hB, dat$B0, type='ccor')
  
  lfm1 <- Factorm(dat$X, q=3)
  corH_lfm <- measurefun(lfm1$hH, dat$H0, type='ccor')
  corB_lfm <- measurefun(lfm1$hB, dat$B0, type='ccor')
  
  library(ggplot2)
  df1 <- data.frame(CCor= c(corH_gfm, corH_lfm, corB_gfm, corB_lfm),
                    Method =factor(rep(c('GFM', "LFM"), times=2)),
                    Quantity= factor(c(rep('factors',2), rep("loadings", 2))))
  ggplot(data=df1, aes(x=Quantity, y=CCor, fill=Method)) + geom_bar(position = "dodge", stat="identity",width = 0.5)

Generate data with the mixed-types of Poisson and Binomial variables

First, we generate the data with Count(Poisson) variables and set the parameters of algorithm. Then fit the GFM model with user-specified number of factors.

  dat <- gendata(seed=1, n=200, p=200, type='pois_bino', q=2, rho=2)
  group <- c(rep(1,ncol(dat$X)/2), rep(2,ncol(dat$X)/2))
  type <- c('poisson','binomial')
  table(dat$X[,1])
  table(dat$X[, 200])
  # user-specified q=2
  gfm2 <- gfm(dat$X, group, type, dropout = 2, q=2, output = FALSE, maxIter=5)
  measurefun(gfm2$hH, dat$H0, type='ccor')
  measurefun(gfm2$hB, dat$B0, type='ccor')

Third, we compare the results with that of linear factor models.

  #  select q automatically
  gfm2 <- gfm(dat$X, group, type, dropout = 2, q=NULL, q_set = 1:4, output = FALSE)
  # measure the performance of GFM estimators in terms of canonical correlations
  corH_gfm <- measurefun(gfm2$hH, dat$H0, type='ccor')
  corB_gfm <- measurefun(gfm2$hB, dat$B0, type='ccor')

Compare with linear factor models

  lfm1 <- Factorm(dat$X, q=3)
  corH_lfm <- measurefun(lfm1$hH, dat$H0, type='ccor')
  corB_lfm <- measurefun(lfm1$hB, dat$B0, type='ccor')
  
  library(ggplot2)
  df1 <- data.frame(CCor= c(corH_gfm, corH_lfm, corB_gfm, corB_lfm),
                    Method =factor(rep(c('GFM', "LFM"), times=2)),
                    Quantity= factor(c(rep('factors',2), rep("loadings", 2))))
  ggplot(data=df1, aes(x=Quantity, y=CCor, fill=Method)) + geom_bar(position = "dodge", stat="identity",width = 0.5)

GFM: installation and simulated example

Wei Liu

2022-01-04

Install the GFM

Fit GFM model using simulated data

Generate data with homogeneous normal variables

Generate data with heterogeous normal variables

Generate data with Count(Poisson) variables

Generate data with the mixed-types of Poisson and Binomial variables

Session information