The document addresses the simulation study that was performed to understand whether cIRT
was able to effectively recover parameters under the proposed model. There are three sections to this document. The first section details the simulation setup and implementation. The second section deals with how we obtained overall measurements for each model realization. The third and final section depicts the output structure we used to create the tables within the publication.
The simulation study below is configured to generate results one might obtain from a pool of 1,000 subjects taking a 20 item test. We obtain summarize the results obtained from running the model 100 times over slightly differing \(\theta\) and \(\eta\).
### Variables
# Y = trial matix
# C = KN vector of binary choices
# N = #of subjects
# J = # of items
# K = # of choices
# atrue = true item discriminations
# btrue = true item locations
# thetatrue = true thetas/latent performance
# gamma = fixed effects coefficients
# Sig = random-effects variance-covariance
# subid = id variable for subjects
# install mvtnorm is necessary
# install.packages("mvtnorm")
# Load the Library
library(cIRT)
# Simulate 2PNO Data
= 1000 # Students
N = 20 # Total numbers of possible items per SA
J
# Set a seed for the random generation of a's and b's.
set.seed(1337)
# Randomly pick a's and b's
# Generate as, bs
=runif(J)+1
atrue=2*runif(J)-1
btrue
save(atrue, btrue, file="a_b_true.rda")
# 2 Level Probit Data
= 30
K
= c(.5,1)
gam_notheta = c(3,.25)
gam_theta = c(gam_notheta,gam_theta)
gamma
= matrix(c(.25,0,0,.125),2,2)
Sig
# Number of replications
= 100
B
# Begin the Simulation Study
for(b in 1:B){
# Provide user with state information
cat(paste0("On Iteration:",b,"\n"))
set.seed(b + 1234)
# True theta and etay
= rnorm(N)
thetatrue = outer(rep(1,N),atrue) * thetatrue - outer(rep(1,N),btrue)
etay
# Generate Y for 2PNO model
= pnorm(etay)
p.correct = matrix(rbinom(N*J, 1, p.correct),N,J)
Y
#################################################
# Simulating 2 level probit data
#################################################
= expand.grid(cid = 1:K,sid = 1:N)[,2]
subid
= rnorm(K*N,0,1) # Pred
pred
= center_matrix(as.matrix(pred))
center_pred
= cbind(1,center_pred)
Xnotheta
= rep(thetatrue,each=K)*Xnotheta
Xtheta = cbind(Xnotheta,Xtheta)
X
= mvtnorm::rmvnorm(N,mean=c(0,0),sigma=Sig) # mvtnorm environment accessed
zetas = apply(Xnotheta*zetas[rep(1:N,each=K),],1,sum)
W_veczeta
= X%*%gamma + W_veczeta
etac = rnorm(N*K,mean=etac,sd=1)
Zc = 1*(Zc>0)
C
# Run the Choice Item Response Model
= cIRT(subid,
out1
Xnotheta,c(1,2),
Xnotheta,
Y,
C, 5000)
= paste0("model_",b)
mname
# Assign the data set name
assign(mname, out1)
# Save the out object
save(list=mname, file=paste0(mname,".rda"))
# Clean up Export
rm(list = c(mname,"out1"))
}
After we obtain the 100 different models, we will need to take averages over each models chain and then obtain an overall average. To do so, we used the following script:
# E[theta] - theta
= function(theta.star, theta.true){
bias matrix(mean(theta.star) - theta.true,ncol=1)
}
= function(theta.star, theta.true){
bias2 matrix(theta.star - theta.true,ncol=1)
}
# sqrt ( 1/n * sum( (y_i - y.hat_i)^2 )
= function(y,y.hat){
RMSE sqrt( (y-y.hat)^2 )
}
# Change true values if needed
# True Values
= c(.5,1)
gam_notheta = c(3,.25)
gam_theta = c(gam_notheta,gam_theta)
gamma # Loads a and b values
load("a_b_true.rda")
= as.numeric(matrix(c(.25,0,0,.125),2,2))[c(1,2,4)]
Sig = 100
B
# Storage to hold bootstrap replications
= matrix(0, B, 20)
a_result
= matrix(0, B, 20)
b_result
= matrix(0, B, 2)
gs0_result
= matrix(0, B, 2)
beta_result
= array(NA, dim=c(2,2,B))
sig_result
for(b in 1:B){
= paste0("model_",b)
mname
load(paste0(mname, ".rda"))
= get(mname)
d
= apply(d$as, 2, FUN = mean)
a_result[i,] = apply(d$bs, 2, FUN = mean)
b_result[i,]
= apply(d$gs0, 2, FUN = mean)
gs0_result[i,]
= apply(d$betas, 2, FUN = mean)
beta_result[i,]
= solve(apply(d$Sigma_zeta_inv, c(1,2), FUN = mean))
sig_result[,,i]
}
# Obtain an overall mean for each of the following:
= apply(a_result, 2, FUN = mean)
m_a_result
= apply(b_result, 2, FUN = mean)
m_b_result
= apply(gs0_result, 2, FUN = mean)
m_gs0_result
= apply(beta_result, 2, FUN = mean)
m_beta_result
= as.numeric(apply(sig_result, c(1,2), FUN = mean))[c(1,2,4)]
m_sig_result
# Perform a bias evaluation given the true values:
= bias2(m_a_result,atrue)
a_bias
= bias2(m_b_result,btrue)
b_bias
= bias2(m_gs0_result,gamma[1:2])
gs0_bias
= bias2(m_beta_result,gamma[3:4])
beta_bias
= bias2(m_sig_result, Sig)
sig_bias
# Perform the RMSE under the supplied results:
= RMSE(m_a_result,atrue)
a_RMSE
= RMSE(m_b_result,btrue)
b_RMSE
= RMSE(m_gs0_result,gamma[1:2])
gs0_RMSE
= RMSE(m_beta_result,gamma[3:4])
beta_RMSE
= RMSE(m_sig_result, Sig) sig_RMSE
After we obtain the different results, we opted to write export code to format the data in a way that the xtable
could process and export it.
# Make a results export
= cbind(m_a_result, atrue, a_bias, a_RMSE)
results_a
rownames(results_a) = paste0("Item ",1:length(atrue))
colnames(results_a) = c("$a$ Estimate", "$a$ True", "$a$ Bias", "$a$ RMSE")
= cbind(m_b_result, btrue, b_bias, b_RMSE)
results_b
rownames(results_b) = paste0("Item ",1:length(btrue))
colnames(results_b) = c("$b$ Estimate", "$b$ True", "$b$ Bias", "$b$ RMSE")
= cbind(m_gs0_result,gamma[1:2],gs0_bias,gs0_RMSE)
results_gs0 rownames(results_gs0) = paste0("$\\gamma_",1:2,"$")
colnames(results_gs0) = c("$\\beta$ Estimate", "$\\beta$ True", "$\\beta$ Bias", "$\\beta$ RMSE")
= cbind(m_beta_result,gamma[3:4],beta_bias,beta_RMSE)
results_beta rownames(results_beta) = paste0("$\\beta_",1:2,"$")
colnames(results_beta) = c("$\\beta$ Estimate", "$\\beta$ True", "$\\beta$ Bias", "$\\beta$ RMSE")
= cbind(m_sig_result,Sig,sig_bias,sig_RMSE)
results_sig_zeta rownames(results_sig_zeta) = c("$\\Zeta_{1,1}$","$\\Zeta_{2,1} = $\\Zeta_{1,2}$", "$\\Zeta_{2,2}$")
colnames(results_sig_zeta) = c("$\\Sigma_{\\Zeta}$ Estimate", "$\\beta$ True", "$\\beta$ Bias", "$\\beta$ RMSE")
save(results_a, results_b, results_gs0, results_beta, results_sig_zeta, file="sim_results.rda")