library(HLMdiag)
library(ggplot2)
library(lme4)
#> Loading required package: Matrix
data("Exam", package = "mlmRev")
head(Exam)
#> school normexam schgend schavg vr intake standLRT sex type
#> 1 1 0.2613242 mixed 0.1661752 mid 50% bottom 25% 0.6190592 F Mxd
#> 2 1 0.1340672 mixed 0.1661752 mid 50% mid 50% 0.2058022 F Mxd
#> 3 1 -1.7238820 mixed 0.1661752 mid 50% top 25% -1.3645760 M Mxd
#> 4 1 0.9675862 mixed 0.1661752 mid 50% mid 50% 0.2058022 F Mxd
#> 5 1 0.5443412 mixed 0.1661752 mid 50% mid 50% 0.3711052 F Mxd
#> 6 1 1.7348992 mixed 0.1661752 mid 50% bottom 25% 2.1894372 M Mxd
#> student
#> 1 143
#> 2 145
#> 3 142
#> 4 141
#> 5 138
#> 6 155
# Model fm1 on page 6
<- lmer(normexam ~ standLRT + (1 | school), Exam, REML = FALSE))
(fm1 #> Linear mixed model fit by maximum likelihood ['lmerMod']
#> Formula: normexam ~ standLRT + (1 | school)
#> Data: Exam
#> AIC BIC logLik deviance df.resid
#> 9365.243 9390.478 -4678.622 9357.243 4055
#> Random effects:
#> Groups Name Std.Dev.
#> school (Intercept) 0.3035
#> Residual 0.7522
#> Number of obs: 4059, groups: school, 65
#> Fixed Effects:
#> (Intercept) standLRT
#> 0.002391 0.563371
# Extract level-1 residuals
# Residual plot from page 7
<- hlm_resid(fm1)
resid_fm1 head(resid_fm1)
#> # A tibble: 6 x 10
#> id normexam standLRT school .resid .fitted .ls.resid .ls.fitted .mar.resid
#> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.261 0.619 1 -0.464 0.725 -0.561 0.822 -0.0898
#> 2 2 0.134 0.206 1 -0.358 0.492 -0.395 0.529 0.0157
#> 3 3 -1.72 -1.36 1 -1.33 -0.393 -1.14 -0.585 -0.958
#> 4 4 0.968 0.206 1 0.475 0.492 0.438 0.529 0.849
#> 5 5 0.544 0.371 1 -0.0409 0.585 -0.102 0.647 0.333
#> 6 6 1.73 2.19 1 0.125 1.61 -0.201 1.94 0.499
#> # … with 1 more variable: .mar.fitted <dbl>
ggplot(resid_fm1, aes(x = standLRT, y = .ls.resid)) +
geom_point() +
geom_smooth() +
labs(y = "LS level-1 residuals")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Model fm2 on page 7
<- lmer(normexam ~ standLRT + I(standLRT^2) + I(standLRT^3) +
fm2 1 | school), Exam, REML = FALSE)
(
<- hlm_resid(fm2)
resid_fm2 #> Warning in LSresids.lmerMod(object, level = 1, standardize = standardize): LS residuals might be inaccurate as one or more groups are rank deficient.
#> Use the 'include.ls = FALSE' parameter to get EB residuals only.
# Figure 2 page 8
ggplot(resid_fm2, aes(x = `I(standLRT^2)`, y = .ls.resid)) +
geom_hline(yintercept = 0, color = "blue") +
geom_point() +
labs( y = "LS residuals", x = "standLRT2")
ggplot_qqnorm()
function is now defunct, Q-Q plots are now much easier to create in ggplot2 directly than when the package was first created.
ggplot(resid_fm2, aes(sample = .ls.resid)) +
geom_qq() +
geom_qq_line() +
labs(x = "Theoretical Quantiles", y = "Sample Quantiles")
A better alternative if found in the qqplotr package
library(qqplotr)
#>
#> Attaching package: 'qqplotr'
#> The following objects are masked from 'package:ggplot2':
#>
#> StatQqLine, stat_qq_line
ggplot(resid_fm2, aes(sample = .ls.resid)) +
stat_qq_band() +
stat_qq_line() +
stat_qq_point() +
labs(x = "Theoretical Quantiles", y = "Sample Quantiles")
EB residuals are now called .ranef.
residuals
# Model fm3, page 11
<- lmer(normexam ~ standLRT + I(standLRT^2) + I(standLRT^3) + sex +
fm3 | school), Exam, REML = FALSE)
(standLRT
## Extract level-2 EB residuals
<- hlm_resid(fm3, level = "school")
resid_fm3 #> Warning in LSresids.lmerMod(object, level = level, stand = standardize): The model matrix is likely rank deficient. Some LS residuals cannot be calculated.
#> It is recommended to use EB (.ranef) group level residuals for this model.
resid_fm3#> # A tibble: 65 x 5
#> school .ranef.intercept .ranef.stand_lrt .ls.intercept .ls.stand_lrt
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.404 0.127 0.533 0.348
#> 2 2 0.401 0.159 NA NA
#> 3 3 0.495 0.0780 0.523 0.00905
#> 4 4 0.0597 0.120 0.265 0.186
#> 5 5 0.251 0.0711 0.186 0.0503
#> 6 6 0.448 0.0482 NA NA
#> 7 7 0.297 -0.151 NA NA
#> 8 8 -0.0788 0.0131 NA NA
#> 9 9 -0.157 -0.0660 -0.282 -0.251
#> 10 10 -0.282 -0.137 -0.172 -0.451
#> # … with 55 more rows
## Construct school-level data set
library(dplyr)
<- Exam %>%
SchoolExam group_by(school) %>%
::summarize(
dplyrsize = length(school),
schgend = unique(schgend),
schavg = unique(schavg),
type = unique(type),
schLRT = mean(standLRT)
)
<- SchoolExam %>% left_join(resid_fm3, by = "school")
SchoolExam
SchoolExam#> # A tibble: 65 x 10
#> school size schgend schavg type schLRT .ranef.intercept .ranef.stand_lrt
#> <chr> <int> <fct> <dbl> <fct> <dbl> <dbl> <dbl>
#> 1 1 73 mixed 0.166 Mxd 0.166 0.404 0.127
#> 2 2 55 girls 0.395 Sngl 0.395 0.401 0.159
#> 3 3 52 mixed 0.514 Mxd 0.514 0.495 0.0780
#> 4 4 79 mixed 0.0918 Mxd 0.0918 0.0597 0.120
#> 5 5 35 mixed 0.211 Mxd 0.211 0.251 0.0711
#> 6 6 80 girls 0.638 Sngl 0.638 0.448 0.0482
#> 7 7 88 girls -0.0290 Sngl -0.0290 0.297 -0.151
#> 8 8 102 girls -0.0405 Sngl -0.0405 -0.0788 0.0131
#> 9 9 34 mixed -0.494 Mxd -0.494 -0.157 -0.0660
#> 10 10 50 mixed 0.189 Mxd 0.189 -0.282 -0.137
#> # … with 55 more rows, and 2 more variables: .ls.intercept <dbl>,
#> # .ls.stand_lrt <dbl>
## Left panel -- figure 5
ggplot(
SchoolExam, aes(
x = reorder(schgend, .ranef.intercept, median),
y = .ranef.intercept)
+
) geom_boxplot() +
labs(x = "school gender", y = "level-2 residual (Intercept)")
## Right panel -- figure 5
ggplot(SchoolExam, aes(x = schavg, y = .ranef.intercept)) +
geom_point() +
geom_smooth() +
labs(x = "average intake score", y = "level-2 residual (Intercept)")
## Model fm4, page 12
<- lmer(normexam ~ standLRT + I(standLRT^2) + I(standLRT^3) +
fm4 + schgend + schavg + (standLRT | school),
sex data = Exam, REML = FALSE)
<- hlm_resid(fm4, level = "school", include.ls = FALSE)
resid_fm4
## Figure 6, page 13
ggplot(resid_fm4, aes(sample = .ranef.intercept)) +
stat_qq_band() +
stat_qq_line() +
stat_qq_point() +
labs(x = "Theoretical Quantiles", y = "Sample Quantiles")
ggplot(resid_fm4, aes(sample = .ranef.stand_lrt)) +
stat_qq_band() +
stat_qq_line() +
stat_qq_point() +
labs(x = "Theoretical Quantiles", y = "Sample Quantiles")
We can now use hlm_influence
to pull off all of the case-deletion diagnostics for the fixed effects at the specified level:
# Calculating influence diagnostics for model fm4
<- hlm_influence(fm4, level = "school")
influence_fm4
influence_fm4#> # A tibble: 65 x 6
#> school cooksd mdffits covtrace covratio leverage.overall
#> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.0216 0.0210 0.141 1.15 0.0217
#> 2 2 0.0144 0.0141 0.137 1.14 0.0267
#> 3 3 0.0384 0.0361 0.140 1.15 0.0257
#> 4 4 0.0165 0.0160 0.133 1.14 0.0201
#> 5 5 0.00889 0.00868 0.0677 1.07 0.0313
#> 6 6 0.0190 0.0171 0.166 1.17 0.0179
#> 7 7 0.0413 0.0394 0.111 1.12 0.0174
#> 8 8 0.00712 0.00668 0.205 1.22 0.0172
#> 9 9 0.00418 0.00408 0.140 1.15 0.0400
#> 10 10 0.00956 0.00935 0.0819 1.08 0.0249
#> # … with 55 more rows
dotplot_diag(influence_fm4$cooksd, cutoff = "internal", name = "cooks.distance")
dotplot_diag(influence_fm4$cooksd, cutoff = "internal", name = "cooks.distance", modify = "dotplot")
#> Warning: Removed 4 rows containing missing values (geom_text_repel).
<- cooks.distance(fm4, level = "school", include.attr = TRUE)
beta_cdd #> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
#> Using compatibility `.name_repair`.
25,]
beta_cdd[#> # A tibble: 1 x 9
#> cooksd beta_1 beta_2 beta_3 beta_4 beta_5 beta_6 beta_7 beta_8
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.0863 0.00390 -0.00987 -0.00333 0.00321 0.000980 0.00232 -0.0168 0.0221
To calculate the relative variance change, we use hlm_influence()
, but approx = FALSE
must be specified:
hlm_influence(fm4, approx = FALSE)