knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

library(languageR)
library(lme4)
library(dplyr)
library(tidyr)
library(ggplot2)
library(tibble)
library(DHARMa)
library(emmeans)
library(knitr)

set.seed(1234)
theme_set(theme_minimal(base_size = 12))

1 Overview

This is the second tutorial on frequentist modeling, see the LMER modeling walkthrough for an introduction to this series.

This tutorial fits a generalized linear mixed-effects model with glmer() using the built-in lexdec data from the languageR package. The outcome is binary response accuracy: 1 = correct, 0 = incorrect. The factor of interest is PrevType, which records whether the previous trial was a word or nonword.

The workflow is:

  1. Prepare tidy binary-outcome data.
  2. Set the factor coding scheme.
  3. Start with a maximally specified random-effects structure.
  4. Fit the maximal model and check convergence and singular fit.
  5. Fit a reduced random-effects model when the maximal model is problematic.
  6. Use the reduced random-effects model as the working model.
  7. Fit a reduced model that leaves out the fixed effect of interest.
  8. Use anova(reduced_model, full_model, test = "Chisq") for the likelihood ratio test, which resembles anova(model) in LMER for factor significance checking.
  9. Check GLMM diagnostics with DHARMa.
  10. Interpret log-odds, odds ratios, and predicted probabilities.

2 Prepare the Data

2.1 Load data

data("lexdec", package = "languageR")

dplyr::glimpse(lexdec)
## Rows: 1,659
## Columns: 28
## $ Subject        <fct> A1, A1, A1, A1, A1, A1, A1, A1, A1, A1, A1, A1, A1, A1,…
## $ RT             <dbl> 6.340359, 6.308098, 6.349139, 6.186209, 6.025866, 6.180…
## $ Trial          <int> 23, 27, 29, 30, 32, 33, 34, 38, 41, 42, 45, 46, 47, 48,…
## $ Sex            <fct> F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F…
## $ NativeLanguage <fct> English, English, English, English, English, English, E…
## $ Correct        <fct> correct, correct, correct, correct, correct, correct, c…
## $ PrevType       <fct> word, nonword, nonword, word, nonword, word, word, nonw…
## $ PrevCorrect    <fct> correct, correct, correct, correct, correct, correct, c…
## $ Word           <fct> owl, mole, cherry, pear, dog, blackberry, strawberry, s…
## $ Frequency      <dbl> 4.859812, 4.605170, 4.997212, 4.727388, 7.667626, 4.060…
## $ FamilySize     <dbl> 1.3862944, 1.0986123, 0.6931472, 0.0000000, 3.1354942, …
## $ SynsetCount    <dbl> 0.6931472, 1.9459101, 1.6094379, 1.0986123, 2.0794415, …
## $ Length         <int> 3, 4, 6, 4, 3, 10, 10, 8, 6, 6, 5, 8, 9, 5, 3, 4, 8, 6,…
## $ Class          <fct> animal, animal, plant, plant, animal, plant, plant, ani…
## $ FreqSingular   <int> 54, 69, 83, 44, 1233, 26, 50, 63, 11, 24, 104, 28, 7, 2…
## $ FreqPlural     <int> 74, 30, 49, 68, 828, 31, 65, 47, 9, 42, 95, 28, 4, 45, …
## $ DerivEntropy   <dbl> 0.7912, 0.6968, 0.4754, 0.0000, 1.2129, 0.3492, 0.0000,…
## $ Complex        <fct> simplex, simplex, simplex, simplex, simplex, complex, c…
## $ rInfl          <dbl> -0.31015493, 0.81450804, 0.51879379, -0.42744401, 0.397…
## $ meanRT         <dbl> 6.3582, 6.4150, 6.3426, 6.3353, 6.2956, 6.3959, 6.3475,…
## $ SubjFreq       <dbl> 3.12, 2.40, 3.88, 4.52, 6.04, 3.28, 5.04, 2.80, 3.12, 3…
## $ meanSize       <dbl> 3.4758, 2.9999, 1.6278, 1.9908, 4.6429, 1.5831, 1.8922,…
## $ meanWeight     <dbl> 3.1806, 2.6112, 1.2081, 1.6114, 4.5167, 1.1365, 1.4951,…
## $ BNCw           <dbl> 12.057065, 5.738806, 5.716520, 2.050370, 74.838494, 1.2…
## $ BNCc           <dbl> 0.000000, 4.062251, 3.249801, 1.462410, 50.859385, 0.16…
## $ BNCd           <dbl> 6.175602, 2.850278, 12.588727, 7.363218, 241.561040, 1.…
## $ BNCcRatio      <dbl> 0.000000, 0.707856, 0.568493, 0.713242, 0.679589, 0.127…
## $ BNCdRatio      <dbl> 0.512198, 0.496667, 2.202166, 3.591166, 3.227765, 0.934…

2.2 Data cleaning, data type coercing, etc.

lexdec_glmer <- lexdec %>%
  filter(
    !is.na(Correct),
    !is.na(PrevType),
    !is.na(Trial),
    !is.na(Frequency),
    !is.na(Length),
    !is.na(Subject),
    !is.na(Word)
  ) %>%
  mutate(
    Subject = factor(Subject),
    Word = factor(Word),
    Correct = factor(Correct, levels = c("incorrect", "correct")),
    Correct_binary = if_else(Correct == "correct", 1L, 0L),
    PrevType = factor(PrevType, levels = c("nonword", "word")),
    Trial_z = as.numeric(scale(Trial)),
    Frequency_z = as.numeric(scale(Frequency)),
    Length_z = as.numeric(scale(Length))
  ) %>%
  droplevels()

lexdec_glmer

3 Set the coding scheme

The predictor of interest, PrevType, is sum coded. With a two-level factor, the coefficient is a deviation from the grand mean on the log-odds scale. Estimated marginal means are usually clearer for interpretation.

contrasts(lexdec_glmer$PrevType) <- contr.sum(2)

contrasts(lexdec_glmer$PrevType)
##         [,1]
## nonword    1
## word      -1

4 Summary statistics

lexdec_glmer %>%
  summarise(
    n_observations = n(),
    n_subjects = n_distinct(Subject),
    n_words = n_distinct(Word),
    proportion_correct = mean(Correct_binary)
  )
lexdec_glmer %>%
  count(Correct)
lexdec_glmer %>%
  count(PrevType, Correct)

4.1 Plotting data patterns

accuracy_by_prevtype <- lexdec_glmer %>%
  group_by(PrevType) %>%
  summarise(
    n = n(),
    proportion_correct = mean(Correct_binary),
    se = sqrt(proportion_correct * (1 - proportion_correct) / n),
    lower = pmax(0, proportion_correct - 1.96 * se),
    upper = pmin(1, proportion_correct + 1.96 * se),
    .groups = "drop"
  )

ggplot(accuracy_by_prevtype, aes(x = PrevType, y = proportion_correct, fill = PrevType)) +
  geom_col(width = 0.6) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.12) +
  scale_y_continuous(limits = c(0, 1)) +
  labs(
    x = "Previous trial type",
    y = "Proportion correct",
    title = "Accuracy by previous trial type"
  ) +
  guides(fill = "none")

5 Start With the Maximal Random Structure

The maximal structure includes random intercepts for subjects and words. It also includes random slopes for predictors that vary within each grouping factor.

For subjects, PrevType, Trial_z, Frequency_z, and Length_z vary across trials. For words, Frequency_z and Length_z are item-level properties, so they do not vary within a word and are not used as word-level random slopes.

If the maximal model does not converge or is singular, simplify the random structure. In a real analysis, this reduction should be theoretically motivated: remove the least important or least stable random slopes first, while keeping the fixed-effects structure aligned with the research question.

6 Fit the Maximal Model

glmer_control <- lme4::glmerControl(
  optimizer = "bobyqa",
  optCtrl = list(maxfun = 200000)
)

glmer_maximal <- lme4::glmer(
  Correct_binary ~ PrevType + Trial_z + Frequency_z + Length_z +
    (1 + PrevType + Trial_z + Frequency_z + Length_z | Subject) +
    (1 + PrevType + Trial_z | Word),
  data = lexdec_glmer,
  family = binomial(link = "logit"),
  control = glmer_control
)

7 Check model convergence and singular fit

cat("Model singular fit: ", lme4::isSingular(glmer_maximal), "\n", sep = "")
## Model singular fit: TRUE
cat(
  "Model failed to converge: ",
  length(glmer_maximal@optinfo$conv$lme4$messages) > 0,
  "\n",
  sep = ""
)
## Model failed to converge: TRUE

8 Fit a Reduced Random-Effects Model

For the rest of this walkthrough, I pretend that the maximal model above is too complex because it has a convergence warning, a singular fit, or both. The next step is to fit a simpler random-effects structure.

Here I simplify to random intercepts for subjects and words. This is a common endpoint when the random-slope structure is too ambitious for a binary outcome, especially when one outcome level is rare.

glmer_reduced_random <- lme4::glmer(
  Correct_binary ~ PrevType + Trial_z + Frequency_z + Length_z +
    (1 | Subject) +
    (1 | Word),
  data = lexdec_glmer,
  family = binomial(link = "logit"),
  control = glmer_control
)

9 Check convergence and fit for the reduced model

cat("Model singular fit: ", lme4::isSingular(glmer_reduced_random), "\n", sep = "")
## Model singular fit: FALSE
cat(
  "Model failed to converge: ",
  length(glmer_reduced_random@optinfo$conv$lme4$messages) > 0,
  "\n",
  sep = ""
)
## Model failed to converge: FALSE

For this walkthrough, I treat the reduced random-effects model as the final working full model. In a real analysis, if this reduced model still had convergence or singularity problems, I would continue simplifying the random-effects structure and document each step.

glmer_fit <- glmer_reduced_random
selected_glmer_name <- "reduced model: subject and word random intercepts"

selected_glmer_name
## [1] "reduced model: subject and word random intercepts"
formula(glmer_fit)
## Correct_binary ~ PrevType + Trial_z + Frequency_z + Length_z + 
##     (1 | Subject) + (1 | Word)

10 Fit the Reduced Main-Effect Model

For the GLMM, test the factor of interest by comparing the full model against a reduced model that leaves out the fixed effect of PrevType. The reduced model keeps the same random-effects structure as the working full model, so the likelihood-ratio test targets the fixed effect.

glmer_without_prevtype <- lme4::glmer(
  Correct_binary ~ Trial_z + Frequency_z + Length_z +
    (1 | Subject) +
    (1 | Word),
  data = lexdec_glmer,
  family = binomial(link = "logit"),
  control = glmer_control
)

summary(glmer_without_prevtype)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Correct_binary ~ Trial_z + Frequency_z + Length_z + (1 | Subject) +  
##     (1 | Word)
##    Data: lexdec_glmer
## Control: glmer_control
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     500.6     533.1    -244.3     488.6      1653 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.6078  0.0860  0.1247  0.1852  0.9657 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  Word    (Intercept) 1.0139   1.007   
##  Subject (Intercept) 0.7604   0.872   
## Number of obs: 1659, groups:  Word, 79; Subject, 21
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.174183   0.354513  11.774  < 2e-16 ***
## Trial_z      0.003881   0.136700   0.028  0.97735    
## Frequency_z  0.699130   0.215421   3.245  0.00117 ** 
## Length_z    -0.036793   0.209517  -0.176  0.86060    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Tril_z Frqnc_
## Trial_z     -0.010              
## Frequency_z  0.214  0.001       
## Length_z    -0.017 -0.023  0.407

11 Check the sigificance of the variable of interest

glmer_lrt <- anova(
  glmer_without_prevtype,
  glmer_fit,
  test = "Chisq"
)

glmer_lrt

12 Check GLMM Diagnostics With DHARMa

For GLMMs, residual diagnostics are not the same as ordinary linear-model residual checks. DHARMa simulates scaled residuals from the fitted model and tests whether they behave as expected under the model.

This does not mean that you cannot use the performance package to do model checking. It is just that the results are less intuitive and less transparent to interpret whether the model actually meets the assumptions or not.

set.seed(1234)

glmer_dharma_residuals <- DHARMa::simulateResiduals(
  fittedModel = glmer_fit,
  n = 1000
)

plot(glmer_dharma_residuals)

DHARMa::testUniformity(glmer_dharma_residuals)

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.013807, p-value = 0.9098
## alternative hypothesis: two-sided
DHARMa::testDispersion(glmer_dharma_residuals)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.016, p-value = 0.85
## alternative hypothesis: two.sided
DHARMa::testOutliers(glmer_dharma_residuals)

## 
##  DHARMa outlier test based on exact binomial test with approximate
##  expectations
## 
## data:  glmer_dharma_residuals
## outliers at both margin(s) = 3, observations = 1659, p-value = 1
## alternative hypothesis: true probability of success is not equal to 0.001998002
## 95 percent confidence interval:
##  0.0003730741 0.0052754946
## sample estimates:
## frequency of outliers (expected: 0.001998001998002 ) 
##                                          0.001808318
DHARMa::testZeroInflation(glmer_dharma_residuals)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 1.0182, p-value = 0.858
## alternative hypothesis: two.sided

If these checks look poor, common next steps include a different predictor structure, nonlinear terms, checking separation or rare-event behavior, adding missing grouping structure, or using a different model family.

13 Interpret the Model

The GLMM coefficients are on the log-odds scale. Exponentiating them gives odds ratios. Estimated marginal means with type = "response" give predicted probabilities, which are often easier to explain.

glmer_fixed_effects <- coef(summary(glmer_fit)) %>%
  as.data.frame() %>%
  tibble::rownames_to_column("term") %>%
  as_tibble()

glmer_fixed_effects %>%
  knitr::kable(digits = 3)
term Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.184 0.358 11.702 0.000
PrevType1 0.124 0.139 0.891 0.373
Trial_z 0.007 0.137 0.053 0.958
Frequency_z 0.700 0.215 3.254 0.001
Length_z -0.034 0.209 -0.162 0.871
glmer_wald_ci <- confint(
  glmer_fit,
  parm = "beta_",
  method = "Wald"
)

glmer_odds_ratios <- tibble(
  term = names(lme4::fixef(glmer_fit)),
  log_odds = unname(lme4::fixef(glmer_fit)),
  conf_low_log_odds = glmer_wald_ci[, 1],
  conf_high_log_odds = glmer_wald_ci[, 2]
) %>%
  mutate(
    odds_ratio = exp(log_odds),
    conf_low_odds_ratio = exp(conf_low_log_odds),
    conf_high_odds_ratio = exp(conf_high_log_odds)
  )

glmer_odds_ratios %>%
  knitr::kable(digits = 3)
term log_odds conf_low_log_odds conf_high_log_odds odds_ratio conf_low_odds_ratio conf_high_odds_ratio
(Intercept) 4.184 3.483 4.885 65.618 32.560 132.241
PrevType1 0.124 -0.149 0.397 1.132 0.862 1.487
Trial_z 0.007 -0.261 0.275 1.007 0.770 1.317
Frequency_z 0.700 0.278 1.121 2.013 1.321 3.068
Length_z -0.034 -0.444 0.376 0.967 0.642 1.456
glmer_prevtype_logit <- emmeans::emmeans(glmer_fit, ~ PrevType)
glmer_prevtype_probability <- emmeans::emmeans(
  glmer_fit,
  ~ PrevType,
  type = "response"
)
glmer_prevtype_contrast <- pairs(glmer_prevtype_probability, type = "response")

glmer_prevtype_logit
##  PrevType emmean    SE  df asymp.LCL asymp.UCL
##  nonword    4.31 0.391 Inf      3.54      5.08
##  word       4.06 0.376 Inf      3.32      4.80
## 
## Results are given on the logit (not the response) scale. 
## Confidence level used: 0.95
glmer_prevtype_probability
##  PrevType  prob      SE  df asymp.LCL asymp.UCL
##  nonword  0.987 0.00513 Inf     0.972     0.994
##  word     0.983 0.00626 Inf     0.965     0.992
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
glmer_prevtype_contrast
##  contrast       odds.ratio    SE  df null z.ratio p.value
##  nonword / word       1.28 0.357 Inf    1   0.891  0.3728
## 
## Tests are performed on the log odds ratio scale
glmer_prevtype_probability_df <- as.data.frame(glmer_prevtype_probability)

ggplot(glmer_prevtype_probability_df, aes(x = PrevType, y = prob)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = asymp.LCL, ymax = asymp.UCL), width = 0.12) +
  scale_y_continuous(limits = c(0, 1)) +
  labs(
    x = "Previous trial type",
    y = "Predicted probability correct",
    title = "Predicted accuracy by previous trial type"
  )

14 Example Reporting Language

We fit a logistic mixed-effects model predicting response accuracy from previous trial type, trial order, word frequency, and word length. Factors were sum coded. I started with the maximal theoretically relevant random-effects structure and checked whether it had convergence warnings or a singular fit. To demonstrate the usual reduction workflow, I then fit a reduced random-effects structure and used it as the working full model. The working random-effects structure was reduced model: subject and word random intercepts. The fixed effect of previous trial type was evaluated by comparing the full model to a reduced model without that fixed effect using a likelihood-ratio test. GLMM diagnostics were checked with simulated residuals from DHARMa. Coefficients are reported on the log-odds scale, odds ratios are provided for fixed effects, and the effect of previous trial type is interpreted with predicted probabilities.