Regression with Interaction

Two-way Interaction : Cont and Categorical

Author

Kamarul Imran Musa

Published

November 10, 2024

Create simulated data

library(tidyverse)
library(stargazer)
library(modelsummary)

Load required packages for prediction, estimated marginal means and interaction

library(emmeans)
library(effects)

Create sample data with stronger interaction

set.seed(123)
n <- 200
stress <- rnorm(n, mean = 5, sd = 1)
sex <- factor(rep(c("male", "female"), each = n/2))
loneliness <- rnorm(n, mean = 3, sd = 1)

# Generate QOL with stronger interaction effect
# Base intercept of 80
# Stress coefficient -2 for males
# Sex coefficient -10 for females
# Stronger interaction effect (beta_stress_sex = -3)
# This means females will have a stress coefficient of -5 (-2 + -3)
beta_stress_sex <- -3  # stronger interaction coefficient
qol <- 80 + 
  (-2 * stress) + 
  (-10 * (sex == "female")) + 
  (beta_stress_sex * stress * (sex == "female")) +
  (-1 * loneliness) + 
  rnorm(n, 0, 1.5)  # reduced error variance for clearer effect

# Create data frame
data <- data.frame(
  qol = qol,
  stress = stress,
  sex = sex,
  loneliness = loneliness
)

Fit linear regression model

model <- lm(qol ~ stress + sex + stress:sex + loneliness, data = data)
modela <- lm(qol ~ stress + sex , data = data)
modelb <- lm(qol ~ stress + sex + loneliness, data = data)

Model summary

stargazer(modela, modelb, model, type = "html")
Dependent variable:
qol
(1) (2) (3)
stress -3.607*** -3.654*** -4.828***
(0.167) (0.140) (0.149)
sexmale 25.061*** 25.259*** 12.747***
(0.314) (0.264) (1.110)
loneliness -1.207*** -1.106***
(0.132) (0.103)
stress:sexmale 2.501***
(0.218)
Constant 60.160*** 63.965*** 69.412***
(0.845) (0.823) (0.795)
Observations 200 200 200
R2 0.971 0.980 0.988
Adjusted R2 0.971 0.979 0.988
Residual Std. Error 2.205 (df = 197) 1.852 (df = 196) 1.435 (df = 195)
F Statistic 3,282.249*** (df = 2; 197) 3,129.781*** (df = 3; 196) 3,943.416*** (df = 4; 195)
Note: p<0.1; p<0.05; p<0.01
panels <- list(
  "A" = modela,
  "B" = modelb,
  "Interaction" = model)

modelsummary(panels)
A B Interaction
(Intercept) 60.160 63.965 69.412
(0.845) (0.823) (0.795)
stress -3.607 -3.654 -4.828
(0.167) (0.140) (0.149)
sexmale 25.061 25.259 12.747
(0.314) (0.264) (1.110)
loneliness -1.207 -1.106
(0.132) (0.103)
stress × sexmale 2.501
(0.218)
Num.Obs. 200 200 200
R2 0.971 0.980 0.988
R2 Adj. 0.971 0.979 0.988
AIC 888.8 820.0 718.9
BIC 902.0 836.5 738.7
Log.Lik. -440.422 -405.014 -353.464
F 3282.249 3129.781 3943.416
RMSE 2.19 1.83 1.42

Calculate estimated marginal means

  • predicted qol
  • based on stress values for each sex at mean value of loneliness
emm <- emmeans(model, ~ stress | sex, 
               at = list(stress = seq(min(stress), max(stress), length.out = 100),
                         loneliness = mean(loneliness)))
  • which you can also get from
# Create prediction grid for plotting
pred_grid <- expand.grid(
  stress = seq(min(data$stress), max(data$stress), length.out = 100),
  loneliness = mean(data$loneliness),
  sex = c("male", "female")
)

# Get predictions for the grid
pred_grid2 <- pred_grid |>
  mutate(predicted_qol = predict(model, newdata = pred_grid))

Plot 1: Using emmip

  • different slopes for male and female (steeper) means effect of stress on qol differs based on sex
  • sex is a moderator.
p1 <- emmip(emm, sex ~ stress, CI = TRUE) +
  labs(x = "Stress Level", y = "Estimated Quality of Life",
       title = "Interaction between Stress and Sex on Quality of Life",
       subtitle = "Estimated Marginal Means") +
  scale_y_continuous(limits= c(25, 85), breaks = seq(25, 75, by = 5)) +
  theme_minimal() +
  theme(legend.position = "bottom")
p1

Plot 2: Using effects package

plot(effect("stress:sex", model))

Plot 3: Custom ggplot with clearer visualization

p2b <- pred_grid2 |>
  ggplot(aes(x = stress, y = predicted_qol, color = sex)) +
  geom_point(alpha = 0.4) +
  geom_smooth(method = "lm", se = TRUE) +
  scale_y_continuous(limits= c(25, 85), breaks = seq(25, 75, by = 5)) +
  labs(x = "Stress Level", y = "Quality of Life",
       title = "Interaction between Stress and Sex on Quality of Life",
       color = "Sex") +
  theme_minimal() +
  scale_color_brewer(palette = "Set1") +
  theme(legend.position = "bottom")
p2b

Simple slopes analysis at meaningful stress levels

stress_levels <- c(mean(stress) - sd(stress), mean(stress), mean(stress) + sd(stress))
names(stress_levels) <- c("Low Stress", "Average Stress", "High Stress")
emm_slopes <- emmeans(model, ~ sex | stress, 
                      at = list(stress = stress_levels,
                                loneliness = mean(loneliness)))
pairs(emm_slopes)
stress = 4.05:
 contrast      estimate    SE  df  t.ratio p.value
 female - male    -22.9 0.292 195  -78.294  <.0001

stress = 4.99:
 contrast      estimate    SE  df  t.ratio p.value
 female - male    -25.2 0.205 195 -123.212  <.0001

stress = 5.93:
 contrast      estimate    SE  df  t.ratio p.value
 female - male    -27.6 0.288 195  -95.691  <.0001