library(tidyverse)
library(stargazer)
library(modelsummary)Regression with Interaction
Two-way Interaction : Cont and Categorical
Create simulated data
Load required packages for prediction, estimated marginal means and interaction
library(emmeans)
library(effects)Create sample data with stronger interaction
set.seed(123)
n <- 200
stress <- rnorm(n, mean = 5, sd = 1)
sex <- factor(rep(c("male", "female"), each = n/2))
loneliness <- rnorm(n, mean = 3, sd = 1)
# Generate QOL with stronger interaction effect
# Base intercept of 80
# Stress coefficient -2 for males
# Sex coefficient -10 for females
# Stronger interaction effect (beta_stress_sex = -3)
# This means females will have a stress coefficient of -5 (-2 + -3)
beta_stress_sex <- -3 # stronger interaction coefficient
qol <- 80 +
(-2 * stress) +
(-10 * (sex == "female")) +
(beta_stress_sex * stress * (sex == "female")) +
(-1 * loneliness) +
rnorm(n, 0, 1.5) # reduced error variance for clearer effect
# Create data frame
data <- data.frame(
qol = qol,
stress = stress,
sex = sex,
loneliness = loneliness
)Fit linear regression model
model <- lm(qol ~ stress + sex + stress:sex + loneliness, data = data)
modela <- lm(qol ~ stress + sex , data = data)
modelb <- lm(qol ~ stress + sex + loneliness, data = data)Model summary
stargazer(modela, modelb, model, type = "html")| Dependent variable: | |||
| qol | |||
| (1) | (2) | (3) | |
| stress | -3.607*** | -3.654*** | -4.828*** |
| (0.167) | (0.140) | (0.149) | |
| sexmale | 25.061*** | 25.259*** | 12.747*** |
| (0.314) | (0.264) | (1.110) | |
| loneliness | -1.207*** | -1.106*** | |
| (0.132) | (0.103) | ||
| stress:sexmale | 2.501*** | ||
| (0.218) | |||
| Constant | 60.160*** | 63.965*** | 69.412*** |
| (0.845) | (0.823) | (0.795) | |
| Observations | 200 | 200 | 200 |
| R2 | 0.971 | 0.980 | 0.988 |
| Adjusted R2 | 0.971 | 0.979 | 0.988 |
| Residual Std. Error | 2.205 (df = 197) | 1.852 (df = 196) | 1.435 (df = 195) |
| F Statistic | 3,282.249*** (df = 2; 197) | 3,129.781*** (df = 3; 196) | 3,943.416*** (df = 4; 195) |
| Note: | p<0.1; p<0.05; p<0.01 | ||
panels <- list(
"A" = modela,
"B" = modelb,
"Interaction" = model)
modelsummary(panels)| A | B | Interaction | |
|---|---|---|---|
| (Intercept) | 60.160 | 63.965 | 69.412 |
| (0.845) | (0.823) | (0.795) | |
| stress | -3.607 | -3.654 | -4.828 |
| (0.167) | (0.140) | (0.149) | |
| sexmale | 25.061 | 25.259 | 12.747 |
| (0.314) | (0.264) | (1.110) | |
| loneliness | -1.207 | -1.106 | |
| (0.132) | (0.103) | ||
| stress × sexmale | 2.501 | ||
| (0.218) | |||
| Num.Obs. | 200 | 200 | 200 |
| R2 | 0.971 | 0.980 | 0.988 |
| R2 Adj. | 0.971 | 0.979 | 0.988 |
| AIC | 888.8 | 820.0 | 718.9 |
| BIC | 902.0 | 836.5 | 738.7 |
| Log.Lik. | -440.422 | -405.014 | -353.464 |
| F | 3282.249 | 3129.781 | 3943.416 |
| RMSE | 2.19 | 1.83 | 1.42 |
Calculate estimated marginal means
- predicted qol
- based on stress values for each sex at mean value of loneliness
emm <- emmeans(model, ~ stress | sex,
at = list(stress = seq(min(stress), max(stress), length.out = 100),
loneliness = mean(loneliness)))- which you can also get from
# Create prediction grid for plotting
pred_grid <- expand.grid(
stress = seq(min(data$stress), max(data$stress), length.out = 100),
loneliness = mean(data$loneliness),
sex = c("male", "female")
)
# Get predictions for the grid
pred_grid2 <- pred_grid |>
mutate(predicted_qol = predict(model, newdata = pred_grid))Plot 1: Using emmip
- different slopes for male and female (steeper) means effect of stress on qol differs based on sex
- sex is a moderator.
p1 <- emmip(emm, sex ~ stress, CI = TRUE) +
labs(x = "Stress Level", y = "Estimated Quality of Life",
title = "Interaction between Stress and Sex on Quality of Life",
subtitle = "Estimated Marginal Means") +
scale_y_continuous(limits= c(25, 85), breaks = seq(25, 75, by = 5)) +
theme_minimal() +
theme(legend.position = "bottom")
p1Plot 2: Using effects package
plot(effect("stress:sex", model))Plot 3: Custom ggplot with clearer visualization
p2b <- pred_grid2 |>
ggplot(aes(x = stress, y = predicted_qol, color = sex)) +
geom_point(alpha = 0.4) +
geom_smooth(method = "lm", se = TRUE) +
scale_y_continuous(limits= c(25, 85), breaks = seq(25, 75, by = 5)) +
labs(x = "Stress Level", y = "Quality of Life",
title = "Interaction between Stress and Sex on Quality of Life",
color = "Sex") +
theme_minimal() +
scale_color_brewer(palette = "Set1") +
theme(legend.position = "bottom")
p2bSimple slopes analysis at meaningful stress levels
stress_levels <- c(mean(stress) - sd(stress), mean(stress), mean(stress) + sd(stress))
names(stress_levels) <- c("Low Stress", "Average Stress", "High Stress")
emm_slopes <- emmeans(model, ~ sex | stress,
at = list(stress = stress_levels,
loneliness = mean(loneliness)))
pairs(emm_slopes)stress = 4.05:
contrast estimate SE df t.ratio p.value
female - male -22.9 0.292 195 -78.294 <.0001
stress = 4.99:
contrast estimate SE df t.ratio p.value
female - male -25.2 0.205 195 -123.212 <.0001
stress = 5.93:
contrast estimate SE df t.ratio p.value
female - male -27.6 0.288 195 -95.691 <.0001