Regression with Interaction

Two-way Interaction : Cont and Cont

Author

Kamarul Imran Musa

Published

November 10, 2024

Create simulated data

library(tidyverse)
library(stargazer)
library(modelsummary)
library(broom)

Load required packages for interaction check and plots

library(emmeans)
library(effects)

Create simulated data

set.seed(123)
n <- 1000

# Generate predictor variables
stress <- rnorm(n, mean = 5, sd = 1)
loneliness <- rnorm(n, mean = 4, sd = 1)
sex <- factor(sample(c("male", "female"), n, replace = TRUE))

# Center continuous predictors for better interpretation
stress_c <- scale(stress, center = TRUE, scale = FALSE)
loneliness_c <- scale(loneliness, center = TRUE, scale = FALSE)

# Generate outcome with specified coefficients and interaction
qol <- 80 + (-2 * stress_c) + (-3 * loneliness_c) + 
  (-10 * (sex == "female")) + (-0.5 * stress_c * loneliness_c) + 
  rnorm(n, mean = 0, sd = 2)

# Create data frame
data <- data.frame(qol, stress_c, loneliness_c, sex)

Fit linear regression model

model <- lm(qol ~ stress_c + loneliness_c + sex + stress_c:loneliness_c, data = data)
modela <- lm(qol ~ stress_c + loneliness_c , data = data)
modelb <- lm(qol ~ stress_c + sex + loneliness_c, data = data)

Model summary

stargazer(modela, modelb, model, type = "html")
Dependent variable:
qol
(1) (2) (3)
stress_c -2.080*** -2.052*** -2.059***
(0.174) (0.067) (0.064)
sexmale 10.030*** 10.022***
(0.132) (0.127)
stress_c:loneliness_c -0.591***
(0.063)
loneliness_c -2.745*** -2.912*** -2.908***
(0.171) (0.066) (0.063)
Constant 74.628*** 69.944*** 69.999***
(0.172) (0.090) (0.087)
Observations 1,000 1,000 1,000
R2 0.306 0.897 0.906
Adjusted R2 0.304 0.897 0.905
Residual Std. Error 5.426 (df = 997) 2.088 (df = 996) 2.003 (df = 995)
F Statistic 219.302*** (df = 2; 997) 2,898.894*** (df = 3; 996) 2,384.190*** (df = 4; 995)
Note: p<0.1; p<0.05; p<0.01
panels <- list(
  "A" = modela,
  "B" = modelb,
  "Interaction" = model)

modelsummary(panels)
A B Interaction
(Intercept) 74.628 69.944 69.999
(0.172) (0.090) (0.087)
stress_c -2.080 -2.052 -2.059
(0.174) (0.067) (0.064)
loneliness_c -2.745 -2.912 -2.908
(0.171) (0.066) (0.063)
sexmale 10.030 10.022
(0.132) (0.127)
stress_c × loneliness_c -0.591
(0.063)
Num.Obs. 1000 1000 1000
R2 0.306 0.897 0.906
R2 Adj. 0.304 0.897 0.905
AIC 6225.5 4316.7 4234.6
BIC 6245.1 4341.2 4264.1
Log.Lik. -3108.729 -2153.335 -2111.320
F 219.302 2898.894 2384.190
RMSE 5.42 2.08 2.00

Generate new data for prediction

# Create prediction grid for plotting
new_data <- expand.grid(
  stress_c = seq(min(data$stress_c), max(data$stress_c), length.out = 3),
  loneliness_c = seq(min(data$loneliness_c), max(data$loneliness_c), length.out = 3),
  sex = c("male", "female")
)

# Get predictions for the grid
new_data_pred <- new_data |>
  mutate(predicted_qol = predict(model, newdata = new_data))
new_data_pred |>
  slice_head(n=6)
    stress_c loneliness_c  sex predicted_qol
1 -2.8259025   -3.0903261 male      89.66624
2  0.1995048   -3.0903261 male      88.96200
3  3.2249121   -3.0903261 male      88.25777
4 -2.8259025    0.1287897 male      85.67988
5  0.1995048    0.1287897 male      79.22022
6  3.2249121    0.1287897 male      72.76056

Plot interaction in ggplot

new_data_pred |>
  ggplot(aes(x = stress_c, y = predicted_qol, colour = factor(loneliness_c))) +
  geom_point(alpha = 0.4) +
  geom_smooth(method = "lm", se = FALSE) +
  facet_grid(cols = vars(sex)) +
  scale_y_continuous(limits= c(40, 90), breaks = seq(40, 90, by = 5)) +
  labs(x = "Stress Level", y = "Quality of Life",
       title = "Interaction between Stress and Loneliness on Quality of Life",
       color = "Sex") +
  theme_minimal() +
  scale_color_brewer(palette = "Set1") +
  theme(legend.position = "bottom")

EMmeans analysis

Create grid of predictor values for stress and loneliness

stress_vals <- seq(min(data$stress_c), max(data$stress_c), length.out = 3)
loneliness_vals <- seq(min(data$loneliness_c), max(data$loneliness_c), length.out = 3)

EMmeans for interaction

emm <- emmeans(model, ~ stress_c + loneliness_c, 
               at = list(stress_c = stress_vals, 
                         loneliness_c = loneliness_vals))

Plot 1: Using emmip

emmip_plot <- emmip(model, ~ stress_c | loneliness_c, 
                    at = list(loneliness_c = loneliness_vals, 
                              stress_c = stress_vals)) +
  scale_y_continuous(limits= c(50, 85), breaks = seq(50, 85, by = 5)) +
  labs(title = "Interaction between Stress and Loneliness",
       x = "Stress Level",
       y = "Quality of Life") +
  theme_minimal() +
  theme(text = element_text(size = 12),
        plot.title = element_text(hjust = 0.5))
emmip_plot

emmip_plot2 <- emmip(model, loneliness_c ~ stress_c, 
                    at = list(loneliness_c = loneliness_vals,
                              stress_c = stress_vals)) +
  scale_y_continuous(limits= c(50, 85), breaks = seq(50, 85, by = 5)) +
  labs(title = "Interaction between Stress and Loneliness",
       x = "Stress Level",
       y = "Quality of Life") +
  theme_minimal() +
  theme(text = element_text(size = 12),
        plot.title = element_text(hjust = 0.5))
emmip_plot2

Plot 2: Using effects package

stress_loneliness_effect <- effect("stress_c:loneliness_c", model)
plot(stress_loneliness_effect, 
     main = "Effect Plot: Stress x Loneliness Interaction",
     xlab = "Stress Level",
     ylab = "Quality of Life")