Regression with No Interaction

Prediction and Plots

Author

Kamarul Imran Musa

Published

November 10, 2024

Create simulated data

library(tidyverse)
library(stargazer)
library(modelsummary)
set.seed(123)
n <- 200 # Sample size

# Generate predictors
stress <- rnorm(n, mean = 5, sd = 1)
loneliness <- rnorm(n, mean = 4, sd = 1) 
sex <- factor(rep(c("male", "female"), each = n/2))

# Generate outcome based on specified coefficients
# Assuming some base intercept and adding random noise
qol <- 100 - 2*stress - 3*loneliness - 10*(sex == "female") + rnorm(n, 0, 5)

# Create data frame
data <- data.frame(qol, stress, loneliness, sex)

Fit linear regression model

model <- lm(qol ~ stress + loneliness + sex, data = data)
modela <- lm(qol ~ stress, data = data)
modelb <- lm(qol ~ stress + loneliness, data = data)
modelc <- lm(qol ~ stress + sex, data = data)



# Model summary
summary(model)

Call:
lm(formula = qol ~ stress + loneliness + sex, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-13.945  -2.870   0.199   3.181  12.555 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  91.9515     2.3421  39.260  < 2e-16 ***
stress       -2.2093     0.3656  -6.043  7.5e-09 ***
loneliness   -3.2874     0.3454  -9.518  < 2e-16 ***
sexmale      10.8272     0.6898  15.696  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.834 on 196 degrees of freedom
Multiple R-squared:  0.6286,    Adjusted R-squared:  0.6229 
F-statistic: 110.6 on 3 and 196 DF,  p-value: < 2.2e-16
stargazer(modela, modelb, modelc, model, type = "html")
Dependent variable:
qol
(1) (2) (3) (4)
stress -1.508** -1.591*** -2.083*** -2.209***
(0.583) (0.545) (0.441) (0.366)
loneliness -2.841*** -3.287***
(0.516) (0.345)
sexmale 10.287*** 10.827***
(0.829) (0.690)
Constant 80.575*** 92.476*** 78.303*** 91.951***
(2.963) (3.509) (2.233) (2.342)
Observations 200 200 200 200
R2 0.033 0.162 0.457 0.629
Adjusted R2 0.028 0.153 0.451 0.623
Residual Std. Error 7.762 (df = 198) 7.244 (df = 197) 5.831 (df = 197) 4.834 (df = 196)
F Statistic 6.679** (df = 1; 198) 19.006*** (df = 2; 197) 82.876*** (df = 2; 197) 110.571*** (df = 3; 196)
Note: p<0.1; p<0.05; p<0.01
panels <- list(
  "A" = modela,
  "B" = modelb,
  "C" = modelc,
  "D" = model)

modelsummary(panels)
A B C D
(Intercept) 80.575 92.476 78.303 91.951
(2.963) (3.509) (2.233) (2.342)
stress -1.508 -1.591 -2.083 -2.209
(0.583) (0.545) (0.441) (0.366)
loneliness -2.841 -3.287
(0.516) (0.345)
sexmale 10.287 10.827
(0.829) (0.690)
Num.Obs. 200 200 200 200
R2 0.033 0.162 0.457 0.629
R2 Adj. 0.028 0.153 0.451 0.623
AIC 1391.3 1364.6 1277.8 1203.8
BIC 1401.2 1377.8 1291.0 1220.3
Log.Lik. -692.638 -678.312 -634.904 -596.912
F 6.679 19.006 82.876 110.571
RMSE 7.72 7.19 5.79 4.79

Create predictions using emmeans

library(emmeans)

# Get predictions at specific values of stress and loneliness for each sex
emm <- emmeans(model, ~ sex, 
               at = list(stress = c(4, 5, 6),
                         loneliness = c(3, 4, 5)))
# Create prediction grid for plotting
pred_grid <- expand.grid(
  stress = seq(min(stress), max(stress), length.out = 100),
  loneliness = mean(loneliness),
  sex = c("male", "female")
)

# Get predictions for the grid
pred_grid2 <- pred_grid |>
  mutate(predicted_qol = predict(model, newdata = pred_grid))

Plot predictions using ggplot2

# Create plot
pred_grid2 |>
ggplot(aes(x = stress, y = predicted_qol, color = sex)) +
  geom_line(linewidth = 1) +
  geom_point(data = data, aes(y = qol), alpha = 0.3) +
  theme_minimal() +
  labs(
    title = "Predicted Quality of Life by Stress Levels and Sex",
    x = "Stress Level",
    y = "Quality of Life",
    color = "Sex"
  ) +
  scale_color_manual(values = c("blue", "red")) +
  theme(legend.position = "bottom")

Create another plot showing the relationship with loneliness

pred_grid2b <- expand.grid(
  loneliness = seq(min(loneliness), max(loneliness), length.out = 100),
  stress = mean(stress),
  sex = c("male", "female")
)

pred_grid2b <- pred_grid2b |>
  mutate(predicted_qol = predict(model, newdata = pred_grid2b))
pred_grid2b |>
ggplot(aes(x = loneliness, y = predicted_qol, color = sex)) +
  geom_line(linewidth = 1) +
  geom_point(data = data, aes(y = qol), alpha = 0.3) +
  theme_minimal() +
  labs(
    title = "Predicted Quality of Life by Loneliness Levels and Sex",
    x = "Loneliness Level",
    y = "Quality of Life",
    color = "Sex"
  ) +
  scale_color_manual(values = c("blue", "red")) +
  theme(legend.position = "bottom")