library(tidyverse)
library(stargazer)
library(modelsummary)Regression with No Interaction
Prediction and Plots
Create simulated data
set.seed(123)
n <- 200 # Sample size
# Generate predictors
stress <- rnorm(n, mean = 5, sd = 1)
loneliness <- rnorm(n, mean = 4, sd = 1)
sex <- factor(rep(c("male", "female"), each = n/2))
# Generate outcome based on specified coefficients
# Assuming some base intercept and adding random noise
qol <- 100 - 2*stress - 3*loneliness - 10*(sex == "female") + rnorm(n, 0, 5)
# Create data frame
data <- data.frame(qol, stress, loneliness, sex)Fit linear regression model
model <- lm(qol ~ stress + loneliness + sex, data = data)
modela <- lm(qol ~ stress, data = data)
modelb <- lm(qol ~ stress + loneliness, data = data)
modelc <- lm(qol ~ stress + sex, data = data)
# Model summary
summary(model)
Call:
lm(formula = qol ~ stress + loneliness + sex, data = data)
Residuals:
Min 1Q Median 3Q Max
-13.945 -2.870 0.199 3.181 12.555
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 91.9515 2.3421 39.260 < 2e-16 ***
stress -2.2093 0.3656 -6.043 7.5e-09 ***
loneliness -3.2874 0.3454 -9.518 < 2e-16 ***
sexmale 10.8272 0.6898 15.696 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 4.834 on 196 degrees of freedom
Multiple R-squared: 0.6286, Adjusted R-squared: 0.6229
F-statistic: 110.6 on 3 and 196 DF, p-value: < 2.2e-16
stargazer(modela, modelb, modelc, model, type = "html")| Dependent variable: | ||||
| qol | ||||
| (1) | (2) | (3) | (4) | |
| stress | -1.508** | -1.591*** | -2.083*** | -2.209*** |
| (0.583) | (0.545) | (0.441) | (0.366) | |
| loneliness | -2.841*** | -3.287*** | ||
| (0.516) | (0.345) | |||
| sexmale | 10.287*** | 10.827*** | ||
| (0.829) | (0.690) | |||
| Constant | 80.575*** | 92.476*** | 78.303*** | 91.951*** |
| (2.963) | (3.509) | (2.233) | (2.342) | |
| Observations | 200 | 200 | 200 | 200 |
| R2 | 0.033 | 0.162 | 0.457 | 0.629 |
| Adjusted R2 | 0.028 | 0.153 | 0.451 | 0.623 |
| Residual Std. Error | 7.762 (df = 198) | 7.244 (df = 197) | 5.831 (df = 197) | 4.834 (df = 196) |
| F Statistic | 6.679** (df = 1; 198) | 19.006*** (df = 2; 197) | 82.876*** (df = 2; 197) | 110.571*** (df = 3; 196) |
| Note: | p<0.1; p<0.05; p<0.01 | |||
panels <- list(
"A" = modela,
"B" = modelb,
"C" = modelc,
"D" = model)
modelsummary(panels)| A | B | C | D | |
|---|---|---|---|---|
| (Intercept) | 80.575 | 92.476 | 78.303 | 91.951 |
| (2.963) | (3.509) | (2.233) | (2.342) | |
| stress | -1.508 | -1.591 | -2.083 | -2.209 |
| (0.583) | (0.545) | (0.441) | (0.366) | |
| loneliness | -2.841 | -3.287 | ||
| (0.516) | (0.345) | |||
| sexmale | 10.287 | 10.827 | ||
| (0.829) | (0.690) | |||
| Num.Obs. | 200 | 200 | 200 | 200 |
| R2 | 0.033 | 0.162 | 0.457 | 0.629 |
| R2 Adj. | 0.028 | 0.153 | 0.451 | 0.623 |
| AIC | 1391.3 | 1364.6 | 1277.8 | 1203.8 |
| BIC | 1401.2 | 1377.8 | 1291.0 | 1220.3 |
| Log.Lik. | -692.638 | -678.312 | -634.904 | -596.912 |
| F | 6.679 | 19.006 | 82.876 | 110.571 |
| RMSE | 7.72 | 7.19 | 5.79 | 4.79 |
Create predictions using emmeans
library(emmeans)
# Get predictions at specific values of stress and loneliness for each sex
emm <- emmeans(model, ~ sex,
at = list(stress = c(4, 5, 6),
loneliness = c(3, 4, 5)))# Create prediction grid for plotting
pred_grid <- expand.grid(
stress = seq(min(stress), max(stress), length.out = 100),
loneliness = mean(loneliness),
sex = c("male", "female")
)
# Get predictions for the grid
pred_grid2 <- pred_grid |>
mutate(predicted_qol = predict(model, newdata = pred_grid))Plot predictions using ggplot2
# Create plot
pred_grid2 |>
ggplot(aes(x = stress, y = predicted_qol, color = sex)) +
geom_line(linewidth = 1) +
geom_point(data = data, aes(y = qol), alpha = 0.3) +
theme_minimal() +
labs(
title = "Predicted Quality of Life by Stress Levels and Sex",
x = "Stress Level",
y = "Quality of Life",
color = "Sex"
) +
scale_color_manual(values = c("blue", "red")) +
theme(legend.position = "bottom")Create another plot showing the relationship with loneliness
pred_grid2b <- expand.grid(
loneliness = seq(min(loneliness), max(loneliness), length.out = 100),
stress = mean(stress),
sex = c("male", "female")
)
pred_grid2b <- pred_grid2b |>
mutate(predicted_qol = predict(model, newdata = pred_grid2b))pred_grid2b |>
ggplot(aes(x = loneliness, y = predicted_qol, color = sex)) +
geom_line(linewidth = 1) +
geom_point(data = data, aes(y = qol), alpha = 0.3) +
theme_minimal() +
labs(
title = "Predicted Quality of Life by Loneliness Levels and Sex",
x = "Loneliness Level",
y = "Quality of Life",
color = "Sex"
) +
scale_color_manual(values = c("blue", "red")) +
theme(legend.position = "bottom")